# 使用 BERT 模型做情感分类

模型：distilbert-base-uncased-finetuned-sst-2-english

默认代码会通过 huggingface.co 自动下载模型，但国内无法直接访问。

因此这里将模型下载到本地，再通过设置本地路径执行。

BERT 不支持中文。

## 使用 cpu 做推理的版本


In [1]:
import torch
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
import time

start_time = time.time()

# 用于强制使用 cpu 时
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

# 加载DistilBERT模型和Tokenizer
model_name = "/root/notebook/huggingface/distilbert-base-uncased-finetuned-sst-2-english"
tokenizer = DistilBertTokenizer.from_pretrained(model_name)
model = DistilBertForSequenceClassification.from_pretrained(model_name)

# 输入文本
text = "Tottenham 2-0 Fulham: Ange Postecoglou is 'bringing the excitement back to Tottenham'"

# 对文本进行标记化和编码
inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True)

# 获取模型的预测
with torch.no_grad():
    outputs = model(**inputs)
    logits = outputs.logits

# 分析预测结果
probabilities = torch.softmax(logits, dim=1)
predicted_class = torch.argmax(probabilities, dim=1).item()
class_labels = ['消极', '积极']

print(f"文本: {text}")
print(f"情感: {class_labels[predicted_class]}")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"使用设备: {device}")
end_time = time.time()
delta_time = end_time - start_time
print("耗时:", delta_time, "秒")


文本: Tottenham 2-0 Fulham: Ange Postecoglou is 'bringing the excitement back to Tottenham'
情感: 积极
使用设备: cpu
耗时: 0.9387893676757812 秒


## 使用 gpu 做推理的版本

In [2]:
import torch
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
import time

start_time = time.time()

# # 用于强制使用 cpu 时
# import os
# os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

# 加载DistilBERT模型和Tokenizer
model_name = "/root/notebook/huggingface/distilbert-base-uncased-finetuned-sst-2-english"
tokenizer = DistilBertTokenizer.from_pretrained(model_name)
model = DistilBertForSequenceClassification.from_pretrained(model_name)

# 输入文本
text = "Tottenham 2-0 Fulham: Ange Postecoglou is 'bringing the excitement back to Tottenham'"

# 对文本进行标记化和编码
inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True)

# 获取模型的预测
with torch.no_grad():
    outputs = model(**inputs)
    logits = outputs.logits

# 分析预测结果
probabilities = torch.softmax(logits, dim=1)
predicted_class = torch.argmax(probabilities, dim=1).item()
class_labels = ['消极', '积极']

print(f"文本: {text}")
print(f"情感: {class_labels[predicted_class]}")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"使用设备: {device}")
end_time = time.time()
delta_time = end_time - start_time
print("耗时:", delta_time, "秒")

文本: Tottenham 2-0 Fulham: Ange Postecoglou is 'bringing the excitement back to Tottenham'
情感: 积极
使用设备: cpu
耗时: 0.5708107948303223 秒
