In [42]:
import torch
import torch.nn as nn
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# 加载分词器
tokenizer = AutoTokenizer.from_pretrained("Chinese-Emotion-Small")
# 将单个文本传入分词器
# tokenizer 返回一个包含三个对象的字典:
#  input_ids 是与句子中每个token对应的索引
#  attention_mask 指示是否应该关注一个token
#  token_type_ids 在存在多个序列时标识一个token属于哪个序列
encoded_input = tokenizer("刚出锅的馒头一块钱一个！")
print(encoded_input)
# 分词器也可以接受列表输入，并对文本进行填充和截断，返回长度统一的批次
pt_batch = tokenizer(
    ["今天天气不错", "今天天气太好了"],
    padding=True,
    truncation=True,
    max_length=16,
    return_tensors="pt",
)

# 加载模型
model = AutoModelForSequenceClassification.from_pretrained("Chinese-Emotion-Small")
model.eval()
# 将输入传入模型，使用**解包字典
with torch.no_grad():
    pt_outputs = model(**pt_batch)
# 模型在 logits 属性输出结果
print(pt_outputs)
# 在 logits 上应用 softmax 函数来查询概率
pt_logits = nn.functional.softmax(pt_outputs.logits, dim=-1)
print(pt_logits)
# 选取概率最高的标签
pt_predictions = pt_logits.argmax(dim=-1)
print(pt_predictions)
label_mapping = {
    0: "平淡语调",
    1: "关切语调",
    2: "开心语调",
    3: "愤怒语调",
    4: "悲伤语调",
    5: "疑问语调",
    6: "惊奇语调",
    7: "厌恶语调",
}
res = [label_mapping[i.item()] for i in pt_predictions]
print(res)

{'input_ids': [1, 260, 42450, 2372, 87901, 494, 241665, 6824, 192441, 10271, 8150, 310, 2], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}
SequenceClassifierOutput(loss=None, logits=tensor([[ 5.5736, -1.9918,  1.6168, -2.5054, -1.0547, -0.6202,  0.7615, -1.4453],
        [-0.0126, -1.2123,  5.7007, -1.3890, -1.8177, -1.8296,  0.3167, -0.7844]]), hidden_states=None, attentions=None)
tensor([[9.6867e-01, 5.0182e-04, 1.8524e-02, 3.0026e-04, 1.2809e-03, 1.9780e-03,
         7.8759e-03, 8.6677e-04],
        [3.2615e-03, 9.8265e-04, 9.8783e-01, 8.2346e-04, 5.3637e-04, 5.3003e-04,
         4.5332e-03, 1.5073e-03]])
tensor([0, 2])
['平淡语调', '开心语调']
