In [1]:
from datasets import load_dataset
from transformers import LlamaTokenizer
from torch.utils.data import DataLoader
from transformers import LlamaTokenizer, LlamaForCausalLM, LlamaConfig
import torch

In [2]:
model_path = 'llama-3b'

# 加载Tokenizer
tokenizer = LlamaTokenizer.from_pretrained(model_path)

# 设置EOS令牌作为填充令牌
tokenizer.pad_token = tokenizer.eos_token

class SentimentClassifier(torch.nn.Module):
    def __init__(self, model, num_labels):
        super(SentimentClassifier, self).__init__()
        self.llama = model.model
        self.classifier = torch.nn.Linear(model.config.hidden_size, num_labels)
    
    def forward(self, input_ids, attention_mask=None):
        with torch.no_grad():
            # 获取Llama模型的输出.float
            outputs = self.llama(input_ids=input_ids, attention_mask=attention_mask)
            # 使用hidden_states属性
            hidden_states = outputs.last_hidden_state.float()  # 或 outputs[0] 如果outputs是元组
            # 创建一个mask，以忽略padding token对平均值的贡献
            #input_mask_expanded = attention_mask.unsqueeze(-1).expand(hidden_states.size()).float()
            
            #sum_embeddings = torch.sum(hidden_states * input_mask_expanded, 1)
            #sum_mask = input_mask_expanded.sum(1)
            #sum_mask = torch.clamp(sum_mask, min=1e-9)
            #sequence_output = sum_embeddings / sum_mask
            sequence_output = hidden_states[:,attention_mask.sum(1).item()-1,:]
            #sequence_output = hidden_states[:, 0, :]  # 取序列的最后一个token的隐藏
        logits = self.classifier(sequence_output)
        return logits
    
# 加载预训练模型

pretrained_model = LlamaForCausalLM.from_pretrained(model_path, torch_dtype=torch.float16, device_map='auto')
for param in pretrained_model.parameters():
    param.requires_grad = False

# 实例化情感分类模型
model = SentimentClassifier(pretrained_model, num_labels=2).cuda()
model.classifier.load_state_dict(torch.load('classifier_linear_layer.pth'))
model.eval()

You are using the default legacy behaviour of the <class 'transformers.models.llama.tokenization_llama.LlamaTokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


SentimentClassifier(
  (llama): LlamaModel(
    (embed_tokens): Embedding(32000, 3200, padding_idx=0)
    (layers): ModuleList(
      (0-25): 26 x LlamaDecoderLayer(
        (self_attn): LlamaSdpaAttention(
          (q_proj): Linear(in_features=3200, out_features=3200, bias=False)
          (k_proj): Linear(in_features=3200, out_features=3200, bias=False)
          (v_proj): Linear(in_features=3200, out_features=3200, bias=False)
          (o_proj): Linear(in_features=3200, out_features=3200, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=3200, out_features=8640, bias=False)
          (up_proj): Linear(in_features=3200, out_features=8640, bias=False)
          (down_proj): Linear(in_features=8640, out_features=3200, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm()
        (post_attention_layernorm): LlamaRMSNorm()
      )
    )
    (norm): LlamaRMSNorm()
 

In [3]:
def sentiment_analysis(text):
    input_ids = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512).input_ids.cuda()
    attention_mask = torch.ones(input_ids.shape).long().cuda()
    outputs = model(input_ids=input_ids, attention_mask=attention_mask)
    preds = torch.argmax(outputs, dim=1)
    return "Positive" if preds.item() == 1 else "Negative"


In [20]:
text = 'Trump has NEVER lost a debate to you. And Trump has said he will debate you anytime, anywhere. Let’s go, Crooked Joe. Name where and when you’ll debate him.What are you so scared of?'
print(sentiment_analysis(text))

Negative
