In [1]:
import torch
import torch.nn as nn


class LoRALayer(nn.Module):
    def __init__(self, in_dim, out_dim, rank, alpha=1.0):
        super().__init__()
        std_v = 1 / torch.sqrt(torch.tensor(rank).float())
        self.A = nn.Parameter(torch.randn(in_dim, rank) * std_v)
        self.B = nn.Parameter(torch.zeros(rank, out_dim))
        self.alpha = alpha

    def forward(self, x):
        x = self.alpha * (x @ self.A @ self.B)
        return x


class LinearWithLoRA(nn.Module):
    def __init__(self, linear, rank, alpha=1.0):
        super().__init__()
        self.linear = linear
        self.lora = LoRALayer(linear.in_features, linear.out_features, rank, alpha)

    def forward(self, x):
        return self.linear(x) + self.lora(x)


In [36]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForSequenceClassification

tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")

In [37]:
for param in model.parameters():
    param.requires_grad = False
print(model)

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

In [38]:
from functools import partial

lora_rank = 8
lora_alpha = 16
lora_dropout = 0.1
lora_query = True
lora_key = True
lora_value = True
lora_projection = False
lora_mlp = True
lora_head = True

layers = []

assign_lora = partial(LinearWithLoRA, rank=lora_rank, alpha=lora_alpha)
for layer in model.bert.encoder.layer:
    if lora_query:
        layer.attention.self.query = assign_lora(layer.attention.self.query)
    if lora_key:
        layer.attention.self.key = assign_lora(layer.attention.self.key)
    if lora_value:
        layer.attention.self.value = assign_lora(layer.attention.self.value)
    if lora_projection:
        layer.attention.output.dense = assign_lora(layer.attention.output.dense)
if lora_head:
    model.classifier = assign_lora(model.classifier)

In [40]:
print(model)

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): LinearWithLoRA(
                (linear): Linear(in_features=768, out_features=768, bias=True)
                (lora): LoRALayer()
              )
              (key): LinearWithLoRA(
                (linear): Linear(in_features=768, out_features=768, bias=True)
                (lora): LoRALayer()
              )
              (value): LinearWithLoRA(
                (linear): Linear(in_features=768, out_features=768, bias=True)
    

In [41]:
from datasets import load_dataset

ds = load_dataset("zeroshot/twitter-financial-news-sentiment")

In [42]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
print(torch.cuda.is_available())  # True 表示有可用GPU


tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")

def tokenizer_function(examples):
    return tokenizer(examples['text'], padding='max_length', truncation=True)

tokenized_datasets = ds.map(tokenizer_function, batched=True)

True


In [43]:
train_dataset = tokenized_datasets['train']
valid_dataset = tokenized_datasets['validation']

In [20]:
from transformers import TrainingArguments

training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="steps",
    )


In [21]:
import numpy as np
import evaluate

metric = evaluate.load("accuracy")

Downloading builder script: 100%|██████████| 4.20k/4.20k [00:00<00:00, 14.8MB/s]


In [23]:
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(output_dir="test_trainer", eval_strategy="epoch")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

In [30]:
model = model.to("cuda")

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=valid_dataset,
    compute_metrics=compute_metrics,
)

Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


In [31]:
trainer.train()



Epoch,Training Loss,Validation Loss,Accuracy
1,No log,0.464016,0.813652
2,0.426100,0.384967,0.862228
3,0.426100,0.39146,0.86139




TrainOutput(global_step=897, training_loss=0.37775869380138655, metrics={'train_runtime': 1734.2363, 'train_samples_per_second': 16.508, 'train_steps_per_second': 0.517, 'total_flos': 7572122009441280.0, 'train_loss': 0.37775869380138655, 'epoch': 3.0})

In [44]:
ans = trainer.evaluate(valid_dataset)
print(ans)



In [45]:
print(ans)

{'eval_loss': 0.39145952463150024, 'eval_accuracy': 0.8613902847571189, 'eval_runtime': 58.6005, 'eval_samples_per_second': 40.751, 'eval_steps_per_second': 1.28, 'epoch': 3.0}


In [49]:
# 自定义函数查看参数
def count_parameters(model):
    trainable_params = 0
    all_params = 0
    for _, param in model.named_parameters():
        num_param = param.numel()
        all_params += param.numel()
        if param.requires_grad:
            trainable_params += num_param
    print(f"可训练参数量：{trainable_params}，总参数量：{all_params}")
    print(f"可训练参数占比：{100 * trainable_params / all_params:.2f}%")

count_parameters(model)

可训练参数量：448536，总参数量：109933083
可训练参数占比：0.41%


In [50]:
model.save_pretrained('./test_trainer')

In [53]:
base_model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")

base_trainer = Trainer(
    model=base_model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=valid_dataset,
    compute_metrics=compute_metrics,
)

#base_trainer.evaluate(valid_dataset)
print(base_trainer.evaluate(valid_dataset))

Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


{'eval_loss': 1.5094717741012573, 'eval_accuracy': 0.5251256281407035, 'eval_runtime': 58.1546, 'eval_samples_per_second': 41.063, 'eval_steps_per_second': 1.29}
