In [1]:
%%capture
!pip install datasets=="2.14.6" transformers=="4.35.0" accelerate=="0.24.1"

In [2]:
import numpy as np
import pandas as pd
from datasets import load_dataset

from transformers import AutoModelForSequenceClassification
from transformers import Trainer, TrainingArguments
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from transformers import AutoTokenizer

Загрузим датасет:

In [19]:
dataset = load_dataset('amazon_polarity')

Downloading builder script:   0%|          | 0.00/4.11k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/1.68k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/6.64k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/688M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/3600000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/400000 [00:00<?, ? examples/s]

In [20]:
dataset

DatasetDict({
    train: Dataset({
        features: ['label', 'title', 'content'],
        num_rows: 3600000
    })
    test: Dataset({
        features: ['label', 'title', 'content'],
        num_rows: 400000
    })
})

In [21]:
dataset['test'][1]

{'label': 1,
 'title': "One of the best game music soundtracks - for a game I didn't really play",
 'content': "Despite the fact that I have only played a small portion of the game, the music I heard (plus the connection to Chrono Trigger which was great as well) led me to purchase the soundtrack, and it remains one of my favorite albums. There is an incredible mix of fun, epic, and emotional songs. Those sad and beautiful tracks I especially like, as there's not too many of those kinds of songs in my other video game soundtracks. I must admit that one of the songs (Life-A Distant Promise) has brought tears to my eyes on many occasions.My one complaint about this soundtrack is that they use guitar fretting effects in many of the songs, which I find distracting. But even if those weren't included I would still consider the collection worth it."}

Проведем токенизацию:

In [23]:
checkpoint = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

Разобьем датасет:

In [24]:
def encode_batch(data):
    return tokenizer(data['content'], padding='max_length', truncation=True, max_length=256)

In [25]:
train_data = dataset['train'].select(range(10000)).map(encode_batch, batched=True)
test_data = dataset['test'].select(range(2000)).map(encode_batch, batched=True)

Map:   0%|          | 0/10000 [00:00<?, ? examples/s]

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

Работа с моделью:

In [26]:
model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)

Downloading model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [27]:
def compute_metrics(p):
    pred, labels = p
    pred = np.argmax(pred, axis=1)
    f1 = f1_score(labels, pred, average='macro')
    accuracy = np.mean(pred == labels)
    precision = precision_score(labels, pred, average='macro')
    recall = recall_score(labels, pred, average='macro')
    return {
        "accuracy": accuracy,
        "f1": f1,
        "precision": precision,
        "recall": recall
    }

In [28]:
training_args = TrainingArguments(
   output_dir="./results",
   learning_rate=2e-5,
   per_device_train_batch_size=16,
   per_device_eval_batch_size=16,
   num_train_epochs=2,
   weight_decay=0.01,
   do_train=True,
   do_eval=True,
   save_strategy="epoch",
   seed=24,
)

In [30]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_data,
    eval_dataset=test_data,
    compute_metrics=compute_metrics,
)

trainer.train()

Step,Training Loss
500,0.3093
1000,0.1746


TrainOutput(global_step=1250, training_loss=0.2227818878173828, metrics={'train_runtime': 479.9463, 'train_samples_per_second': 41.671, 'train_steps_per_second': 2.604, 'total_flos': 1324673986560000.0, 'train_loss': 0.2227818878173828, 'epoch': 2.0})

In [32]:
models = {
    'Before fine-tuning': AutoModelForSequenceClassification.from_pretrained("checkpoint", num_labels=2),
    'After fine-tuning': AutoModelForSequenceClassification.from_pretrained("./results/checkpoint-625", num_labels=2)
}

for model_name, model in models.items():
    trainer = Trainer(model=model, compute_metrics=compute_metrics)
    results = trainer.evaluate(eval_dataset=test_data)
    predictions = trainer.predict(test_data)
    pred_labels = np.argmax(predictions.predictions, axis=1)

    print(f"\n{model_name}:")

    metrics_df = pd.DataFrame({
        'Metric': ['Accuracy', 'F1 Score', 'Precision', 'Recall'],
        'Value': [
            results['eval_accuracy'],
            results['eval_f1'],
            results['eval_precision'],
            results['eval_recall']
        ]
    })
    display(metrics_df)

    print("\nConfusion Matrix:")
    display(pd.DataFrame(confusion_matrix(test_data['label'], pred_labels)))
    print("\nClassification Report:")
    display(pd.DataFrame.from_dict(
        classification_report(test_data['label'],
        pred_labels,
        output_dict=True,
        zero_division=0)
    ).T)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



Before fine-tuning:


Unnamed: 0,Metric,Value
0,Accuracy,0.5145
1,F1 Score,0.472717
2,Precision,0.545313
3,Recall,0.528567



Confusion Matrix:


Unnamed: 0,0,1
0,796,158
1,813,233



Classification Report:


Unnamed: 0,precision,recall,f1-score,support
0,0.494717,0.834382,0.621147,954.0
1,0.595908,0.222753,0.324287,1046.0
accuracy,0.5145,0.5145,0.5145,0.5145
macro avg,0.545313,0.528567,0.472717,2000.0
weighted avg,0.54764,0.5145,0.465889,2000.0



After fine-tuning:


Unnamed: 0,Metric,Value
0,Accuracy,0.9035
1,F1 Score,0.903109
2,Precision,0.90429
3,Recall,0.902489



Confusion Matrix:


Unnamed: 0,0,1
0,840,114
1,79,967



Classification Report:


Unnamed: 0,precision,recall,f1-score,support
0,0.914037,0.880503,0.896957,954.0
1,0.894542,0.924474,0.909262,1046.0
accuracy,0.9035,0.9035,0.9035,0.9035
macro avg,0.90429,0.902489,0.903109,2000.0
weighted avg,0.903841,0.9035,0.903392,2000.0
