In [1]:
import torch
import os
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
from transformers import Trainer, TrainingArguments, DataCollatorWithPadding
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from datasets import Dataset
import accelerate

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def get_device():
    if torch.cuda.is_available():
        print("Using GPU:", torch.cuda.get_device_name(0))
        return 0
    else:
        print("Using CPU")
        return -1 

device = get_device()

Using GPU: NVIDIA GeForce RTX 3070 Laptop GPU


In [3]:
model_path = "distilbert-base-uncased"

tokenizer = AutoTokenizer.from_pretrained(model_path)

id2label = {0: "NEGATIVE", 1: "NEUTRAL", 2: "POSITIVE"}
label2id = {"NEGATIVE": 0, "NEUTRAL": 1, "POSITIVE": 2}
model = AutoModelForSequenceClassification.from_pretrained(
    model_path,
    num_labels=3,
    id2label=id2label,
    label2id=label2id,
)


classifier = pipeline(
    task="sentiment-analysis",
    model=model,
    tokenizer=tokenizer, 
    device=device,
)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Device set to use cuda:0


In [4]:
DATASET_PATH = "sentiment.csv"

df = pd.read_csv(DATASET_PATH)

# 2) Drop NA / short texts
df = df[["text", "label"]].dropna()
df = df[df["text"].astype(str).str.strip().str.len() > 3].copy()

# 3) Map labels
label_mapping = {-1: 0, 0: 1, 1: 2}
df["label"] = df["label"].map(label_mapping)

train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

train_dataset = Dataset.from_pandas(train_df)
test_dataset = Dataset.from_pandas(test_df)


In [5]:
from sklearn.metrics import accuracy_score, f1_score, classification_report

batch_size = 32
all_preds = []

# Use test dataset for evaluation
texts = test_df["text"].tolist()
y_true = test_df["label"].tolist()

for i in range(0, len(texts), batch_size):
    batch = texts[i:i+batch_size]
    preds = classifier(batch)  # pretrained model pipeline
    all_preds.extend(preds)

# Convert Hugging Face labels ("POSITIVE", etc.) to integers
label_str_to_id = {"NEGATIVE": 0, "NEUTRAL": 1, "POSITIVE": 2}
y_pred = [label_str_to_id[p["label"]] for p in all_preds]

# Evaluate
print(classification_report(y_true, y_pred, target_names=["NEG", "NEU", "POS"]))

You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


              precision    recall  f1-score   support

         NEG       0.12      0.98      0.21       115
         NEU       0.25      0.00      0.00       569
         POS       0.00      0.00      0.00       285

    accuracy                           0.12       969
   macro avg       0.12      0.33      0.07       969
weighted avg       0.16      0.12      0.03       969



In [6]:
for name, param in model.named_parameters():
    param.requires_grad = False

for name, param in model.named_parameters():
    if "classifier" in name:
        param.requires_grad = True

for name, param in model.named_parameters():
    if "classifier" in name:
        print(name, param.shape)

for i in [-2, -1]:  # second-to-last and last layer
    for param in model.distilbert.transformer.layer[i].parameters():
        param.requires_grad = True


model.classifier.weight.data = torch.randn_like(model.classifier.weight.data)
model.classifier.bias.data = torch.randn_like(model.classifier.bias.data)

pre_classifier.weight torch.Size([768, 768])
pre_classifier.bias torch.Size([768])
classifier.weight torch.Size([3, 768])
classifier.bias torch.Size([3])


In [7]:
def preprocess_data(data):
    return tokenizer(data["text"], padding=True, truncation=True)

# Then tokenize separately
train_dataset = train_dataset.map(preprocess_data, batched=True)
test_dataset = test_dataset.map(preprocess_data, batched=True)

Map: 100%|██████████| 3872/3872 [00:00<00:00, 14283.20 examples/s]
Map: 100%|██████████| 969/969 [00:00<00:00, 12531.51 examples/s]


In [8]:
data_collator = DataCollatorWithPadding(tokenizer)

In [9]:

def compute_metrics(eval_pred):
    """
    Hugging Face Trainer metrics function.
    eval_pred: a tuple (logits, labels)
    """
    logits, labels = eval_pred
    # Get predicted class indices
    preds = logits.argmax(axis=-1)

    # Compute basic metrics
    acc = accuracy_score(labels, preds)
    macro_f1 = f1_score(labels, preds, average="macro")

    # print("\nClassification Report:\n")
    # print(classification_report(labels, preds, target_names=["NEG", "NEU", "POS"]))

    return {
        "accuracy": acc,
        "f1": macro_f1
    }

In [10]:
lr = 2e-4
batch_size = 32
num_epochs = 5

training_args = TrainingArguments(
    output_dir='./distilbert_finetuned',
    learning_rate=lr,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=num_epochs,
    logging_strategy="epoch",
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    weight_decay=0.01,
    report_to=[]
)

In [11]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

# 8) Train
trainer.train()

  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,1.6319,0.772553,0.767802,0.723612
2,0.6994,0.621853,0.791538,0.746745
3,0.4672,0.515518,0.793602,0.758682
4,0.3454,0.588529,0.803922,0.769994
5,0.2207,0.716918,0.798762,0.770358


TrainOutput(global_step=605, training_loss=0.6729042841383248, metrics={'train_runtime': 57.4576, 'train_samples_per_second': 336.944, 'train_steps_per_second': 10.529, 'total_flos': 495892271033280.0, 'train_loss': 0.6729042841383248, 'epoch': 5.0})

In [12]:
# %% Evaluate on the test dataset
# This will compute metrics using your compute_metrics function
metrics = trainer.evaluate(eval_dataset=test_dataset)
print("\nValidation metrics:", metrics)

# Detailed classification report
predictions_output = trainer.predict(test_dataset)
logits = predictions_output.predictions
y_true = predictions_output.label_ids
y_pred = logits.argmax(axis=-1)

print("\nDetailed classification report on test set:")
print(classification_report(y_true, y_pred, target_names=["NEG", "NEU", "POS"]))


Validation metrics: {'eval_loss': 0.5155175924301147, 'eval_accuracy': 0.7936016511867905, 'eval_f1': 0.7586821488265652, 'eval_runtime': 1.6262, 'eval_samples_per_second': 595.881, 'eval_steps_per_second': 19.063, 'epoch': 5.0}

Detailed classification report on test set:
              precision    recall  f1-score   support

         NEG       0.71      0.71      0.71       115
         NEU       0.86      0.84      0.85       569
         POS       0.70      0.73      0.72       285

    accuracy                           0.79       969
   macro avg       0.76      0.76      0.76       969
weighted avg       0.80      0.79      0.79       969



In [13]:
# # 10) Save fine-tuned model and tokenizer
# output_dir = './distilbert_finetuned'

# trainer.save_model(output_dir)
# tokenizer.save_pretrained(output_dir)

# print(f"\nSaved fine-tuned model to: {output_dir}")