In [25]:
import os
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline

In [26]:
def get_device():
    if torch.cuda.is_available():
        return torch.device("cuda")
    return torch.device("cpu")

device = get_device()
print("Using device:", device)

Using device: cpu


In [82]:
model_path = "distilbert-base-uncased"

tokenizer = AutoTokenizer.from_pretrained(model_path)

id2label = {0: "NEGATIVE", 1: "NEUTRAL", 2: "POSITIVE"}
label2id = {"NEGATIVE": 0, "NEUTRAL": 1, "POSITIVE": 2}
model = AutoModelForSequenceClassification.from_pretrained(
    model_path,
    num_labels=3,
    id2label=id2label,
    label2id=label2id,
)


classifier = pipeline(
    task="sentiment-analysis",
    model=model,
    tokenizer=tokenizer, 
    device=-1,
)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Device set to use cpu


In [83]:
# Fine-tune DistilBERT on FineTunned_dataset without changing previous cells
import os
import pandas as pd
import numpy as np
import torch
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, classification_report
from transformers import Trainer, TrainingArguments, DataCollatorWithPadding
from datasets import DatasetDict, Dataset

DATASET_PATH = "sentiment.csv"

df = pd.read_csv(DATASET_PATH)

# 2) Drop NA / short texts
df = df[["text", "label"]].dropna()
df = df[df["text"].astype(str).str.strip().str.len() > 3].copy()

# 3) Map labels
label_mapping = {-1: 0, 0: 1, 1: 2}
df["label"] = df["label"].map(label_mapping)

train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

train_dataset = Dataset.from_pandas(train_df)
test_dataset = Dataset.from_pandas(test_df)


In [84]:
from sklearn.metrics import accuracy_score, f1_score, classification_report

batch_size = 32
all_preds = []

# Use test dataset for evaluation
texts = test_df["text"].tolist()
y_true = test_df["label"].tolist()

for i in range(0, len(texts), batch_size):
    batch = texts[i:i+batch_size]
    preds = classifier(batch)  # pretrained model pipeline
    all_preds.extend(preds)

# Convert Hugging Face labels ("POSITIVE", etc.) to integers
label_str_to_id = {"NEGATIVE": 0, "NEUTRAL": 1, "POSITIVE": 2}
y_pred = [label_str_to_id[p["label"]] for p in all_preds]

# Evaluate
print("Accuracy:", accuracy_score(y_true, y_pred))
print("Macro F1:", f1_score(y_true, y_pred, average="macro"))
print(classification_report(y_true, y_pred, target_names=["NEG", "NEU", "POS"]))

Accuracy: 0.11867905056759546
Macro F1: 0.07072570725707257
              precision    recall  f1-score   support

         NEG       0.12      1.00      0.21       115
         NEU       0.00      0.00      0.00       569
         POS       0.00      0.00      0.00       285

    accuracy                           0.12       969
   macro avg       0.04      0.33      0.07       969
weighted avg       0.01      0.12      0.03       969



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [85]:
for name, param in model.named_parameters():
    param.requires_grad = False

for name, param in model.named_parameters():
    if "classifier" in name:
        param.requires_grad = True

for name, param in model.named_parameters():
    if "classifier" in name:
        print(name, param.shape)

for i in [-2, -1]:  # second-to-last and last layer
    for param in model.distilbert.transformer.layer[i].parameters():
        param.requires_grad = True


model.classifier.weight.data = torch.randn_like(model.classifier.weight.data)
model.classifier.bias.data = torch.randn_like(model.classifier.bias.data)

pre_classifier.weight torch.Size([768, 768])
pre_classifier.bias torch.Size([768])
classifier.weight torch.Size([3, 768])
classifier.bias torch.Size([3])


In [86]:
def preprocess_data(data):
    return tokenizer(data["text"], padding=True, truncation=True)

# Then tokenize separately
train_dataset = train_dataset.map(preprocess_data, batched=True)
test_dataset = test_dataset.map(preprocess_data, batched=True)

Map: 100%|██████████| 3872/3872 [00:00<00:00, 10543.77 examples/s]
Map: 100%|██████████| 969/969 [00:00<00:00, 9954.18 examples/s]


In [87]:
data_collator = DataCollatorWithPadding(tokenizer)

In [88]:

def compute_metrics(eval_pred):
    """
    Hugging Face Trainer metrics function.
    eval_pred: a tuple (logits, labels)
    """
    logits, labels = eval_pred
    # Get predicted class indices
    preds = logits.argmax(axis=-1)

    # Compute basic metrics
    acc = accuracy_score(labels, preds)
    macro_f1 = f1_score(labels, preds, average="macro")

    # print("\nClassification Report:\n")
    # print(classification_report(labels, preds, target_names=["NEG", "NEU", "POS"]))

    return {
        "accuracy": acc,
        "f1": macro_f1
    }

In [89]:
import accelerate
print(accelerate.__version__)

1.10.0


In [90]:
lr = 2e-4
batch_size = 32
num_epochs = 10

training_args = TrainingArguments(
    output_dir='./distilbert_finetuned',
    learning_rate=lr,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=num_epochs,
    logging_strategy="epoch",
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
)

In [91]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

# 8) Train
trainer.train()

  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,1.692,0.720578,0.764706,0.722787
2,0.7787,0.733421,0.760578,0.695039
3,0.5454,0.565081,0.775026,0.739697
4,0.4338,0.652322,0.790506,0.74709
5,0.351,0.600224,0.780186,0.737773
6,0.2724,0.714426,0.780186,0.745284
7,0.1783,0.828935,0.781218,0.741835
8,0.1519,1.058288,0.778122,0.74475
9,0.0934,1.441555,0.785346,0.750604
10,0.0643,1.552781,0.793602,0.757195




TrainOutput(global_step=1210, training_loss=0.45614109354570875, metrics={'train_runtime': 488.3214, 'train_samples_per_second': 79.292, 'train_steps_per_second': 2.478, 'total_flos': 991784542066560.0, 'train_loss': 0.45614109354570875, 'epoch': 10.0})

In [92]:
# %% Evaluate on the test dataset
# This will compute metrics using your compute_metrics function
metrics = trainer.evaluate(eval_dataset=test_dataset)
print("\nValidation metrics:", metrics)

# Detailed classification report
predictions_output = trainer.predict(test_dataset)
logits = predictions_output.predictions
y_true = predictions_output.label_ids
y_pred = logits.argmax(axis=-1)

print("\nDetailed classification report on test set:")
print(classification_report(y_true, y_pred, target_names=["NEG", "NEU", "POS"]))




Validation metrics: {'eval_loss': 0.5650809407234192, 'eval_accuracy': 0.7750257997936016, 'eval_f1': 0.7396973243949443, 'eval_runtime': 6.8248, 'eval_samples_per_second': 141.983, 'eval_steps_per_second': 4.542, 'epoch': 10.0}





Detailed classification report on test set:
              precision    recall  f1-score   support

         NEG       0.68      0.70      0.69       115
         NEU       0.87      0.81      0.84       569
         POS       0.66      0.74      0.69       285

    accuracy                           0.78       969
   macro avg       0.73      0.75      0.74       969
weighted avg       0.78      0.78      0.78       969



In [93]:
# metrics = trainer.evaluate()
# print("Validation metrics:", metrics)
# print("\nDetailed classification report on validation set:")
# val_logits = trainer.predict(val_dataset).predictions
# val_preds = val_logits.argmax(axis=-1)
# print(classification_report(val_labels, val_preds))

# # 10) Save fine-tuned model and tokenizer
# trainer.save_model(output_dir)
# tokenizer.save_pretrained(output_dir)

# print(f"\nSaved fine-tuned model to: {output_dir}")