In [14]:

import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline

In [15]:
model_path = "distilbert-base-uncased"

tokenizer = AutoTokenizer.from_pretrained(model_path)

id2label = {0: "NEGATIVE", 1: "NEUTRAL", 2: "POSITIVE"}
label2id = {"NEGATIVE": 0, "NEUTRAL": 1, "POSITIVE": 2}
model = AutoModelForSequenceClassification.from_pretrained(
    model_path,
    num_labels=3,
    id2label=id2label,
    label2id=label2id,
)

classifier = pipeline(
    task="sentiment-analysis",
    model=model,
    tokenizer=tokenizer, 
    device=-1,
)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Device set to use cpu


In [16]:
# Fine-tune DistilBERT on FineTunned_dataset without changing previous cells
import os
import pandas as pd
import numpy as np
import torch
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, classification_report
from transformers import Trainer, TrainingArguments, DataCollatorWithPadding
from datasets import DatasetDict, Dataset

DATASET_PATH = "sentiment.csv"

df = pd.read_csv(DATASET_PATH)

# 2) Drop NA / short texts
df = df[["text", "label"]].dropna()
df = df[df["text"].astype(str).str.strip().str.len() > 3].copy()

# 3) Map labels
label_mapping = {-1: 0, 0: 1, 1: 2}
df["label"] = df["label"].map(label_mapping)

train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

train_dataset = Dataset.from_pandas(train_df)
test_dataset = Dataset.from_pandas(test_df)


In [17]:
from sklearn.metrics import accuracy_score, f1_score, classification_report

batch_size = 32
all_preds = []

# Use test dataset for evaluation
texts = test_df["text"].tolist()
y_true = test_df["label"].tolist()

for i in range(0, len(texts), batch_size):
    batch = texts[i:i+batch_size]
    preds = classifier(batch)  # pretrained model pipeline
    all_preds.extend(preds)

# Convert Hugging Face labels ("POSITIVE", etc.) to integers
label_str_to_id = {"NEGATIVE": 0, "NEUTRAL": 1, "POSITIVE": 2}
y_pred = [label_str_to_id[p["label"]] for p in all_preds]

# Evaluate
print("Accuracy:", accuracy_score(y_true, y_pred))
print("Macro F1:", f1_score(y_true, y_pred, average="macro"))
print(classification_report(y_true, y_pred, target_names=["NEG", "NEU", "POS"]))

Accuracy: 0.11867905056759546
Macro F1: 0.07072570725707257
              precision    recall  f1-score   support

         NEG       0.12      1.00      0.21       115
         NEU       0.00      0.00      0.00       569
         POS       0.00      0.00      0.00       285

    accuracy                           0.12       969
   macro avg       0.04      0.33      0.07       969
weighted avg       0.01      0.12      0.03       969



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [18]:
# for name, param in model.named_parameters():
#     param.requires_grad = False

# for name, param in model.named_parameters():
#     if "classifier" in name:
#         param.requires_grad = True

# for name, param in model.named_parameters():
#     if "classifier" in name:
#         print(name, param.shape)


# model.classifier.weight.data = torch.randn_like(model.classifier.weight.data)
# model.classifier.bias.data = torch.randn_like(model.classifier.bias.data)

In [19]:
def preprocess_data(data):
    return tokenizer(data["text"], padding=True, truncation=True)

# Then tokenize separately
train_dataset = train_dataset.map(preprocess_data, batched=True)
test_dataset = test_dataset.map(preprocess_data, batched=True)

Map: 100%|██████████| 3872/3872 [00:00<00:00, 11537.30 examples/s]
Map: 100%|██████████| 969/969 [00:00<00:00, 10155.19 examples/s]


In [20]:
data_collator = DataCollatorWithPadding(tokenizer)

In [21]:

def compute_metrics(eval_pred):
    """
    Hugging Face Trainer metrics function.
    eval_pred: a tuple (logits, labels)
    """
    logits, labels = eval_pred
    # Get predicted class indices
    preds = logits.argmax(axis=-1)

    # Compute basic metrics
    acc = accuracy_score(labels, preds)
    macro_f1 = f1_score(labels, preds, average="macro")

    # print("\nClassification Report:\n")
    # print(classification_report(labels, preds, target_names=["NEG", "NEU", "POS"]))

    return {
        "accuracy": acc,
        "f1": macro_f1
    }

In [22]:
import accelerate
print(accelerate.__version__)

1.10.0


In [23]:
lr = 2e-4
batch_size = 32
num_epochs = 10

training_args = TrainingArguments(
    output_dir='./distilbert_finetuned',
    learning_rate=lr,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=num_epochs,
    logging_strategy="epoch",
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
)

In [24]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

# 8) Train
trainer.train()

  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.7838,0.691193,0.721362,0.496334
2,0.5279,0.622047,0.776058,0.742479
3,0.3903,0.669256,0.776058,0.73771
4,0.2745,0.685415,0.780186,0.736594
5,0.1869,0.883347,0.762642,0.712681




KeyboardInterrupt: 

In [None]:
# %% Evaluate on the test dataset
# This will compute metrics using your compute_metrics function
metrics = trainer.evaluate(eval_dataset=test_dataset)
print("\nValidation metrics:", metrics)

# Detailed classification report
predictions_output = trainer.predict(test_dataset)
logits = predictions_output.predictions
y_true = predictions_output.label_ids
y_pred = logits.argmax(axis=-1)

print("\nDetailed classification report on test set:")
print(classification_report(y_true, y_pred, target_names=["NEG", "NEU", "POS"]))




Validation metrics: {'eval_loss': 0.619900107383728, 'eval_accuracy': 0.7265221878224974, 'eval_f1': 0.6293701121658111, 'eval_runtime': 5.8527, 'eval_samples_per_second': 165.564, 'eval_steps_per_second': 5.297, 'epoch': 10.0}





Detailed classification report on test set:
              precision    recall  f1-score   support

         NEG       0.61      0.39      0.48       115
         NEU       0.78      0.88      0.83       569
         POS       0.62      0.55      0.58       285

    accuracy                           0.73       969
   macro avg       0.67      0.61      0.63       969
weighted avg       0.71      0.73      0.72       969



In [None]:
# metrics = trainer.evaluate()
# print("Validation metrics:", metrics)
# print("\nDetailed classification report on validation set:")
# val_logits = trainer.predict(val_dataset).predictions
# val_preds = val_logits.argmax(axis=-1)
# print(classification_report(val_labels, val_preds))

# # 10) Save fine-tuned model and tokenizer
# trainer.save_model(output_dir)
# tokenizer.save_pretrained(output_dir)

# print(f"\nSaved fine-tuned model to: {output_dir}")