In [None]:
# !pip install --upgrade transformers==4.44.2


In [1]:
import os
import random
import numpy as np
import pandas as pd
import torch
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, accuracy_score, precision_recall_fscore_support
from transformers import (
    RobertaTokenizer,
    RobertaForSequenceClassification,
    Trainer,
    TrainingArguments,
)
from datasets import Dataset

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
# ------------------------------
# Device + Seed
# ------------------------------
print("Running on:", "CUDA 🟢" if torch.cuda.is_available() else "CPU 🔴")

SEED = 20
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)

# ------------------------------
# Robust CSV Loader (handles Sindhi text encodings)
# ------------------------------
# def safe_read_csv(path):
#     encodings = ['utf-8', 'utf-8-sig', 'latin1', 'ISO-8859-1', 'cp1252']
#     for enc in encodings:
#         try:
#             return pd.read_csv(path, encoding=enc, engine='python', on_bad_lines='skip')
#         except Exception as e:
#             print(f"⚠️ Failed with {enc}: {e}")
#     raise ValueError(f"❌ Could not read file: {path}")

train_df = pd.read_csv(r"C:\Users\stdFurqan\Downloads\sst2_english\sst2_70.csv")
test_df = pd.read_csv(r"C:\Users\stdFurqan\Downloads\sst2_english\sst2_30.csv")

print("✅ Data loaded successfully!")
print("Train size:", len(train_df), " Test size:", len(test_df))
print("Columns:", list(train_df.columns))


# ------------------------------
# Label Encoding
# ------------------------------
le = LabelEncoder()
train_df["label"] = le.fit_transform(train_df["label_name"].astype(str))
test_df["label"]  = le.transform(test_df["label_name"].astype(str))
label_names = list(le.classes_)
print("Detected Labels:", label_names)

# ------------------------------
# Convert to HuggingFace Dataset
# ------------------------------
train_dataset = Dataset.from_pandas(train_df)
test_dataset  = Dataset.from_pandas(test_df)

Running on: CUDA 🟢
✅ Data loaded successfully!
Train size: 2800  Test size: 1200
Columns: ['sentence', 'label_name']
Detected Labels: ['negative', 'positive']


In [6]:
# ------------------------------
# Tokenizer & Model
# ------------------------------
model_name = "roberta-large"
tokenizer = RobertaTokenizer.from_pretrained(model_name)

def tokenize_function(examples):
    return tokenizer(
        examples["sentence"],
        truncation=True,
        padding="max_length",
        max_length=256,
    )

train_tokenized = train_dataset.map(tokenize_function, batched=True)
test_tokenized  = test_dataset.map(tokenize_function, batched=True)

model = RobertaForSequenceClassification.from_pretrained(
    model_name,
    num_labels=len(label_names)
)

# ------------------------------
# Metrics Function (compatible with v4.28.0)
# ------------------------------
def compute_metrics(pred):
    preds = np.argmax(pred.predictions, axis=1)
    labels = pred.label_ids
    acc = accuracy_score(labels, preds)
    precision, recall, f1, _ = precision_recall_fscore_support(
        labels, preds, average="weighted", zero_division=0
    )
    return {"accuracy": acc, "precision": precision, "recall": recall, "f1": f1}

Map: 100%|████████████████████████████████████████████████████████████████| 2800/2800 [00:00<00:00, 4967.45 examples/s]
Map: 100%|████████████████████████████████████████████████████████████████| 1200/1200 [00:00<00:00, 8052.48 examples/s]
Some weights of the model checkpoint at roberta-large were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of Robert

In [7]:
# ------------------------------
# Training Setup (optimized for RTX 4090)
# ------------------------------
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=8,   # Increase to 16 if memory allows
    per_device_eval_batch_size=8,
    num_train_epochs=5,
    weight_decay=0.01,
    seed=SEED,
    fp16=True,                       # ✅ enables mixed precision on RTX 4090
    dataloader_num_workers=8,
    load_best_model_at_end=True,
    logging_dir="./logs",
    report_to=[],
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_tokenized,
    eval_dataset=test_tokenized,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

# ------------------------------
# Train the Model
# ------------------------------
trainer.train()

  self.scaler = torch.cuda.amp.GradScaler()
  else torch.cuda.amp.autocast(cache_enabled=cache_enabled, dtype=self.amp_dtype)


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,No log,0.336529,0.915,0.916671,0.915,0.914915
2,0.456600,0.496571,0.9125,0.916257,0.9125,0.912302
3,0.289700,0.401666,0.923333,0.923338,0.923333,0.923333
4,0.289700,0.456925,0.929167,0.929311,0.929167,0.929161
5,0.110300,0.506137,0.925,0.925232,0.925,0.92499


  else torch.cuda.amp.autocast(cache_enabled=cache_enabled, dtype=self.amp_dtype)
  else torch.cuda.amp.autocast(cache_enabled=cache_enabled, dtype=self.amp_dtype)
  else torch.cuda.amp.autocast(cache_enabled=cache_enabled, dtype=self.amp_dtype)
  else torch.cuda.amp.autocast(cache_enabled=cache_enabled, dtype=self.amp_dtype)
  state_dict = torch.load(best_model_path, map_location="cpu")


TrainOutput(global_step=1750, training_loss=0.25294852447509764, metrics={'train_runtime': 725.8212, 'train_samples_per_second': 19.288, 'train_steps_per_second': 2.411, 'total_flos': 6523519543296000.0, 'train_loss': 0.25294852447509764, 'epoch': 5.0})

In [8]:
# ------------------------------
# Evaluate on Test Set
# ------------------------------
predictions = trainer.predict(test_tokenized)
y_pred = np.argmax(predictions.predictions, axis=1)
y_true = predictions.label_ids

print("\n📊 Classification Report (4 decimal places):\n")
print(classification_report(y_true, y_pred, target_names=label_names, digits=4))

acc = accuracy_score(y_true, y_pred)
print(f"✅ Overall Accuracy: {acc:.4f}")

  else torch.cuda.amp.autocast(cache_enabled=cache_enabled, dtype=self.amp_dtype)



📊 Classification Report (4 decimal places):

              precision    recall  f1-score   support

    negative     0.8903    0.9467    0.9176       600
    positive     0.9431    0.8833    0.9122       600

    accuracy                         0.9150      1200
   macro avg     0.9167    0.9150    0.9149      1200
weighted avg     0.9167    0.9150    0.9149      1200

✅ Overall Accuracy: 0.9150
