In [1]:
import os
import random
import numpy as np
import pandas as pd
import torch
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, accuracy_score, precision_recall_fscore_support
from transformers import (
    XLMRobertaTokenizer,
    XLMRobertaForSequenceClassification,
    Trainer,
    TrainingArguments,
)
from datasets import Dataset
from transformers import (
    BertTokenizer,
    BertForSequenceClassification,
    Trainer,
    TrainingArguments,
)

In [2]:
import torch

if torch.cuda.is_available():
    device = "cuda"
    gpu_name = torch.cuda.get_device_name(0)
    print(f"✅ Using GPU: {gpu_name}")
else:
    device = "cpu"
    print("⚠️ CUDA not available. Using CPU instead.")

print(f"🖥️ Device set to: {device}")

✅ Using GPU: NVIDIA GeForce RTX 4080 SUPER
🖥️ Device set to: cuda


In [3]:
# ------------------------------
# Device + Seed
# ------------------------------
print("Running on:", "CUDA 🟢" if torch.cuda.is_available() else "CPU 🔴")

SEED = 20
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)

# ------------------------------
# Robust CSV Loader (handles Sindhi text encodings)
# ------------------------------
def safe_read_csv(path):
    encodings = ['utf-8', 'utf-8-sig', 'latin1', 'ISO-8859-1', 'cp1252']
    for enc in encodings:
        try:
            return pd.read_csv(path, encoding=enc, engine='python', on_bad_lines='skip')
        except Exception as e:
            print(f"⚠️ Failed with {enc}: {e}")
    raise ValueError(f"❌ Could not read file: {path}")

train_df = safe_read_csv(r"C:\Users\Stdfurqan\Downloads\urdu_v1\70_urdu_v1.csv")
test_df  = safe_read_csv(r"C:\Users\Stdfurqan\Downloads\urdu_v1\test.csv")

print("✅ Data loaded successfully!")
print("Train size:", len(train_df), " Test size:", len(test_df))
print("Columns:", list(train_df.columns))

# ------------------------------
# Rename columns for consistency
# ------------------------------
train_df = train_df.rename(columns={"Class": "label", "Cleaned_Tweet": "Text"})
test_df  = test_df.rename(columns={"Class": "label", "Cleaned_Tweet": "Text"})

# ------------------------------
# Label Encoding
# ------------------------------
le = LabelEncoder()
train_df["label"] = le.fit_transform(train_df["label"].astype(str))
test_df["label"]  = le.transform(test_df["label"].astype(str))
label_names = list(le.classes_)
print("Detected Labels:", label_names)

# ------------------------------
# Convert to HuggingFace Dataset
# ------------------------------
train_dataset = Dataset.from_pandas(train_df)
test_dataset  = Dataset.from_pandas(test_df)

Running on: CUDA 🟢
✅ Data loaded successfully!
Train size: 685  Test size: 294
Columns: ['Cleaned_Tweet', 'Class']
Detected Labels: ['N', 'P']


In [4]:
# ------------------------------
# Tokenizer & Model
# ------------------------------
model_name = "bert-base-multilingual-cased"
tokenizer = BertTokenizer.from_pretrained(model_name)

def tokenize_function(examples):
    return tokenizer(
        examples["Text"],
        truncation=True,
        padding="max_length",
        max_length=256,
    )

train_tokenized = train_dataset.map(tokenize_function, batched=True)
test_tokenized  = test_dataset.map(tokenize_function, batched=True)

model = BertForSequenceClassification.from_pretrained(
    model_name,
    num_labels=len(label_names)
)

# ------------------------------
# Metrics Function (compatible with v4.28.0)
# ------------------------------
def compute_metrics(pred):
    preds = np.argmax(pred.predictions, axis=1)
    labels = pred.label_ids
    acc = accuracy_score(labels, preds)
    precision, recall, f1, _ = precision_recall_fscore_support(
        labels, preds, average="weighted", zero_division=0
    )
    return {"accuracy": acc, "precision": precision, "recall": recall, "f1": f1}



Map:   0%|          | 0/685 [00:00<?, ? examples/s]

Map:   0%|          | 0/294 [00:00<?, ? examples/s]

Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual

In [5]:
# ------------------------------
# Training Setup (optimized for RTX 4090)
# ------------------------------
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=8,   # Increase to 16 if memory allows
    per_device_eval_batch_size=8,
    num_train_epochs=5,
    weight_decay=0.01,
    seed=SEED,
    fp16=True,                       # ✅ enables mixed precision on RTX 4090
    dataloader_num_workers=8,
    load_best_model_at_end=True,
    logging_dir="./logs",
    report_to=[],
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_tokenized,
    eval_dataset=test_tokenized,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

# ------------------------------
# Train the Model
# ------------------------------
trainer.train()

  self.scaler = torch.cuda.amp.GradScaler()
  else torch.cuda.amp.autocast(cache_enabled=cache_enabled, dtype=self.amp_dtype)


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,No log,0.691159,0.510204,0.260308,0.510204,0.344732
2,No log,0.631932,0.646259,0.649073,0.646259,0.643281
3,No log,0.705748,0.608844,0.615698,0.608844,0.599894
4,No log,1.160434,0.64966,0.670736,0.64966,0.640933
5,No log,1.173074,0.659864,0.660716,0.659864,0.659801


  else torch.cuda.amp.autocast(cache_enabled=cache_enabled, dtype=self.amp_dtype)
  else torch.cuda.amp.autocast(cache_enabled=cache_enabled, dtype=self.amp_dtype)
  else torch.cuda.amp.autocast(cache_enabled=cache_enabled, dtype=self.amp_dtype)
  else torch.cuda.amp.autocast(cache_enabled=cache_enabled, dtype=self.amp_dtype)
  state_dict = torch.load(best_model_path, map_location="cpu")


TrainOutput(global_step=430, training_loss=0.503225282181141, metrics={'train_runtime': 240.3683, 'train_samples_per_second': 14.249, 'train_steps_per_second': 1.789, 'total_flos': 450577682304000.0, 'train_loss': 0.503225282181141, 'epoch': 5.0})

In [6]:
# ------------------------------
# Evaluate on Test Set
# ------------------------------
predictions = trainer.predict(test_tokenized)
y_pred = np.argmax(predictions.predictions, axis=1)
y_true = predictions.label_ids

print("\n📊 Classification Report (4 decimal places):\n")
print(classification_report(y_true, y_pred, target_names=label_names, digits=4))

acc = accuracy_score(y_true, y_pred)
print(f"✅ Overall Accuracy: {acc:.4f}")

  else torch.cuda.amp.autocast(cache_enabled=cache_enabled, dtype=self.amp_dtype)



📊 Classification Report (4 decimal places):

              precision    recall  f1-score   support

           N     0.6322    0.7333    0.6790       150
           P     0.6667    0.5556    0.6061       144

    accuracy                         0.6463       294
   macro avg     0.6494    0.6444    0.6425       294
weighted avg     0.6491    0.6463    0.6433       294

✅ Overall Accuracy: 0.6463
