In [None]:
import os
import shutil

# Get user profile path
user_profile = os.environ["USERPROFILE"]

# Paths to Hugging Face cached models
cached_models = [
    os.path.join(user_profile, r".cache\huggingface\hub\models--bert-base-multilingual-cased"),
    os.path.join(user_profile, r".cache\huggingface\hub\models--xlm-roberta-base")
]

# Remove cached models if they exist
for path in cached_models:
    if os.path.exists(path):
        shutil.rmtree(path)
        print(f"Removed cache: {path}")
    else:
        print(f"No cache found at: {path}")


In [1]:
# Set device
import torch
device = "cuda" if torch.cuda.is_available() else "cpu"

In [2]:
# ==============================
# Imports
# ==============================
import torch
import pandas as pd
import random
import numpy as np
from collections import Counter
from transformers import XLMRobertaTokenizer, XLMRobertaForMaskedLM
from openprompt.prompts import ManualTemplate, ManualVerbalizer
from openprompt.data_utils import InputExample
from openprompt.plms import load_plm
from openprompt import PromptForClassification, PromptDataLoader
from torch.optim import AdamW
from sklearn.metrics import classification_report
from collections import defaultdict
from torch.utils.data import DataLoader, Sampler

# ========================================
# Check CUDA
# ========================================
device = "cuda" #if torch.cuda.is_available() else "cpu"
print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU name:", torch.cuda.get_device_name(0))
    print("CUDA version:", torch.version.cuda)
    print("GPU count:", torch.cuda.device_count())

# ========================================
# Seeds for reproducibility
# ========================================
seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

  from .autonotebook import tqdm as notebook_tqdm


CUDA available: True
GPU name: NVIDIA GeForce RTX 4080 SUPER
CUDA version: 12.1
GPU count: 1


In [3]:
# ==============================
# Define Classes
# ==============================
classes = ["positive", "negative","neutral"]
label_map = {"positive": 0, "negative": 1, "neutral": 2}

# # Step 1: Use load_plm with 'roberta' to get the correct WrapperClass
_, _, _, WrapperClass = load_plm("roberta", "roberta-base")  # Just to get the wrapper

# # Step 2: Manually load XLM-RoBERTa model/tokenizer
model_name = "xlm-roberta-base"
tokenizer = XLMRobertaTokenizer.from_pretrained(model_name)
plm = XLMRobertaForMaskedLM.from_pretrained(model_name)

# ==============================
# Load Pretrained Language Model (mBERT)
# ==============================
# plm, tokenizer, model_config, WrapperClass = load_plm("bert", "bert-base-multilingual-cased")




In [4]:

# ==============================
# Define Prompt Template (Manual)
# ==============================
template = ManualTemplate( 
    text='{"placeholder":"text_a"} €å€Å ÿ¨ŸÖŸÑ€Å {"mask"} €Å€í€î',
    tokenizer=tokenizer,
)

 

# templates = [
#     ("P1", ManualTemplate(
#         text='{"placeholder":"text_a"} €å€Å ÿ¨ŸÖŸÑ€Å {"mask"} €Å€í€î',  # Keep as is
#         tokenizer=tokenizer,
#     )),
#     ("P2", ManualTemplate(
#         text='ÿØ€å ⁄Øÿ¶€å ÿπÿ®ÿßÿ±ÿ™: {"placeholder":"text_a"} ⁄©ÿß ŸÖŸàÿßÿØ ÿßŸàÿ± Ÿæ€åÿ∫ÿßŸÖ {"mask"} €Å€í€î',  # generalized for neg/pos
#         tokenizer=tokenizer,
#     )),
#     ("P3", ManualTemplate(
#         text='ÿßÿ≥ ÿπÿ®ÿßÿ±ÿ™ ⁄©ÿß ÿ™ÿ¨ÿ≤€å€Å ⁄©ÿ±€å⁄∫: {"placeholder":"text_a"} ÿßÿ≥ ⁄©ÿß ÿßÿ≠ÿ≥ÿßÿ≥ €åÿß ŸÖŸà⁄à {"mask"} €Å€í€î',  # focuses on sentiment/intent
#         tokenizer=tokenizer,
#     )),
#     ("P4", ManualTemplate(
#         text='ÿ¨ŸÖŸÑ€Å: {"placeholder":"text_a"} €ÅŸÖ€å⁄∫ ÿ®ÿ™ÿßÿ™ÿß €Å€í ⁄©€Å €å€Å ŸÖŸàÿßÿØ {"mask"} €Å€í€î',  # clear statement aligned with label
#         tokenizer=tokenizer,
#     )),
#     ("P5", ManualTemplate(
#         text='{"placeholder":"text_a"} ÿßÿ≥ ÿπÿ®ÿßÿ±ÿ™ ⁄©ÿß ŸÖÿ∑ŸÑÿ® {"mask"} €Å€í€î',
#         tokenizer=tokenizer,
#     )),
#     ("P6", ManualTemplate(
#         text='{"placeholder":"text_a"} ÿßÿ≥ ŸÖÿπÿßŸÖŸÑ€í ŸÖ€å⁄∫ ÿ≠ÿ™ŸÖ€å ÿ±ÿßÿ¶€í {"mask"}',
#         tokenizer=tokenizer,
#     )),
#     ("P7", ManualTemplate(
#         text='{"placeholder":"text_a"} ÿßÿ≥ ŸÖŸàÿßÿØ ⁄©€å ÿ™ÿ¥ÿ±€åÿ≠ {"mask"}',
#         tokenizer=tokenizer,
#     )),
#     ("P8", ManualTemplate(
#         text='{"placeholder":"text_a"} ÿßÿ≥ ÿ≠ŸàÿßŸÑ€í ÿ≥€í ŸÅ€åÿµŸÑ€Å {"mask"}',
#         tokenizer=tokenizer,
#     )),
#     ("P9", ManualTemplate(
#         text='{"placeholder":"text_a"} ÿßÿ≥ ŸÖÿ™ŸÜ ⁄©€å ÿØÿ±ÿ¨€Å ÿ®ŸÜÿØ€å {"mask"}',
#         tokenizer=tokenizer,
#     )),
#     ("P10", ManualTemplate(
#         text='{"placeholder":"text_a"} ÿßÿ≥ ÿßÿ∏€Åÿßÿ± ⁄©ÿß ŸÜÿ™€åÿ¨€Å {"mask"}',
#         tokenizer=tokenizer,
#     )), 
# ]

# ==============================
# Define Verbalizer (Manual)
# ==============================
verbalizer = ManualVerbalizer(
    classes=classes,
    label_words = {
    "positive": ["ŸÖÿ´ÿ®ÿ™", "ÿÆŸàÿ¥⁄ØŸàÿßÿ±"],
    "negative": ["ŸÖŸÜŸÅ€å", "ŸÖÿ≥ÿ¶ŸÑ€Å"],
    "neutral":  ["ÿ∫€åÿ±ÿ¨ÿßŸÜÿ®ÿØÿßÿ±", "ŸÖÿπŸÑŸàŸÖÿßÿ™€å"],
        
    },
    tokenizer=tokenizer,
)

In [5]:
# ==============================
# Create Prompt Model
# ==============================
prompt_model = PromptForClassification(
    template=template,
    plm=plm,
    verbalizer=verbalizer
)


In [6]:
# ==============================
# Load Evaluation Dataset
# ==============================
df = pd.read_csv(r"C:\Users\stdFurqan\Desktop\paft\multi\urdu_multidomain_dev_split_no_request.csv")
eval_dataset = [
    InputExample(guid=i, text_a=row['text'], label=label_map[row['sentiment']])
    for i, row in df.iterrows()
]


# ==============================
# PromptDataLoader
# ==============================
eval_loader = PromptDataLoader(
    dataset=eval_dataset,
    tokenizer=tokenizer,
    template=template,              # your ManualTemplate for XNLI
    tokenizer_wrapper_class=WrapperClass,
    max_seq_length=128,
    batch_size=8,
    shuffle=False
)

# ==============================
# Evaluate Model
# ==============================
prompt_model.eval()
all_preds = []
all_labels = []

with torch.no_grad():
    for batch in eval_loader:
        logits = prompt_model(batch)
        preds = torch.argmax(logits, dim=-1)
        all_preds.extend(preds.cpu().tolist())
        all_labels.extend(batch['label'].cpu().tolist())

# ==============================
# Print Classification Report
# ==============================
print("\nüìä SSTM Urdu Dev Classification Report:")
print(classification_report(all_labels, all_preds, target_names=classes, digits=4))

tokenizing: 6923it [00:01, 5226.38it/s]



üìä SSTM Urdu Dev Classification Report:
              precision    recall  f1-score   support

    positive     0.6812    0.6843    0.6827      2192
    negative     0.4816    0.5850    0.5283      2053
     neutral     0.5357    0.4455    0.4864      2678

    accuracy                         0.5625      6923
   macro avg     0.5662    0.5716    0.5658      6923
weighted avg     0.5657    0.5625    0.5610      6923



In [None]:
roberta
üìä SSTM Urdu Dev Classification Report:
              precision    recall  f1-score   support

    positive     0.6812    0.6843    0.6827      2192
    negative     0.4816    0.5850    0.5283      2053
     neutral     0.5357    0.4455    0.4864      2678

    accuracy                         0.5625      6923
   macro avg     0.5662    0.5716    0.5658      6923
weighted avg     0.5657    0.5625    0.5610      6923

In [None]:
mbert
üìä SSTM Urdu Dev Classification Report:
              precision    recall  f1-score   support

    positive     0.0000    0.0000    0.0000      2192
    negative     0.2974    0.9669    0.4549      2053
     neutral     0.4476    0.0414    0.0759      2678

    accuracy                         0.3028      6923
   macro avg     0.2483    0.3361    0.1769      6923
weighted avg     0.2613    0.3028    0.1642      6923

In [None]:
# # ==============================
# # Load Evaluation Dataset
# # ==============================
# df = pd.read_csv(r"C:\Users\stdFurqan\Desktop\paft\SST-2\urdu_sentiment_test_labeled.csv")
# eval_dataset = [
#     InputExample(guid=i, text_a=row['text'], label=label_map[row['label']])
#     for i, row in df.iterrows()
# ]

# # ==============================
# # 0-Shot Evaluation with Each Template
# # ==============================
# prompt_model.eval()  # ensure model is in evaluation mode
# batch_size = 8    # eval batch size

# # Optional: store template order and results
# all_pass_patterns = {}

# for pass_idx, (prompt_name, current_template) in enumerate(templates, start=1):
#     print(f"\nüü¶ 0-Shot Evaluation - Template {prompt_name} ({pass_idx}/{len(templates)})")

#     # Create PromptDataLoader with current template
#     eval_loader = PromptDataLoader(
#         dataset=eval_dataset,
#         tokenizer=tokenizer,
#         template=current_template,
#         tokenizer_wrapper_class=WrapperClass,
#         max_seq_length=128,
#         batch_size=batch_size,
#         shuffle=False
#     )

#     pass_preds = []
#     pass_labels = []

#     # Run evaluation
#     with torch.no_grad():
#         for batch in eval_loader:
#             logits = prompt_model(batch)
#             preds = torch.argmax(logits, dim=-1)
#             pass_preds.extend(preds.cpu().tolist())
#             pass_labels.extend(batch['label'].cpu().tolist())

#     # Print report immediately after this template
#     print(f"\nüìä STS_B Urdu Dev Classification Report - Template {prompt_name}")
#     print(classification_report(pass_labels, pass_preds, target_names=classes, digits=4))

#     # Store template name (optional)
#     all_pass_patterns[f"pass_{pass_idx}"] = prompt_name

# # Optional: print template order at the end
# print("\n‚úÖ Templates used per pass:", all_pass_patterns)
