In [None]:
import os
import shutil

# Get user profile path
user_profile = os.environ["USERPROFILE"]

# Paths to Hugging Face cached models
cached_models = [
    os.path.join(user_profile, r".cache\huggingface\hub\models--bert-base-multilingual-cased"),
    os.path.join(user_profile, r".cache\huggingface\hub\models--xlm-roberta-base")
]

# Remove cached models if they exist
for path in cached_models:
    if os.path.exists(path):
        shutil.rmtree(path)
        print(f"Removed cache: {path}")
    else:
        print(f"No cache found at: {path}")


In [1]:
# Set device
import torch
device = "cuda" if torch.cuda.is_available() else "cpu"

In [2]:
# ==============================
# Imports
# ==============================
import torch
import pandas as pd
import random
import numpy as np
from collections import Counter
from transformers import XLMRobertaTokenizer, XLMRobertaForMaskedLM
from openprompt.prompts import ManualTemplate, ManualVerbalizer
from openprompt.data_utils import InputExample
from openprompt.plms import load_plm
from openprompt import PromptForClassification, PromptDataLoader
from torch.optim import AdamW
from sklearn.metrics import classification_report
from collections import defaultdict
from torch.utils.data import DataLoader, Sampler

# ========================================
# Check CUDA
# ========================================
device = "cuda" #if torch.cuda.is_available() else "cpu"
print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU name:", torch.cuda.get_device_name(0))
    print("CUDA version:", torch.version.cuda)
    print("GPU count:", torch.cuda.device_count())

# ========================================
# Seeds for reproducibility
# ========================================
seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

  from .autonotebook import tqdm as notebook_tqdm


CUDA available: True
GPU name: NVIDIA GeForce RTX 4080 SUPER
CUDA version: 12.1
GPU count: 1


In [3]:
### Classes ###
classes = ['entailment', 'not_entailment']

### Label Map ###
label_map = {'entailment': 1, 'not_entailment': 0}



# # Step 1: Use load_plm with 'roberta' to get the correct WrapperClass
_, _, _, WrapperClass = load_plm("roberta", "roberta-base")  # Just to get the wrapper

# # # Step 2: Manually load XLM-RoBERTa model/tokenizer
model_name = "xlm-roberta-base"
tokenizer = XLMRobertaTokenizer.from_pretrained(model_name)
plm = XLMRobertaForMaskedLM.from_pretrained(model_name)

# ==============================
# Load Pretrained Language Model (mBERT)
# ==============================
# plm, tokenizer, model_config, WrapperClass = load_plm("bert", "bert-base-multilingual-cased")



In [4]:
# ==============================
# Define Prompt Template (Manual)
# ==============================
template = ManualTemplate(
    text = '{"placeholder":"text_a"} ÿßŸàÿ± {"placeholder":"text_b"} ⁄©ÿß ÿ™ÿπŸÑŸÇ {"mask"} €Å€í€î',
    tokenizer=tokenizer,
)




# templates = [
#     ("P1", ManualTemplate(
#         text='{"placeholder":"text_a"} ÿßŸàÿ± {"placeholder":"text_b"} ⁄©ÿß ÿ™ÿπŸÑŸÇ {"mask"} €Å€í€î',
#         tokenizer=tokenizer,
#     )),

#     ("P2", ManualTemplate(
#         text='Ÿæ€ÅŸÑÿß ÿ®€åÿßŸÜ: {"placeholder":"text_a"} ÿØŸàÿ≥ÿ±ÿß ÿ®€åÿßŸÜ: {"placeholder":"text_b"} ÿßŸÜ ⁄©ÿß ÿ™ÿπŸÑŸÇ {"mask"} €Å€í€î',
#         tokenizer=tokenizer,
#     )),

#     ("P3", ManualTemplate(
#         text='{"placeholder":"text_b"} ⁄©€åÿß {"placeholder":"text_a"} ÿ≥€í {"mask"} €ÅŸàÿ™ÿß €Å€íÿü',
#         tokenizer=tokenizer,
#     )),

#     ("P4", ManualTemplate(
#         text='{"placeholder":"text_a"} ⁄©€å ÿ±Ÿàÿ¥ŸÜ€å ŸÖ€å⁄∫ {"placeholder":"text_b"} {"mask"} €Å€í€î',
#         tokenizer=tokenizer,
#     )),

#     ("P5", ManualTemplate(
#         text='{"placeholder":"text_b"} ⁄©ÿß ÿ®€åÿßŸÜ {"placeholder":"text_a"} ⁄©€í ŸÖÿ∑ÿßÿ®ŸÇ {"mask"} €Å€í€î',
#         tokenizer=tokenizer,
#     )),

#     ("P6", ManualTemplate(
#         text='{"placeholder":"text_a"} ÿßŸàÿ± {"placeholder":"text_b"} ⁄©€í ÿØÿ±ŸÖ€åÿßŸÜ ŸÖŸÜÿ∑ŸÇ€å ÿ±ÿ¥ÿ™€Å {"mask"} €Å€í€î',
#         tokenizer=tokenizer,
#     )),

#     ("P7", ManualTemplate(
#         text='{"placeholder":"text_b"}ÿå {"placeholder":"text_a"} ÿ≥€í {"mask"} ÿ∑Ÿàÿ± Ÿæÿ± ÿ¨⁄ëÿß €Å€í€î',
#         tokenizer=tokenizer,
#     )),

#     ("P8", ManualTemplate(
#         text='ÿß⁄Øÿ± €ÅŸÖ {"placeholder":"text_a"} ⁄©Ÿà ÿØ€å⁄©⁄æ€å⁄∫ ÿ™Ÿà {"placeholder":"text_b"} {"mask"} ÿ®ŸÜÿ™ÿß €Å€í€î',
#         tokenizer=tokenizer,
#     )),

#     ("P9", ManualTemplate(
#         text='{"placeholder":"text_a"} ⁄©€í ÿ≠ŸàÿßŸÑ€í ÿ≥€í {"placeholder":"text_b"} {"mask"} ÿ≥ŸÖÿ¨⁄æÿß ÿ¨ÿßÿ™ÿß €Å€í€î',
#         tokenizer=tokenizer,
#     )),

#     ("P10", ManualTemplate(
#         text='{"placeholder":"text_a"} ÿßŸàÿ± {"placeholder":"text_b"} ŸÖ€å⁄∫ ÿ™ÿπŸÑŸÇ ⁄©€å ŸÜŸàÿπ€åÿ™ {"mask"} €Å€í€î',
#         tokenizer=tokenizer,
#     )),
# ]







verbalizer = ManualVerbalizer(
    classes=classes,
    label_words={
        "entailment": ["ÿØÿ±ÿ≥ÿ™", "ÿ´ÿßÿ®ÿ™"],
        "not_entailment": ["ÿ∫ŸÑÿ∑", "ŸÜÿßŸÖÿ∑ÿßÿ®ŸÇ"]
    },
    tokenizer=tokenizer,
)

In [5]:
# ==============================
# Create Prompt Model
# ==============================
prompt_model = PromptForClassification(
    template=template,
    plm=plm,
    verbalizer=verbalizer
)


In [6]:
# ==============================
# Load Evaluation Dataset
# ==============================
df = pd.read_csv(
    r"C:\Users\stdFurqan\Desktop\paft\WNLI\WNLI_dev_urdu_entailment.csv"
)

# Make InputExamples
eval_dataset = [
    InputExample(
        guid=i,
        text_a=row['Sentence1'],
        text_b=row['Sentence2'],
        label=label_map[row['label_text']]
    )
    for i, row in df.iterrows()
]


# ==============================
# PromptDataLoader
# ==============================
eval_loader = PromptDataLoader(
    dataset=eval_dataset,
    tokenizer=tokenizer,
    template=template,              # your ManualTemplate for XNLI
    tokenizer_wrapper_class=WrapperClass,
    max_seq_length=128,
    batch_size=8,
    shuffle=False
)

# ==============================
# Evaluate Model
# ==============================
prompt_model.eval()
all_preds = []
all_labels = []

with torch.no_grad():
    for batch in eval_loader:
        logits = prompt_model(batch)
        preds = torch.argmax(logits, dim=-1)
        all_preds.extend(preds.cpu().tolist())
        all_labels.extend(batch['label'].cpu().tolist())

# ==============================
# Print Classification Report
# ==============================
print("\nüìä WNLI Urdu Dev Classification Report:")
print(classification_report(all_labels, all_preds, target_names=classes, digits=4))

tokenizing: 71it [00:00, 2885.70it/s]



üìä WNLI Urdu Dev Classification Report:
                precision    recall  f1-score   support

    entailment     0.5522    0.9250    0.6916        40
not_entailment     0.2500    0.0323    0.0571        31

      accuracy                         0.5352        71
     macro avg     0.4011    0.4786    0.3744        71
  weighted avg     0.4203    0.5352    0.4146        71



In [None]:
roberta
üìä WNLI Urdu Dev Classification Report:
                precision    recall  f1-score   support

    entailment     0.5522    0.9250    0.6916        40
not_entailment     0.2500    0.0323    0.0571        31

      accuracy                         0.5352        71
     macro avg     0.4011    0.4786    0.3744        71
  weighted avg     0.4203    0.5352    0.4146        71

In [None]:
mbert
üìä WNLI Urdu Dev Classification Report:
                precision    recall  f1-score   support

    entailment     0.5556    0.2500    0.3448        40
not_entailment     0.4340    0.7419    0.5476        31

      accuracy                         0.4648        71
     macro avg     0.4948    0.4960    0.4462        71
  weighted avg     0.5025    0.4648    0.4334        71

In [None]:
# # ==============================
# # Load Evaluation Dataset
# # ==============================
# df = pd.read_csv(r"C:\Users\stdFurqan\Desktop\paft\SST-2\urdu_sentiment_test_labeled.csv")
# eval_dataset = [
#     InputExample(guid=i, text_a=row['text'], label=label_map[row['label']])
#     for i, row in df.iterrows()
# ]

# # ==============================
# # 0-Shot Evaluation with Each Template
# # ==============================
# prompt_model.eval()  # ensure model is in evaluation mode
# batch_size = 8    # eval batch size

# # Optional: store template order and results
# all_pass_patterns = {}

# for pass_idx, (prompt_name, current_template) in enumerate(templates, start=1):
#     print(f"\nüü¶ 0-Shot Evaluation - Template {prompt_name} ({pass_idx}/{len(templates)})")

#     # Create PromptDataLoader with current template
#     eval_loader = PromptDataLoader(
#         dataset=eval_dataset,
#         tokenizer=tokenizer,
#         template=current_template,
#         tokenizer_wrapper_class=WrapperClass,
#         max_seq_length=128,
#         batch_size=batch_size,
#         shuffle=False
#     )

#     pass_preds = []
#     pass_labels = []

#     # Run evaluation
#     with torch.no_grad():
#         for batch in eval_loader:
#             logits = prompt_model(batch)
#             preds = torch.argmax(logits, dim=-1)
#             pass_preds.extend(preds.cpu().tolist())
#             pass_labels.extend(batch['label'].cpu().tolist())

#     # Print report immediately after this template
#     print(f"\nüìä STS_B Urdu Dev Classification Report - Template {prompt_name}")
#     print(classification_report(pass_labels, pass_preds, target_names=classes, digits=4))

#     # Store template name (optional)
#     all_pass_patterns[f"pass_{pass_idx}"] = prompt_name

# # Optional: print template order at the end
# print("\n‚úÖ Templates used per pass:", all_pass_patterns)
