In [None]:
import os
import shutil

# Get user profile path
user_profile = os.environ["USERPROFILE"]

# Paths to Hugging Face cached models
cached_models = [
    os.path.join(user_profile, r".cache\huggingface\hub\models--bert-base-multilingual-cased"),
    os.path.join(user_profile, r".cache\huggingface\hub\models--xlm-roberta-base")
]

# Remove cached models if they exist
for path in cached_models:
    if os.path.exists(path):
        shutil.rmtree(path)
        print(f"Removed cache: {path}")
    else:
        print(f"No cache found at: {path}")


In [1]:
# Set device
import torch
device = "cuda" if torch.cuda.is_available() else "cpu"

In [2]:
# ==============================
# Imports
# ==============================
import torch
import pandas as pd
import random
import numpy as np
from collections import Counter
from transformers import XLMRobertaTokenizer, XLMRobertaForMaskedLM
from openprompt.prompts import ManualTemplate, ManualVerbalizer
from openprompt.data_utils import InputExample
from openprompt.plms import load_plm
from openprompt import PromptForClassification, PromptDataLoader
from torch.optim import AdamW
from sklearn.metrics import classification_report
from collections import defaultdict
from torch.utils.data import DataLoader, Sampler

# ========================================
# Check CUDA
# ========================================
device = "cuda" #if torch.cuda.is_available() else "cpu"
print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU name:", torch.cuda.get_device_name(0))
    print("CUDA version:", torch.version.cuda)
    print("GPU count:", torch.cuda.device_count())

# ========================================
# Seeds for reproducibility
# ========================================
seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

  from .autonotebook import tqdm as notebook_tqdm


CUDA available: True
GPU name: NVIDIA GeForce RTX 4080 SUPER
CUDA version: 12.1
GPU count: 1


In [3]:
### Classes ###
classes = ['LOCATION', 'NUMBER', 'DATE', 'PERSON', 'DESIGNATION', 'TIME', 'ORGANIZATION']

### Label Map ###
label_map = {'LOCATION': 0, 'NUMBER': 1, 'DATE': 2, 'PERSON': 3, 'DESIGNATION': 4, 'TIME': 5, 'ORGANIZATION': 6}



# # Step 1: Use load_plm with 'roberta' to get the correct WrapperClass
_, _, _, WrapperClass = load_plm("roberta", "roberta-base")  # Just to get the wrapper

# # # Step 2: Manually load XLM-RoBERTa model/tokenizer
model_name = "xlm-roberta-base"
tokenizer = XLMRobertaTokenizer.from_pretrained(model_name)
plm = XLMRobertaForMaskedLM.from_pretrained(model_name)

# ==============================
# Load Pretrained Language Model (mBERT)
# ==============================
# plm, tokenizer, model_config, WrapperClass = load_plm("bert", "bert-base-multilingual-cased")



In [4]:
# ==============================
# Define Prompt Template (Manual)
# ==============================
template = ManualTemplate(
    text = 'ÿ¨ŸÖŸÑ€Å: {"placeholder":"text_a"} ŸÑŸÅÿ∏ {"placeholder":"text_b"} ÿß€å⁄© {"mask"} €Å€í€î',
    tokenizer=tokenizer,
)




# templates = [
#     ("P1", ManualTemplate(
#         text='ÿ¨ŸÖŸÑ€Å: {"placeholder":"text_a"} ŸÑŸÅÿ∏ {"placeholder":"text_b"} ÿß€å⁄© {"mask"} €Å€í€î',
#         tokenizer=tokenizer,
#     )),

#     ("P2", ManualTemplate(
#         text='{"placeholder":"text_a"} ŸÖ€å⁄∫ ŸÖŸàÿ¨ŸàÿØ ŸÑŸÅÿ∏ {"placeholder":"text_b"} ⁄©€å ŸÇÿ≥ŸÖ {"mask"} €Å€í€î',
#         tokenizer=tokenizer,
#     )),

#     ("P3", ManualTemplate(
#         text='ŸÖŸÜÿØÿ±ÿ¨€Å ÿ∞€åŸÑ ÿ¨ŸÖŸÑ€í ŸÖ€å⁄∫: {"placeholder":"text_b"} ÿß€å⁄© {"mask"} ⁄©€í ÿ∑Ÿàÿ± Ÿæÿ± ÿ¢ÿ™ÿß €Å€í: {"placeholder":"text_a"}',
#         tokenizer=tokenizer,
#     )),

#     ("P4", ManualTemplate(
#         text='ÿ¨ŸÖŸÑ€í: {"placeholder":"text_a"} ŸÑŸÅÿ∏ {"placeholder":"text_b"} ⁄©ÿß ÿØÿ±ÿ¨€Å {"mask"} €Å€í€î',
#         tokenizer=tokenizer,
#     )),

#     ("P5", ManualTemplate(
#         text='{"placeholder":"text_a"} ‚Üí ŸÑŸÅÿ∏ {"placeholder":"text_b"} ⁄©€å ÿ¥ŸÜÿßÿÆÿ™: {"mask"}',
#         tokenizer=tokenizer,
#     )),

#     ("P6", ManualTemplate(
#         text='ÿ¨ŸÖŸÑ€Å ŸÖ€å⁄∫ {"placeholder":"text_b"} ÿß€å⁄© {"mask"} ŸÇÿ≥ŸÖ ⁄©ÿß ŸÑŸÅÿ∏ €Å€í: {"placeholder":"text_a"}',
#         tokenizer=tokenizer,
#     )),

#     ("P7", ManualTemplate(
#         text='€å€Å ŸÑŸÅÿ∏ {"placeholder":"text_b"} ÿ¨ŸÖŸÑ€í {"placeholder":"text_a"} ŸÖ€å⁄∫ {"mask"} €Å€í€î',
#         tokenizer=tokenizer,
#     )),

#     ("P8", ManualTemplate(
#         text='ÿ¨ŸÖŸÑ€í {"placeholder":"text_a"} ŸÖ€å⁄∫ ŸÑŸÅÿ∏ {"placeholder":"text_b"} ⁄©€å ŸÇÿ≥ŸÖ ⁄©€åÿß €Å€íÿü {"mask"}',
#         tokenizer=tokenizer,
#     )),

#     ("P9", ManualTemplate(
#         text='ÿ¨ŸÖŸÑ€Å: {"placeholder":"text_a"} ŸÑŸÅÿ∏ {"placeholder":"text_b"} ⁄©Ÿà ÿØÿ±ÿ¨€Å ÿ®ŸÜÿØ€å ⁄©ÿ±€å⁄∫: {"mask"}',
#         tokenizer=tokenizer,
#     )),

#     ("P10", ManualTemplate(
#         text='{"placeholder":"text_a"} ‚Üí {"placeholder":"text_b"} ÿß€å⁄© {"mask"} ⁄©€å ŸÖÿ´ÿßŸÑ €Å€í€î',
#         tokenizer=tokenizer,
#     )),
# ]







verbalizer = ManualVerbalizer(
    classes=classes,
    label_words={
    'LOCATION': ['ÿ¥€Åÿ±', 'ŸÖŸÑ⁄©'],           # City, Country
    'NUMBER': ['ÿπÿØÿØ', 'ŸÜŸÖÿ®ÿ±'],            # Number, Digit
    'DATE': ['ÿ™ÿßÿ±€åÿÆ', 'ÿØŸÜ'],              # Date, Day
    'PERSON': ['ÿ¥ÿÆÿµ', 'ÿßŸÜÿ≥ÿßŸÜ'],           # Person, Human
    'DESIGNATION': ['ÿπ€ÅÿØ€Å', 'ŸÖŸÜÿµÿ®'],      # Designation, Position
    'TIME': ['ŸàŸÇÿ™', '⁄Ø⁄æŸÜŸπ€Å'],             # Time, Hour
    'ORGANIZATION': ['ÿßÿØÿßÿ±€Å', '⁄©ŸÖŸæŸÜ€å'],     # Organization, Company
    },
    tokenizer=tokenizer,
)


In [5]:
# ==============================
# Create Prompt Model
# ==============================
prompt_model = PromptForClassification(
    template=template,
    plm=plm,
    verbalizer=verbalizer
)


In [6]:
# ==============================
# Load NER Test Dataset
# ==============================
df = pd.read_csv(r"C:\Users\stdFurqan\Desktop\paft\NER\ner_test.csv")

# ==============================
# Convert to InputExamples
# ==============================
# Note: For NER, label is per entity, so sentence is text_a, entity is in meta
eval_dataset = [
    InputExample(
        guid=i,
        text_a=row['sentence'],
        meta={'entity': row['entity']},
        label=label_map[row['label']]
    )
    for i, row in df.iterrows()
]

# ==============================
# PromptDataLoader
# ==============================
eval_loader = PromptDataLoader(
    dataset=eval_dataset,
    tokenizer=tokenizer,
    template=template,              # your ManualTemplate for XNLI
    tokenizer_wrapper_class=WrapperClass,
    max_seq_length=128,
    batch_size=8,
    shuffle=False
)

# ==============================
# Evaluate Model
# ==============================
prompt_model.eval()
all_preds = []
all_labels = []

with torch.no_grad():
    for batch in eval_loader:
        logits = prompt_model(batch)
        preds = torch.argmax(logits, dim=-1)
        all_preds.extend(preds.cpu().tolist())
        all_labels.extend(batch['label'].cpu().tolist())

# ==============================
# Print Classification Report
# ==============================
print("\nüìä NER Urdu Dev Classification Report:")
print(classification_report(all_labels, all_preds, target_names=classes, digits=4))

tokenizing: 932it [00:00, 1827.55it/s]



üìä NER Urdu Dev Classification Report:
              precision    recall  f1-score   support

    LOCATION     0.0000    0.0000    0.0000       243
      NUMBER     0.4245    0.2261    0.2951       199
        DATE     0.2500    0.0732    0.1132        41
      PERSON     0.2648    0.8678    0.4058       242
 DESIGNATION     0.0000    0.0000    0.0000        57
        TIME     0.0000    0.0000    0.0000        15
ORGANIZATION     0.2500    0.0370    0.0645       135

    accuracy                         0.2822       932
   macro avg     0.1699    0.1720    0.1255       932
weighted avg     0.2066    0.2822    0.1827       932



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
roberta
üìä NER Urdu Dev Classification Report:
              precision    recall  f1-score   support

    LOCATION     0.0000    0.0000    0.0000       243
      NUMBER     0.4245    0.2261    0.2951       199
        DATE     0.2500    0.0732    0.1132        41
      PERSON     0.2648    0.8678    0.4058       242
 DESIGNATION     0.0000    0.0000    0.0000        57
        TIME     0.0000    0.0000    0.0000        15
ORGANIZATION     0.2500    0.0370    0.0645       135

    accuracy                         0.2822       932
   macro avg     0.1699    0.1720    0.1255       932
weighted avg     0.2066    0.2822    0.1827       932

In [None]:
mbert
üìä NER Urdu Dev Classification Report:
              precision    recall  f1-score   support

    LOCATION     0.5000    0.0123    0.0241       243
      NUMBER     0.1667    0.0050    0.0098       199
        DATE     0.0000    0.0000    0.0000        41
      PERSON     0.2567    0.9091    0.4004       242
 DESIGNATION     0.0000    0.0000    0.0000        57
        TIME     0.0000    0.0000    0.0000        15
ORGANIZATION     0.1000    0.0370    0.0541       135

    accuracy                         0.2457       932
   macro avg     0.1462    0.1376    0.0698       932
weighted avg     0.2471    0.2457    0.1202       932

In [None]:
# # ==============================
# # Load Evaluation Dataset
# # ==============================
# df = pd.read_csv(r"C:\Users\stdFurqan\Desktop\paft\SST-2\urdu_sentiment_test_labeled.csv")
# eval_dataset = [
#     InputExample(guid=i, text_a=row['text'], label=label_map[row['label']])
#     for i, row in df.iterrows()
# ]

# # ==============================
# # 0-Shot Evaluation with Each Template
# # ==============================
# prompt_model.eval()  # ensure model is in evaluation mode
# batch_size = 8    # eval batch size

# # Optional: store template order and results
# all_pass_patterns = {}

# for pass_idx, (prompt_name, current_template) in enumerate(templates, start=1):
#     print(f"\nüü¶ 0-Shot Evaluation - Template {prompt_name} ({pass_idx}/{len(templates)})")

#     # Create PromptDataLoader with current template
#     eval_loader = PromptDataLoader(
#         dataset=eval_dataset,
#         tokenizer=tokenizer,
#         template=current_template,
#         tokenizer_wrapper_class=WrapperClass,
#         max_seq_length=128,
#         batch_size=batch_size,
#         shuffle=False
#     )

#     pass_preds = []
#     pass_labels = []

#     # Run evaluation
#     with torch.no_grad():
#         for batch in eval_loader:
#             logits = prompt_model(batch)
#             preds = torch.argmax(logits, dim=-1)
#             pass_preds.extend(preds.cpu().tolist())
#             pass_labels.extend(batch['label'].cpu().tolist())

#     # Print report immediately after this template
#     print(f"\nüìä STS_B Urdu Dev Classification Report - Template {prompt_name}")
#     print(classification_report(pass_labels, pass_preds, target_names=classes, digits=4))

#     # Store template name (optional)
#     all_pass_patterns[f"pass_{pass_idx}"] = prompt_name

# # Optional: print template order at the end
# print("\n‚úÖ Templates used per pass:", all_pass_patterns)
