In [None]:
import os
import shutil

# Get user profile path
user_profile = os.environ["USERPROFILE"]

# Paths to Hugging Face cached models
cached_models = [
    os.path.join(user_profile, r".cache\huggingface\hub\models--bert-base-multilingual-cased"),
    os.path.join(user_profile, r".cache\huggingface\hub\models--xlm-roberta-base")
]

# Remove cached models if they exist
for path in cached_models:
    if os.path.exists(path):
        shutil.rmtree(path)
        print(f"Removed cache: {path}")
    else:
        print(f"No cache found at: {path}")


In [1]:
# Set device
import torch
device = "cuda" if torch.cuda.is_available() else "cpu"

In [2]:
# ==============================
# Imports
# ==============================
import torch
import pandas as pd
import random
import numpy as np
from collections import Counter
from transformers import XLMRobertaTokenizer, XLMRobertaForMaskedLM
from openprompt.prompts import ManualTemplate, ManualVerbalizer
from openprompt.data_utils import InputExample
from openprompt.plms import load_plm
from openprompt import PromptForClassification, PromptDataLoader
from torch.optim import AdamW
from sklearn.metrics import classification_report
from collections import defaultdict
from torch.utils.data import DataLoader, Sampler

# ========================================
# Check CUDA
# ========================================
device = "cuda" #if torch.cuda.is_available() else "cpu"
print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU name:", torch.cuda.get_device_name(0))
    print("CUDA version:", torch.version.cuda)
    print("GPU count:", torch.cuda.device_count())

# ========================================
# Seeds for reproducibility
# ========================================
seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

  from .autonotebook import tqdm as notebook_tqdm


CUDA available: True
GPU name: NVIDIA GeForce RTX 4080 SUPER
CUDA version: 12.1
GPU count: 1


In [3]:
# ==============================
# Define Classes
# ==============================
classes = ['PN', 'G', 'NN', 'P', 'U', 'VB', 'SM', 'PM', 'PP', 'CC', 'ADJ', 'CA', 'RP', 'SC',
          'SE', 'ADV', 'EXP', 'I', 'NEG', 'TA', 'AP', 'Q', 'PD', 'WALA', 'KP', 'GR', 'REP',
          'A', 'KD', 'AA', 'QW', 'KER', 'OR', 'AKP', 'MUL', 'INT', 'AD', 'FR', 'DATE', 'RD']

label_map = {'PN': 0, 'G': 1, 'NN': 2, 'P': 3, 'U': 4, 'VB': 5, 'SM': 6, 'PM': 7, 'PP': 8, 'CC': 9,
            'ADJ': 10, 'CA': 11, 'RP': 12, 'SC': 13, 'SE': 14, 'ADV': 15, 'EXP': 16, 'I': 17, 'NEG': 18,
            'TA': 19, 'AP': 20, 'Q': 21, 'PD': 22, 'WALA': 23, 'KP': 24, 'GR': 25, 'REP': 26, 'A': 27, 'KD': 28,
            'AA': 29, 'QW': 30, 'KER': 31, 'OR': 32, 'AKP': 33, 'MUL': 34, 'INT': 35, 'AD': 36, 'FR': 37, 'DATE': 38, 'RD': 39}

# # # Step 1: Use load_plm with 'roberta' to get the correct WrapperClass
# _, _, _, WrapperClass = load_plm("roberta", "roberta-base")  # Just to get the wrapper

# # # Step 2: Manually load XLM-RoBERTa model/tokenizer
# model_name = "xlm-roberta-base"
# tokenizer = XLMRobertaTokenizer.from_pretrained(model_name)
# plm = XLMRobertaForMaskedLM.from_pretrained(model_name)

# ==============================
# Load Pretrained Language Model (mBERT)
# ==============================
plm, tokenizer, model_config, WrapperClass = load_plm("bert", "bert-base-multilingual-cased")


Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForMaskedLM: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForMaskedLM were not initialized from the model checkpoint at bert-base-multilingual-cased and are newly initialized: ['cls.predictions.decoder.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [4]:

# ==============================
# Define Prompt Template (Manual)
# ==============================
# ==============================
# Define Prompt Template
# ==============================
template = ManualTemplate(
    text='ŸÑŸÅÿ∏: {"placeholder":"text_a"} ‚Üí ÿ≠ÿµ€Å ⁄©ŸÑÿßŸÖ: {"mask"}',
    tokenizer=tokenizer,
)

# templates = [
#     ("P1", ManualTemplate(
#         text='ŸÑŸÅÿ∏: {"placeholder":"text_a"} ‚Üí ÿ≠ÿµ€Å ⁄©ŸÑÿßŸÖ: {"mask"}',
#         tokenizer=tokenizer,
#     )),
#     ("P2", ManualTemplate(
#         text='ÿ¨ŸÖŸÑ€í ŸÖ€å⁄∫ €å€Å ŸÑŸÅÿ∏ {"placeholder":"text_a"} ⁄©ÿ≥ ŸÇÿ≥ŸÖ ⁄©ÿß €Å€í: {"mask"}€î',
#         tokenizer=tokenizer,
#     )),
#     ("P3", ManualTemplate(
#         text='ŸÑŸÅÿ∏ {"placeholder":"text_a"} ⁄©ÿß POS tag ⁄©€åÿß €Å€íÿü {"mask"}',
#         tokenizer=tokenizer,
#     )),
#     ("P4", ManualTemplate(
#         text='€å€Å ŸÑŸÅÿ∏ {"placeholder":"text_a"} ÿ¨ŸÖŸÑ€í ŸÖ€å⁄∫ ⁄©ÿ≥ ÿ≤ŸÖÿ±€í ⁄©ÿß €Å€íÿü {"mask"}',
#         tokenizer=tokenizer,
#     )),
#     ("P5", ManualTemplate(
#         text='{"placeholder":"text_a"} ‚Üí ÿ≠ÿµ€Å ⁄©ŸÑÿßŸÖ: {"mask"}',
#         tokenizer=tokenizer,
#     )),
#     ("P6", ManualTemplate(
#         text='ŸÑŸÅÿ∏: {"placeholder":"text_a"} ÿßÿ≥ ⁄©ÿß ÿ≠ÿµ€Å ⁄©ŸÑÿßŸÖ {"mask"} €Å€í€î',
#         tokenizer=tokenizer,
#     )),
#     ("P7", ManualTemplate(
#         text='POS tag for {"placeholder":"text_a"} is {"mask"}',
#         tokenizer=tokenizer,
#     )),
#     ("P8", ManualTemplate(
#         text='{"placeholder":"text_a"} ⁄©ÿ≥ ŸÇÿ≥ŸÖ ⁄©ÿß ŸÑŸÅÿ∏ €Å€í: {"mask"}',
#         tokenizer=tokenizer,
#     )),
#     ("P9", ManualTemplate(
#         text='ŸÑŸÅÿ∏ {"placeholder":"text_a"} ‚Üí POS: {"mask"}',
#         tokenizer=tokenizer,
#     )),
#     ("P10", ManualTemplate(
#         text='ÿ¨ŸÖŸÑ€í ŸÖ€å⁄∫ ŸÑŸÅÿ∏ {"placeholder":"text_a"} ⁄©ÿ≥ POS category ÿ≥€í ÿ™ÿπŸÑŸÇ ÿ±⁄©⁄æÿ™ÿß €Å€íÿü {"mask"}',
#         tokenizer=tokenizer,
#     )),
# ]

# ==============================
# Define Verbalizer (Manual)
# ==============================
verbalizer = ManualVerbalizer(
    classes=classes,
    label_words = {
    "PN": ["PN", "‚Äô", "ÿß€å"],
    "G": ["G", "ŸÖ€åÿ±€í", "ŸÖ€åÿ±ÿß"],
    "NN": ["NN", "ÿ®⁄æÿßÿ¶€å", "ŸÖÿ≠ŸÜÿ™"],
    "P": ["P", "⁄©ÿß", "⁄©€å"],
    "U": ["U", "ŸÖ€åŸÑ", "ŸÖŸÜ"],
    "VB": ["VB", "ÿ¢€åÿß€Å€í", "€Å€å⁄∫"],
    "SM": ["SM", "€î", "!"],
    "PM": ["PM", "ÿå", "\""],
    "PP": ["PP", "€ÅŸÖ", "ÿ¢Ÿæ"],
    "CC": ["CC", "ÿßŸàÿ±", "Ÿà"],
    "ADJ": ["ADJ", "ŸÇÿßÿ¶ŸÑ", "ŸÖ€åÿ±€åÿ¶Ÿπ"],
    "CA": ["CA", "ÿß⁄©€åÿ≥", "ÿØŸà"],
    "RP": ["RP", "ÿÆŸàÿØ", "ÿßŸæŸÜ€íÿ¢Ÿæ"],
    "SC": ["SC", "ÿ¨ÿ®⁄©€Å", "⁄©€Å"],
    "SE": ["SE", "ÿ≥€í"],
    "ADV": ["ADV", "ÿ®ÿ±ÿßÿ¶€í", "ÿ≤€åÿßÿØ€Å"],
    "EXP": ["EXP", "‚Äù", "("],
    "I": ["I", "ÿ™Ÿà", "€Å€å"],
    "NEG": ["NEG", "ŸÜ€Å€å⁄∫", "ŸÜ€Å"],
    "TA": ["TA", "€Å€å⁄∫", "€Å€í"],
    "AP": ["AP", "Ÿà€Åÿß⁄∫", "ÿßÿ®"],
    "Q": ["Q", "⁄©⁄Ü⁄æ", "€Åÿ±"],
    "PD": ["PD", "ÿßÿ≥", "ÿßŸÜ"],
    "WALA": ["WALA", "ŸàÿßŸÑÿß", "ŸàÿßŸÑ€í"],
    "KP": ["KP", "⁄©ÿ≥", "⁄©€åÿß"],
    "GR": ["GR", "ÿßŸæŸÜ€å", "ÿßŸæŸÜÿß"],
    "REP": ["REP", "ÿ¨ÿ≥", "ÿ¨ŸÜ"],
    "A": ["A", "ÿ≥€å", "ÿ≥ÿß"],
    "KD": ["KD", "⁄©ÿ≥€å", "⁄©ÿ≥"],
    "AA": ["AA", "ÿ≥⁄©ÿ™€í", "⁄Ø€åÿß"],
    "QW": ["QW", "⁄©€åÿß", "⁄©€åŸà⁄∫"],
    "KER": ["KER", "⁄©ÿ±"],
    "OR": ["OR", "ÿØŸàŸÜŸà⁄∫", "ÿ≥ÿßÿ™Ÿà€å⁄∫"],
    "AKP": ["AKP", "⁄©€Å€å⁄∫", "⁄©€åÿ≥€í"],
    "MUL": ["MUL", "ÿ¥ÿßŸæŸÜ⁄Ø", "ÿßŸÅÿ∑ÿßÿ±"],
    "INT": ["INT", "Ÿàÿß€Å", "ÿßŸÑŸÇÿßÿØÿ±€å"],
    "AD": ["AD", "ÿß€åÿ≥€å"],
    "FR": ["FR", "ÿ¢ÿØ⁄æ€í", "ÿ≥ÿß⁄ë⁄æ€í"],
    "DATE": ["DATE", "‚Äô", "‚Äù"],
    "RD": ["RD", "ÿ¨ÿ™ŸÜ€í", "js"]
    },
    tokenizer=tokenizer,
)

In [5]:
# ==============================
# Create Prompt Model
# ==============================
prompt_model = PromptForClassification(
    template=template,
    plm=plm,
    verbalizer=verbalizer
)


In [6]:
# ==============================
# Load Evaluation Dataset
# ==============================
df_eval = pd.read_csv(r"C:\Users\stdFurqan\Desktop\paft\pos\test.csv")  # columns: 'word', 'tag'
eval_dataset = [
    InputExample(guid=str(i), text_a=row['word'], label=label_map[row['tag']])
    for i, row in df_eval.iterrows()
]


# ==============================
# PromptDataLoader
# ==============================
eval_loader = PromptDataLoader(
    dataset=eval_dataset,
    tokenizer=tokenizer,
    template=template,              # your ManualTemplate for XNLI
    tokenizer_wrapper_class=WrapperClass,
    max_seq_length=128,
    batch_size=8,
    shuffle=False
)

# ==============================
# Evaluate Model
# ==============================
prompt_model.eval()
all_preds = []
all_labels = []

with torch.no_grad():
    for batch in eval_loader:
        logits = prompt_model(batch)
        preds = torch.argmax(logits, dim=-1)
        all_preds.extend(preds.cpu().tolist())
        all_labels.extend(batch['label'].cpu().tolist())

# ==============================
# Print Classification Report
# ==============================
all_label_ids = list(range(len(classes)))  # [0, 1, 2, ..., 39]

print("\nüìä POS Tagging Classification Report:")
print(
    classification_report(
        all_labels,
        all_preds,
        labels=all_label_ids,      # üëà IMPORTANT FIX
        target_names=classes,
        digits=4,
        zero_division=0
    )
)

tokenizing: 86676it [00:20, 4185.18it/s]



üìä POS Tagging Classification Report:
              precision    recall  f1-score   support

          PN     0.0644    0.1870    0.0958      6065
           G     0.0000    0.0000    0.0000       474
          NN     0.0000    0.0000    0.0000     21792
           P     0.9522    0.7051    0.8102     10256
           U     0.8000    0.4000    0.5333        40
          VB     0.0000    0.0000    0.0000     10060
          SM     0.0000    0.0000    0.0000      3464
          PM     1.0000    0.1393    0.2445      1924
          PP     0.0000    0.0000    0.0000      3316
          CC     0.0000    0.0000    0.0000      1938
         ADJ     0.0000    0.0000    0.0000      5342
          CA     0.0000    0.0000    0.0000      1763
          RP     0.0000    0.0000    0.0000        84
          SC     0.0000    0.0000    0.0000      2504
          SE     0.0000    0.0000    0.0000      1440
         ADV     0.0000    0.0000    0.0000      1480
         EXP     0.3524    0.7208    0.4

In [None]:
roberta


In [None]:
mbert
üìä POS Tagging Classification Report:
              precision    recall  f1-score   support

          PN     0.0644    0.1870    0.0958      6065
           G     0.0000    0.0000    0.0000       474
          NN     0.0000    0.0000    0.0000     21792
           P     0.9522    0.7051    0.8102     10256
           U     0.8000    0.4000    0.5333        40
          VB     0.0000    0.0000    0.0000     10060
          SM     0.0000    0.0000    0.0000      3464
          PM     1.0000    0.1393    0.2445      1924
          PP     0.0000    0.0000    0.0000      3316
          CC     0.0000    0.0000    0.0000      1938
         ADJ     0.0000    0.0000    0.0000      5342
          CA     0.0000    0.0000    0.0000      1763
          RP     0.0000    0.0000    0.0000        84
          SC     0.0000    0.0000    0.0000      2504
          SE     0.0000    0.0000    0.0000      1440
         ADV     0.0000    0.0000    0.0000      1480
         EXP     0.3524    0.7208    0.4733       197
           I     0.6360    0.4872    0.5517      1800
         NEG     0.0000    0.0000    0.0000      1062
          TA     0.0000    0.0000    0.0000      3181
          AP     1.0000    0.2831    0.4413       710
           Q     0.0000    0.0000    0.0000      1219
          PD     0.0000    0.0000    0.0000      1164
        WALA     0.0000    0.0000    0.0000       253
          KP     0.0000    0.0000    0.0000       111
          GR     0.0000    0.0000    0.0000       437
         REP     0.0000    0.0000    0.0000       589
           A     0.0707    0.4078    0.1205       206
          KD     0.7113    0.9718    0.8214       213
          AA     0.0000    0.0000    0.0000      2571
          QW     0.0000    0.0000    0.0000       219
         KER     0.0000    0.0000    0.0000       211
          OR     0.0000    0.0000    0.0000       171
         AKP     0.0000    0.0000    0.0000       257
         MUL     0.0000    0.0000    0.0000        27
         INT     0.0000    0.0000    0.0000        59
          AD     0.0000    0.0000    0.0000        51
          FR     0.0000    0.0000    0.0000        26
        DATE     0.0000    0.0000    0.0000         0
          RD     0.0000    0.0000    0.0000         0

    accuracy                         0.1172     86676
   macro avg     0.1397    0.1076    0.1023     86676
weighted avg     0.1639    0.1172    0.1267     86676


In [None]:
# # ==============================
# # Load Evaluation Dataset
# # ==============================
# df = pd.read_csv(r"C:\Users\stdFurqan\Desktop\paft\SST-2\urdu_sentiment_test_labeled.csv")
# eval_dataset = [
#     InputExample(guid=i, text_a=row['text'], label=label_map[row['label']])
#     for i, row in df.iterrows()
# ]

# # ==============================
# # 0-Shot Evaluation with Each Template
# # ==============================
# prompt_model.eval()  # ensure model is in evaluation mode
# batch_size = 8    # eval batch size

# # Optional: store template order and results
# all_pass_patterns = {}

# for pass_idx, (prompt_name, current_template) in enumerate(templates, start=1):
#     print(f"\nüü¶ 0-Shot Evaluation - Template {prompt_name} ({pass_idx}/{len(templates)})")

#     # Create PromptDataLoader with current template
#     eval_loader = PromptDataLoader(
#         dataset=eval_dataset,
#         tokenizer=tokenizer,
#         template=current_template,
#         tokenizer_wrapper_class=WrapperClass,
#         max_seq_length=128,
#         batch_size=batch_size,
#         shuffle=False
#     )

#     pass_preds = []
#     pass_labels = []

#     # Run evaluation
#     with torch.no_grad():
#         for batch in eval_loader:
#             logits = prompt_model(batch)
#             preds = torch.argmax(logits, dim=-1)
#             pass_preds.extend(preds.cpu().tolist())
#             pass_labels.extend(batch['label'].cpu().tolist())

#     # Print report immediately after this template
#     print(f"\nüìä STS_B Urdu Dev Classification Report - Template {prompt_name}")
#     print(classification_report(pass_labels, pass_preds, target_names=classes, digits=4))

#     # Store template name (optional)
#     all_pass_patterns[f"pass_{pass_idx}"] = prompt_name

# # Optional: print template order at the end
# print("\n‚úÖ Templates used per pass:", all_pass_patterns)
