In [None]:
# !sed -i 's/transformers.generation_utils/transformers.generation/' /usr/local/lib/python3.12/dist-packages/openprompt/pipeline_base.py

In [None]:
# !pip install transformers==4.24.0

In [1]:
# Set device
import torch
device = "cuda" if torch.cuda.is_available() else "cpu"

In [2]:
# ==============================
# Imports
# ==============================
import torch
import pandas as pd
import random
import numpy as np
from collections import Counter
from transformers import XLMRobertaTokenizer, XLMRobertaForMaskedLM
from openprompt.prompts import ManualTemplate, ManualVerbalizer
from openprompt.prompts import SoftVerbalizer
from openprompt.prompts import AutomaticVerbalizer
from openprompt.data_utils import InputExample
from openprompt.plms import load_plm
from openprompt import PromptForClassification, PromptDataLoader
from torch.optim import AdamW
from sklearn.metrics import classification_report
from torch.utils.data import DataLoader, Sampler

  from .autonotebook import tqdm as notebook_tqdm


In [3]:


# ==============================
# Set random seeds for reproducibility
# ==============================
# üí° Added this block to ensure consistent results across runs
seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [4]:
# ==============================
# Balanced Batch Sampler
# ==============================
class BalancedBatchSampler(Sampler):
    def __init__(self, dataset, batch_size):
        """
        dataset: list of InputExample
        batch_size: total batch size (must be divisible by number of classes)
        """
        self.dataset = dataset
        self.labels = [ex.label for ex in dataset]
        self.classes = list(sorted(set(self.labels)))
        self.num_classes = len(self.classes)
        assert batch_size % self.num_classes == 0, "Batch size must be divisible by number of classes"
        self.batch_size_per_class = batch_size // self.num_classes

    def __iter__(self):
        class_indices = {c: np.where(np.array(self.labels) == c)[0].tolist() for c in self.classes}
        for c in self.classes:
            np.random.shuffle(class_indices[c])

        num_batches = min(len(class_indices[c]) // self.batch_size_per_class for c in self.classes)

        for i in range(num_batches):
            batch = []
            for c in self.classes:
                start = i * self.batch_size_per_class
                end = start + self.batch_size_per_class
                batch.extend(class_indices[c][start:end])
            np.random.shuffle(batch)
            yield batch

    def __len__(self):
        return min(len(np.where(np.array(self.labels) == c)[0]) // self.batch_size_per_class for c in self.classes)

In [5]:

# Step 1: Training data (16-shot) ha,m and spam
train_dataset = [

    InputExample(guid=0, text_a="Ÿæÿ±ŸàŸÅ€åÿ≥ÿ± ŸÜ€í ÿ®ÿßÿ™ ⁄©€å €ÅŸÖ ÿ≥€í€î", label=0),
    InputExample(guid=1, text_a="Ÿæÿ±ŸàŸÅ€åÿ≥ÿ± ŸÜ€í ÿ®ÿßÿ™ ⁄©€å €ÅŸÖ ÿ≥€í€î", label=0),
    InputExample(guid=2, text_a="ŸÖ ÿÆŸàÿØ ÿ≥€í ⁄Ü€åÿÆ ÿ±€Å€í ÿ™⁄æÿß €î", label=0),
    InputExample(guid=3, text_a="€ÅŸÖ ŸÜ€í €Å€åÿ±€å ⁄©Ÿà ⁄Ø⁄æÿ®ÿ±ÿß ⁄©ÿ± Ÿæ⁄©ÿßÿ±ÿß ÿ™⁄æÿß€î", label=0),
    InputExample(guid=4, text_a="€Å€åÿ±€å ŸÜ€í ÿÆŸàÿØ ⁄©Ÿà ⁄©⁄æÿßŸÜÿ≥€å ÿØ€í ÿØ€åÿß€î", label=0),
    InputExample(guid=5, text_a="€Å€åÿ±€å ŸÜ€í €ÅŸÖ€å⁄∫ ⁄©⁄æÿßŸÜÿ≥€å ÿØ€í ⁄©ÿ± ŸÅŸπ ⁄©ÿ± ÿØ€åÿ™ÿß ÿ™⁄æÿß €î", label=0),
    InputExample(guid=6, text_a="ÿßŸÜ€ÅŸà⁄∫ ŸÜ€í ÿßÿ≥€í ÿ®ŸÜÿß ⁄©ÿ± ÿ∫ÿµ€Å ÿØŸÑÿßÿ™ÿß ÿ™⁄æÿß €î", label=0),
    InputExample(guid=7, text_a="ÿßŸÜ€ÅŸà⁄∫ ŸÜ€í ÿßÿ≥€í ÿµÿØÿ± ÿ®ŸÜÿß ⁄©ÿ± ÿ®ŸÜÿß ÿØ€åÿ™€í ÿ™⁄æÿß €î", label=0),
    InputExample(guid=8, text_a="ÿßŸÜ€ÅŸà⁄∫ ŸÜ€í ÿßÿ≥€í ÿ™⁄æ⁄©ÿß ÿØ€åÿ™ÿß ÿ™⁄æÿß €î", label=0),
    InputExample(guid=9, text_a="⁄Øÿß⁄ë€å ÿ≥⁄ë⁄© Ÿæÿ± €Åÿßÿ±ŸÜ ŸÖÿßÿ±ÿ™€å ÿ™⁄æ€å ⁄©Ÿà€î", label=0),
    InputExample(guid=10, text_a="⁄©ÿ™ÿß ⁄©ŸÖÿ±€í ÿ≥€í ÿ®ÿß€Åÿ± ÿ®⁄æŸàŸÜ⁄©ÿß ÿØ€åÿß€î", label=0),
    InputExample(guid=11, text_a="⁄àÿßÿ¶ŸÜ ÿ∫ÿßÿ¶ÿ® €ÅŸà ⁄©ÿ± ÿ¨ŸÜ⁄ØŸÑ ŸÖ€å⁄∫ ⁄Øÿ¶€å ÿ™⁄æ€å€î", label=0),
    InputExample(guid=12, text_a="ÿπŸÖÿßÿ±ÿ™ ÿßŸàŸÜ⁄Ü€å ÿßŸàÿ± ÿßŸàŸÜ⁄Ü€å €Å€å⁄∫ €î", label=0),
    InputExample(guid=13, text_a="€å€Å ÿπŸÖÿßÿ±ÿ™ ÿßŸàŸÜ⁄Ü€å ÿßŸàÿ± ÿßŸàŸÜ⁄Ü€å €Å€å⁄∫ €î", label=0),
    InputExample(guid=14, text_a="ÿßÿ≥ ÿπŸÖÿßÿ±ÿ™ ⁄©Ÿà ÿßÿ≥ ÿ≥€í ÿ≤€åÿßÿØ€Å ŸÖŸÑ ⁄Ø€åÿß €Å€í ÿ™⁄æ€í €î", label=0),
    InputExample(guid=15, text_a="€å€Å ÿπŸÖÿßÿ±ÿ™ ÿßÿ≥ ÿ≥€í ÿ≤€åÿßÿØ€Å €Å€å⁄∫ €î", label=0),

    
    InputExample(guid=16, text_a="€ÅŸÖÿßÿ±€í ÿØŸàÿ≥ÿ™ ÿßÿ≥ ÿ™ÿ¨ÿ≤€å€í ⁄©Ÿà ŸÜ€Å€å⁄∫ ÿÆÿ±€åÿØ€å⁄∫ ⁄Ø€í - ÿß⁄ØŸÑ€í ÿ™ÿ¨ÿ≤€å€í ⁄©Ÿà ⁄Ü⁄æŸà⁄ë ÿØ€å⁄∫ ÿ¨Ÿà €ÅŸÖ ÿ™ÿ¨Ÿà€åÿ≤ ⁄©ÿ±ÿ™€í €Å€å⁄∫ €î", label=1),
    InputExample(guid=17, text_a="ÿß€å⁄© ÿßŸàÿ± ŸÅÿ±ÿ∂€å ÿπÿßŸÖ ⁄©ÿßÿ±€å ÿßŸàÿ± ŸÖ€å⁄∫ €Åÿßÿ± ŸÖÿßŸÜ ÿ±€Åÿß €ÅŸà⁄∫ €î", label=1),
    InputExample(guid=18, text_a="ÿß€å⁄© ÿßŸàÿ± ŸÅÿ±ÿ∂€å ÿπÿßŸÖ ⁄©ÿßÿ±€å €åÿß ŸÖ€å⁄∫ ÿ™ÿ±⁄© ⁄©ÿ± ÿ±€Åÿß €ÅŸà⁄∫ €î", label=1),
    InputExample(guid=19, text_a="€ÅŸÖ ŸÅÿπŸÑ ⁄©ÿß ÿ¨ÿ™ŸÜÿß ÿ≤€åÿßÿØ€Å ŸÖÿ∑ÿßŸÑÿπ€Å ⁄©ÿ±ÿ™€í €Å€å⁄∫ Ÿà€Å ÿßÿ™ŸÜ€í €Å€å Ÿæÿß⁄ØŸÑ €ÅŸàÿ™€í ÿ¨ÿßÿ™€í €Å€å⁄∫ €î", label=1),
    InputExample(guid=20, text_a="ÿ±Ÿàÿ≤ ÿ±Ÿàÿ≤ ÿ≠ŸÇÿßÿ¶ŸÇ ŸÖÿ®€ÅŸÖ €ÅŸàÿ™€í ÿ¨ÿß ÿ±€Å€í €Å€å⁄∫ €î", label=1),
    InputExample(guid=21, text_a="ŸÖ€å⁄∫ ÿ¢Ÿæ ⁄©Ÿà ÿß€å⁄© ŸÖÿ¥ÿ±Ÿàÿ® Ÿπ⁄æ€å⁄© ⁄©ÿ± ÿØŸà⁄∫ ⁄Øÿß €î", label=1),
    InputExample(guid=22, text_a="ŸÅÿ±€å⁄à ŸÜ€í ŸæŸàÿØŸà⁄∫ ⁄©Ÿà ⁄ÜŸæŸπÿß ŸæÿßŸÜ€å ÿØ€åÿß €î", label=1),
    InputExample(guid=23, text_a="ÿ®ŸÑ ⁄©Ÿà ⁄©⁄æÿßŸÜÿ≥€å ÿ¢ŸÜ€í Ÿæÿ± ÿ±€åÿ≥ÿ™Ÿàÿ±ÿß⁄∫ ÿ≥€í ÿ®ÿß€Åÿ± ŸÜ⁄©ŸÑ ÿ¢€åÿß €î", label=1),
    InputExample(guid=24, text_a="€ÅŸÖ ÿ±ÿßÿ™ ⁄©Ÿà ÿ±ŸÇÿµ ⁄©ÿ± ÿ±€Å€í €Å€å⁄∫ €î", label=1),
    InputExample(guid=25, text_a="€Åÿ±ŸÖŸÜ ŸÜ€í ÿØ⁄æÿßÿ™ ⁄©€í ŸÅŸÑ€åŸπ Ÿæÿ± €Åÿ™⁄æŸà⁄ëÿß ŸÖÿßÿ±ÿß €î", label=1),
    InputExample(guid=26, text_a="ŸÜÿßŸÇÿØ€åŸÜ ŸÜ€í ÿßÿ≥Ÿπ€åÿ¨ ÿ≥€í ÿ®ÿß€Åÿ± ⁄àÿ±ÿßŸÖ€í ⁄©Ÿà €ÅŸÜÿ≥ ÿØ€åÿß €î", label=1),
    InputExample(guid=27, text_a="ÿ™ÿßŸÑÿßÿ® Ÿπ⁄æŸàÿ≥ ÿ∑Ÿàÿ± Ÿæÿ± ÿ¨ŸÖ ⁄Ø€åÿß €î", label=1),
    InputExample(guid=28, text_a="ÿ®ŸÑ ⁄©ŸÖÿ±€í ÿ≥€í ÿ®ÿß€Åÿ± ŸÜ⁄©ŸÑ ÿ¢€åÿß €î", label=1),
    InputExample(guid=29, text_a="ÿ®ÿßÿ∫ÿ®ÿßŸÜ ŸÜ€í Ÿæ⁄æŸàŸÑŸà⁄∫ ⁄©Ÿà ⁄ÜŸæŸπÿß ŸæÿßŸÜ€å ÿØ€åÿß €î", label=1),
    InputExample(guid=30, text_a="ÿ®ÿßÿ∫ÿ®ÿßŸÜ ŸÜ€í Ÿæ⁄æŸàŸÑŸà⁄∫ ⁄©Ÿà ŸæÿßŸÜ€å ÿØ€åÿß €î", label=1),
    InputExample(guid=31, text_a="ÿ®ŸÑ ŸÜ€í ÿ®ÿßÿ™⁄æ Ÿπÿ® ⁄©Ÿà Ÿπ⁄©⁄ëŸà⁄∫ ŸÖ€å⁄∫ ÿ™Ÿà⁄ë ÿØ€åÿß €î", label=1),    
]


In [6]:
# ==============================
# Define Classes
# ==============================
classes = ["unacc", "acc"]
label_map = {"unacc": 0, "acc": 1}

# # Step 1: Use load_plm with 'roberta' to get the correct WrapperClass
_, _, _, WrapperClass = load_plm("roberta", "roberta-base")  # Just to get the wrapper

# # Step 2: Manually load XLM-RoBERTa model/tokenizer
model_name = "xlm-roberta-base"
tokenizer = XLMRobertaTokenizer.from_pretrained(model_name)
plm = XLMRobertaForMaskedLM.from_pretrained(model_name)

# ==============================
# Load Pretrained Language Model (mBERT)
# ==============================
# plm, tokenizer, model_config, WrapperClass = load_plm("bert", "bert-base-multilingual-cased")




In [7]:

# ==============================
# Define Prompt Template (Manual)
# ==============================
template = ManualTemplate(
    # text = '{"placeholder":"text_a"} Ÿáÿ∞Ÿá ÿßŸÑÿ¨ŸÖŸÑÿ© {"mask"}.',
    text='{"placeholder":"text_a"} €å€Å ÿ¨ŸÖŸÑ€Å {"mask"} €Å€í€î',
    tokenizer=tokenizer,
)




templates = [
    ("P1", ManualTemplate(
        text='{"placeholder":"text_a"} €å€Å ÿ¨ŸÖŸÑ€Å {"mask"} €Å€í€î',  # Keep as is
        tokenizer=tokenizer,
    )),
    ("P2", ManualTemplate(
        text='ÿØ€å ⁄Øÿ¶€å ÿπÿ®ÿßÿ±ÿ™: {"placeholder":"text_a"} ⁄©ÿß ŸÖŸàÿßÿØ ÿßŸàÿ± Ÿæ€åÿ∫ÿßŸÖ {"mask"} €Å€í€î',  # generalized for neg/pos
        tokenizer=tokenizer,
    )),
    ("P3", ManualTemplate(
        text='ÿßÿ≥ ÿπÿ®ÿßÿ±ÿ™ ⁄©ÿß ÿ™ÿ¨ÿ≤€å€Å ⁄©ÿ±€å⁄∫: {"placeholder":"text_a"} ÿßÿ≥ ⁄©ÿß ÿßÿ≠ÿ≥ÿßÿ≥ €åÿß ŸÖŸà⁄à {"mask"} €Å€í€î',  # focuses on sentiment/intent
        tokenizer=tokenizer,
    )),
    ("P4", ManualTemplate(
        text='ÿ¨ŸÖŸÑ€Å: {"placeholder":"text_a"} €ÅŸÖ€å⁄∫ ÿ®ÿ™ÿßÿ™ÿß €Å€í ⁄©€Å €å€Å ŸÖŸàÿßÿØ {"mask"} €Å€í€î',  # clear statement aligned with label
        tokenizer=tokenizer,
    )),
    ("P5", ManualTemplate(
        text='{"placeholder":"text_a"} ÿßÿ≥ ÿπÿ®ÿßÿ±ÿ™ ⁄©ÿß ŸÖÿ∑ŸÑÿ® {"mask"} €Å€í€î',
        tokenizer=tokenizer,
    )),
    ("P6", ManualTemplate(
        text='{"placeholder":"text_a"} ÿßÿ≥ ŸÖÿπÿßŸÖŸÑ€í ŸÖ€å⁄∫ ÿ≠ÿ™ŸÖ€å ÿ±ÿßÿ¶€í {"mask"}',
        tokenizer=tokenizer,
    )),
    ("P7", ManualTemplate(
        text='{"placeholder":"text_a"} ÿßÿ≥ ŸÖŸàÿßÿØ ⁄©€å ÿ™ÿ¥ÿ±€åÿ≠ {"mask"}',
        tokenizer=tokenizer,
    )),
    ("P8", ManualTemplate(
        text='{"placeholder":"text_a"} ÿßÿ≥ ÿ≠ŸàÿßŸÑ€í ÿ≥€í ŸÅ€åÿµŸÑ€Å {"mask"}',
        tokenizer=tokenizer,
    )),
    ("P9", ManualTemplate(
        text='{"placeholder":"text_a"} ÿßÿ≥ ŸÖÿ™ŸÜ ⁄©€å ÿØÿ±ÿ¨€Å ÿ®ŸÜÿØ€å {"mask"}',
        tokenizer=tokenizer,
    )),
    ("P10", ManualTemplate(
        text='{"placeholder":"text_a"} ÿßÿ≥ ÿßÿ∏€Åÿßÿ± ⁄©ÿß ŸÜÿ™€åÿ¨€Å {"mask"}',
        tokenizer=tokenizer,
    )), 
]



# ==============================
# Define Verbalizer (Manual)
# ==============================
verbalizer = ManualVerbalizer(
    classes=classes,
    label_words = {
        "acc": ["ÿµÿ≠€åÿ≠","ÿØÿ±ÿ≥ÿ™"], 
       "unacc": ["ŸÜÿß ÿØÿ±ÿ≥ÿ™","ÿ∫ŸÑÿ∑"]
    },
    tokenizer=tokenizer,
)

In [8]:
# ==============================
# Create Prompt Model
# ==============================
prompt_model = PromptForClassification(
    template=template,
    plm=plm,
    verbalizer=verbalizer
)

# ==============================
# Training loop with BalancedBatchSampler + random template switching
# ==============================
T = 20   # epochs
K = 1    # steps per prompt
batch_size = 8





prompt_model.train()
optimizer = AdamW(prompt_model.parameters(), lr=1e-5)
all_epoch_patterns = {}

for epoch in range(T):
    print(f"\nüü¶ Epoch {epoch+1}/{T}")

    # Random initial template
    prompt_name, current_template = random.choice(templates)
    epoch_pattern = []

    # Create PromptDataLoader with BalancedBatchSampler
    sampler = BalancedBatchSampler(train_dataset, batch_size=batch_size)
    train_loader = PromptDataLoader(
        dataset=train_dataset,
        tokenizer=tokenizer,
        template=current_template,
        tokenizer_wrapper_class=WrapperClass,
        max_seq_length=128,
        batch_size=batch_size,
        batch_sampler=sampler,
        shuffle=False  # shuffle is ignored when batch_sampler is used
    )

    step_counter = 0
    epoch_loss = 0.0

    for batch in train_loader:
        # Move batch to device
        # batch = {k: v.to(device) for k, v in batch.items()}
    
        optimizer.zero_grad()
        logits = prompt_model(batch)
        loss = torch.nn.CrossEntropyLoss()(logits, batch['label'])
        # loss = criterion(logits, batch["label"])
        
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()
        epoch_pattern.append(prompt_name)

        step_counter += 1

        # Switch template every K steps
        if step_counter % K == 0:
            prompt_name, current_template = random.choice(templates)

            # Rebuild PromptDataLoader with new template but same sampler
            train_loader = PromptDataLoader(
                dataset=train_dataset,
                tokenizer=tokenizer,
                template=current_template,
                tokenizer_wrapper_class=WrapperClass,
                max_seq_length=128,
                batch_size=batch_size,
                batch_sampler=sampler,
                shuffle=False
            )

    all_epoch_patterns[f"epoch_{epoch+1}"] = epoch_pattern
    print(f"Epoch {epoch+1} Loss: {epoch_loss:.4f}")
    print(f"Prompt pattern: {epoch_pattern}")



üü¶ Epoch 1/20


tokenizing: 32it [00:00, 2582.35it/s]
tokenizing: 32it [00:00, 2779.93it/s]
tokenizing: 32it [00:00, 2368.45it/s]
tokenizing: 32it [00:00, 2042.76it/s]
tokenizing: 32it [00:00, 2031.42it/s]


Epoch 1 Loss: 12.0558
Prompt pattern: ['P2', 'P1', 'P5', 'P4']

üü¶ Epoch 2/20


tokenizing: 32it [00:00, 2278.70it/s]
tokenizing: 32it [00:00, 2097.94it/s]
tokenizing: 32it [00:00, 2593.13it/s]
tokenizing: 32it [00:00, 1937.55it/s]
tokenizing: 32it [00:00, 2557.70it/s]


Epoch 2 Loss: 2.8763
Prompt pattern: ['P3', 'P2', 'P9', 'P2']

üü¶ Epoch 3/20


tokenizing: 32it [00:00, 2664.90it/s]
tokenizing: 32it [00:00, 2593.63it/s]
tokenizing: 32it [00:00, 2551.28it/s]
tokenizing: 32it [00:00, 2239.46it/s]
tokenizing: 32it [00:00, 1827.14it/s]


Epoch 3 Loss: 3.4712
Prompt pattern: ['P7', 'P1', 'P1', 'P2']

üü¶ Epoch 4/20


tokenizing: 32it [00:00, 2122.93it/s]
tokenizing: 32it [00:00, 1360.94it/s]
tokenizing: 32it [00:00, 1774.64it/s]
tokenizing: 32it [00:00, 1999.25it/s]
tokenizing: 32it [00:00, 2665.32it/s]


Epoch 4 Loss: 5.4851
Prompt pattern: ['P4', 'P9', 'P10', 'P1']

üü¶ Epoch 5/20


tokenizing: 32it [00:00, 1827.19it/s]
tokenizing: 32it [00:00, 1937.24it/s]
tokenizing: 32it [00:00, 3037.42it/s]
tokenizing: 32it [00:00, 1935.81it/s]
tokenizing: 32it [00:00, 3364.19it/s]


Epoch 5 Loss: 3.6066
Prompt pattern: ['P4', 'P9', 'P7', 'P4']

üü¶ Epoch 6/20


tokenizing: 32it [00:00, 3191.78it/s]
tokenizing: 32it [00:00, 1882.20it/s]
tokenizing: 32it [00:00, 2637.05it/s]
tokenizing: 32it [00:00, 2276.57it/s]
tokenizing: 32it [00:00, 2773.21it/s]


Epoch 6 Loss: 2.8905
Prompt pattern: ['P10', 'P5', 'P1', 'P3']

üü¶ Epoch 7/20


tokenizing: 32it [00:00, 2910.82it/s]
tokenizing: 32it [00:00, 1418.54it/s]
tokenizing: 32it [00:00, 2128.92it/s]
tokenizing: 32it [00:00, 2551.91it/s]
tokenizing: 32it [00:00, 2777.86it/s]


Epoch 7 Loss: 2.8219
Prompt pattern: ['P6', 'P5', 'P3', 'P4']

üü¶ Epoch 8/20


tokenizing: 32it [00:00, 2201.88it/s]
tokenizing: 32it [00:00, 2456.00it/s]
tokenizing: 32it [00:00, 3192.54it/s]
tokenizing: 32it [00:00, 2159.54it/s]
tokenizing: 32it [00:00, 2779.53it/s]


Epoch 8 Loss: 2.9465
Prompt pattern: ['P2', 'P2', 'P7', 'P2']

üü¶ Epoch 9/20


tokenizing: 32it [00:00, 2605.92it/s]
tokenizing: 32it [00:00, 2657.46it/s]
tokenizing: 32it [00:00, 2909.11it/s]
tokenizing: 32it [00:00, 2657.25it/s]
tokenizing: 32it [00:00, 3045.70it/s]


Epoch 9 Loss: 2.4369
Prompt pattern: ['P6', 'P10', 'P5', 'P1']

üü¶ Epoch 10/20


tokenizing: 32it [00:00, 2781.31it/s]
tokenizing: 32it [00:00, 2186.74it/s]
tokenizing: 32it [00:00, 2774.58it/s]
tokenizing: 32it [00:00, 2079.96it/s]
tokenizing: 32it [00:00, 3145.26it/s]


Epoch 10 Loss: 2.4226
Prompt pattern: ['P9', 'P2', 'P7', 'P2']

üü¶ Epoch 11/20


tokenizing: 32it [00:00, 2665.91it/s]
tokenizing: 32it [00:00, 3198.48it/s]
tokenizing: 32it [00:00, 3359.98it/s]
tokenizing: 32it [00:00, 3007.28it/s]
tokenizing: 32it [00:00, 1980.31it/s]


Epoch 11 Loss: 2.5591
Prompt pattern: ['P5', 'P10', 'P6', 'P10']

üü¶ Epoch 12/20


tokenizing: 32it [00:00, 2760.20it/s]
tokenizing: 32it [00:00, 2909.11it/s]
tokenizing: 32it [00:00, 2557.55it/s]
tokenizing: 32it [00:00, 2918.03it/s]
tokenizing: 32it [00:00, 2515.32it/s]


Epoch 12 Loss: 2.0362
Prompt pattern: ['P2', 'P1', 'P4', 'P5']

üü¶ Epoch 13/20


tokenizing: 32it [00:00, 2322.63it/s]
tokenizing: 32it [00:00, 2559.45it/s]
tokenizing: 32it [00:00, 3493.16it/s]
tokenizing: 32it [00:00, 2180.35it/s]
tokenizing: 32it [00:00, 3191.10it/s]


Epoch 13 Loss: 2.0852
Prompt pattern: ['P4', 'P2', 'P7', 'P5']

üü¶ Epoch 14/20


tokenizing: 32it [00:00, 2989.19it/s]
tokenizing: 32it [00:00, 2169.25it/s]
tokenizing: 32it [00:00, 3000.08it/s]
tokenizing: 32it [00:00, 2476.80it/s]
tokenizing: 32it [00:00, 1968.26it/s]


Epoch 14 Loss: 1.9660
Prompt pattern: ['P6', 'P3', 'P6', 'P6']

üü¶ Epoch 15/20


tokenizing: 32it [00:00, 2133.32it/s]
tokenizing: 32it [00:00, 2333.82it/s]
tokenizing: 32it [00:00, 3200.38it/s]
tokenizing: 32it [00:00, 1979.99it/s]
tokenizing: 32it [00:00, 2756.18it/s]


Epoch 15 Loss: 1.9990
Prompt pattern: ['P5', 'P2', 'P10', 'P3']

üü¶ Epoch 16/20


tokenizing: 32it [00:00, 2460.59it/s]
tokenizing: 32it [00:00, 1834.23it/s]
tokenizing: 32it [00:00, 3949.21it/s]
tokenizing: 32it [00:00, 2777.68it/s]
tokenizing: 32it [00:00, 2648.81it/s]


Epoch 16 Loss: 1.5429
Prompt pattern: ['P4', 'P3', 'P8', 'P7']

üü¶ Epoch 17/20


tokenizing: 32it [00:00, 2743.28it/s]
tokenizing: 32it [00:00, 2721.42it/s]
tokenizing: 32it [00:00, 2779.07it/s]
tokenizing: 32it [00:00, 2556.63it/s]
tokenizing: 32it [00:00, 2908.92it/s]


Epoch 17 Loss: 1.4037
Prompt pattern: ['P9', 'P4', 'P6', 'P1']

üü¶ Epoch 18/20


tokenizing: 32it [00:00, 3556.00it/s]
tokenizing: 32it [00:00, 4182.15it/s]
tokenizing: 32it [00:00, 3998.62it/s]
tokenizing: 32it [00:00, 3995.29it/s]
tokenizing: 32it [00:00, 2909.24it/s]


Epoch 18 Loss: 1.0855
Prompt pattern: ['P1', 'P6', 'P7', 'P5']

üü¶ Epoch 19/20


tokenizing: 32it [00:00, 2786.33it/s]
tokenizing: 32it [00:00, 4000.77it/s]
tokenizing: 32it [00:00, 5333.51it/s]
tokenizing: 32it [00:00, 2908.48it/s]
tokenizing: 32it [00:00, 4569.89it/s]


Epoch 19 Loss: 0.9758
Prompt pattern: ['P4', 'P10', 'P6', 'P4']

üü¶ Epoch 20/20


tokenizing: 32it [00:00, 4129.65it/s]
tokenizing: 32it [00:00, 3504.38it/s]
tokenizing: 32it [00:00, 2916.95it/s]
tokenizing: 32it [00:00, 3323.21it/s]
tokenizing: 32it [00:00, 2850.06it/s]

Epoch 20 Loss: 0.8804
Prompt pattern: ['P7', 'P8', 'P3', 'P5']





In [9]:
# ==============================
# Load Evaluation Dataset
# ==============================
df = pd.read_csv(r"C:\Users\stdFurqan\Desktop\paft\cola_dataset\final_ColA_Dev_Urdu_labeled - Sheet1.csv")
eval_dataset = [
    InputExample(guid=i, text_a=row['Urdu Sentence'], label=label_map[row['label']])
    for i, row in df.iterrows()
]


eval_loader = PromptDataLoader(
    dataset=eval_dataset,
    tokenizer=tokenizer,
    template=template,
    tokenizer_wrapper_class=WrapperClass,
    max_seq_length=128,
    batch_size=8,
    shuffle=False
)

# ==============================
# Evaluate Model
# ==============================
prompt_model.eval()
all_preds = []
all_labels = []

with torch.no_grad():
    for batch in eval_loader:
        logits = prompt_model(batch)
        preds = torch.argmax(logits, dim=-1)
        all_preds.extend(preds.cpu().tolist())
        all_labels.extend(batch['label'].cpu().tolist())

# ==============================
# Print Classification Report
# ==============================
print("\nüìä Fine-tuned Classification Report:")
print(classification_report(all_labels, all_preds, target_names=classes, digits=4))


tokenizing: 1043it [00:00, 4192.74it/s]



üìä Fine-tuned Classification Report:
              precision    recall  f1-score   support

       unacc     0.4502    0.3509    0.3944       322
         acc     0.7361    0.8086    0.7707       721

    accuracy                         0.6673      1043
   macro avg     0.5932    0.5798    0.5825      1043
weighted avg     0.6478    0.6673    0.6545      1043



In [None]:
roberta
üìä Fine-tuned Classification Report:
              precision    recall  f1-score   support

       unacc     0.4502    0.3509    0.3944       322
         acc     0.7361    0.8086    0.7707       721

    accuracy                         0.6673      1043
   macro avg     0.5932    0.5798    0.5825      1043
weighted avg     0.6478    0.6673    0.6545      1043

In [None]:
mbert
üìä Fine-tuned Classification Report:
              precision    recall  f1-score   support

       unacc     0.4346    0.2888    0.3470       322
         acc     0.7238    0.8322    0.7742       721

    accuracy                         0.6644      1043
   macro avg     0.5792    0.5605    0.5606      1043
weighted avg     0.6345    0.6644    0.6423      1043