In [1]:
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import random
from torch.utils.data import Dataset, DataLoader
import sentencepiece as sp
from tqdm import tqdm
import math
import numpy as np

In [2]:
!pip install rouge_score sacrebleu

Collecting rouge_score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting sacrebleu
  Downloading sacrebleu-2.5.1-py3-none-any.whl.metadata (51 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m51.8/51.8 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
Collecting portalocker (from sacrebleu)
  Downloading portalocker-3.2.0-py3-none-any.whl.metadata (8.7 kB)
Collecting colorama (from sacrebleu)
  Downloading colorama-0.4.6-py2.py3-none-any.whl.metadata (17 kB)
Downloading sacrebleu-2.5.1-py3-none-any.whl (104 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m104.1/104.1 kB[0m [31m9.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorama-0.4.6-py2.py3-none-any.whl (25 kB)
Downloading portalocker-3.2.0-py3-none-any.whl (22 kB)
Building wheels for collected packages: rouge_score
  Building wheel for rouge_score (setup.py) ... [?25l[?25hdone
  Created wheel for rouge_score: filename=rouge_score

In [3]:
from sacrebleu.metrics import BLEU, CHRF
from rouge_score import rouge_scorer

In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Using device: cuda


In [138]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [139]:
tokenizer = sp.SentencePieceProcessor()
tokenizer.Load('/content/drive/MyDrive/NLP_P2/1/bpe_tokenizer.model')


True

In [141]:
VOCAB_SIZE = tokenizer.get_piece_size()
PAD_ID = tokenizer.piece_to_id('<pad>')

print(f"Vocabulary size: {VOCAB_SIZE}")
print(f"PAD ID: {PAD_ID}")


Vocabulary size: 8000
PAD ID: 0


In [175]:
data = torch.load('/content/drive/MyDrive/NLP_P2/1/span_corruption_dataset.pt')
input_ids = data['input_ids']
target_ids = data['target_ids']


In [176]:
split_idx = int(len(input_ids) * 0.7)
train_input = input_ids[:split_idx]
train_target = target_ids[:split_idx]
val_input = input_ids[split_idx:]
val_target = target_ids[split_idx:]

print(f"Training samples: {len(train_input)}")
print(f"Validation samples: {len(val_input)}")


Training samples: 7172
Validation samples: 3075


In [177]:
class UrduDataset(Dataset):
    def __init__(self,input_ids,target_ids):
        super().__init__()
        self.input_ids = input_ids
        self.target_ids = target_ids

    def __len__(self):
        return len(self.input_ids)
    def __getitem__(self, idx):
        return {
            'input_ids': torch.tensor(self.input_ids[idx], dtype=torch.long),
            'target_ids': torch.tensor(self.target_ids[idx], dtype=torch.long)
        }

def collate_fn(batch):
    input_ids = [item['input_ids'] for item in batch]
    target_ids = [item['target_ids'] for item in batch]

    input_ids=nn.utils.rnn.pad_sequence(input_ids,batch_first=True,padding_value=PAD_ID)
    target_ids=nn.utils.rnn.pad_sequence(target_ids,batch_first=True,padding_value=PAD_ID)
    return {
        'input_ids': input_ids,
        'target_ids': target_ids
    }

In [178]:
train_dataset = UrduDataset(train_input, train_target)
val_dataset = UrduDataset(val_input, val_target)

train_loader = DataLoader(
    train_dataset,
    batch_size=32,
    shuffle=True,
    collate_fn=collate_fn
)
val_loader = DataLoader(
    val_dataset,
    batch_size=32,
    shuffle=False,
    collate_fn=collate_fn
)


# --------------------TRANSFORMER CODE--------------------------

In [179]:


def get_positional_encoding(max_seq_len, dm):
    pos=torch.arange(max_seq_len).unsqueeze(1)
    denom = 10000 ** (2 * torch.arange(0, dm//2)/ dm)
    angles = pos / denom
    PE = torch.zeros(max_seq_len, dm)
    PE[:, 0::2] = torch.sin(angles)
    PE[:, 1::2] = torch.cos(angles)
    return PE

In [180]:

def create_padding_mask(seq):
    """Create padding mask: True for padding positions"""
    return seq == PAD_ID
def create_look_ahead_mask(size):
    """Create look_ahead mask for decoder"""
    mask = torch.triu(torch.ones(size, size,), diagonal=1)
    return mask.bool()
l=create_look_ahead_mask(4)
print(l.shape)
print(l)

torch.Size([4, 4])
tensor([[False,  True,  True,  True],
        [False, False,  True,  True],
        [False, False, False,  True],
        [False, False, False, False]])


In [181]:

def FullyConnected(embedding_dim,fully_connected_dim,dropout=0.3):
    return nn.Sequential(
        nn.Linear(embedding_dim,fully_connected_dim),
        nn.ReLU(),
        nn.Dropout(dropout),
        nn.Linear(fully_connected_dim,embedding_dim)
    )

<center><img src="img/encoder_layer.png" alt="Encoder" width="400"/></center>

<center><caption><b>Transformer encoder layer</caption></center>


In [182]:
class EncoderLayer(nn.Module):
    def __init__(self,embedding_dim,num_heads,ffn_dim,dropout_rate=0.1,layernorm_eps=1e-6):
        super().__init__()
        self.mha=nn.MultiheadAttention(embed_dim=embedding_dim,num_heads=num_heads,dropout=dropout_rate,batch_first=True)
        self.ffn=FullyConnected(embedding_dim,ffn_dim)
        self.layernorm1=nn.LayerNorm(embedding_dim,eps=layernorm_eps)
        self.layernorm2=nn.LayerNorm(embedding_dim,eps=layernorm_eps)
        self.dropout_attn = nn.Dropout(dropout_rate)
        self.dropout_ffn=nn.Dropout(dropout_rate)

    def forward(self,x, mask):
        """
        Forward pass for the Encoder Layer
        Arguments:
            x -- Tensor of shape (batch_size, input_seq_len, embedding_dim)
            mask -- Boolean mask to ensure that the padding is not
                    treated as part of the input
        Returns:
            encoder_layer_out -- Tensor of shape (batch_size, input_seq_len, embedding_dim)
        """
        attn_output,_=self.mha(query=x,value=x,key=x,key_padding_mask=mask)# (batch_size, input_seq_len, embedding_dim)
        attn_output = self.dropout_attn(attn_output)
        skip_x_attn=self.layernorm1(x+attn_output)

        ffn_output=self.ffn(skip_x_attn)
        ffn_output=self.dropout_ffn(ffn_output)

        out=self.layernorm2(ffn_output+skip_x_attn)
        return out

<center><img src="img/encoder.png" alt="Encoder" width="400"/></center>

<center><caption><b>Full encoder</caption></center>


In [183]:
class Encoder(nn.Module):
    def __init__(self,num_layers, embedding_dim, num_heads, ffn_dim, input_vocab_size,max_seq_len, dropout_rate=0.1, padding_id=0):
        super().__init__()
        self.embedding_dim=embedding_dim
        self.num_layers=num_layers
        self.embedding= nn.Embedding(input_vocab_size,embedding_dim,padding_idx=padding_id)
        self.pos_encoding= get_positional_encoding(max_seq_len,embedding_dim)

        self.enc_layers=nn.ModuleList([
            EncoderLayer(embedding_dim,num_heads,ffn_dim,dropout_rate) for _ in range(num_layers)
        ])
        self.dropout= nn.Dropout(dropout_rate)
        self.scale_dm = torch.sqrt(torch.tensor(self.embedding_dim))
    def forward(self,x,padding_mask):
         """
        Arguments:
            x: Tensor of shape (batch_size, input_seq_len)
            mask: key padding mask of shape (batch_size, input_seq_len)
        Returns:
            output: Tensor of shape (batch_size, input_seq_len, embedding_dim)
        """
         seq_len=x.shape[1]
         x=self.embedding(x)*self.scale_dm
         pos_enc=self.pos_encoding[:seq_len,:].to(x.device)
         x=self.dropout(x+pos_enc)

         for i in range(self.num_layers):
             x=self.enc_layers[i](x,padding_mask)

         return x

<center><img src="img/decoder_layer.png"  width="300"/></center>

<center><caption><b>Transformer decoder layer</caption></center>


In [184]:
class DecoderLayer(nn.Module):
    def __init__(self,embedding_dim,num_heads,ffn_dim,dropout_rate=0.1,layernorm_eps=1e-6):
        super().__init__()
        self.mha1=nn.MultiheadAttention(embedding_dim,num_heads,dropout=dropout_rate,batch_first=True)
        self.mha2=nn.MultiheadAttention(embedding_dim,num_heads,dropout=dropout_rate,batch_first=True)
        self.ffn=FullyConnected(embedding_dim,ffn_dim)

        self.layernorm1=nn.LayerNorm(embedding_dim,eps=layernorm_eps)
        self.layernorm2=nn.LayerNorm(embedding_dim,eps=layernorm_eps)
        self.layernorm3=nn.LayerNorm(embedding_dim,eps=layernorm_eps)

        self.dropout1=nn.Dropout(dropout_rate)
        self.dropout2=nn.Dropout(dropout_rate)
        self.dropout3=nn.Dropout(dropout_rate)
    def forward(self,x,enc_out,look_ahead_mask,padding_mask):

        masked_att,_=self.mha1(query=x,key=x,value=x,attn_mask=look_ahead_mask)
        out1 = self.layernorm1( x + self.dropout1(masked_att))

        cross_att,_=self.mha2(query=out1,key=enc_out,value=enc_out,key_padding_mask=padding_mask)
        out2=self.layernorm2(out1+self.dropout2(cross_att))

        ffn_out=self.ffn(out2)
        out3=self.layernorm3(out2+self.dropout3(ffn_out))

        return out3


<center><img src="img/decoder.png"  width="300"/></center>

<center><caption><b>Full decoder</caption></center>


In [185]:
class Decoder(nn.Module):
    def __init__(self,num_layers,embedding_dim,num_heads,ffn_dim,target_vocab_size,max_seq_len,dropout_rate=0.1, padding_id=0):
        super().__init__()
        self.num_layers=num_layers
        self.embedding_dim=embedding_dim
        self.embedding=nn.Embedding(target_vocab_size,embedding_dim,padding_idx=padding_id)
        self.pos_encoding=get_positional_encoding(max_seq_len,embedding_dim)
        self.dec_layers=nn.ModuleList([
            DecoderLayer(embedding_dim,num_heads,ffn_dim,dropout_rate) for _ in range(num_layers)
        ])
        self.dropout=nn.Dropout(dropout_rate)
        self.scale_dm = torch.sqrt(torch.tensor(self.embedding_dim))
    def forward(self,x, enc_output,look_ahead_mask, padding_mask):
        """
        Forward  pass for the Decoder

        Arguments:
            x -- Tensor of shape (batch_size, target_seq_len)
            enc_output --  Tensor of shape(batch_size, input_seq_len, embedding_dim)
            look_ahead_mask -- Boolean mask for the target_input (batch_size, tgt_seq_len, tgt_seq_len)
            padding_mask -- Boolean mask for the second multihead attention layer (batch_size, src_seq_len)
        Returns:
            x -- Tensor of shape (batch_size, target_seq_len, embedding_dim)
        """
        seq_len=x.shape[1]
        x=self.embedding(x)* self.scale_dm
        pos_enc=self.pos_encoding[:seq_len,:].to(x.device)
        x=self.dropout(x+pos_enc)
        for i in range(self.num_layers):
            x= self.dec_layers[i](x,enc_output,look_ahead_mask,padding_mask)

        return x

<center><img src="img/transformer.png"  width="400"/></center>

<center><caption><b>Full decoder</caption></center>


In [186]:
class Transformer(nn.Module):
    def __init__(self,vocab_size=8000,max_seq_len=100,padding_idx=0):
        super().__init__()
        self.encoder=Encoder(num_layers=2,embedding_dim=256,num_heads=4,ffn_dim=1024,input_vocab_size=vocab_size,max_seq_len=max_seq_len,padding_id=padding_idx)
        self.decoder=Decoder(num_layers=2,embedding_dim=256,num_heads=4,ffn_dim=1024,target_vocab_size=vocab_size,max_seq_len=max_seq_len,padding_id=padding_idx)
        self.final_layer=nn.Linear(in_features=256,out_features=vocab_size)

    def forward(self,src,tgt,padding_mask,teacher_forcing=0):

        batch_len,tgt_len = tgt.shape
        dec_input= tgt[:,0:1]

        enc_output =self.encoder(src,padding_mask)

        outputs=[]
        for i in range(0,tgt_len):

            tgt_look_ahead_mask =create_look_ahead_mask(dec_input.size(1)).to(device)
            dec_out=self.decoder(dec_input,enc_output,tgt_look_ahead_mask,padding_mask)
            pred=self.final_layer(dec_out)

            outputs.append(pred[:,-1:,:])

            if i<tgt_len-1:

                tf=torch.rand(batch_len,1,device=device)<teacher_forcing
                pred_t= pred[:,-1:,:].argmax(dim=-1)
                ground_t=tgt[:,i+1:i+2]
                next_t=torch.where(tf,ground_t,pred_t)

                dec_input= torch.cat([dec_input,next_t],dim=1)

        output=torch.cat(outputs,dim=1)
        return output



# --------------------TRAINING-TEST CODE--------------------------

In [155]:
def train_epoch(model,dataloader,criterian,optimizer,teacher_forcing=1.0):
    model.train()
    total_loss=0

    progress_bar=tqdm(dataloader,desc=f"Training (TF={teacher_forcing:.2f})")

    for batch in progress_bar:
        input_ids = batch['input_ids'].to(device)
        target_ids = batch['target_ids'].to(device)

        decoder_input = target_ids[:, :-1]
        decoder_target = target_ids[:, 1:]

        padding_mask=create_padding_mask(input_ids).to(device)

        optimizer.zero_grad()
        output= model(input_ids,decoder_input,padding_mask,teacher_forcing=teacher_forcing)


        loss = criterian(output.reshape(-1,output.size(-1)),decoder_target.reshape(-1))
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(),1.0)
        optimizer.step()
        total_loss += loss.item()
        progress_bar.set_postfix({'Train loss': loss.item()})

    return total_loss / len(dataloader)



In [156]:
def calculate_bleu(predictions, references):
    """Calculate BLEU score"""
    bleu = BLEU()
    score = bleu.corpus_score(predictions, [references])
    return score.score

def calculate_rouge_l(predictions, references):
    """Calculate ROUGE-L score"""
    scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=False)
    scores = []
    for pred, ref in zip(predictions, references):
        score = scorer.score(ref, pred)
        scores.append(score['rougeL'].fmeasure)
    return np.mean(scores)

def calculate_chrf(predictions, references):
    """Calculate chrF score"""
    chrf = CHRF()
    score = chrf.corpus_score(predictions, [references])
    return score.score

def calculate_perplexity(loss):
    """Calculate perplexity from loss"""
    return math.exp(min(loss, 100))

In [157]:
def evaluate(model, dataloader, criterian):
    model.eval()
    total_loss = 0
    predictions = []
    references = []

    with torch.no_grad():
        progress_bar = tqdm(dataloader, desc="Evaluating (no teacher forcing)")
        for batch in progress_bar:
            input_ids = batch['input_ids'].to(device)
            target_ids = batch['target_ids'].to(device)

            decoder_input = target_ids[:, :-1]
            decoder_target = target_ids[:, 1:]

            padding_mask = create_padding_mask(input_ids).to(device)

            output = model(input_ids, decoder_input, padding_mask, teacher_forcing=0)
            loss = criterian(output.reshape(-1, output.size(-1)), decoder_target.reshape(-1))

            total_loss += loss.item()
            progress_bar.set_postfix({'val loss': loss.item()})

            # Decode predictions and references for metrics
            pred_ids = output.argmax(dim=-1).cpu().tolist()
            tgt_ids = decoder_target.cpu().tolist()
            for pred, ref in zip(pred_ids, tgt_ids):
                pred_text = tokenizer.decode(pred)
                ref_text = tokenizer.decode(ref)
                predictions.append(pred_text.replace("<mask>", ""))
                references.append(ref_text.replace("<mask>", ""))

    avg_loss = total_loss / len(dataloader)

    bleu_score = calculate_bleu(predictions, references)
    rouge_score = calculate_rouge_l(predictions, references)
    chrf_score = calculate_chrf(predictions, references)
    perplexity = calculate_perplexity(avg_loss)

    print("\nEvaluation Results:")
    print(f"  Val Loss:   {avg_loss:.4f}")
    print(f"  BLEU:       {bleu_score:.4f}")
    print(f"  ROUGE-L:    {rouge_score:.4f}")
    print(f"  chrF:       {chrf_score:.4f}")
    print(f"  Perplexity: {perplexity:.4f}")

    return {
        'loss': avg_loss,
        'bleu': bleu_score,
        'rouge_l': rouge_score,
        'chrf': chrf_score,
        'perplexity': perplexity,
        'predictions': predictions[:10],
        'references': references[:10]
    }

In [187]:
model=Transformer(vocab_size=VOCAB_SIZE,padding_idx=PAD_ID).to(device)
print(f"Model parameters: {sum(p.numel() for p in model.parameters()):,}")


Model parameters: 9,838,400


In [188]:
criterian=nn.CrossEntropyLoss(ignore_index=PAD_ID)
optimizer =torch.optim.Adam(model.parameters(),lr=1e-4)
#scheduler =torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,patience=3,factor=0.5)

In [189]:
output=None
best_bleu = 0


In [194]:
NUM_EPOCHS=10
teacher_forcing=1.0
for epoch in range(NUM_EPOCHS):
        print(f"\nEpoch {epoch + 1}/{NUM_EPOCHS}")
        #teacher_forcing=max(0,teacher_forcing-0.1)

        loss =train_epoch(model,train_loader,criterian,optimizer,teacher_forcing=teacher_forcing)
        #scheduler.step(loss)
        output = evaluate(model, val_loader,criterian)


        if output['bleu'] > best_bleu:
            best_bleu = output['bleu']
            torch.save({
                'model_state_dict': model.state_dict(),
            }, 'best_model.pt')
            print(f"\n  ✓ New best model saved! BLEU: {best_bleu:.2f}")




Epoch 1/10


Training (TF=1.00): 100%|██████████| 225/225 [00:13<00:00, 16.35it/s, Train loss=2.83]
Evaluating (no teacher forcing): 100%|██████████| 97/97 [00:01<00:00, 49.58it/s, val loss=2.43]



Evaluation Results:
  Val Loss:   4.0509
  BLEU:       1.4908
  ROUGE-L:    0.0000
  chrF:       3.1938
  Perplexity: 57.4483

Epoch 2/10


Training (TF=1.00): 100%|██████████| 225/225 [00:13<00:00, 16.21it/s, Train loss=3.55]
Evaluating (no teacher forcing): 100%|██████████| 97/97 [00:02<00:00, 46.88it/s, val loss=2.72]



Evaluation Results:
  Val Loss:   4.0294
  BLEU:       2.0584
  ROUGE-L:    0.0000
  chrF:       3.7613
  Perplexity: 56.2254

Epoch 3/10


Training (TF=1.00): 100%|██████████| 225/225 [00:13<00:00, 16.37it/s, Train loss=3.28]
Evaluating (no teacher forcing): 100%|██████████| 97/97 [00:01<00:00, 56.28it/s, val loss=2.41]



Evaluation Results:
  Val Loss:   4.1172
  BLEU:       0.0000
  ROUGE-L:    0.0000
  chrF:       3.6183
  Perplexity: 61.3857

Epoch 4/10


Training (TF=1.00): 100%|██████████| 225/225 [00:13<00:00, 16.40it/s, Train loss=3.38]
Evaluating (no teacher forcing): 100%|██████████| 97/97 [00:01<00:00, 56.62it/s, val loss=2.45]



Evaluation Results:
  Val Loss:   4.1096
  BLEU:       0.0000
  ROUGE-L:    0.0000
  chrF:       3.2953
  Perplexity: 60.9240

Epoch 5/10


Training (TF=1.00): 100%|██████████| 225/225 [00:13<00:00, 16.63it/s, Train loss=2.6]
Evaluating (no teacher forcing): 100%|██████████| 97/97 [00:01<00:00, 58.59it/s, val loss=2.7]



Evaluation Results:
  Val Loss:   4.0458
  BLEU:       2.1402
  ROUGE-L:    0.0000
  chrF:       3.9573
  Perplexity: 57.1563

Epoch 6/10


Training (TF=1.00): 100%|██████████| 225/225 [00:13<00:00, 16.66it/s, Train loss=3.32]
Evaluating (no teacher forcing): 100%|██████████| 97/97 [00:01<00:00, 48.89it/s, val loss=2.83]



Evaluation Results:
  Val Loss:   4.0593
  BLEU:       1.6101
  ROUGE-L:    0.0000
  chrF:       3.8634
  Perplexity: 57.9336

Epoch 7/10


Training (TF=1.00): 100%|██████████| 225/225 [00:13<00:00, 16.59it/s, Train loss=2.93]
Evaluating (no teacher forcing): 100%|██████████| 97/97 [00:02<00:00, 47.54it/s, val loss=2.5]



Evaluation Results:
  Val Loss:   4.1397
  BLEU:       2.1376
  ROUGE-L:    0.0000
  chrF:       3.7633
  Perplexity: 62.7868

Epoch 8/10


Training (TF=1.00): 100%|██████████| 225/225 [00:13<00:00, 17.10it/s, Train loss=3.64]
Evaluating (no teacher forcing): 100%|██████████| 97/97 [00:01<00:00, 59.16it/s, val loss=2.81]



Evaluation Results:
  Val Loss:   4.1051
  BLEU:       3.0941
  ROUGE-L:    0.0000
  chrF:       3.9361
  Perplexity: 60.6495

  ✓ New best model saved! BLEU: 3.09

Epoch 9/10


Training (TF=1.00): 100%|██████████| 225/225 [00:12<00:00, 17.32it/s, Train loss=2.88]
Evaluating (no teacher forcing): 100%|██████████| 97/97 [00:01<00:00, 58.54it/s, val loss=2.67]



Evaluation Results:
  Val Loss:   4.1538
  BLEU:       2.6441
  ROUGE-L:    0.0000
  chrF:       3.8705
  Perplexity: 63.6731

Epoch 10/10


Training (TF=1.00): 100%|██████████| 225/225 [00:13<00:00, 16.63it/s, Train loss=3.8]
Evaluating (no teacher forcing): 100%|██████████| 97/97 [00:01<00:00, 56.97it/s, val loss=2.89]



Evaluation Results:
  Val Loss:   4.0937
  BLEU:       1.4973
  ROUGE-L:    0.0000
  chrF:       4.2227
  Perplexity: 59.9617


In [195]:
print("Examples:")
for i in range(10):
    print(f"\n  Example {i+1}:")
    print(f"    Prediction: {output['predictions'][i]}")
    print(f"    Reference:  {output['references'][i]}")

Examples:

  Example 1:
    Prediction: ایک
    Reference:  قیمت

  Example 2:
    Prediction: اور ہیں
    Reference:  اور پی

  Example 3:
    Prediction: میں کے
    Reference:  نیپرا دی

  Example 4:
    Prediction: کے کی
    Reference:  اینڈ ڈسپ

  Example 5:
    Prediction: اس
    Reference:  استحکام

  Example 6:
    Prediction: اور
    Reference:  چیز

  Example 7:
    Prediction: میں
    Reference:  ایک

  Example 8:
    Prediction: کی
    Reference:  سے

  Example 9:
    Prediction: کیا
    Reference:  ہمت

  Example 10:
    Prediction: ہوں
    Reference:  ہے


In [196]:
model.load_state_dict(torch.load('best_model.pt')['model_state_dict'])
output = evaluate(model, val_loader,criterian)

Evaluating (no teacher forcing): 100%|██████████| 97/97 [00:02<00:00, 46.37it/s, val loss=2.81]



Evaluation Results:
  Val Loss:   4.1051
  BLEU:       3.0941
  ROUGE-L:    0.0000
  chrF:       3.9361
  Perplexity: 60.6495


In [197]:
print("Examples:")
for i in range(10):
    print(f"\n  Example {i+1}:")
    print(f"    Prediction: {output['predictions'][i]}")
    print(f"    Reference:  {output['references'][i]}")

Examples:

  Example 1:
    Prediction: ہے
    Reference:  قیمت

  Example 2:
    Prediction: اور ہیں
    Reference:  اور پی

  Example 3:
    Prediction: کے کی
    Reference:  نیپرا دی

  Example 4:
    Prediction: کے کی
    Reference:  اینڈ ڈسپ

  Example 5:
    Prediction: اس
    Reference:  استحکام

  Example 6:
    Prediction: اور
    Reference:  چیز

  Example 7:
    Prediction: ہے
    Reference:  ایک

  Example 8:
    Prediction: کے
    Reference:  سے

  Example 9:
    Prediction: بات
    Reference:  ہمت

  Example 10:
    Prediction: ہوں
    Reference:  ہے


In [198]:
save_path = "/content/drive/MyDrive/NLP_P2/1/span_15_3.pth"
# Save model
torch.save({
    'model_state_dict': model.state_dict(),
}, save_path)
print("Model saved as 'span_15_3.pth'")



Model saved as 'span_15_3.pth'
