In [6]:
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import random
from torch.utils.data import Dataset, DataLoader
import sentencepiece as sp
from tqdm import tqdm
import math
import numpy as np

In [2]:
!pip install rouge_score torch



In [3]:
from sacrebleu.metrics import BLEU, CHRF
from rouge_score import rouge_scorer

In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Using device: cpu


In [5]:
from google.colab import drive
drive.mount('/content/drive')

ModuleNotFoundError: No module named 'google.colab'

In [8]:
tokenizer = sp.SentencePieceProcessor()
tokenizer.Load('bpe_tokenizer.model')


True

In [9]:
VOCAB_SIZE = tokenizer.get_piece_size()
PAD_ID = tokenizer.piece_to_id('<pad>')
UNK_ID = tokenizer.piece_to_id('<unk>')
SOS_ID = tokenizer.piece_to_id('<s>')
EOS_ID = tokenizer.piece_to_id('</s>')
print(f"Vocabulary size: {VOCAB_SIZE}")
print(f"PAD ID: {PAD_ID}")


Vocabulary size: 768
PAD ID: 0


In [None]:
s=tokenizer.Encode("میرا اس سے کوئی لینا دینا نہیں",add_eos=True,add_bos=True)
print(s)

[520, 31, 35, 135, 90, 51, 121, 51, 52, 3]


In [None]:
[520, 31, 35, 135, 90, 51, 121, 51, 52]
print(tokenizer.decode(s))

میرا اس سے کوئی لینا دینا نہیں


In [8]:

with open('/content/drive/MyDrive/NLP_ASS2/TEST/sentences_cleaned.txt', 'r', encoding='utf-8') as f:
        data = [line.strip() for line in f if line.strip()]


In [9]:
# Split dataset: 80% train, 10% validation, 10% test
print("Splitting dataset...")
random.shuffle(data)
train_size = int(0.8 * len(data))

train_groups = data[:train_size]
val_groups = data[train_size:]

print(f"Train: {len(train_groups)}, Val: {len(val_groups)}")



Splitting dataset...
Train: 8197, Val: 2050


In [None]:

class UrduChatbotDataset(Dataset):
    """Dataset for Urdu chatbot with teacher forcing"""

    def __init__(self, sentence_groups, max_len=50):
        self.sentence_groups = sentence_groups
        self.max_len = max_len

    def __len__(self):
        return len(self.sentence_groups)

    def __getitem__(self, idx):
        # Split based on word count (2/5th for input, 3/5th for target)
        group = self.sentence_groups[idx]
        words = group.split()

        # Calculate split point based on word count
        total_words = len(words)
        split_point = max(1, total_words * 2 // 5)  # At least 1 word for input

        input_text = ' '.join(words[:split_point])
        target_text = ' '.join(words[split_point:])

        # Tokenize input and target
        input_tokens = tokenizer.encode(input_text)
        target_tokens = tokenizer.encode(target_text)

        # Truncate and pad
        input_ids = input_tokens[:self.max_len] + [PAD_ID] * (self.max_len - len(input_tokens))
        target_ids = target_tokens[:self.max_len-1]+[EOS_ID] + [PAD_ID] * (self.max_len - len(target_tokens))

        # Teacher forcing: decoder input is [START] + target[:-1], decoder target is target
        decoder_input_ids = [SOS_ID] + target_ids[:-1]
        decoder_target_ids = target_ids 

        return {
            'encoder_input': torch.tensor(input_ids, dtype=torch.long),
            'decoder_input': torch.tensor(decoder_input_ids, dtype=torch.long),
            'decoder_target': torch.tensor(decoder_target_ids, dtype=torch.long)
        }


In [11]:
train_dataset = UrduChatbotDataset(train_groups)
val_dataset = UrduChatbotDataset(val_groups)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)


# --------------------TRANSFORMER CODE--------------------------

In [None]:

class PositionalEncoding(nn.Module):
    """Positional encoding using sine and cosine"""
    def _init_(self, d_model, max_len=5000, dropout=0.1):
        super()._init_()
        self.dropout = nn.Dropout(p=dropout)
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0)
        self.register_buffer('pe', pe)
    
    def forward(self, x):
        x = x + self.pe[:, :x.size(1), :]
        return self.dropout(x)

class MultiHeadAttention(nn.Module):
    """Multi-Head Attention mechanism"""
    def _init_(self, d_model, num_heads, dropout=0.1):
        super()._init_()
        assert d_model % num_heads == 0
        self.d_model = d_model
        self.num_heads = num_heads
        self.d_k = d_model // num_heads
        self.W_q = nn.Linear(d_model, d_model)
        self.W_k = nn.Linear(d_model, d_model)
        self.W_v = nn.Linear(d_model, d_model)
        self.W_o = nn.Linear(d_model, d_model)
        self.dropout = nn.Dropout(dropout)
    
    def split_heads(self, x):
        batch_size, seq_len, d_model = x.size()
        x = x.view(batch_size, seq_len, self.num_heads, self.d_k)
        return x.transpose(1, 2)
    
    def scaled_dot_product_attention(self, Q, K, V, mask=None):
        scores = torch.matmul(Q, K.transpose(-2, -1)) / math.sqrt(self.d_k)
        if mask is not None:
            scores = scores.masked_fill(mask == 0, -1e9)
        attention_weights = F.softmax(scores, dim=-1)
        attention_weights = self.dropout(attention_weights)
        output = torch.matmul(attention_weights, V)
        return output, attention_weights
    
    def forward(self, query, key, value, mask=None):
        batch_size = query.size(0)
        Q = self.split_heads(self.W_q(query))
        K = self.split_heads(self.W_k(key))
        V = self.split_heads(self.W_v(value))
        attn_output, _ = self.scaled_dot_product_attention(Q, K, V, mask)
        attn_output = attn_output.transpose(1, 2).contiguous()
        attn_output = attn_output.view(batch_size, -1, self.d_model)
        output = self.W_o(attn_output)
        return output

class FeedForward(nn.Module):
    """Position-wise Feed-Forward Network"""
    def _init_(self, d_model, d_ff, dropout=0.1):
        super()._init_()
        self.linear1 = nn.Linear(d_model, d_ff)
        self.linear2 = nn.Linear(d_ff, d_model)
        self.dropout = nn.Dropout(dropout)
    
    def forward(self, x):
        x = F.relu(self.linear1(x))
        x = self.dropout(x)
        x = self.linear2(x)
        return x

class EncoderLayer(nn.Module):
    """Single Transformer Encoder Layer"""
    def _init_(self, d_model, num_heads, d_ff, dropout=0.1):
        super()._init_()
        self.self_attention = MultiHeadAttention(d_model, num_heads, dropout)
        self.feed_forward = FeedForward(d_model, d_ff, dropout)
        self.norm1 = nn.LayerNorm(d_model)
        self.norm2 = nn.LayerNorm(d_model)
        self.dropout1 = nn.Dropout(dropout)
        self.dropout2 = nn.Dropout(dropout)
    
    def forward(self, x, mask=None):
        attn_output = self.self_attention(x, x, x, mask)
        x = self.norm1(x + self.dropout1(attn_output))
        ff_output = self.feed_forward(x)
        x = self.norm2(x + self.dropout2(ff_output))
        return x

class DecoderLayer(nn.Module):
    """Single Transformer Decoder Layer"""
    def _init_(self, d_model, num_heads, d_ff, dropout=0.1):
        super()._init_()
        self.self_attention = MultiHeadAttention(d_model, num_heads, dropout)
        self.cross_attention = MultiHeadAttention(d_model, num_heads, dropout)
        self.feed_forward = FeedForward(d_model, d_ff, dropout)
        self.norm1 = nn.LayerNorm(d_model)
        self.norm2 = nn.LayerNorm(d_model)
        self.norm3 = nn.LayerNorm(d_model)
        self.dropout1 = nn.Dropout(dropout)
        self.dropout2 = nn.Dropout(dropout)
        self.dropout3 = nn.Dropout(dropout)
    
    def forward(self, x, encoder_output, src_mask=None, tgt_mask=None):
        self_attn_output = self.self_attention(x, x, x, tgt_mask)
        x = self.norm1(x + self.dropout1(self_attn_output))
        cross_attn_output = self.cross_attention(x, encoder_output, encoder_output, src_mask)
        x = self.norm2(x + self.dropout2(cross_attn_output))
        ff_output = self.feed_forward(x)
        x = self.norm3(x + self.dropout3(ff_output))
        return x

class Transformer(nn.Module):
    """Complete Transformer Encoder-Decoder Model"""
    def _init_(self, vocab_size, d_model=256, num_heads=2, d_ff=1024,
                 num_encoder_layers=2, num_decoder_layers=2, max_len=512,
                 dropout=0.1, pad_idx=0):
        super()._init_()
        self.d_model = d_model
        self.pad_idx = pad_idx
        
        self.encoder_embedding = nn.Embedding(vocab_size, d_model)
        self.decoder_embedding = nn.Embedding(vocab_size, d_model)
        self.encoder_pos_encoding = PositionalEncoding(d_model, max_len, dropout)
        self.decoder_pos_encoding = PositionalEncoding(d_model, max_len, dropout)
        
        self.encoder_layers = nn.ModuleList([
            EncoderLayer(d_model, num_heads, d_ff, dropout)
            for _ in range(num_encoder_layers)
        ])
        
        self.decoder_layers = nn.ModuleList([
            DecoderLayer(d_model, num_heads, d_ff, dropout)
            for _ in range(num_decoder_layers)
        ])
        
        self.output_projection = nn.Linear(d_model, vocab_size)
        self._init_parameters()
    
    def _init_parameters(self):
        for p in self.parameters():
            if p.dim() > 1:
                nn.init.xavier_uniform_(p)
    
    def make_src_mask(self, src):
        src_mask = (src != self.pad_idx).unsqueeze(1).unsqueeze(2)
        return src_mask
    
    def make_tgt_mask(self, tgt):
        batch_size, tgt_len = tgt.size()
        tgt_pad_mask = (tgt != self.pad_idx).unsqueeze(1).unsqueeze(2)
        tgt_sub_mask = torch.tril(torch.ones((tgt_len, tgt_len), device=tgt.device)).bool()
        tgt_mask = tgt_pad_mask & tgt_sub_mask
        return tgt_mask
    
    def forward(self, src, tgt):
        src_mask = self.make_src_mask(src)
        tgt_mask = self.make_tgt_mask(tgt)
        
        # Encoder
        x = self.encoder_embedding(src) * math.sqrt(self.d_model)
        x = self.encoder_pos_encoding(x)
        for layer in self.encoder_layers:
            x = layer(x, src_mask)
        encoder_output = x
        
        # Decoder
        x = self.decoder_embedding(tgt) * math.sqrt(self.d_model)
        x = self.decoder_pos_encoding(x)
        for layer in self.decoder_layers:
            x = layer(x, encoder_output, src_mask, tgt_mask)
        
        output = self.output_projection(x)
        return output


# --------------------TRAINING-TEST CODE--------------------------

In [None]:
def train_epoch(model, train_loader, criterion, optimizer):
    """Train for one epoch with teacher forcing"""
    model.train()
    epoch_loss = 0
    pbar = tqdm(train_loader, desc="Training")
    
    for batch in pbar:
        encoder_input = batch['encoder_input'].to(device)
        decoder_input = batch['decoder_input'].to(device)
        decoder_target = batch['decoder_target'].to(device)
        
        optimizer.zero_grad()
        output = model(encoder_input, decoder_input)
        output = output.reshape(-1, output.size(-1))
        target = decoder_target.reshape(-1)
        loss = criterion(output, target)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()
        
        epoch_loss += loss.item()
        pbar.set_postfix({'Train loss': f'{loss.item():.4f}'})
    
    return epoch_loss / len(train_loader)

In [None]:
def validate(model, val_loader, criterion, device):
    """Validate the model"""
    model.eval()
    epoch_loss = 0
    
    with torch.no_grad():
        pbar = tqdm(val_loader, desc="Validation")
        for batch in pbar:
            encoder_input = batch['encoder_input'].to(device)
            decoder_input = batch['decoder_input'].to(device)
            decoder_target = batch['decoder_target'].to(device)
            
            output = model(encoder_input, decoder_input)
            output = output.reshape(-1, output.size(-1))
            target = decoder_target.reshape(-1)
            loss = criterion(output, target)
            epoch_loss += loss.item()
            pbar.set_postfix({'loss': f'{loss.item():.4f}'})
    
    return epoch_loss / len(val_loader)

In [21]:
def calculate_bleu(predictions, references):
    """Calculate BLEU score"""
    bleu = BLEU()
    score = bleu.corpus_score(predictions, [references])
    return score.score

def calculate_rouge_l(predictions, references):
    """Calculate ROUGE-L score"""
    scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=False)
    scores = []
    for pred, ref in zip(predictions, references):
        score = scorer.score(ref, pred)
        scores.append(score['rougeL'].fmeasure)
    return np.mean(scores)

def calculate_chrf(predictions, references):
    """Calculate chrF score"""
    chrf = CHRF()
    score = chrf.corpus_score(predictions, [references])
    return score.score

def calculate_perplexity(loss):
    """Calculate perplexity from loss"""
    return math.exp(min(loss, 100))

In [None]:
def evaluate(model, dataloader, criterian):
    model.eval()
    total_loss = 0
    predictions = []
    references = []

    with torch.no_grad():
        progress_bar = tqdm(dataloader, desc="Evaluating (no teacher forcing)")
        for batch in progress_bar:
            encoder_input = batch['encoder_input'].to(device)
            decoder_input = batch['decoder_input'].to(device)
            decoder_target = batch['decoder_target'].to(device)

            output = model(encoder_input, decoder_input)
            loss = criterian(output.reshape(-1, output.size(-1)), decoder_target.reshape(-1))

            total_loss += loss.item()
            progress_bar.set_postfix({'val loss': loss.item()})

            # Decode predictions and references for metrics
            pred_ids = output.argmax(dim=-1).cpu().tolist()
            tgt_ids = decoder_target.cpu().tolist()
            for pred, ref in zip(pred_ids, tgt_ids):
                pred_text = tokenizer.decode(pred)
                ref_text = tokenizer.decode(ref)
                predictions.append(pred_text)
                references.append(ref_text)

    avg_loss = total_loss / len(dataloader)

    bleu_score = calculate_bleu(predictions, references)
    rouge_score = calculate_rouge_l(predictions, references)
    chrf_score = calculate_chrf(predictions, references)
    perplexity = calculate_perplexity(avg_loss)

    print("\nEvaluation Results:")
    print(f"  Val Loss:   {avg_loss:.4f}")
    print(f"  BLEU:       {bleu_score:.4f}")
    print(f"  ROUGE-L:    {rouge_score:.4f}")
    print(f"  chrF:       {chrf_score:.4f}")
    print(f"  Perplexity: {perplexity:.4f}")

    return {
        'loss': avg_loss,
        'bleu': bleu_score,
        'rouge_l': rouge_score,
        'chrf': chrf_score,
        'perplexity': perplexity,
        'predictions': predictions[:10],
        'references': references[:10]
    }

In [None]:

def generate_text(model, input_text, max_length=30, temperature=0.8, device='cpu'):
    """Generate text continuation using beam search"""
    model.eval()
    
    with torch.no_grad():
        token_ids = tokenizer.encode(input_text)
        encoder_ids = token_ids[:50] + [PAD_ID] * (50 - len(token_ids))
        encoder_input = torch.tensor([encoder_ids], dtype=torch.long).to(device)
        decoder_input = torch.tensor([[SOS_ID]], dtype=torch.long).to(device)
        
        generated_tokens = []
        for _ in range(max_length):
            output = model(encoder_input, decoder_input)
            next_token_logits = output[0, -1, :] / temperature
            probs = F.softmax(next_token_logits, dim=-1)
            next_token = torch.multinomial(probs, num_samples=1)
            
            if next_token.item() == EOS_ID:
                break
            
            generated_tokens.append(next_token.item())
            decoder_input = torch.cat([decoder_input, next_token.unsqueeze(0)], dim=1)
        
        output_tokens = []
        for idx in generated_tokens:
            if idx not in [PAD_ID,SOS_ID,EOS_ID]:
                token = tokenizer
                output_tokens.append(token)
        
        text = ''.join(output_tokens)
        return text.strip()


In [None]:

model = Transformer(
    vocab_size=768,
    d_model=512,
    num_heads=2,
    d_ff=1024,
    num_encoder_layers=2,
    num_decoder_layers=2,
    max_len=50,
    dropout=0.1,
    pad_idx=PAD_ID
).to(device)


print(f"Model parameters: {sum(p.numel() for p in model.parameters()):,}")


Model parameters: 4,080,128


In [24]:
criterian=nn.CrossEntropyLoss(ignore_index=PAD_ID)
optimizer =torch.optim.Adam(model.parameters(),lr=5e-4)
#scheduler =torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,patience=3,factor=0.5)

In [25]:
output=None
best_bleu = 0


In [34]:
NUM_EPOCHS=2
teacher_forcing=0.6
for epoch in range(NUM_EPOCHS):
        print(f"\nEpoch {epoch + 1}/{NUM_EPOCHS}")
        #teacher_forcing=max(0,teacher_forcing-0.1)

        loss =train_epoch(model,train_loader,criterian,optimizer,teacher_forcing=teacher_forcing)
        #scheduler.step(loss)
        output = evaluate(model, val_loader,criterian)


        if output['bleu'] > best_bleu:
            best_bleu = output['bleu']
            torch.save({
                'model_state_dict': model.state_dict(),
            }, 'best_model.pt')
            print(f"\n  ✓ New best model saved! BLEU: {best_bleu:.2f}")




Epoch 1/2


Training (TF=0.60): 100%|██████████| 257/257 [02:12<00:00,  1.94it/s, Train loss=4.27]
Evaluating (no teacher forcing): 100%|██████████| 65/65 [00:10<00:00,  5.92it/s, val loss=5.72]



Evaluation Results:
  Val Loss:   5.9951
  BLEU:       0.0170
  ROUGE-L:    0.0000
  chrF:       6.2281
  Perplexity: 401.4465

Epoch 2/2


Training (TF=0.60): 100%|██████████| 257/257 [02:12<00:00,  1.95it/s, Train loss=4.38]
Evaluating (no teacher forcing): 100%|██████████| 65/65 [00:11<00:00,  5.83it/s, val loss=6.03]



Evaluation Results:
  Val Loss:   6.0266
  BLEU:       0.0108
  ROUGE-L:    0.0000
  chrF:       6.2077
  Perplexity: 414.3145


In [35]:
print("Examples:")
for i in range(10):

    print(f"\n  Example {i+1}:")
    print(f"    Prediction: {output['predictions'][i]}")
    print(f"    Reference:  {output['references'][i]}")

Examples:

  Example 1:
    Prediction: کی حقثیت سے ہے؟ ہیں؟ ہیں؟ ہیں؟ ہیں؟ ہیں؟ ہیں؟ ہیں؟ ہیں؟ ہیں؟ ہیں؟ ہیں؟ ہیں؟ ہیں؟ ہیں؟ ہیں؟ ہیں؟ ہیں؟ ہیں؟ ہیں؟ ہیں؟ ہیں؟
    Reference:  ڈرامے کا نام نہیں ہے

  Example 2:
    Prediction: سے ہے؟؟؟؟؟؟؟؟؟؟؟؟؟؟؟؟؟؟؟؟؟؟؟؟؟؟؟؟؟؟؟؟؟؟؟؟؟؟؟؟؟؟؟؟؟؟؟؟
    Reference:  الوداع کیا

  Example 3:
    Prediction: کےدیدی کے لیےدیدی کے لیےدیدی کے لیےدیدی کے لیےدیدی کے لیےدیدی کے لیےتوں کی ہے ہے ہے ہے ہے ہے ہے ہے ہے ہے ہے ہے ہے ہے ہے ہے
    Reference:  ہمارے پاکستانی سیاستدا نوں کی ایکسپورٹ کردہ ہوگی

  Example 4:
    Prediction: سے زیادہ تو نہیں ہےرہ نہیں ہےا ہےا ہےتی ہےے نہیں ہےتی ہےتی ہےے ہیںا ہےے ہیںے ہیںے نہیں ہےے ہیںے ہیںے ہیںے ہیںے ہیںے نہیں ہےے ہیںے ہیں
    Reference:  الطائی میں منقسم کیا گیا ہے

  Example 5:
    Prediction: کے لیے اس کا حصہ ہے؟ کے لیےابلفت کے لیےابل کے لیےابل کے لیےابل کے لیےابل کے لیےابل کے لیےابل کے لیےابل کے لیےابل کے لیےاب
    Reference:  تک جاری رہ سکتا ہی

  Example 6:
    Prediction: کی تششششششششششششششششششریف کا ہے ہے ہے ہے ہے ہے ہ

In [None]:
test_inputs = ["یہ ایک", "پاکستان میں", "اچھا"]

for input_text in test_inputs:
    generated = generate_text(model, tokenizer, input_text, max_length=20, device=device)
    print(f"Input:  {input_text}")
    print(f"Output: {generated}")
    print()

In [None]:
model.load_state_dict(torch.load('best_model.pt')['model_state_dict'])
output = evaluate(model, val_loader,criterian)

Evaluating (no teacher forcing): 100%|██████████| 49/49 [00:01<00:00, 27.26it/s, val loss=2.27]



Evaluation Results:
  Val Loss:   3.5039
  BLEU:       1.2726
  ROUGE-L:    0.0000
  chrF:       3.2106
  Perplexity: 33.2460


In [31]:
output = evaluate(model, val_loader,criterian)

Evaluating (no teacher forcing): 100%|██████████| 65/65 [00:12<00:00,  5.26it/s, val loss=6.86]



Evaluation Results:
  Val Loss:   7.2425
  BLEU:       0.0141
  ROUGE-L:    0.0000
  chrF:       7.1527
  Perplexity: 1397.5314


In [32]:
print("Examples:")
for i in range(10):
    print(f"\n  Example {i+1}:")
    print(f"    Prediction: {output['predictions'][i]}")
    print(f"    Reference:  {output['references'][i]}")

Examples:

  Example 1:
    Prediction: کی باتیں نہیں ہے؟تا ہے؟ے گا؟ے گا؟ے گا؟ے گا؟ے گا؟ے گا؟ے گا؟ے گا؟ے گاڑیوں کی تشریف کی تشریف کی تشریف کی
    Reference:  ڈرامے کا نام نہیں ہے

  Example 2:
    Prediction: کہیں کہنا چاہیے ہے؟ ہے؟ ہے؟ ہے؟ ہے؟ ہے؟ ہے؟ ہے؟ ہے؟ ہے؟ ہے؟ ہے؟ ہے؟ ہے؟ ہے؟ ہے؟ ہے؟ ہے؟ ہے؟ ہے؟ ہے؟ ہے؟ ہے
    Reference:  الوداع کیا

  Example 3:
    Prediction: ایک بار بار بار بار بار بار بار بار بار بار بار بار بار بار بار بار بار بار بار بار بار بار بار بار بار بار بار بار بار بار بار بار بار بار بار بار بار بار بار بار بار بار بار بار بار بار بار بار بار
    Reference:  ہمارے پاکستانی سیاستدا نوں کی ایکسپورٹ کردہ ہوگی

  Example 4:
    Prediction: سے زیادہ توقعے نہیں ہے؟تا ہے؟ے گا؟ا ہے؟ا ہے؟ا ہے؟ ہے کہ یہ سفر ہے کہ یہ سفر ہے کہ یہ سفر ہے کہ یہ سفر ہے کہ یہ سفر ہے کہ
    Reference:  الطائی میں منقسم کیا گیا ہے

  Example 5:
    Prediction: کے ساتھ ساتھ ساتھ ساتھ ساتھ ساتھ ساتھ ساتھ ساتھ ساتھ ساتھ ساتھ ساتھ ساتھ ساتھ ساتھ ساتھ ساتھ ساتھ ساتھ ساتھ ساتھ ساتھ ساتھ ساتھ ساتھ ساتھ

In [36]:
save_path = "/content/drive/MyDrive/NLP_ASS2/span_15_3.pth"
# Save model
torch.save({
    'model_state_dict': model.state_dict(),
}, save_path)
print("Model saved as 'span_15_3.pth'")



Model saved as 'span_15_3.pth'
