In [1]:
import os
import wandb
import torch
import random
import numpy as np
import pandas as pd
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
from prettytable import PrettyTable
from torch.utils.data import TensorDataset, DataLoader, RandomSampler

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
os.environ['WANDB_API_KEY'] = 'paste your wandb.ai key here'
wandb.login()


[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33malandandoor[0m ([33malandandoor-iit-madras[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [3]:
SOW_token = 0
EOW_token = 1

class Lang:
    def __init__(self, name):
        self.name = name
        self.n_letters = 2 # Count SOW and EOW
        self.letter2index = {}
        self.letter2count = {}
        self.index2letter = {0: "0", 1: "1"}

    def addWord(self, word):
        for ch in word:
            self.addLetter(ch)

    def addLetter(self, ch):
        if ch not in self.letter2index:
            self.letter2index[ch] = self.n_letters
            self.letter2count[ch] = 1
            self.index2letter[self.n_letters] = ch
            self.n_letters += 1
        else:
            self.letter2count[ch] += 1

input_lang = Lang('eng')
output_lang = Lang('mal')
x_train = pd.read_csv('/kaggle/input/malayalam/ml/lexicons/ml.translit.sampled.train.tsv', sep='\t', header=None) #, nrows=1000)
x_val = pd.read_csv('/kaggle/input/malayalam/ml/lexicons/ml.translit.sampled.dev.tsv', sep='\t', header=None)
x_test = pd.read_csv('/kaggle/input/malayalam/ml/lexicons/ml.translit.sampled.test.tsv', sep='\t', header=None)


In [4]:
MAX_LENGTH = 50

def indexesFromWord(lang, word):
    return [lang.letter2index[ch] for ch in word]

def tensorFromWord(lang, word):
    indexes = indexesFromWord(lang, word)
    indexes.append(EOW_token)
    return torch.tensor(indexes, dtype=torch.long, device=device).view(1, -1)

def wordFromTensor(lang, tensor):
    s = ""
    for i in tensor:
        if(i.item()==1):
            break
        s += lang.index2letter[i.item()] 
    return s

def get_dataloader(x, input_lang, output_lang, batch_size):
    pairs = list(zip(x[1].values, x[0].values))  # Get list of (input, target) tuples
    n = len(pairs)
    input_ids = np.zeros((n, MAX_LENGTH), dtype=np.int32)
    target_ids = np.zeros((n, MAX_LENGTH), dtype=np.int32)

    for i, (inp, tgt) in enumerate(pairs):
        if not isinstance(inp, str) or not isinstance(tgt, str):
            continue  # skip malformed entries

        input_lang.addWord(inp)
        output_lang.addWord(tgt)
        inp_ids = indexesFromWord(input_lang, inp)
        tgt_ids = indexesFromWord(output_lang, tgt)
        inp_ids.append(EOW_token)
        tgt_ids.append(EOW_token)
        input_ids[i, :len(inp_ids)] = inp_ids
        target_ids[i, :len(tgt_ids)] = tgt_ids

    data = TensorDataset(torch.LongTensor(input_ids).to(device),
                         torch.LongTensor(target_ids).to(device))
    sampler = RandomSampler(data)
    dataloader = DataLoader(data, sampler=sampler, batch_size=batch_size)
    return dataloader

In [5]:
class EncoderRNN(nn.Module):
    def __init__(self, config, input_size):
        super(EncoderRNN, self).__init__()
        
        self.bidirectional = False
            
        self.embedding = nn.Embedding(input_size, config.inp_embed_size)
        self.algo = algorithms[config.cell_type](config.inp_embed_size, config.hidden_size, config.num_enc, bidirectional = self.bidirectional, batch_first=True) #config.num_layers
        self.dropout = nn.Dropout(config.dropout)
        
    def forward(self, input):
        output, hidden = self.algo(self.dropout(self.embedding(input)))
        return output, hidden

In [6]:
class DecoderRNN(nn.Module):
    def __init__(self, config, output_size):
        super(DecoderRNN, self).__init__()

        self.out = nn.Linear(config.hidden_size, output_size)
        self.config = config
        self.bidirectional = False
           
        self.embedding = nn.Embedding(output_size, config.hidden_size)
        self.algo = algorithms[config.cell_type](config.hidden_size, config.hidden_size, config.num_enc, bidirectional = self.bidirectional, batch_first=True) #config.num_layers
        self.out = nn.Linear(config.hidden_size, output_size)

    def forward(self, encoder_outputs, encoder_hidden, target_tensor=None):
        batch_size = encoder_outputs.size(0)
        decoder_input = torch.empty(batch_size, 1, dtype=torch.long, device=device).fill_(SOW_token)
        decoder_hidden = encoder_hidden
        decoder_outputs = []

        for i in range(MAX_LENGTH):
            decoder_output, decoder_hidden  = self.forward_step(decoder_input, decoder_hidden)
            decoder_outputs.append(decoder_output)

            if target_tensor is not None:
                # Teacher forcing: Feed the target as the next input
                decoder_input = target_tensor[:, i].unsqueeze(1) # Teacher forcing
            else:
                # Without teacher forcing: use its own predictions as the next input
                _, topi = decoder_output.topk(1)
                decoder_input = topi.squeeze(-1).detach()  # detach from history as input

        decoder_outputs = torch.cat(decoder_outputs, dim=1)
        decoder_outputs = F.log_softmax(decoder_outputs, dim=-1)
        return decoder_outputs, decoder_hidden, None # We return `None` for consistency in the training loop

    def forward_step(self, input, hidden):
        output = F.relu(self.embedding(input))
        output, hidden = self.algo(output, hidden)
        output = self.out(output)
        return output, hidden

In [7]:
def train_epoch(dataloader, encoder, decoder, encoder_optimizer,
                decoder_optimizer, criterion, batch_size, teacher_forcing=True):

    total_loss = 0
    correct = 0
    total = 0
    
    for data in dataloader:
        input_tensor, target_tensor = data  # shape: (B, MAX_LENGTH)
        current_batch_size = input_tensor.size(0)

        target_tensor2 = target_tensor if teacher_forcing else None

        encoder_optimizer.zero_grad()
        decoder_optimizer.zero_grad()

        encoder_outputs, encoder_hidden = encoder(input_tensor)
        decoder_outputs, _, _ = decoder(encoder_outputs, encoder_hidden, target_tensor2)

        outputs = decoder_outputs.view(-1, decoder_outputs.size(-1))  # shape: (B*MAX_LENGTH, vocab_size)
        labels = target_tensor.view(-1)  # shape: (B*MAX_LENGTH)

        loss = criterion(outputs, labels)
        loss.backward()

        encoder_optimizer.step()
        decoder_optimizer.step()

        total_loss += loss.item()

        _, predicted = torch.max(outputs, 1)  # shape: (B*MAX_LENGTH)

        # Accuracy: Count how many complete sequences match
        predicted = predicted.view(current_batch_size, MAX_LENGTH)
        labels = labels.view(current_batch_size, MAX_LENGTH)

        matches = (predicted == labels).all(dim=1)  # shape: (B,)
        correct += matches.sum().item()
        total += current_batch_size

    # print(correct)

    return total_loss / len(dataloader), (correct*100) / total


In [8]:
def train(train_dataloader, val_dataloader, test_dataloader, encoder, decoder, n_epochs, config):
    encoder_optimizer = optim.Adam(encoder.parameters(), lr=config.lr)
    decoder_optimizer = optim.Adam(decoder.parameters(), lr=config.lr)
    criterion = nn.NLLLoss()

    for epoch in range(1, n_epochs + 1):
        print("Epoch:",epoch)
        loss, acc = train_epoch(train_dataloader, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, config.batch_size)
        print("Train: accuracy:", acc, "loss:", loss)
        if(acc<0.01 and epoch>=15):
            break
        val_loss, val_acc = train_epoch(val_dataloader, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, config.batch_size, teacher_forcing=False)
        print("Validation: accuracy:", val_acc, "Loss:", val_loss)
        wandb.log({'train_accuracy': acc,'train_loss': loss,'val_accuracy': val_acc,'val_loss': val_loss})
        
    test_loss, test_acc = train_epoch(test_dataloader, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, config.batch_size, teacher_forcing=False)
    print("Test: accuracy:", test_acc, "Loss:", test_loss, "\n")

In [15]:
num_epochs = 20

algorithms = {'rnn': nn.RNN,'gru': nn.GRU,'lstm': nn.LSTM}

sweep_config = {
    'method': 'bayes', 
    'metric': {
      'name': 'val_accuracy',
      'goal': 'maximize'   
    },
    'parameters': {
        'inp_embed_size':{
            'values': [64, 128, 256]
        },
        'num_dec': {
            'values': [1, 2, 3]
        },
        'num_enc': {
            'values': [1, 2, 3]
        },
        'dropout': {
            'values': [0.2, 0.3]
        },
        'lr': {
            'values': [0.001, 0.0001]
        },
        'hidden_size': {
            'values': [256]
        },
        'batch_size': {
            'values': [64, 128, 256]
        },
        'cell_type':{
            'values': ['rnn', 'gru']
        }
    }
}

sweep_id = wandb.sweep(sweep=sweep_config, project='DL_A3')

Create sweep with ID: mtcfnqzi
Sweep URL: https://wandb.ai/alandandoor-iit-madras/DL_A3/sweeps/mtcfnqzi


In [16]:
def test():
    with wandb.init() as run:
        config = wandb.config

        # Run name formatting
        wandb.run.name = (
            f"{config.cell_type}-E_{config.num_enc}-D_{config.num_enc}-"
            f"do_{config.dropout}-bs_{config.batch_size}-lr_{config.lr}-"
            f"hs_{config.hidden_size}-emb_{config.inp_embed_size}-")
        
        train_dataloader = get_dataloader(x_train, input_lang, output_lang, wandb.config.batch_size)
        val_dataloader = get_dataloader(x_val, input_lang, output_lang, wandb.config.batch_size)
        test_dataloader = get_dataloader(x_test, input_lang, output_lang, wandb.config.batch_size)
        encoder = EncoderRNN(wandb.config, input_lang.n_letters).to(device)
        decoder = DecoderRNN(wandb.config, output_lang.n_letters).to(device)
        print(input_lang.n_letters, output_lang.n_letters)
        train(train_dataloader, val_dataloader, test_dataloader, encoder, decoder, num_epochs, wandb.config)
        # encoder.eval()
        # decoder.eval()
        # evaluate(encoder, decoder)
        
wandb.agent(sweep_id, function=test) # calls main function for count number of times. , count=1
wandb.finish() #cojvqj9b sweep_id

[34m[1mwandb[0m: Agent Starting Run: hqyg7s31 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	cell_type: rnn
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	inp_embed_size: 128
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	num_dec: 1
[34m[1mwandb[0m: 	num_enc: 3


28 72
Epoch: 1
Train: accuracy: 0.0 loss: 0.6650956464035749
Validation: accuracy: 0.0 Loss: 0.8682441028316369
Epoch: 2
Train: accuracy: 0.0 loss: 0.4951935721579674
Validation: accuracy: 0.0 Loss: 0.8517633643043175
Epoch: 3
Train: accuracy: 0.0 loss: 0.45797463295101987
Validation: accuracy: 0.0 Loss: 0.8904221486509516
Epoch: 4
Train: accuracy: 0.0 loss: 0.44154970052469467
Validation: accuracy: 0.0 Loss: 0.8434952530968055
Epoch: 5
Train: accuracy: 0.0 loss: 0.4273442731433735
Validation: accuracy: 0.0 Loss: 0.8179512854372517
Epoch: 6
Train: accuracy: 0.0 loss: 0.4132558388425382
Validation: accuracy: 0.0 Loss: 0.8220115685730838
Epoch: 7
Train: accuracy: 0.0 loss: 0.40286203755345296
Validation: accuracy: 0.0 Loss: 0.8130715241592922
Epoch: 8
Train: accuracy: 0.0 loss: 0.3937718674034566
Validation: accuracy: 0.0 Loss: 0.8098514347933652
Epoch: 9
Train: accuracy: 0.0017128567024082765 loss: 0.3841278578143522
Validation: accuracy: 0.0 Loss: 0.8076106905937195
Epoch: 10
Train: ac

0,1
train_accuracy,▁▁▁▁▁▁▁▁██▁▁▁█
train_loss,█▄▃▃▃▂▂▂▂▂▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,▆▅█▅▃▃▂▂▂▁▃▃▂▁

0,1
train_accuracy,0.00171
train_loss,0.3484
val_accuracy,0.0
val_loss,0.79269


[34m[1mwandb[0m: Agent Starting Run: 7tdam4lu with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	cell_type: gru
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	inp_embed_size: 256
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	num_dec: 3
[34m[1mwandb[0m: 	num_enc: 1


28 72
Epoch: 1
Train: accuracy: 0.0017128567024082765 loss: 0.8158819832608788
Validation: accuracy: 0.0 Loss: 0.8730654610527886
Epoch: 2
Train: accuracy: 0.0017128567024082765 loss: 0.5451947434334578
Validation: accuracy: 0.0 Loss: 0.9183282163408067
Epoch: 3
Train: accuracy: 0.0 loss: 0.4971040019712511
Validation: accuracy: 0.0 Loss: 0.8193305664592319
Epoch: 4
Train: accuracy: 0.0 loss: 0.45689975145870015
Validation: accuracy: 0.01772735330615139 Loss: 0.7799878597259522
Epoch: 5
Train: accuracy: 0.0 loss: 0.42491134009945575
Validation: accuracy: 0.01772735330615139 Loss: 0.7434302753872342
Epoch: 6
Train: accuracy: 0.008564283512041382 loss: 0.39874835628649336
Validation: accuracy: 0.10636411983690836 Loss: 0.7155773255560133
Epoch: 7
Train: accuracy: 0.056524271179473125 loss: 0.3741072825842926
Validation: accuracy: 0.10636411983690836 Loss: 0.6922286947568258
Epoch: 8
Train: accuracy: 0.11476139906135453 loss: 0.35042334946292236
Validation: accuracy: 0.12409147314305974 L

0,1
train_accuracy,▁▁▁▁▁▁▁▁▁▁▂▂▂▃▃▄▅▆▇█
train_loss,█▅▄▄▄▃▃▃▃▂▂▂▂▂▂▁▁▁▁▁
val_accuracy,▁▁▁▁▁▂▂▂▂▂▃▂▃▄▃▅▄▆▆█
val_loss,▇█▆▅▅▄▄▃▃▃▃▂▂▂▂▂▁▁▁▁

0,1
train_accuracy,5.28759
train_loss,0.19369
val_accuracy,1.08137
val_loss,0.55854


[34m[1mwandb[0m: Agent Starting Run: 6l8u3ubg with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	cell_type: gru
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	inp_embed_size: 64
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	num_dec: 1
[34m[1mwandb[0m: 	num_enc: 3


28 72
Epoch: 1
Train: accuracy: 0.0 loss: 0.7126096830114739
Validation: accuracy: 0.03545470661230278 Loss: 0.7369306763906157
Epoch: 2
Train: accuracy: 0.03425713404816553 loss: 0.45707524642103475
Validation: accuracy: 0.12409147314305974 Loss: 0.5888795226477506
Epoch: 3
Train: accuracy: 0.47274844986468434 loss: 0.3465876151345515
Validation: accuracy: 0.7800035454706612 Loss: 0.528822761238291
Epoch: 4
Train: accuracy: 1.7676681168853414 loss: 0.2676287122878684
Validation: accuracy: 1.3472788512675058 Loss: 0.4778485345036796
Epoch: 5
Train: accuracy: 4.861087321434689 loss: 0.20829999789611925
Validation: accuracy: 2.605920936004255 Loss: 0.44291959418339677
Epoch: 6
Train: accuracy: 9.751293206810319 loss: 0.1671705367319033
Validation: accuracy: 4.502747739762453 Loss: 0.41137926933470736
Epoch: 7
Train: accuracy: 15.674351683738138 loss: 0.13851519297045398
Validation: accuracy: 6.63003013650062 Loss: 0.3841663578922829
Epoch: 8
Train: accuracy: 21.354184508923982 loss: 0.11

0,1
train_accuracy,▁▁▁▁▂▂▃▃▄▅▅▅▆▆▇▇▇▇██
train_loss,█▅▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▂▂▂▃▃▄▄▅▅▆▆▇▇██
val_loss,█▆▅▅▄▄▄▃▃▃▂▂▂▂▂▁▁▁▁▁

0,1
train_accuracy,62.33257
train_loss,0.03668
val_accuracy,41.87201
val_loss,0.18546


[34m[1mwandb[0m: Agent Starting Run: 5glch16f with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	cell_type: rnn
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	inp_embed_size: 64
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	num_dec: 2
[34m[1mwandb[0m: 	num_enc: 3


28 72
Epoch: 1
Train: accuracy: 0.0 loss: 0.7739179126543342
Validation: accuracy: 0.0 Loss: 0.9022456076410081
Epoch: 2
Train: accuracy: 0.0 loss: 0.5510292909468811
Validation: accuracy: 0.0 Loss: 0.9241186287668016
Epoch: 3
Train: accuracy: 0.0 loss: 0.5073811334254184
Validation: accuracy: 0.0 Loss: 0.9486446566051907
Epoch: 4
Train: accuracy: 0.0 loss: 0.48347228374209916
Validation: accuracy: 0.0 Loss: 1.0064421521292792
Epoch: 5
Train: accuracy: 0.0 loss: 0.4787094315240889
Validation: accuracy: 0.0 Loss: 0.8966968403922186
Epoch: 6
Train: accuracy: 0.0 loss: 0.45873728564062055
Validation: accuracy: 0.0 Loss: 0.9178169329961141
Epoch: 7
Train: accuracy: 0.0 loss: 0.45034276917413757
Validation: accuracy: 0.0 Loss: 0.9238175405396356
Epoch: 8
Train: accuracy: 0.0 loss: 0.4419031405344573
Validation: accuracy: 0.0 Loss: 0.9281988024711609
Epoch: 9
Train: accuracy: 0.0 loss: 0.43491515497670913
Validation: accuracy: 0.0 Loss: 0.8627900838851928
Epoch: 10
Train: accuracy: 0.0051385

0,1
train_accuracy,▁▁▁▁▁▁▁▁▁▄█▂▅▃
train_loss,█▄▃▂▂▂▂▂▂▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,▄▄▆█▃▄▄▅▂▃▂▃▃▁

0,1
train_accuracy,0.00343
train_loss,0.4073
val_accuracy,0.0
val_loss,0.84408


[34m[1mwandb[0m: Agent Starting Run: fjjvirb2 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	cell_type: rnn
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	inp_embed_size: 128
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	num_dec: 3
[34m[1mwandb[0m: 	num_enc: 3


28 72
Epoch: 1
Train: accuracy: 0.0 loss: 0.7692091762367432
Validation: accuracy: 0.0 Loss: 0.8960955474111769
Epoch: 2
Train: accuracy: 0.0 loss: 0.5537206602957369
Validation: accuracy: 0.0 Loss: 0.9247069517771404
Epoch: 3
Train: accuracy: 0.0 loss: 0.5089050406830576
Validation: accuracy: 0.0 Loss: 0.9992077045970493
Epoch: 4
Train: accuracy: 0.0 loss: 0.492232779248799
Validation: accuracy: 0.0 Loss: 0.8654321047994825
Epoch: 5
Train: accuracy: 0.0 loss: 0.471723957969532
Validation: accuracy: 0.0 Loss: 0.8948398921224806
Epoch: 6
Train: accuracy: 0.0 loss: 0.46002776845800536
Validation: accuracy: 0.0 Loss: 0.964135558075375
Epoch: 7
Train: accuracy: 0.00513857010722483 loss: 0.45094982278686124
Validation: accuracy: 0.0 Loss: 0.8933986518118117
Epoch: 8
Train: accuracy: 0.0017128567024082765 loss: 0.44035606017780515
Validation: accuracy: 0.0 Loss: 0.9123984098434448
Epoch: 9
Train: accuracy: 0.0 loss: 0.43206893810818964
Validation: accuracy: 0.0 Loss: 0.8997966355747646
Epoch

0,1
train_accuracy,▁▁▁▁▁▁█▃▁▁▁▁▁▃
train_loss,█▄▃▃▂▂▂▂▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,▃▄█▂▃▆▃▄▃▂▁▁▃▁

0,1
train_accuracy,0.00171
train_loss,0.40737
val_accuracy,0.0
val_loss,0.85495


[34m[1mwandb[0m: Agent Starting Run: ghw4uggg with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	cell_type: gru
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	inp_embed_size: 128
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	num_dec: 2
[34m[1mwandb[0m: 	num_enc: 2


28 72
Epoch: 1
Train: accuracy: 0.0 loss: 0.9771175837412672
Validation: accuracy: 0.0 Loss: 0.8357292646947114
Epoch: 2
Train: accuracy: 0.0 loss: 0.6409975852508212
Validation: accuracy: 0.0 Loss: 0.8738661278849063
Epoch: 3
Train: accuracy: 0.0 loss: 0.5769263628268346
Validation: accuracy: 0.0 Loss: 0.8817227379135464
Epoch: 4
Train: accuracy: 0.0 loss: 0.529188605375165
Validation: accuracy: 0.0 Loss: 0.7942237076552018
Epoch: 5
Train: accuracy: 0.013702853619266212 loss: 0.47011234887822745
Validation: accuracy: 0.03545470661230278 Loss: 0.6820898652076721
Epoch: 6
Train: accuracy: 0.10619711554931315 loss: 0.4112136544879347
Validation: accuracy: 0.407729126041482 Loss: 0.6293107621047808
Epoch: 7
Train: accuracy: 0.3066013497310815 loss: 0.3648667776688738
Validation: accuracy: 0.7622761921645098 Loss: 0.617484564366548
Epoch: 8
Train: accuracy: 0.6680141139392278 loss: 0.32764819407567186
Validation: accuracy: 1.0990959049813862 Loss: 0.5738127944262131
Epoch: 9
Train: accurac

0,1
train_accuracy,▁▁▁▁▁▁▁▁▁▂▂▂▃▄▄▅▆▇▇█
train_loss,█▅▅▄▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▂▂▂▂▃▃▄▅▅▆▇▇█
val_loss,▇██▇▅▄▄▄▄▃▃▃▃▂▂▂▂▁▁▁

0,1
train_accuracy,21.5426
train_loss,0.12293
val_accuracy,15.28098
val_loss,0.37997


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Sweep Agent: Exiting.


In [12]:
def evaluate_and_save_predictions(encoder, decoder):
    results = []
    correct_preds = []
    incorrect_preds = []
    output_file = 'test_predictions.tsv'
    table = PrettyTable()
    table.field_names = ["Sample #", "Input", "Ground Truth", "Prediction", "Correct?"]

    with torch.no_grad():
        for i in range(len(x_test[0])):
            input_seq = x_test[1][i]
            true_output = x_test[0][i]

            input_tensor = tensorFromWord(input_lang, input_seq)
            encoder_outputs, encoder_hidden = encoder(input_tensor)
            decoder_outputs, decoder_hidden, decoder_attn = decoder(encoder_outputs, encoder_hidden)

            _, topi = decoder_outputs.topk(1)
            decoded_ids = topi.squeeze()

            predicted_output = ''
            for idx in decoded_ids:
                if idx.item() == EOW_token:
                    break
                predicted_output += output_lang.index2letter.get(idx.item(), '?')

            results.append((input_seq, true_output, predicted_output))

            if predicted_output == true_output:
                correct_preds.append((input_seq, true_output, predicted_output))
            else:
                incorrect_preds.append((input_seq, true_output, predicted_output))

    # Randomly sample 10 from each category
    sample_correct = random.sample(correct_preds, min(10, len(correct_preds)))
    sample_incorrect = random.sample(incorrect_preds, min(10, len(incorrect_preds)))

    # Add rows to table
    for i, (inp, true, pred) in enumerate(sample_correct + sample_incorrect):
        table.add_row([
            i + 1,
            inp,
            true,
            pred,
            "✅" if pred == true else "❌"
        ])

    # Print table
    print("\n📊 Sample Predictions (10 Correct + 10 Incorrect):\n")
    print(table)

    # Save all predictions as TSV with UTF-8 encoding
    df = pd.DataFrame(results, columns=["Input", "Ground Truth", "Prediction"])
    df.to_csv(output_file, index=False, sep='\t', encoding='utf-8-sig')
    print(f"\n📁 All test predictions saved to: {output_file}")


In [13]:
# Best Model
num_epochs = 20

algorithms = {'rnn': nn.RNN,'gru': nn.GRU,'lstm': nn.LSTM}

best_config = {
    'method': 'bayes', 
    'metric': {
      'name': 'val_accuracy',
      'goal': 'maximize'   
    },
    'parameters': {
        'inp_embed_size':{
            'values': [256]
        },
        'num_dec': {
            'values': [3]
        },
        'num_enc': {
            'values': [3]
        },
        'dropout': {
            'values': [0.3]
        },
        'lr': {
            'values': [0.001]
        },
        'hidden_size': {
            'values': [256]
        },
        'batch_size': {
            'values': [64]
        },
        'cell_type':{
            'values': ['lstm']
        }
    }
}

sweep_id = wandb.sweep(sweep=best_config, project='DL_A3')


Create sweep with ID: ga83y5dh
Sweep URL: https://wandb.ai/alandandoor-iit-madras/DL_A3/sweeps/ga83y5dh


In [14]:
def test():
    with wandb.init() as run:
        config = wandb.config

        # Run name formatting
        wandb.run.name = (
            f"{config.cell_type}-E_{config.num_enc}-D_{config.num_enc}-"
            f"do_{config.dropout}-bs_{config.batch_size}-lr_{config.lr}-"
            f"hs_{config.hidden_size}-emb_{config.inp_embed_size}-")
        
        train_dataloader = get_dataloader(x_train, input_lang, output_lang, wandb.config.batch_size)
        val_dataloader = get_dataloader(x_val, input_lang, output_lang, wandb.config.batch_size)
        test_dataloader = get_dataloader(x_test, input_lang, output_lang, wandb.config.batch_size)
        encoder = EncoderRNN(wandb.config, input_lang.n_letters).to(device)
        decoder = DecoderRNN(wandb.config, output_lang.n_letters).to(device)
        print(input_lang.n_letters, output_lang.n_letters)
        train(train_dataloader, val_dataloader, test_dataloader, encoder, decoder, num_epochs, wandb.config)
        encoder.eval()
        decoder.eval()
        evaluate_and_save_predictions(encoder, decoder)
        
wandb.agent(sweep_id, function=test, count=1) # calls main function for count number of times. , count=1
wandb.finish() #cojvqj9b sweep_id

[34m[1mwandb[0m: Agent Starting Run: czdklr55 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	cell_type: lstm
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	inp_embed_size: 256
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	num_dec: 3
[34m[1mwandb[0m: 	num_enc: 3


28 72
Epoch: 1
Train: accuracy: 0.006851426809633106 loss: 0.5371539197119538
Validation: accuracy: 0.01772735330615139 Loss: 0.5985575479068114
Epoch: 2
Train: accuracy: 6.731526840464527 loss: 0.22551415269013173
Validation: accuracy: 5.495479524906932 Loss: 0.3696252606557996
Epoch: 3
Train: accuracy: 27.90586139563564 loss: 0.10346452793083995
Validation: accuracy: 15.511434142882468 Loss: 0.2913090188181802
Epoch: 4
Train: accuracy: 41.58130930766332 loss: 0.06927306238900531
Validation: accuracy: 24.8360219819181 Loss: 0.24873447635870302
Epoch: 5
Train: accuracy: 51.16988112774485 loss: 0.05186797272483334
Validation: accuracy: 30.384683566743487 Loss: 0.2210860520266415
Epoch: 6
Train: accuracy: 58.362166421157205 loss: 0.04085318874751359
Validation: accuracy: 38.8406310937777 Loss: 0.18889590143487694
Epoch: 7
Train: accuracy: 63.81418930492275 loss: 0.03344602544607364
Validation: accuracy: 45.346569757135256 Loss: 0.17045104059945332
Epoch: 8
Train: accuracy: 68.84313658319

0,1
train_accuracy,▁▂▃▄▅▆▆▇▇▇▇▇████████
train_loss,█▄▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▂▃▄▄▅▅▆▆▆▇▇▇▇█████
val_loss,█▅▄▄▃▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁

0,1
train_accuracy,87.04566
train_loss,0.00858
val_accuracy,83.63765
val_loss,0.04788
