# Vanilla Seq2Seq Report Tester Notebook
This notebook is used to take the best model from the sweep retrain the model using appropriate callbacks and then predict on the test set and save it and also create some visualizations if required. Without much details lets get into the assignment.

In [1]:
# Importing the necessary libraries #
# Importing the necessary libraries needed
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torchvision.transforms.functional as Fn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Subset, Dataset
from torchvision import datasets, transforms
from sklearn.model_selection import train_test_split
import os
from tqdm import tqdm
import wandb
wandb.login(key = "5ef7c4bbfa350a2ffd3c198cb9289f544e3a0910")

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mae21b105[0m ([33mRough[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [2]:
# Data preparation
# Loading the dataset
df_train = pd.read_csv('/kaggle/input/dl-a3-tamil/ta_lexicons/ta.translit.sampled.train.tsv', sep='\t',  header=None, names=["native","latin","count"])
df_test = pd.read_csv('/kaggle/input/dl-a3-tamil/ta_lexicons/ta.translit.sampled.test.tsv', sep='\t',  header=None, names=["native","latin","count"])
df_val = pd.read_csv('/kaggle/input/dl-a3-tamil/ta_lexicons/ta.translit.sampled.dev.tsv', sep='\t',  header=None, names=["native","latin","count"])

In [None]:
# Preparing the dataset for the model to fit #
class Dataset_Tamil(Dataset):
    def __init__(self, dataframe, build_vocab=True, input_token_index=None, output_token_index=None,
                 max_enc_seq_len=0, max_dec_seq_len=0):
        
        # Input variables
        self.input_df = dataframe
        self.input_words = []
        self.output_words = []
        # Characters of the language
        self.input_characters = set()
        self.output_characters = set()

        # Iterating thorough the rows
        for _, row in self.input_df.iterrows():
            input_word = str(row["latin"])
            output_word = "\t" + str(row["native"]) + "\n"
            self.input_words.append(input_word)
            self.output_words.append(output_word)
        
        if build_vocab:
            self.build_vocab()
        else:
            # Token index for sequence building
            self.input_token_index = input_token_index
            self.output_token_index = output_token_index
            # Heuristics lengths for the encoder decoder
            self.max_enc_seq_len = max_enc_seq_len
            self.max_dec_seq_len = max_dec_seq_len

        # Finding the encoder/decoder tokens 
        self.total_encoder_tokens = len(self.input_token_index)
        self.total_decoder_tokens = len(self.output_token_index)

    def build_vocab(self):
        # Building the vocabulary
        self.input_characters = sorted(set(" ".join(self.input_words)))
        self.output_characters = sorted(set(" ".join(self.output_words)))
        # Adding the padding character if not present
        if " " not in self.input_characters:
            self.input_characters.append(" ")
        if " " not in self.output_characters:
            self.output_characters.append(" ")

        # Fitting/Finding the necessary values from training data
        self.input_token_index = {char: i for i, char in enumerate(self.input_characters)}
        self.output_token_index = {char: i for i, char in enumerate(self.output_characters)}

        self.input_token_index_reversed = {i: char for i, char in enumerate(self.input_characters)}
        self.output_token_index_reversed = {i: char for i, char in enumerate(self.output_characters)}

        self.max_enc_seq_len = max(len(txt) for txt in self.input_words)
        self.max_dec_seq_len = max(len(txt) for txt in self.output_words)

    def __len__(self):
        return len(self.input_words)
    
    def __getitem__(self, index):
        input_word = self.input_words[index]
        output_word = self.output_words[index]

        # Finding the input for each stages of the network
        encoder_input = np.zeros((self.max_enc_seq_len, self.total_encoder_tokens), dtype=np.float32)
        decoder_input = np.zeros((self.max_dec_seq_len, self.total_decoder_tokens), dtype=np.float32)
        decoder_output = np.zeros((self.max_dec_seq_len, self.total_decoder_tokens), dtype=np.float32)

        for t, char in enumerate(input_word):
            if char in self.input_token_index:
                encoder_input[t, self.input_token_index[char]] = 1.0
        for t in range(len(input_word), self.max_enc_seq_len):
            encoder_input[t, self.input_token_index[" "]] = 1.0

        for t, char in enumerate(output_word):
            if char in self.output_token_index:
                decoder_input[t, self.output_token_index[char]] = 1.0
                if t > 0:
                    decoder_output[t - 1, self.output_token_index[char]] = 1.0
        # Fill remaining positions with space character
        for t in range(len(output_word), self.max_dec_seq_len):
            decoder_input[t, self.output_token_index[" "]] = 1.0

        for t in range(len(output_word) - 1, self.max_dec_seq_len):
            decoder_output[t, self.output_token_index[" "]] = 1.0

        return (
            torch.from_numpy(encoder_input),
            torch.from_numpy(decoder_input),
            torch.from_numpy(decoder_output)
        )

In [None]:
# Model classes definitions #
class Encoder(nn.Module):
    def __init__(self, input_size, hidden_size, dropout=0.3, cell_type="RNN", num_layers=1, bi_directional=False):
        super(Encoder, self).__init__()
        self.hidden_size = hidden_size
        self.cell_type = cell_type.upper()
        self.dropout = dropout
        self.num_layers = num_layers

        if self.cell_type == 'LSTM':
            self.enc = nn.LSTM(input_size, hidden_size, batch_first=True, dropout=self.dropout, num_layers=self.num_layers, bidirectional=bi_directional)
        elif self.cell_type == 'GRU':
            self.enc = nn.GRU(input_size, hidden_size, batch_first=True, dropout=self.dropout, num_layers=self.num_layers, bidirectional=bi_directional)
        else:
            self.enc = nn.RNN(input_size, hidden_size, batch_first=True, dropout=self.dropout, num_layers=self.num_layers, bidirectional=bi_directional)

    def forward(self, x):
        if self.cell_type == "LSTM":
            hidden, (hn, cn) = self.enc(x)
            return hidden, (hn, cn)
        else:
            hidden, out = self.enc(x)
            return hidden, out
        

class Decoder(nn.Module):
    def __init__(self, input_size, hidden_size, dropout=0.3, cell_type='RNN', num_layers=1):
        super(Decoder, self).__init__()
        self.hidden_size = hidden_size
        self.input_size = input_size
        self.cell_type = cell_type.upper()
        self.dropout = dropout
        self.num_layers = num_layers

        if self.cell_type == 'LSTM':
            self.dec = nn.LSTM(input_size, hidden_size, batch_first=True, dropout=self.dropout, num_layers=self.num_layers)
        elif self.cell_type == 'GRU':
            self.dec = nn.GRU(input_size, hidden_size, batch_first=True, dropout=self.dropout, num_layers=self.num_layers)
        else:
            self.dec = nn.RNN(input_size, hidden_size, batch_first=True, dropout=self.dropout, num_layers=self.num_layers)

    def forward(self, x, states):
        if type(states) == tuple:
            hidden, (hn, cn) = self.dec(x, states)
            return hidden, (hn, cn)
        else:
            hidden, out = self.dec(x, states)
            return hidden, out
        
# Helper function
def combine_directions(hidden):
    layers = []
    for i in range(0, hidden.size(0), 2):  
        fwd = hidden[i]
        bwd = hidden[i + 1]
        combined = torch.cat((fwd, bwd), dim=-1) 
        layers.append(combined)
    return torch.stack(layers)  


class Seq2Seq(nn.Module):
    def __init__(self, input_token_index, output_token_index, max_dec_seq_len, embedding_dim,hidden_size_enc, bi_directional,
            nature="train", enc_cell="LSTM", dec_cell="LSTM", num_layers=1,dropout=0.2, device="cpu"):
        super(Seq2Seq, self).__init__()
        self.input_index_token = input_token_index
        self.output_index_token = output_token_index
        self.max_dec_seq_len = max_dec_seq_len
        self.nature = nature
        self.enc_cell_type = enc_cell.upper()
        self.dec_cell_type = dec_cell.upper()
        self.num_layers= num_layers
        self.bi_directional = bi_directional
        self.hidden_size_enc = hidden_size_enc
        self.hidden_size_dec = (1 + int(self.bi_directional == True))*hidden_size_enc
        self.embedding = nn.Linear(in_features=len(self.input_index_token), out_features=embedding_dim)
        self.embedding_act = nn.Tanh()
        self.encoder = Encoder(input_size=embedding_dim, hidden_size=hidden_size_enc, dropout=dropout, cell_type=enc_cell, num_layers=num_layers, bi_directional=self.bi_directional).to(device)
        self.decoder = Decoder(input_size=len(self.output_index_token), hidden_size=self.hidden_size_dec, dropout=dropout, cell_type=dec_cell, num_layers=num_layers).to(device)
        self.device = device
        self.loss_fn = nn.CrossEntropyLoss()
        self.fc = nn.Linear(in_features=self.hidden_size_dec, out_features=len(output_token_index))

    def forward(self, batch):
        ENC_IN, DEC_IN, DEC_OUT = batch
        ENC_IN = ENC_IN.to(self.device)
        DEC_IN = DEC_IN.to(self.device)

        batch_size = ENC_IN.size(0)
        input_embedding = self.embedding_act(self.embedding(ENC_IN))
        hidden_enc, states_enc = self.encoder(input_embedding)

        if self.bi_directional == True:
            if self.enc_cell_type == "LSTM":
                (h,c) = states_enc
                states_enc = (combine_directions(h), combine_directions(c))
            else:
                states_enc = combine_directions(states_enc)

        # Teacher forcing mode #    
        # Making the states correctly formatted
        if self.dec_cell_type == "LSTM": 
            if isinstance(states_enc, tuple):
                states_dec = states_enc
            else:
                h = torch.zeros(self.num_layers, batch_size, self.decoder.hidden_size, device=self.device)
                c = states_enc
                states_dec = (h, c)
        else:
            if isinstance(states_enc, tuple):
                states_dec = states_enc[1]
            else:
                states_dec = states_enc

        # Decoder gives the outputs batchwise
        decoder_outputs, _ = self.decoder(DEC_IN, states_dec) 
        logits = self.fc(decoder_outputs)                      
        return logits

    def predict_greedy(self, batch):
        # Greedy force outputs #
        ENC_IN, DEC_IN, DEC_OUT = batch
        ENC_IN = ENC_IN.to(self.device)
        DEC_IN = DEC_IN.to(self.device)

        batch_size = ENC_IN.size(0)
        input_embedding = self.embedding_act(self.embedding(ENC_IN))
        hidden_enc, states_enc = self.encoder(input_embedding)

        if self.bi_directional == True:
            if self.enc_cell_type == "LSTM":
                (h,c) = states_enc
                states_enc = (combine_directions(h), combine_directions(c))
            else:
                states_enc = combine_directions(states_enc)
            
        # Final matrix
        final_out = torch.zeros(batch_size, self.max_dec_seq_len, len(self.output_index_token), device=self.device)

        # Initial decoder input (with start token)
        in_ = torch.zeros(batch_size, 1, len(self.output_index_token), device=self.device)
        in_[:, 0, 0] = 1.0
        # Making the states correctly formatted
        if self.dec_cell_type == "LSTM":
            if isinstance(states_enc, tuple):
                states_dec = states_enc
            else:
                h = torch.zeros(self.num_layers, batch_size, self.decoder.hidden_size, device=self.device)
                c = states_enc
                states_dec = (h, c)
        else:
            if isinstance(states_enc, tuple):
                states_dec = states_enc[1]
            else:
                states_dec = states_enc

        # Output to input
        for t in range(self.max_dec_seq_len):
            out_step, states_dec = self.decoder(in_, states_dec)  
            logits_step = self.fc(out_step.squeeze(1))          
            final_out[:, t, :] = logits_step

            # Greedy argmax for next input
            top1 = torch.argmax(logits_step, dim=1)              
            in_ = torch.zeros(batch_size, 1, len(self.output_index_token), device=self.device)
            in_[torch.arange(batch_size), 0, top1] = 1.0

        return final_out


In [None]:
# Fucntion for validation of the model # 
def validate_seq2seq(model, val_loader, device, val_type = "greedy", beam_width=None):
    model.eval()
    total_loss = 0.0
    correct_chars = 0
    total_chars = 0
    correct_words = 0
    total_words = 0
    loss_fn = nn.CrossEntropyLoss(ignore_index=2)

    with torch.no_grad():
        tqdm_progress = tqdm(val_loader, desc="Predicting ...")
        for batch in tqdm_progress:
            ENC_IN, DEC_IN, DEC_OUT = batch
            ENC_IN = ENC_IN.to(device)
            DEC_IN = DEC_IN.to(device)
            DEC_OUT = DEC_OUT.to(device)

            # Forward pass
            decoder_output = model(batch)

            # Compute loss
            vocab_size = decoder_output.size(-1)
            decoder_output = decoder_output.view(-1, vocab_size)
            decoder_target_indices = DEC_OUT.argmax(dim=-1).view(-1)

            loss = loss_fn(decoder_output, decoder_target_indices)
            total_loss += loss.item()

            # Character-wise accuracy
            if val_type == "greedy":
                decoder_output = model.predict_greedy(batch)

            #print(decoder_output.shape)
            pred_tokens = decoder_output.argmax(dim=2)
            true_tokens = DEC_OUT.argmax(dim=2)
            #print(pred_tokens.shape)
            
            mask = true_tokens != 2  # Ignore PAD tokens
            correct_chars += (pred_tokens[mask] == true_tokens[mask]).sum().item()
            total_chars += mask.sum().item()

            mask = true_tokens != 2  # Ignore PAD tokens
            #print(mask.shape)
            total_words += decoder_output.shape[0]
            #print(pred_tokens[mask].shape)
            chk_words = (mask.int() - (pred_tokens == true_tokens).int())
            chk_words[mask == False] = 0
            correct_words += (chk_words.sum(dim = 1) == 0).sum().item()

    avg_loss = total_loss / len(val_loader)
    accuracy = correct_chars / total_chars if total_chars > 0 else 0.0
    word_acc = correct_words / total_words if total_words > 0 else 0.0
    return avg_loss, accuracy, word_acc

In [None]:
# Trainloop
def train_seq2seq(model, train_loader, val_loader, optimizer, num_epochs, device, beam_sizes = [3,5], run=None):
    loss_fn = nn.CrossEntropyLoss(ignore_index=2)  # 2 is the padding index
    max_val_char_acc = 0
    max_val_word_acc = 0
    print("Training of the model has started...")
    counter = 0
    patience = 5
    for epoch in range(num_epochs):
        model.train()
        epoch_loss = 0.0
        tqdm_loader = tqdm(train_loader, desc=f"Epoch : {epoch + 1} ", ncols=100)

        for batch in tqdm_loader:
            ENC_IN, DEC_IN, DEC_OUT = batch
            ENC_IN = ENC_IN.to(device)
            DEC_IN = DEC_IN.to(device)
            DEC_OUT = DEC_OUT.to(device)
            # Move to device
            decoder_output = model(batch)

            # Reshape for loss
            decoder_output = decoder_output.view(-1, decoder_output.size(-1))
            decoder_target_indices = DEC_OUT.argmax(dim=-1).view(-1)

            loss = loss_fn(decoder_output, decoder_target_indices)
            
            # Backpropagation
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            epoch_loss += loss.item()
            tqdm_loader.set_postfix({"Train Loss": loss.item()})

        avg_loss = epoch_loss / len(train_loader)
        print(f"Epoch [{epoch+1}/{num_epochs}] | Train Loss: {avg_loss:.4f}")

        val_loss, val_acc, val_word_acc = validate_seq2seq(model, val_loader, device)
        print(f"Epoch [{epoch+1}/{num_epochs}] | Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.4f} | Val Word Acc: {val_word_acc:.4f}")

        if run is not None:
            run.log({"train_loss_epoch" : avg_loss, "val_loss_epoch" : val_loss, "val_char_acc" : val_acc, "val_word_acc" : val_word_acc})

        if val_word_acc > max_val_word_acc:
            max_val_char_acc = val_acc
            max_val_word_acc = val_word_acc
            torch.save(model.state_dict(),"Vanilla_Best_model.pth")
            counter = 0
        else:
            counter += 1

        if counter > patience:
            break

    if run is not None:
        run.summary["max_val_char_acc"] = max_val_char_acc
        run.summary["max_val_word_acc"] = max_val_word_acc

In [7]:
torch.cuda.empty_cache()
config = {
        "learning_rate" : 0.001,
        "dropout_rnn" : 0.4, 
        "batch_size" :  256,
        "epochs" : 15,
        "embedding_dim" : 64,
        "num_layers" : 5,
        "hidden_size_enc" : 256,
        "enc_cell_type" : "GRU",
        "dec_cell_type" : "LSTM",
        "bi_directional" : True,
    }
run = wandb.init(entity="A3_DA6401_DL", project="Vanilla_RNN", name="Best Model Vanilla S2S", config=config)
# Loading the datasets and dataloaders
train_dataset = Dataset_Tamil(df_train)
val_dataset = Dataset_Tamil(df_val, build_vocab=False, input_token_index=train_dataset.input_token_index, 
                            output_token_index=train_dataset.output_token_index, max_enc_seq_len=train_dataset.max_enc_seq_len,
                            max_dec_seq_len=train_dataset.max_dec_seq_len)
test_dataset = Dataset_Tamil(df_test, build_vocab=False, input_token_index=train_dataset.input_token_index, 
                            output_token_index=train_dataset.output_token_index, max_enc_seq_len=train_dataset.max_enc_seq_len,
                            max_dec_seq_len=train_dataset.max_dec_seq_len)

train_loader = DataLoader(train_dataset, batch_size=config["batch_size"], shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=config["batch_size"], shuffle=False)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = Seq2Seq(input_token_index=train_dataset.input_token_index, output_token_index=train_dataset.output_token_index, max_dec_seq_len=train_dataset.max_dec_seq_len,
                embedding_dim=config["embedding_dim"], hidden_size_enc=config["hidden_size_enc"], bi_directional=config["bi_directional"], enc_cell=config["enc_cell_type"], dec_cell=config["dec_cell_type"], 
                num_layers=config["num_layers"], dropout=config["dropout_rnn"], device=device).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=config["learning_rate"])

train_seq2seq(model, train_loader, val_loader, optimizer, num_epochs=config["epochs"], device=device, run = run)

Training of the model has started...


Epoch : 1 : 100%|████████████████████████████████| 267/267 [00:38<00:00,  6.88it/s, Train Loss=2.05]


Epoch [1/15] | Train Loss: 2.5555


Predicting ...: 100%|██████████| 27/27 [00:03<00:00,  8.19it/s]


Epoch [1/15] | Val Loss: 2.0390 | Val Acc: 0.1724 | Val Word Acc: 0.0001


Epoch : 2 : 100%|███████████████████████████████| 267/267 [00:38<00:00,  6.92it/s, Train Loss=0.936]


Epoch [2/15] | Train Loss: 1.4576


Predicting ...: 100%|██████████| 27/27 [00:03<00:00,  8.57it/s]


Epoch [2/15] | Val Loss: 0.9106 | Val Acc: 0.5602 | Val Word Acc: 0.1017


Epoch : 3 : 100%|███████████████████████████████| 267/267 [00:38<00:00,  6.91it/s, Train Loss=0.431]


Epoch [3/15] | Train Loss: 0.6390


Predicting ...: 100%|██████████| 27/27 [00:03<00:00,  8.55it/s]


Epoch [3/15] | Val Loss: 0.4643 | Val Acc: 0.7740 | Val Word Acc: 0.4116


Epoch : 4 : 100%|███████████████████████████████| 267/267 [00:38<00:00,  6.93it/s, Train Loss=0.324]


Epoch [4/15] | Train Loss: 0.3652


Predicting ...: 100%|██████████| 27/27 [00:03<00:00,  8.54it/s]


Epoch [4/15] | Val Loss: 0.3615 | Val Acc: 0.8226 | Val Word Acc: 0.5103


Epoch : 5 : 100%|████████████████████████████████| 267/267 [00:38<00:00,  6.92it/s, Train Loss=0.22]


Epoch [5/15] | Train Loss: 0.2669


Predicting ...: 100%|██████████| 27/27 [00:03<00:00,  8.59it/s]


Epoch [5/15] | Val Loss: 0.3292 | Val Acc: 0.8327 | Val Word Acc: 0.5443


Epoch : 6 : 100%|███████████████████████████████| 267/267 [00:38<00:00,  6.90it/s, Train Loss=0.236]


Epoch [6/15] | Train Loss: 0.2123


Predicting ...: 100%|██████████| 27/27 [00:03<00:00,  8.54it/s]


Epoch [6/15] | Val Loss: 0.3262 | Val Acc: 0.8354 | Val Word Acc: 0.5554


Epoch : 7 : 100%|███████████████████████████████| 267/267 [00:39<00:00,  6.81it/s, Train Loss=0.155]


Epoch [7/15] | Train Loss: 0.1775


Predicting ...: 100%|██████████| 27/27 [00:03<00:00,  8.38it/s]


Epoch [7/15] | Val Loss: 0.3224 | Val Acc: 0.8443 | Val Word Acc: 0.5727


Epoch : 8 : 100%|███████████████████████████████| 267/267 [00:39<00:00,  6.83it/s, Train Loss=0.134]


Epoch [8/15] | Train Loss: 0.1514


Predicting ...: 100%|██████████| 27/27 [00:03<00:00,  8.53it/s]


Epoch [8/15] | Val Loss: 0.3228 | Val Acc: 0.8427 | Val Word Acc: 0.5783


Epoch : 9 : 100%|███████████████████████████████| 267/267 [00:38<00:00,  6.89it/s, Train Loss=0.133]


Epoch [9/15] | Train Loss: 0.1316


Predicting ...: 100%|██████████| 27/27 [00:03<00:00,  8.59it/s]


Epoch [9/15] | Val Loss: 0.3266 | Val Acc: 0.8430 | Val Word Acc: 0.5771


Epoch : 10 : 100%|██████████████████████████████| 267/267 [00:38<00:00,  6.91it/s, Train Loss=0.132]


Epoch [10/15] | Train Loss: 0.1160


Predicting ...: 100%|██████████| 27/27 [00:03<00:00,  8.61it/s]


Epoch [10/15] | Val Loss: 0.3258 | Val Acc: 0.8435 | Val Word Acc: 0.5784


Epoch : 11 : 100%|██████████████████████████████| 267/267 [00:38<00:00,  6.92it/s, Train Loss=0.136]


Epoch [11/15] | Train Loss: 0.1043


Predicting ...: 100%|██████████| 27/27 [00:03<00:00,  8.59it/s]


Epoch [11/15] | Val Loss: 0.3478 | Val Acc: 0.8440 | Val Word Acc: 0.5793


Epoch : 12 : 100%|█████████████████████████████| 267/267 [00:38<00:00,  6.93it/s, Train Loss=0.0851]


Epoch [12/15] | Train Loss: 0.0972


Predicting ...: 100%|██████████| 27/27 [00:03<00:00,  8.63it/s]


Epoch [12/15] | Val Loss: 0.3546 | Val Acc: 0.8386 | Val Word Acc: 0.5694


Epoch : 13 : 100%|█████████████████████████████| 267/267 [00:38<00:00,  6.89it/s, Train Loss=0.0707]


Epoch [13/15] | Train Loss: 0.0877


Predicting ...: 100%|██████████| 27/27 [00:03<00:00,  8.62it/s]


Epoch [13/15] | Val Loss: 0.3464 | Val Acc: 0.8481 | Val Word Acc: 0.5902


Epoch : 14 : 100%|█████████████████████████████| 267/267 [00:38<00:00,  6.91it/s, Train Loss=0.0585]


Epoch [14/15] | Train Loss: 0.0800


Predicting ...: 100%|██████████| 27/27 [00:03<00:00,  8.61it/s]


Epoch [14/15] | Val Loss: 0.3583 | Val Acc: 0.8447 | Val Word Acc: 0.5818


Epoch : 15 : 100%|█████████████████████████████| 267/267 [00:38<00:00,  6.91it/s, Train Loss=0.0958]


Epoch [15/15] | Train Loss: 0.0764


Predicting ...: 100%|██████████| 27/27 [00:03<00:00,  8.56it/s]

Epoch [15/15] | Val Loss: 0.3670 | Val Acc: 0.8447 | Val Word Acc: 0.5850





In [8]:
model_best = Seq2Seq(input_token_index=train_dataset.input_token_index, output_token_index=train_dataset.output_token_index, max_dec_seq_len=train_dataset.max_dec_seq_len,
                embedding_dim=config["embedding_dim"], hidden_size_enc=config["hidden_size_enc"], bi_directional=config["bi_directional"], enc_cell=config["enc_cell_type"], dec_cell=config["dec_cell_type"], 
                num_layers=config["num_layers"], dropout=config["dropout_rnn"], device=device).to(device)

model_best.load_state_dict(torch.load("/kaggle/working/Vanilla_Best_model.pth", weights_only=True))

<All keys matched successfully>

In [13]:
validate_seq2seq(model_best, val_loader, device)

Predicting ...: 100%|██████████| 27/27 [00:03<00:00,  8.34it/s]


(0.34643515089043864, 0.8480805578056257, 0.5901567306283872)

In [15]:
# Test data
test_loader = DataLoader(test_dataset, batch_size=config["batch_size"], shuffle=False)
_, test_char_acc, test_word_acc = validate_seq2seq(model_best, test_loader, device)

if run is not None:
    run.summary["test_char_acc"] = test_char_acc
    run.summary["test_word_acc"] = test_word_acc

Predicting ...: 100%|██████████| 27/27 [00:03<00:00,  8.32it/s]


In [18]:
def write_outputs(model, data_loader, filename_csv):
    tqdm_progress = tqdm(data_loader, desc="Writing...")

    list_in, list_out, list_pred = [], [], []
    for batch in tqdm_progress:
        ENC_IN, DEC_IN, DEC_OUT = batch
        out_pred = model.predict_greedy(batch)

        for ix in range(ENC_IN.shape[0]):
            str_in, str_out, str_pred = "", "", ""
            input_word_vec = ENC_IN[ix].argmax(1)
            output_word_vec = DEC_OUT[ix].argmax(1)
            output_pred_vec = out_pred[ix].argmax(1)

            for jx in range(train_dataset.max_dec_seq_len):
                char = train_dataset.output_token_index_reversed[output_word_vec[jx].item()]
                if char == "\n":
                    break
                str_out += char                

            for jx in range(train_dataset.max_dec_seq_len):
                char = train_dataset.output_token_index_reversed[output_pred_vec[jx].item()]
                if char == "\n":
                    break
                str_pred += char      

            for jx in range(train_dataset.max_enc_seq_len):
                char = train_dataset.input_token_index_reversed[input_word_vec[jx].item()]
                if char == " ":
                    break
                str_in += char     

            list_in.append(str_in)
            list_out.append(str_out)
            list_pred.append(str_pred)

    pds = pd.DataFrame(data = {"english" : list_in, "truth tamil" : list_out, "pred tamil" : list_pred})
    pds.to_csv(filename_csv, encoding="utf-8", index=False)       


In [19]:
write_outputs(model, test_loader, "Vanilla_predictions_test.csv")

Writing...: 100%|██████████| 27/27 [00:03<00:00,  8.00it/s]
