<a href="https://www.kaggle.com/code/rutujkhare/fdl-a3-1?scriptVersionId=130561112" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [1]:
import matplotlib.pyplot as plt
import matplotlib.image as img
import numpy as np
import pandas as pd
import torch
import random
import wandb
import torch.nn as nn
import csv

In [2]:
if torch.cuda.is_available():
    device_name = torch.device("cuda")
else:
    device_name = torch.device('cpu')
print("Using {}.".format(device_name))

Using cuda.


In [3]:
def preprocessingData(df, max_len, eng_to_int, hin_to_int):
    eng_words = df['eng'].copy()
    for i in range(len(eng_words)):
        l = len(eng_words[i])
        eng_words[i] = eng_words[i] + "*"*(max_len - l + 3)
    hin_words = df['hin'].copy()
    for i in range(len(hin_words)):
        l = len(hin_words[i])
        hin_words[i] = "#" + hin_words[i] + "*"*(max_len - l + 2)

    index_eng_words = []
    for eng_word in eng_words:
        index_eng_word = [eng_to_int[i] for i in eng_word]
        index_eng_words.append(index_eng_word)
    index_hin_words = []
    for hin_word in hin_words:
        index_hin_word = [hin_to_int[i] if i in hin_to_int else hin_to_int['_'] for i in hin_word]
        index_hin_words.append(index_hin_word)
    tensor_eng = torch.tensor(index_eng_words).to(device_name)
    tensor_hin = torch.tensor(index_hin_words).to(device_name)
    return tensor_eng, tensor_hin


In [4]:
def calculateAccuracy(trained_pred, y_true):
    out = []
    ten_pred = torch.tensor(trained_pred)
    for i in range(len(trained_pred)):
        temp = ten_pred[i].T
        out.extend(temp)
    y_pred = torch.stack(out).to(device_name)
    cnt = 0
    for i,j in zip(y_pred, y_true):
        cor = torch.eq(i, j)
        if(torch.mean(cor.float()).item() == 1.0):
            cnt += 1
    return cnt / len(y_pred)

In [5]:
class GRU_Encoder(nn.Module):
    def __init__(self, input_size, hid_size, num_of_enc_layers, emb_size, batch_size, dropout, bi_direct):
        super(GRU_Encoder, self).__init__()
        self.input_size = input_size
        self.hid_size = hid_size
        self.num_of_enc_layers = num_of_enc_layers
        self.emb_size = emb_size
        self.batch_size = batch_size
        self.bi_direct = bi_direct
        self.dropout = dropout
        self.embedding = nn.Embedding(input_size, emb_size)
        # print("IS:{} ES:{}".format(input_size, emb_size))
        self.gru = nn.GRU(emb_size, hid_size, num_of_enc_layers, bidirectional = bi_direct, dropout = dropout)

    def forward(self, input_data, hidden):
        input_data = input_data.T
        # print(input_data.shape)
        embed = self.embedding(input_data).to(device_name)
        output, hidden = self.gru(embed, hidden)
        return output, hidden

    def initialiseHidden(self):
        if(self.bi_direct):
            return torch.zeros(2*self.num_of_enc_layers, self.batch_size, self.hid_size, device = device_name)
        else:
            return torch.zeros(self.num_of_enc_layers, self.batch_size, self.hid_size, device = device_name)


In [6]:
class GRU_Decoder(nn.Module):
    def __init__(self, op_size, num_of_dec_layers, hid_size, batch_size, emb_size, dropout, bi_direct):
        super(GRU_Decoder, self).__init__()
        self.op_size = op_size
        self.hid_size = hid_size
        self.num_of_dec_layers = num_of_dec_layers
        self.emb_size = emb_size
        self.batch_size = batch_size
        self.bi_direct = bi_direct
        self.embedding = nn.Embedding(op_size, emb_size)
        self.op = nn.Linear(2*hid_size, op_size) if (bi_direct) else nn.Linear(hid_size, op_size)
        self.softmax = nn.LogSoftmax(dim = 2)
        self.gru = nn.GRU(emb_size, hid_size, num_of_dec_layers, bidirectional = bi_direct, dropout = dropout)

    def forward(self, input_data, hidden):
        # print(input_data)
        embed = self.embedding(input_data)
        embed = embed.view(-1, self.batch_size, self.emb_size)
        #     print(hidden.shape)
        out, hidden = self.gru(embed, hidden)
        # print(out.shape)
        temp = self.op(out)
        out = self.softmax(temp)
        return out, hidden

In [7]:
class RNN_Encoder(nn.Module):
    def __init__(self, input_size, hid_size, num_of_enc_layers, emb_size, batch_size, dropout, bi_direct):
        super(RNN_Encoder, self).__init__()
        self.input_size = input_size
        self.hid_size = hid_size
        self.num_of_enc_layers = num_of_enc_layers
        self.emb_size = emb_size
        self.batch_size = batch_size
        self.bi_direct = bi_direct
        self.dropout = dropout
        self.embedding = nn.Embedding(input_size, emb_size)
        self.rnn = nn.RNN(emb_size, hid_size, num_of_enc_layers, bidirectional = bi_direct, dropout = dropout)

    def forward(self, input_data, hidden):
        input_data = input_data.T
        embed = self.embedding(input_data).to(device_name)
        output, hidden = self.rnn(embed, hidden)
        return output, hidden

    def initialiseHidden(self):
        if(self.bi_direct):
            return torch.zeros(2*self.num_of_enc_layers, self.batch_size, self.hid_size, device = device_name)
        else:
            return torch.zeros(self.num_of_enc_layers, self.batch_size, self.hid_size, device = device_name)
  

In [8]:
class RNN_Decoder(nn.Module):
    def __init__(self, op_size, num_of_dec_layers, hid_size, batch_size, emb_size, dropout, bi_direct):
        super(RNN_Decoder, self).__init__()
        self.op_size = op_size
        self.hid_size = hid_size
        self.num_of_dec_layers = num_of_dec_layers
        self.emb_size = emb_size
        self.batch_size = batch_size
        self.bi_direct = bi_direct
        self.embedding = nn.Embedding(op_size, emb_size)
        self.op = nn.Linear(2*hid_size, op_size) if (bi_direct) else nn.Linear(hid_size, op_size)
        self.softmax = nn.LogSoftmax(dim = 2)
        self.rnn = nn.RNN(emb_size, hid_size, num_of_dec_layers, bidirectional = bi_direct, dropout = dropout)

    def forward(self, input_data, hidden):
        embed = self.embedding(input_data)
        embed = embed.view(-1, self.batch_size, self.emb_size)
        out, hidden = self.rnn(embed, hidden)
        temp = self.op(out)
        out = self.softmax(temp)
        return out, hidden

In [9]:
class LSTM_Encoder(nn.Module):
    def __init__(self, input_size, hid_size, num_of_enc_layers, emb_size, batch_size, dropout, bi_direct):
        super(LSTM_Encoder, self).__init__()
        self.input_size = input_size
        self.hid_size = hid_size
        self.num_of_enc_layers = num_of_enc_layers
        self.emb_size = emb_size
        self.batch_size = batch_size
        self.bi_direct = bi_direct
        self.dropout = dropout
        self.embedding = nn.Embedding(input_size, emb_size)
        self.lstm = nn.LSTM(emb_size, hid_size, num_of_enc_layers, bidirectional = bi_direct, dropout = dropout)

    def forward(self, input_data, hidden, state):
        input_data = input_data.T
        embed = self.embedding(input_data).to(device_name)
        output, (hidden, state) = self.lstm(embed, (hidden, state))
        return output, hidden, state

    def initialiseHidden(self):
        if(self.bi_direct):
            return torch.zeros(2*self.num_of_enc_layers, self.batch_size, self.hid_size, device = device_name)
        else:
            return torch.zeros(self.num_of_enc_layers, self.batch_size, self.hid_size, device = device_name)
  

In [10]:
class LSTM_Decoder(nn.Module):
    def __init__(self, op_size, num_of_dec_layers, hid_size, batch_size, emb_size, dropout, bi_direct):
        super(LSTM_Decoder, self).__init__()
        self.op_size = op_size
        self.hid_size = hid_size
        self.num_of_dec_layers = num_of_dec_layers
        self.emb_size = emb_size
        self.batch_size = batch_size
        self.bi_direct = bi_direct
        self.embedding = nn.Embedding(op_size, emb_size)
        self.op = nn.Linear(2*hid_size, op_size) if (bi_direct) else nn.Linear(hid_size, op_size)
        self.softmax = nn.LogSoftmax(dim = 2)
        self.lstm = nn.LSTM(emb_size, hid_size, num_of_dec_layers, bidirectional = bi_direct, dropout = dropout)

    def forward(self, input_data, hidden, state):
        embed = self.embedding(input_data)
        embed = embed.view(-1, self.batch_size, self.emb_size)
        out, (hidden, state) = self.lstm(embed, (hidden, state))
        temp = self.op(out)
        out = self.softmax(temp)
        return out, hidden, state

In [11]:
class Atten_decoder(nn.Module):
    def __init__(self, input_size, embedding_size, hidden_size, output_size, dec_layers, p, max_input_size, cell_type, bidirectional):
        super(Atten_decoder, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.max_length =   max_input_size 
        self.dec_layers = dec_layers
        self.dropout = nn.Dropout(p)
        self.cell_type = cell_type
        self.softmax = nn.LogSoftmax(dim = 0)
        self.embedding = nn.Embedding(output_size, embedding_size)
        if(cell_type == "GRU"):
            self.gru = nn.GRU(hidden_size, hidden_size, dec_layers, dropout = p)
        if(cell_type == "RNN"):
            self.rnn = nn.RNN(hidden_size, hidden_size, dec_layers, dropout = p)
        if(cell_type == "LSTM"):
            self.lstm = nn.LSTM(hidden_size, hidden_size, dec_layers, dropout = p)
        self.fc = nn.Linear(hidden_size, output_size)  # fully connected.
        self.attn = nn.Linear(hidden_size+embedding_size, self.max_length)
        if(bidirectional):
            self.attn_combine = nn.Linear(hidden_size * 2 + embedding_size, hidden_size)
        else :
            self.attn_combine = nn.Linear(hidden_size + embedding_size, hidden_size)

    def forward(self, x,output, hidden, cell = 0):
        x = x.unsqueeze(0)
        output=output.permute(1,0,2)
#         print("X :", x.shape)
        embedded = self.embedding(x)
        embedded = self.dropout(embedded)
#         print("Emb-{},\nHid-{}".format(embedded.shape, hidden.shape))
        attn_weights = self.softmax(self.attn(torch.cat((embedded[0],hidden[0]), 2)))
        attn_applied = torch.bmm(attn_weights.unsqueeze(1),output)
        attn_applied = attn_applied.squeeze(1)
        op = torch.cat((embedded[0], attn_applied), 1)

        op = self.attn_combine(op).unsqueeze(0)
        op = nn.functional.relu(op)
        if(self.cell_type == "GRU"):
            outputs, hidden = self.gru(op, hidden)
        if(self.cell_type == "RNN"):
            outputs, hidden = self.rnn(op, hidden)
        if(self.cell_type == "LSTM"):
            outputs, (hidden, cell) = self.lstm(op, (hidden, cell))
        predictions = self.fc(outputs)
        # shape of predictions: (1, N, length_of_vocab)
        predictions = predictions.squeeze(0)
        # shape of predictions: (N, length_of_vocab)
        if(self.cell_type == "LSTM"):
            return predictions, hidden, attn_weights, attn_applied, cell
        return predictions, hidden ,attn_weights, attn_applied

In [12]:
def trainWithoutAttention(input_data, target_data, loss_fn, enc_optimizer, dec_optimizer, encoder, decoder, num_of_enc_layers, num_of_dec_layers, batch_size, bi_direct, cell_type):
    teacher_forcing = 0.5
    loss = 0
    for b in range(0, len(input_data), batch_size):
        x, y = input_data[b : b+batch_size], target_data[b : b+batch_size]
        temp = 0
        enc_optimizer.zero_grad()
        dec_optimizer.zero_grad()
        if(cell_type == 'GRU' or cell_type == 'RNN'):
            enc_hidden = encoder.initialiseHidden()
            enc_output, enc_hidden = encoder(x, enc_hidden)
            dec_hidden = enc_hidden[-1].repeat(num_of_dec_layers, 1, 1)
            if bi_direct:
                dec_hidden = dec_hidden.repeat(2,1,1)
            y = y.T
            dec_input = y[0]
            #       print("AFT_Decoder Hidden : {}".format(dec_hidden.shape))
            condition = False if random.random() > teacher_forcing else True
            if(condition):
                for i in range(len(y)):
                    dec_output, dec_hidden = decoder(dec_input, dec_hidden)
                    temp += loss_fn(torch.squeeze(dec_output), y[i])
                    dec_input = y[i]
            else:
                for i in range(len(y)):
                    dec_output, dec_hidden = decoder(dec_input, dec_hidden)
                    prob, idx = dec_output.topk(1)
                    temp += loss_fn(torch.squeeze(dec_output), y[i])
                    dec_input = idx.squeeze().detach()
                    
        elif(cell_type == 'LSTM'):
            enc_hidden = encoder.initialiseHidden()
            enc_state = encoder.initialiseHidden()
            
            enc_output, enc_hidden, enc_state = encoder(x, enc_hidden, enc_state)
            dec_hidden = enc_hidden[-1].repeat(num_of_dec_layers, 1, 1)
            if bi_direct:
                dec_hidden = dec_hidden.repeat(2,1,1)
            
            dec_state = enc_state[-1].repeat(num_of_dec_layers, 1, 1)
            if bi_direct:
                dec_state = dec_state.repeat(2,1,1)
            y = y.T
            dec_input = y[0]
            #       print("AFT_Decoder Hidden : {}".format(dec_hidden.shape))
            condition = False if random.random() > teacher_forcing else True
            if(condition):
                for i in range(len(y)):
                    dec_output, dec_hidden, dec_state = decoder(dec_input, dec_hidden, dec_state)
                    temp += loss_fn(torch.squeeze(dec_output), y[i])
                    dec_input = y[i]
            else:
                for i in range(len(y)):
                    dec_output, dec_hidden, dec_state = decoder(dec_input, dec_hidden, dec_state)
                    prob, idx = dec_output.topk(1)
                    temp += loss_fn(torch.squeeze(dec_output), y[i])
                    dec_input = idx.squeeze().detach()
        
        temp.backward()
        enc_optimizer.step()
        dec_optimizer.step()
        loss += temp

    return loss.item()/(len(target_data) * target_data.shape[1]), encoder, decoder



In [13]:
def evalWithoutAttention(input_data, target_data, encoder, decoder, num_of_enc_layers, num_of_dec_layers, batch_size, bi_direct, cell_type):
    out = []
    for b in range(0, len(input_data), batch_size):
        x, y = input_data[b : b+batch_size], target_data[b : b+batch_size]
        encoder.eval()
        decoder.eval()
        predicted_data = list()
        if(cell_type == 'GRU' or cell_type == 'RNN'):
            enc_hidden = encoder.initialiseHidden()
            enc_output, enc_hidden = encoder(x, enc_hidden)
            y = y.T      
            dec_hidden = enc_hidden[-1].repeat(num_of_dec_layers, 1, 1)
            #dec_hidden = enc_hidden
            dec_input = y[0]
            if bi_direct:
                dec_hidden = dec_hidden.repeat(2,1,1)
            for i in range(len(y)):
                dec_output, dec_hidden = decoder(dec_input, dec_hidden)
                prob, idx = dec_output.topk(1)
                idx = idx.squeeze()
                dec_input = idx
                predicted_data.append(idx.tolist())
            out.append(predicted_data)
        elif(cell_type == 'LSTM'):
            enc_hidden = encoder.initialiseHidden()
            enc_state = encoder.initialiseHidden()
            enc_output, enc_hidden, enc_state = encoder(x, enc_hidden, enc_state)
            y = y.T      
            dec_hidden = enc_hidden[-1].repeat(num_of_dec_layers, 1, 1)
            if bi_direct:
                dec_hidden = dec_hidden.repeat(2,1,1)
                
            dec_state = enc_state[-1].repeat(num_of_dec_layers, 1, 1)
            if bi_direct:
                dec_state = dec_state.repeat(2,1,1)
            
            dec_input = y[0]
            for i in range(len(y)):
                dec_output, dec_hidden, dec_state = decoder(dec_input, dec_hidden, dec_state)
                prob, idx = dec_output.topk(1)
                idx = idx.squeeze()
                dec_input = idx
                predicted_data.append(idx.tolist())
            out.append(predicted_data)
    return out

In [14]:
def trainWithAttention(input_data, target_data, loss_fn, enc_optimizer, dec_optimizer, encoder, decoder, num_of_enc_layers, num_of_dec_layers, batch_size, bi_direct, cell_type):
    teacher_forcing = 0.5
    loss = 0
    for b in range(0, len(input_data), batch_size):
        x, y = input_data[b : b+batch_size], target_data[b : b+batch_size]
        temp = 0
        enc_optimizer.zero_grad()
        dec_optimizer.zero_grad()
        if(cell_type == 'GRU' or cell_type == 'RNN'):
            enc_hidden = encoder.initialiseHidden()
            enc_output, enc_hidden = encoder(x, enc_hidden)
            dec_hidden = enc_hidden[-1].repeat(num_of_dec_layers, 1, 1)
            if bi_direct:
                dec_hidden = dec_hidden.repeat(2,1,1)
            y = y.T
            dec_input = y[0]
            #       print("AFT_Decoder Hidden : {}".format(dec_hidden.shape))
            condition = False if random.random() > teacher_forcing else True
            if(condition):
                for i in range(len(y)):
                    dec_output, dec_hidden , attn_weights, attn_applied= decoder(dec_input, enc_output, dec_hidden)
                    temp += loss_fn(torch.squeeze(dec_output), y[i])
                    dec_input = y[i]
            else:
                for i in range(len(y)):
                    dec_output, dec_hidden , attn_weights, attn_applied= decoder(dec_input, enc_output, dec_hidden)
                    prob, idx = dec_output.topk(1)
                    temp += nn.NLLLoss()(nn.functional.log_softmax(dec_output, dim=1), y[i])
#                     temp += loss_fn(torch.squeeze(dec_output), y[i])
                    dec_input = idx.squeeze().detach()
                    
        elif(cell_type == 'LSTM'):
            enc_hidden = encoder.initialiseHidden()
            enc_state = encoder.initialiseHidden()
            
            enc_output, enc_hidden, enc_state = encoder(x, enc_hidden, enc_state)
            dec_hidden = enc_hidden[-1].repeat(num_of_dec_layers, 1, 1)
            if bi_direct:
                dec_hidden = dec_hidden.repeat(2,1,1)
            
            dec_state = enc_state[-1].repeat(num_of_dec_layers, 1, 1)
            if bi_direct:
                dec_state = dec_state.repeat(2,1,1)
            y = y.T
            dec_input = y[0]
            #       print("AFT_Decoder Hidden : {}".format(dec_hidden.shape))
            condition = False if random.random() > teacher_forcing else True
            if(condition):
                for i in range(len(y)):
                    dec_output, dec_hidden, attn_weights, attn_applied, dec_state = decoder(dec_input, enc_output, dec_hidden, dec_state)
                    temp += loss_fn(torch.squeeze(dec_output), y[i])
                    dec_input = y[i]
            else:
                for i in range(len(y)):
                    dec_output, dec_hidden, attn_weights, attn_applied, dec_state = decoder(dec_input, enc_output, dec_hidden, dec_state)
                    prob, idx = dec_output.topk(1)
#                     print("blhhh")
                    temp += nn.NLLLoss()(nn.functional.log_softmax(dec_output, dim=1), y[i])
#                     temp += loss_fn(torch.squeeze(dec_output), y[i])
                    dec_input = idx.squeeze().detach()
        
        temp.backward()
        enc_optimizer.step()
        dec_optimizer.step()
        loss += temp

    return loss.item()/(len(target_data) * target_data.shape[1]), encoder, decoder, attn_weights, attn_applied

In [15]:
def evalWithAttention(input_data, target_data, encoder, decoder, num_of_enc_layers, num_of_dec_layers, batch_size, bi_direct, cell_type):
    out = []
    for b in range(0, len(input_data), batch_size):
        x, y = input_data[b : b+batch_size], target_data[b : b+batch_size]
        encoder.eval()
        decoder.eval()
        predicted_data = list()
        if(cell_type == 'GRU' or cell_type == 'RNN'):
            enc_hidden = encoder.initialiseHidden()
            enc_output, enc_hidden = encoder(x, enc_hidden)
            y = y.T      
            dec_hidden = enc_hidden[-1].repeat(num_of_dec_layers, 1, 1)
            #dec_hidden = enc_hidden
            dec_input = y[0]
            if bi_direct:
                dec_hidden = dec_hidden.repeat(2,1,1)
            for i in range(len(y)):
#                 dec_output, dec_hidden = decoder(dec_input, dec_hidden)
                dec_output, dec_hidden , attn_weights, attn_applied= decoder(dec_input, enc_output, dec_hidden)
                prob, idx = dec_output.topk(1)
                idx = idx.squeeze()
                dec_input = idx
                predicted_data.append(idx.tolist())
            out.append(predicted_data)
        elif(cell_type == 'LSTM'):
            enc_hidden = encoder.initialiseHidden()
            enc_state = encoder.initialiseHidden()
            enc_output, enc_hidden, enc_state = encoder(x, enc_hidden, enc_state)
            y = y.T      
            dec_hidden = enc_hidden[-1].repeat(num_of_dec_layers, 1, 1)
            if bi_direct:
                dec_hidden = dec_hidden.repeat(2,1,1)
                
            dec_state = enc_state[-1].repeat(num_of_dec_layers, 1, 1)
            if bi_direct:
                dec_state = dec_state.repeat(2,1,1)
            
            dec_input = y[0]
            for i in range(len(y)):
                dec_output, dec_hidden, attn_weights, attn_applied, dec_state = decoder(dec_input, enc_output, dec_hidden, dec_state)
                prob, idx = dec_output.topk(1)
                idx = idx.squeeze()
                dec_input = idx
                predicted_data.append(idx.tolist())
            out.append(predicted_data)
    return out

In [16]:
def training(input_data, input_size, target_data, target_size, max_input_size, epochs, batch_size, emb_size, num_of_enc_layers, num_of_dec_layers, hid_size, cell_type, bi_direct, enc_dropout, dec_dropout, use_attention, beam_size):
    learning_rate = 0.001
    if(use_attention):
        if(cell_type == "GRU"):
            encoder = GRU_Encoder(input_size, hid_size, num_of_enc_layers, emb_size, batch_size, enc_dropout, bi_direct).to(device_name)
        elif(cell_type == "RNN"):
            encoder = RNN_Encoder(input_size, hid_size, num_of_enc_layers, emb_size, batch_size, enc_dropout, bi_direct).to(device_name)
        elif(cell_type == "LSTM"):
            encoder = LSTM_Encoder(input_size, hid_size, num_of_enc_layers, emb_size, batch_size, enc_dropout, bi_direct).to(device_name)
        decoder = Atten_decoder(input_size, emb_size, hid_size, target_size, num_of_dec_layers, dec_dropout, max_input_size, cell_type, bi_direct).to(device_name)
    else:
        if(cell_type == "GRU"):
            encoder = GRU_Encoder(input_size, hid_size, num_of_enc_layers, emb_size, batch_size, enc_dropout, bi_direct).to(device_name)
            decoder = GRU_Decoder(target_size, num_of_dec_layers, hid_size, batch_size, emb_size, dec_dropout, bi_direct).to(device_name)
        elif(cell_type == "RNN"):
            encoder = RNN_Encoder(input_size, hid_size, num_of_enc_layers, emb_size, batch_size, enc_dropout, bi_direct).to(device_name)
            decoder = RNN_Decoder(target_size, num_of_dec_layers, hid_size, batch_size, emb_size, dec_dropout, bi_direct).to(device_name)
        elif(cell_type == "LSTM"):
            encoder = LSTM_Encoder(input_size, hid_size, num_of_enc_layers, emb_size, batch_size, enc_dropout, bi_direct).to(device_name)
            decoder = LSTM_Decoder(target_size, num_of_dec_layers, hid_size, batch_size, emb_size, dec_dropout, bi_direct).to(device_name)

    enc_optimizer = torch.optim.Adam(encoder.parameters(), learning_rate)
    dec_optimizer = torch.optim.Adam(decoder.parameters(), learning_rate)
    loss_fn = nn.NLLLoss(reduction = 'sum')
    encoder.train()
    decoder.train()
    train_loss = []
    train_acc = []
    val_acc = []
    temp1 = 0
    temp2 = 0
    if(use_attention):
        for i in range(epochs):
            encoder.train()
            decoder.train()
            loss, encoder, decoder ,attn_weights, attn_applied = trainWithAttention(input_data, target_data, loss_fn, enc_optimizer, dec_optimizer, encoder, decoder, num_of_enc_layers, num_of_dec_layers, batch_size, bi_direct, cell_type)
            train_loss.append(loss)
            trained_pred = evalWithAttention(tensor_eng, tensor_hin, encoder, decoder, num_of_enc_layers, num_of_dec_layers, batch_size, bi_direct, cell_type)
            acc = calculateAccuracy(trained_pred, tensor_hin)
            train_acc.append(acc)
            trained_pred_val = evalWithAttention(tensor_eng_val, tensor_hin_val, encoder, decoder, num_of_enc_layers, num_of_dec_layers, batch_size, bi_direct, cell_type)
            vacc = calculateAccuracy(trained_pred_val, tensor_hin_val)
            val_acc.append(vacc)
            print("Epoch : {} \tLoss : {}".format(i, loss))
        
        temp1 = attn_weights
        temp2 = attn_applied
    else:
        for i in range(epochs):
            encoder.train()
            decoder.train()
            loss, encoder, decoder = trainWithoutAttention(input_data, target_data, loss_fn, enc_optimizer, dec_optimizer, encoder, decoder, num_of_enc_layers, num_of_dec_layers, batch_size, bi_direct, cell_type)
            train_loss.append(loss)
            trained_pred = evalWithoutAttention(tensor_eng, tensor_hin, encoder, decoder, num_of_enc_layers, num_of_dec_layers, batch_size, bi_direct, cell_type)
            acc = calculateAccuracy(trained_pred, tensor_hin)
            train_acc.append(acc)
            trained_pred_val = evalWithoutAttention(tensor_eng_val, tensor_hin_val, encoder, decoder, num_of_enc_layers, num_of_dec_layers, batch_size, bi_direct, cell_type)
            vacc = calculateAccuracy(trained_pred_val, tensor_hin_val)
            val_acc.append(vacc)
            print("Epoch : {} \tLoss : {}".format(i, loss))
    
    return encoder, decoder, train_loss, train_acc, val_acc, temp1, temp2


In [17]:
#Loading the dataset and preprocessing

df = pd.read_csv('/kaggle/input/fdl-a3/hin_train.csv', names=['eng','hin'])
df_test = pd.read_csv('/kaggle/input/fdl-a3/hin_test.csv', names=['eng','hin'])
df_valid = pd.read_csv('/kaggle/input/fdl-a3/hin_valid.csv', names=['eng','hin'])
eng_maxlen = len(max(df['eng'], key=len))
hin_maxlen = len(max(df['hin'], key=len))
max_len = max(eng_maxlen, hin_maxlen)
eng_words = df['eng'].copy()
hin_words = df['hin'].copy()

unique_eng_letters = set(''.join(eng_words))
unique_eng_letters.add('*')


unique_hin_letters = set(''.join(hin_words))
unique_hin_letters.add('#')
unique_hin_letters.add('*')

int_to_eng = dict(enumerate(unique_eng_letters))
eng_to_int = {char: ind for ind, char in int_to_eng.items()}

int_to_hin = dict(enumerate(unique_hin_letters))
hin_to_int = {char: ind for ind, char in int_to_hin.items()}
hin_to_int['_'] = len(hin_to_int)

tensor_eng, tensor_hin = preprocessingData(df, max_len, eng_to_int, hin_to_int)
tensor_eng_test, tensor_hin_test = preprocessingData(df_test, max_len, eng_to_int, hin_to_int)
tensor_eng_val, tensor_hin_val = preprocessingData(df_valid, max_len, eng_to_int, hin_to_int)

In [18]:
#Configuration of the model

input_data = tensor_eng
input_size = len(unique_eng_letters)
target_data = tensor_hin
target_size = len(unique_hin_letters) 
max_input_size = tensor_eng.shape[1] 
epochs = 10
batch_size = 256 
emb_size = 512 
num_of_enc_layers = 3
num_of_dec_layers = 3
hid_size = 512
cell_type = "LSTM" 
bi_direct = True 
enc_dropout = 0.3
dec_dropout = 0.1
beam_size = 1
use_attention = False

In [19]:
#Block of code to train the model

encoder, decoder, train_loss, train_acc, val_acc, temp1, temp2 = training(input_data, input_size, target_data, target_size, max_input_size, epochs, batch_size, emb_size, num_of_enc_layers, num_of_dec_layers, hid_size, cell_type, bi_direct, enc_dropout, dec_dropout, use_attention, beam_size)
if(use_attention):
    trained_pred = evalWithAttention(tensor_eng, tensor_hin, encoder, decoder, num_of_enc_layers, num_of_dec_layers, batch_size, bi_direct, cell_type)
else:
    trained_pred = evalWithoutAttention(tensor_eng, tensor_hin, encoder, decoder, num_of_enc_layers, num_of_dec_layers, batch_size, bi_direct, cell_type)
calculateAccuracy(trained_pred, tensor_hin)

Epoch : 0 	Loss : 0.9295275426793982
Epoch : 1 	Loss : 0.39543628833912037
Epoch : 2 	Loss : 0.31561272515190975
Epoch : 3 	Loss : 0.25897040473090277
Epoch : 4 	Loss : 0.22920066550925927
Epoch : 5 	Loss : 0.21472488968460648
Epoch : 6 	Loss : 0.19993019386574074
Epoch : 7 	Loss : 0.18572965268735533
Epoch : 8 	Loss : 0.1757969495985243
Epoch : 9 	Loss : 0.1607646009657118


0.58232421875

In [20]:
#BLock of code to see the test accuracy

if(use_attention):
    trained_pred = evalWithAttention(tensor_eng_test, tensor_hin_test, encoder, decoder, num_of_enc_layers, num_of_dec_layers, batch_size, bi_direct, cell_type)
else:
    trained_pred = evalWithoutAttention(tensor_eng_test, tensor_hin_test, encoder, decoder, num_of_enc_layers, num_of_dec_layers, batch_size, bi_direct, cell_type)
calculateAccuracy(trained_pred, tensor_hin_test)

0.3408203125

In [None]:
#Block of code to generate csv file

hin_words = []
for temp in trained_pred:
    temp = torch.tensor(temp).T
    for i in temp:
        s=''
        for j in range(1,len(i)):
            if(int_to_hin[i[j].item()] == '*'):
                hin_words.append(s)
                break
            s += int_to_hin[i[j].item()]

res = df_test.copy()
res['predicted'] = hin_words
res.to_csv('predicted_hin_words.csv')

# SWEEP CONFIG

In [None]:
wandb.login(key = '5b3ff6cba361172038b8948f6dace9286a5bbfa0')

In [None]:
sweep_configuration = {
    'method': 'bayes',
    'name': 'fdl_a31',
     'metric': {
        'goal': 'maximize', 
        'name': 'validation_accuracy'
        },
    'parameters': {
        'batch_size': {'values': [16, 32, 64, 256, 512]},
        'epochs': {'values': [10]},
        'num_of_enc_layers':{'values' : [1, 2, 3, 4]},
        'num_of_dec_layers':{'values' : [1, 2, 3, 4]},
        'hid_size': {'values' : [16, 32, 64, 256, 512]},
        'emb_size': {'values' : [16, 32, 64, 256, 512]},
        'cell_type':{'values' : ['GRU', 'LSTM', 'RNN']},
        'bi_direct':{'values' : [True, False]},
        'use_attention':{'values' : [False]},
        'enc_dropout': {'values' : [0.1, 0.2, 0.3]},
        'dec_dropout': {'values' : [0.1, 0.2, 0.3]},
        'beam_size': {'values' : [1]}
     }
}

In [None]:
def sweepTrain():
    wandb.init()

    epochs = wandb.config.epochs
    batch_size = wandb.config.batch_size 
    emb_size = wandb.config.emb_size
    num_of_enc_layers = wandb.config.num_of_enc_layers
    num_of_dec_layers = wandb.config.num_of_dec_layers
    hid_size = wandb.config.hid_size
    cell_type = wandb.config.cell_type
    bi_direct = wandb.config.bi_direct
    enc_dropout = wandb.config.enc_dropout
    dec_dropout = wandb.config.dec_dropout
    beam_size = wandb.config.batch_size
    use_attention = wandb.config.use_attention
    
#     wandb.run.name = f'ct_{cell_type}_hid_{hid_size}_emb_{emb_size}_ep_{epochs}_batch_{batch_size}_noel_{num_of_enc_layers}_nodl_{num_of_dec_layers}_bd_{bi_direct}_enc_dp_{enc_dropout}_dec_dp_{enc_dropout}_bs_{beam_size}'
    wandb.run.name = f'inp_emb_{emb_size}_encl_{num_of_enc_layers}_decl_{num_of_dec_layers}_hid_{hid_size}_cel_{cell_type}_dp_{enc_dropout}'
    encoder, decoder, train_loss, train_acc, val_acc, temp1, temp2 = training(input_data, input_size, target_data, target_size, max_input_size, epochs, batch_size, emb_size, num_of_enc_layers, num_of_dec_layers, hid_size, cell_type, bi_direct, enc_dropout, dec_dropout, use_attention, beam_size)
    for i in range(len(train_loss)):
        wandb.log({'training_loss': train_loss[i],
                  'training_accuracy': train_acc[i],
                  'validation_accuracy': val_acc[i]
                  })

In [None]:
sweep_id = wandb.sweep(sweep=sweep_configuration, project='fdl_a32')
wandb.agent(sweep_id, function=sweepTrain)
wandb.finish()