<a href="https://www.kaggle.com/code/jaswanth431/dl-assignment-3-atten?scriptVersionId=177668108" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [1]:
import torch
from torch import nn
import pandas as pd
import torch.optim as optim
import torch.nn.functional as F
from torch.nn.utils.rnn import pad_sequence
import copy
from torch.utils.data import Dataset, DataLoader
import gc
import random
import wandb

In [2]:
wandb.login(key="62cfafb7157dfba7fdd6132ac9d757ccd913aaaf")

[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
END_TOKEN = '>'
START_TOKEN = '<'
PAD_TOKEN = '_'
TEACHER_FORCING_RATIO = 0.5

train_csv = "/kaggle/input/aksh11/aksharantar_sampled/tel/tel_train.csv"
test_csv = "/kaggle/input/aksh11/aksharantar_sampled/tel/tel_test.csv"
val_csv = "/kaggle/input/aksh11/aksharantar_sampled/tel/tel_valid.csv"

train_df = pd.read_csv(train_csv, header=None)
test_df = pd.read_csv(test_csv, header=None)
val_df = pd.read_csv(val_csv, header=None)
train_source, train_target = train_df[0].to_numpy(), train_df[1].to_numpy();
val_source, val_target = val_df[0].to_numpy(), val_df[1].to_numpy();

cuda


In [4]:
def add_padding(source_data, MAX_LENGTH):
    padded_source_strings = []
    for i in range(len(source_data)):
        source_str =START_TOKEN+ source_data[i] + END_TOKEN
        # Truncate or pad source sequence
        source_str = source_str[:MAX_LENGTH]
        source_str += PAD_TOKEN * (MAX_LENGTH - len(source_str))

        padded_source_strings.append(source_str)
        
    return padded_source_strings


def generate_string_to_sequence(source_data, source_char_index_dict):
    source_sequences = []
    for i in range(len(source_data)):
        source_sequences.append(get_chars(source_data[i], source_char_index_dict))
    source_sequences = pad_sequence(source_sequences, batch_first=True, padding_value=2)
    return source_sequences


def get_chars(str, char_index_dict):
    chars_indexes = []
    for ch in str:
        chars_indexes.append(char_index_dict[ch])
    return torch.tensor(chars_indexes, device=device)


def preprocess_data(source_data, target_data):
    data = {
        "source_chars": [START_TOKEN, END_TOKEN, PAD_TOKEN],
        "target_chars": [START_TOKEN, END_TOKEN, PAD_TOKEN],
        "source_char_index": {START_TOKEN: 0, END_TOKEN:1, PAD_TOKEN:2},
        "source_index_char": {0:START_TOKEN, 1: END_TOKEN, 2:PAD_TOKEN},
        "target_char_index": {START_TOKEN: 0, END_TOKEN:1, PAD_TOKEN:2},
        "target_index_char": {0:START_TOKEN, 1: END_TOKEN, 2:PAD_TOKEN},
        "source_len": 3,
        "target_len": 3,
        "source_data": source_data,
        "target_data": target_data,
        "source_data_seq": [],
        "target_data_seq": []
    }
    
    data["INPUT_MAX_LENGTH"] = max(len(string) for string in source_data) +2
    data["OUTPUT_MAX_LENGTH"] = max(len(string) for string in target_data)+2

    
    padded_source_strings=add_padding(source_data, data["INPUT_MAX_LENGTH"])
    padded_target_strings = add_padding(target_data, data["OUTPUT_MAX_LENGTH"])
    
    for i in range(len(padded_source_strings)):
        for c in padded_source_strings[i]:
            if data["source_char_index"].get(c) is None:
                data["source_chars"].append(c)
                idx = len(data["source_chars"]) - 1
                data["source_char_index"][c] = idx
                data["source_index_char"][idx] = c
        for c in padded_target_strings[i]:
            if data["target_char_index"].get(c) is None:
                data["target_chars"].append(c)
                idx = len(data["target_chars"]) - 1
                data["target_char_index"][c] = idx
                data["target_index_char"][idx] = c

    data['source_data_seq'] = generate_string_to_sequence(padded_source_strings,  data['source_char_index'])
    data['target_data_seq'] = generate_string_to_sequence(padded_target_strings,  data['target_char_index'])
#     print(data["source_data"][0])
#     print(data["source_data_seq"][0])
#     print(data["target_data"][0])
#     print(data["target_data_seq"][0])

    
    data["source_len"] = len(data["source_chars"])
    data["target_len"] = len(data["target_chars"])
    
    return data

# data = preprocess_data(copy.copy(train_source), copy.copy(train_target))

In [5]:
def get_cell_type(cell_type):
    if(cell_type == "RNN"):
        return nn.RNN
    elif(cell_type == "LSTM"):
        return nn.LSTM
    elif(cell_type == "GRU"):
        return nn.GRU
    else:
        print("Specify correct cell type")
        
class Attention(nn.Module):
    def __init__(self, hidden_size):
        super(Attention, self).__init__()
        self.Wa = nn.Linear(hidden_size, hidden_size)
        self.Ua = nn.Linear(hidden_size, hidden_size)
        self.Va = nn.Linear(hidden_size, 1)

    def forward(self, query, keys):
        scores = self.Va(torch.tanh(self.Wa(query) + self.Ua(keys)))
        scores = scores.squeeze().unsqueeze(1)
        weights = F.softmax(scores, dim=0)
        weights = weights.permute(2,1,0)
        keys = keys.permute(1,0,2)
        context = torch.bmm(weights, keys)
        return context, weights


class Encoder(nn.Module):
    def __init__(self, h_params, data, device ):
        super(Encoder, self).__init__()
        self.embedding = nn.Embedding(data["source_len"], h_params["char_embd_dim"])
        self.cell = get_cell_type(h_params["cell_type"])(h_params["char_embd_dim"], h_params["hidden_layer_neurons"],num_layers=h_params["number_of_layers"], batch_first=True)
        self.device=device
        self.h_params = h_params
        self.data = data
    def forward(self, input , encoder_curr_state):
        input_length = self.data["INPUT_MAX_LENGTH"]
        batch_size = self.h_params["batch_size"]
        hidden_neurons = self.h_params["hidden_layer_neurons"]
        layers = self.h_params["number_of_layers"]
        encoder_states  = torch.zeros(input_length, layers, batch_size, hidden_neurons, device=device )
        for i in range(input_length):
            current_input = input[:, i].view(batch_size,1)
            _, encoder_curr_state = self.forward_step(current_input, encoder_curr_state)
            encoder_states[i] = encoder_curr_state
        return encoder_states
    
    def forward_step(self, current_input, prev_state):
        embd_input = self.embedding(current_input)
        output, prev_state = self.cell(embd_input, prev_state)
        return output, prev_state
        
    def getInitialState(self):
        return torch.zeros(self.h_params["number_of_layers"],self.h_params["batch_size"],self.h_params["hidden_layer_neurons"], device=self.device)

    
class Decoder(nn.Module):
    def __init__(self, h_params, data,device):
        super(Decoder, self).__init__()
        self.attention = Attention(h_params["hidden_layer_neurons"]).to(device)
        self.embedding = nn.Embedding(data["target_len"], h_params["char_embd_dim"])
        self.cell = get_cell_type(h_params["cell_type"])(h_params["hidden_layer_neurons"] +h_params["char_embd_dim"], h_params["hidden_layer_neurons"],num_layers=h_params["number_of_layers"], batch_first=True)
        self.fc = nn.Linear(h_params["hidden_layer_neurons"], data["target_len"])
        self.softmax = nn.LogSoftmax(dim=2)
        self.h_params = h_params
        self.data = data
        self.device = device

    def forward(self, decoder_current_state, encoder_final_layers, target_batch, loss_fn, teacher_forcing_enabled=True):
#         print("Teacher forcing:", teacher_forcing_enabled)
        batch_size = self.h_params["batch_size"]
        decoder_current_input = torch.full((batch_size,1),self.data["target_char_index"][START_TOKEN], device=self.device)
        embd_input = self.embedding(decoder_current_input)
        curr_embd = F.relu(embd_input)
        decoder_actual_output = []
        attentions = []
        loss = 0
        
        use_teacher_forcing = False
        if(teacher_forcing_enabled):
            use_teacher_forcing = True if random.random() < TEACHER_FORCING_RATIO else False
        for i in range(self.data["OUTPUT_MAX_LENGTH"]):
            decoder_output, decoder_current_state, attn_weights = self.forward_step(decoder_current_input, decoder_current_state, encoder_final_layers)
            attentions.append(attn_weights)
            topv, topi = decoder_output.topk(1)
            decoder_current_input = topi.squeeze().detach()
            decoder_actual_output.append(decoder_current_input)

            if(target_batch==None):
                decoder_current_input = decoder_current_input.view(self.h_params["batch_size"], 1)
            else:
                curr_target_chars = target_batch[:, i]
                if(i<self.data["OUTPUT_MAX_LENGTH"]-1):
                    if use_teacher_forcing:
                        decoder_current_input = target_batch[:, i+1].view(self.h_params["batch_size"], 1)
                    else:
                        decoder_current_input = decoder_current_input.view(self.h_params["batch_size"], 1)
                decoder_output = decoder_output[:, -1, :]
                loss+=(loss_fn(decoder_output, curr_target_chars))

        decoder_actual_output = torch.cat(decoder_actual_output,dim=0).view(self.data["OUTPUT_MAX_LENGTH"], self.h_params["batch_size"]).transpose(0,1)

        correct = (decoder_actual_output == target_batch).all(dim=1).sum().item()
        return decoder_actual_output, attentions, loss, correct
    
    def forward_step(self, current_input, prev_state, encoder_final_layers):
        embd_input = self.embedding(current_input)
        context , attn_weights = self.attention(prev_state[-1,:,:], encoder_final_layers)
        curr_embd = F.relu(embd_input)
        input_gru = torch.cat((curr_embd, context), dim=2)
        output, prev_state = self.cell(input_gru, prev_state)
        output = self.softmax(self.fc(output))
        return output, prev_state, attn_weights  

In [6]:
class MyDataset(Dataset):
    def __init__(self, data):
        self.source_data_seq = data[0]
        self.target_data_seq = data[1]
    
    def __len__(self):
        return len(self.source_data_seq)
    
    def __getitem__(self, idx):
        source_data = self.source_data_seq[idx]
        target_data = self.target_data_seq[idx]
        return source_data, target_data


In [7]:
   
def evaluate(encoder, decoder, data, dataloader, device, h_params, loss_fn, use_teacher_forcing = False):
    correct_predictions = 0
    total_loss = 0
    total_predictions = len(dataloader.dataset)
    number_of_batches = len(dataloader)
    for batch_num, (source_batch, target_batch) in enumerate(dataloader):
        encoder_initial_state = encoder.getInitialState()
        encoder_states = encoder(source_batch,encoder_initial_state)

        decoder_current_state = encoder_states[-1, :, :, :]
        encoder_final_layer_states = encoder_states[:, -1, :, :]

        loss = 0
        correct = 0

        decoder_output, attentions, loss, correct = decoder(decoder_current_state, encoder_final_layer_states, target_batch, loss_fn, use_teacher_forcing)
        if(batch_num == 0):
                for j in range(20):
                    print(make_strings(data,source_batch[j],target_batch[j],decoder_output[j]))
      
        correct_predictions+=correct
        total_loss +=loss
    
    accuracy = correct_predictions / total_predictions
    total_loss /= number_of_batches
    
    return accuracy, total_loss


In [8]:
def make_strings(data, source, target, output):
    source_string = ""
    target_string = ""
    output_string = ""
#     print(output)
    for i in source:
#         print(i.item())
        source_string+=(data['source_index_char'][i.item()])
    for i in target:
        target_string+=(data['target_index_char'][i.item()])
    for i in output:
        output_string+=(data['target_index_char'][i.item()])
    return source_string, target_string, output_string
                        

def train_loop(encoder, decoder,h_params, data, data_loader, device, val_dataloader, use_teacher_forcing=True):
    
    encoder_optimizer = optim.Adam(encoder.parameters(), lr=h_params["learning_rate"])
    decoder_optimizer = optim.Adam(decoder.parameters(), lr=h_params["learning_rate"])
    
    loss_fn = nn.NLLLoss()
    
    total_predictions = len(data_loader.dataset)
    total_batches = len(data_loader)
    
    for ep in range(h_params["epochs"]):
        total_correct = 0
        total_loss = 0
        for batch_num, (source_batch, target_batch) in enumerate(data_loader):
#             if(batch_num>0):
#                 break
            encoder_initial_state = encoder.getInitialState()
            encoder_states = encoder(source_batch,encoder_initial_state)
            
            decoder_current_state = encoder_states[-1, :, :, :]
            encoder_final_layer_states = encoder_states[:, -1, :, :]
            
            
            loss = 0
            correct = 0
            
            decoder_output, attentions, loss, correct = decoder(decoder_current_state, encoder_final_layer_states, target_batch, loss_fn, use_teacher_forcing)
            total_correct +=correct
            total_loss += loss.item()/data["OUTPUT_MAX_LENGTH"]
            if(batch_num == 0):
                    for j in range(20):
                        print(make_strings(data,source_batch[j],target_batch[j],decoder_output[j]))
            if(batch_num%20 == 0):
                print("ep:", ep, " bt:", batch_num, " loss:", loss.item()/data["OUTPUT_MAX_LENGTH"], " acc: ", correct/h_params["batch_size"])
            encoder_optimizer.zero_grad()
            decoder_optimizer.zero_grad()
            loss.backward()
            encoder_optimizer.step()
            decoder_optimizer.step()
            
        train_acc = total_correct/total_predictions
        train_loss = total_loss/total_batches
        val_acc, val_loss = evaluate(encoder, decoder, data, val_dataloader,device, h_params, loss_fn, False)
        print("ep: ", ep, " train acc:", train_acc, " train loss:", train_loss, " val acc:", val_acc, " val loss:", val_loss.item()/data["OUTPUT_MAX_LENGTH"])
        wandb.log({"train_accuracy":train_acc, "train_loss":train_loss, "val_accuracy":val_acc, "val_loss":val_loss, "epoch":ep})



In [9]:
# h_params={
#     "char_embd_dim" : 256, 
#     "hidden_layer_neurons":256,
#     "batch_size":32,
#     "number_of_layers":1,
#     "learning_rate":0.0001,
#     "epochs":20,
#     "cell_type":"GRU",
#     "dropout":0,
#     "optimizer":"adam"
# }

def prepare_dataloaders(train_source, train_target, val_source, val_target, h_params):
    data = preprocess_data(copy.copy(train_source), copy.copy(train_target))
    training_data = [data["source_data_seq"], data['target_data_seq']]
    train_dataset = MyDataset(training_data)
    train_dataloader = DataLoader(train_dataset, batch_size=h_params["batch_size"], shuffle=True)

    #prepare validation data
    val_padded_source_strings=add_padding(val_source, data["INPUT_MAX_LENGTH"])
    val_padded_target_strings = add_padding(val_target, data["OUTPUT_MAX_LENGTH"])
    val_source_sequences = generate_string_to_sequence(val_padded_source_strings,  data['source_char_index'])
    val_target_sequences = generate_string_to_sequence(val_padded_target_strings,  data['target_char_index'])
    validation_data = [val_source_sequences, val_target_sequences]
    val_dataset = MyDataset(validation_data)
    val_dataloader = DataLoader(val_dataset, batch_size=h_params["batch_size"], shuffle=True)
    return train_dataloader, val_dataloader, data


In [10]:
def train(h_params, data, device, data_loader, val_dataloader, use_teacher_forcing=True):
    encoder = Encoder(h_params, data, device).to(device)
    decoder = Decoder(h_params, data, device).to(device)
    train_loop(encoder, decoder,h_params, data, data_loader,device, val_dataloader, use_teacher_forcing)
    encoder=None
    decoder=None
    gc.collect()
    torch.cuda.empty_cache() 


In [11]:
# config = h_params
# # run = wandb.init(project="DL Assignment 3 With Attention", name=f"{config['cell_type']}_{config['optimizer']}_ep_{config['epochs']}_lr_{config['learning_rate']}_embd_{config['char_embd_dim']}_hid_lyr_neur_{config['hidden_layer_neurons']}_bs_{config['batch_size']}_enc_layers_{config['number_of_layers']}_dec_layers_{config['number_of_layers']}_dropout_{config['dropout']}", config=config)
# train_dataloader, val_dataloader, data = prepare_dataloaders(train_source, train_target, val_source, val_target, h_params)
# train(h_params, data, device, train_dataloader, val_dataloader, True)

In [12]:
#Run this cell to run a sweep with appropriate parameters
sweep_params = {
    'method' : 'bayes',
    'name'   : 'DL Assignment 3 With Attention',
    'metric' : {
        'goal' : 'maximize',
        'name' : 'val_accuracy',
    },
    'parameters' : {
        'epochs':{'values' : [15, 20]},
        'learning_rate':{'values' : [0.001, 0.0001]},
        'batch_size':{'values':[32,64, 128]},
        'char_embd_dim':{'values' : [64, 128, 256] } ,
        'number_of_layers':{'values' : [1,2,3,4]},
        'optimizer':{'values':['nadam','adam']},
        'cell_type':{'values' : ["RNN","LSTM", "GRU"]},
        'hidden_layer_neurons':{'values': [ 128, 256, 512]},
        'dropout':{'values': [0,0.2, 0.3]}
    }
}

sweep_id = wandb.sweep(sweep=sweep_params, project="DL Assignment 3 With Attention")
def main():
    wandb.init(project="DL Assignment 3" )
    config = wandb.config
    with wandb.init(project="DL Assignment 3", name=f"{config['cell_type']}_{config['optimizer']}_ep_{config['epochs']}_lr_{config['learning_rate']}_embd_{config['char_embd_dim']}_hid_lyr_neur_{config['hidden_layer_neurons']}_bs_{config['batch_size']}_enc_layers_{config['number_of_layers']}_dec_layers_{config['number_of_layers']}_dropout_{config['dropout']}", config=config):
        train_dataloader, val_dataloader, data = prepare_dataloaders(train_source, train_target, val_source, val_target, config)
        train(config, data, device, train_dataloader, val_dataloader, True)


Create sweep with ID: nvimt6qw
Sweep URL: https://wandb.ai/jaswanth431/DL%20Assignment%203%20With%20Attention/sweeps/nvimt6qw


In [13]:
wandb.agent("f4esgkqv", function=main, count=100)

[34m[1mwandb[0m: Agent Starting Run: 94mujjqa with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	char_embd_dim: 128
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	hidden_layer_neurons: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_layers: 1
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: Currently logged in as: [33mjaswanth431[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: wandb version 0.17.0 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade
[34m[1mwandb[0m: Tracking run with wandb version 0.16.6
[34m[1mwandb[0m: Run data is saved locally in [35m[1m/kaggle/working/wandb/run-20240514_183515-94mujjqa[0m
[34m[1mwandb[0m: Run [1m`wandb offline`[0m to turn off syncing.
[34m[1mwandb[0m: Syncing run [33mfine-sweep-31[0m
[34m[1mwandb[0m: ⭐️ View project at [34m[

('<doddiloo>____________________', '<దొడ్డిలో>_____________', 'ధధధధధధధధధధధధధధధధధధధధధధధ')
('<cranbroke>___________________', '<క్రాన్బ్రోక్>_________', 'ధధధధధధధధధధధధధధధధధధధధధధధ')
('<vaarinicchataku>_____________', '<వారినిచ్చటకు>_________', 'వధధధధధధధధధధధధధధధధధధధధధధ')
('<kungadame>___________________', '<కుంగడమే>______________', 'ధధధధధధధధధధధధధధధధధధధధధధధ')
('<kannayyagadu>________________', '<కన్నయ్యగాడు>__________', 'జగఞఞఞఞఞఞఞఞఞఞఞఞఞఞఞఞఞఞఞఞఞ')
('<panichesthegaani>____________', '<పనిచేస్తేగాని>________', 'వధధధధధధధధధధధధధధధధధధధధధధ')
('<dukaanamku>__________________', '<దుకాణంకు>_____________', 'జగఞఞఞఞఞఞఞఞఞఞఞఞఞఞఞఞఞఞఞఞఞ')
('<ismaily>_____________________', '<ఇస్మాయిలీ>____________', 'జీీీీీీీీీీీీీీీీీీీీీీ')
('<aritaakuluu>_________________', '<అరిటాకులూ>____________', 'జీీీహహచఱఞఞఞఞఞఞఞఞఞఞఞఞఞఞఞ')
('<naalgusaarlu>________________', '<నాల్గుసార్లు>_________', 'ధధధధధధధధధధధధధధధధధధధధధధధ')
('<pyachlugaa>__________________', '<ప్యాచ్లుగా>___________', 'వధధధధధధధధధధధధధధధధధధధధధధ')
('<prastaavimstuu>___

[34m[1mwandb[0m:                                                                                
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run history:
[34m[1mwandb[0m:          epoch ▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
[34m[1mwandb[0m: train_accuracy ▁▄▅▆▇▇▇▇▇▇▇███▇█████
[34m[1mwandb[0m:     train_loss █▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
[34m[1mwandb[0m:   val_accuracy ▁▄▆▆▇▇▇▇█▇▇▆▇█▇▆▇█▇█
[34m[1mwandb[0m:       val_loss █▃▃▂▂▂▂▁▁▁▁▂▂▁▁▃▁▁▁▁
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run summary:
[34m[1mwandb[0m:          epoch 19
[34m[1mwandb[0m: train_accuracy 0.73141
[34m[1mwandb[0m:     train_loss 0.12179
[34m[1mwandb[0m:   val_accuracy 0.41211
[34m[1mwandb[0m:       val_loss 6.37252
[34m[1mwandb[0m: 
[34m[1mwandb[0m: 🚀 View run [33mfine-sweep-31[0m at: [34m[4mhttps://wandb.ai/jaswanth431/DL%20Assignment%203%20With%20Attention/runs/94mujjqa[0m
[34m[1mwandb[0m: ⭐️ View project at: [34m[4mhttps://wandb.ai/jaswanth431/DL%20Assignment%203%20With%20Attention[0m
[34m[

('<parvathaarogya>______________', '<పర్వతారోగ్య>__________', 'గగగగగగగగగగకఫగగగబబబబబబబబ')
('<maaldeevulugaa>______________', '<మాల్దీవులుగా>_________', 'గగగగఉగగగగగఢఢగగగగృృబబబబబ')
('<vinakapoyina>________________', '<వినకపోయినా>___________', 'గగగగగగగగగగగగగగృృృృృృబబబ')
('<chesinavarinigurchi>_________', '<చేసినవారినిగూర్చి>____', 'గగభఖగఛగగగగఛగగగగగబగగగగృృ')
('<tangallapallini>_____________', '<తంగళ్లపల్లిని>________', 'గగగఓఘకగగగబగగఝగగగగబబబబబబ')
('<daaninani>___________________', '<దానినని>______________', 'గగగగగగఝగగగగగగగగబబబబబబబబ')
('<harshistunnadani>____________', '<హర్షిస్తున్నదని>______', 'గగగగఘగగగగగఛగఛగగగగగగగృృృ')
('<sambandheekulato>____________', '<సంబంధీకులతో>__________', 'గగగఛఈగగగగగగగగగగృృృృృృృఫ')
('<gabbilalake>_________________', '<గబ్బిలాలకే>___________', 'గగఓకఛగగగగగైగగగృృబబబబబబబ')
('<cheppalanukunte>_____________', '<చెప్పాలనుకుంటే>_______', 'గగుగగగగగగగగోగగైగగగగృృృఫ')
('<samaachaaramadi>_____________', '<సమాచారమది>____________', 'గగగగగగగగగగగగగగబబబబబబబబబ')
('<ardhaalankaaram>__

[34m[1mwandb[0m:                                                                                
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run history:
[34m[1mwandb[0m:          epoch ▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
[34m[1mwandb[0m: train_accuracy ▁▅▆▇▇▇▇▇▇▇▇▇█▇██████
[34m[1mwandb[0m:     train_loss █▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
[34m[1mwandb[0m:   val_accuracy ▁▆▇▇▇▇█▇██▇▇█▇██████
[34m[1mwandb[0m:       val_loss █▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run summary:
[34m[1mwandb[0m:          epoch 19
[34m[1mwandb[0m: train_accuracy 0.7399
[34m[1mwandb[0m:     train_loss 0.11891
[34m[1mwandb[0m:   val_accuracy 0.42407
[34m[1mwandb[0m:       val_loss 6.19776
[34m[1mwandb[0m: 
[34m[1mwandb[0m: 🚀 View run [33mcharmed-sweep-32[0m at: [34m[4mhttps://wandb.ai/jaswanth431/DL%20Assignment%203%20With%20Attention/runs/rb8hmn48[0m
[34m[1mwandb[0m: ⭐️ View project at: [34m[4mhttps://wandb.ai/jaswanth431/DL%20Assignment%203%20With%20Attention[0m
[34m

('<mudiram>_____________________', '<ముదిరామ్>_____________', 'ూఓఓఓఓఓఓఓఓఉఉఓఓఓఓఓఓఓఓఓఓఓఓ')
('<vaastavaanaki>_______________', '<వాస్తవానకి>___________', 'ూచఔఇఇఇఇఇఇఉఓఓఓఓఓఓఓఓఓఓఓఓఓ')
('<siggupadadaniki>_____________', '<సిగ్గుపడడానికి>_______', 'ూలలసతఉతళళళఉఉఔఉబఔఉఓఓఓఓఓఓ')
('<oteyavaa>____________________', '<ఓటేయవా>_______________', 'ూలలఓతతఉఉఉఓఓఓఓఓఓఓఓఓఓఓఓఓఓ')
('<poyedemunda>_________________', '<పోయేదేముంద>___________', 'ూఔఔఓాఓఓఓతతఓఓఓఓఓఓఓఓఓఓఓఓఓ')
('<nirmaanayatnaalaloo>_________', '<నిర్మాణయత్నాలలో>______', 'చఔఔఔఔఉఉఉఏఏళఱఉఔఔఔఔఉఉఉఓఓఓ')
('<durber>______________________', '<డర్బర్>_______________', 'ూఔఓఓఓఉఉఉఉఓఓఓఓఓఓఓఓఓఓఓఓఓఓ')
('<tirunavukkarsar>_____________', '<తిరునావుక్కరసర్>______', 'ూఔఔఔఔఔఔఔఔళళఉఉఉఉఉఉఉఉఓఓఓఓ')
('<viphalamayyevaaru>___________', '<విఫలమయ్యేవారు>________', 'ూఔఔఔఔఔఔళాాతఉఉఉఉఉఉఓఓఓఓథథ')
('<antukuntadi>_________________', '<అంటుకుంటది>___________', 'ూఓఓలలఓళళలఓఓఓఓఓఓఓఓఓఓఓఓఓఓ')
('<aavaasapraantaalu>___________', '<ఆవాసప్రాంతాలు>________', 'చలలలఈధఇఉఉఉఉఉోఔఔఔఓఓఓఓఓఓథ')
('<ahalaku>__________

Traceback (most recent call last):
  File "/tmp/ipykernel_25/3376784685.py", line 28, in main
    train(config, data, device, train_dataloader, val_dataloader, True)
  File "/tmp/ipykernel_25/2918762232.py", line 4, in train
    train_loop(encoder, decoder,h_params, data, data_loader,device, val_dataloader, use_teacher_forcing)
  File "/tmp/ipykernel_25/970494238.py", line 58, in train_loop
    val_acc, val_loss = evaluate(encoder, decoder, data, val_dataloader,device, h_params, loss_fn, False)
  File "/tmp/ipykernel_25/168279160.py", line 16, in evaluate
    decoder_output, attentions, loss, correct = decoder(decoder_current_state, encoder_final_layer_states, target_batch, loss_fn, use_teacher_forcing)
  File "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
    return forward_call(*args, 

In [14]:
import torch
torch.cuda.empty_cache()
import gc
gc.collect()

15860