In [94]:
from __future__ import unicode_literals, print_function, division
import unicodedata
import numpy as np
import pandas as pd
from io import open
import random
import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
import random
import wandb
import re

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [95]:
wandb.login()

True

## DATA PROCESSING

In [96]:
SOS_token = 0
EOS_token = 1

class Lang:
    def __init__(self, name):
        self.name = name
        self.chr2index = {}
        self.chr2count = {}
        self.index2chr = {0: "$", 1: "#"}
        self.n_chrs = 2 

    def addword(self, word):
        for ch in word:
            self.addchr(ch)

    def addchr(self, ch):
        if ch not in self.chr2index:
            self.chr2index[ch] = self.n_chrs
            self.chr2count[ch] = 1
            self.index2chr[self.n_chrs] = ch
            self.n_chrs += 1
        else:
            self.chr2count[ch] += 1

In [97]:
MAX_LENGTH = 25
# def unicodeToAscii(s):
#     return ''.join(
#         c for c in unicodedata.normalize('NFD', s)
#         if unicodedata.category(c) != 'Mn'
#     )


# def normalizeString(s):
#     s = unicodeToAscii(s.strip())
#     s = re.sub(r"([.!?])", r" \1", s)
#     s = re.sub(r"[^a-zA-Z.!?]+", r" ", s)
#     return s

In [98]:

def readLangs(lang1,lang2):
    pairs = ()
    for i in ['train','valid','test']:
        fd = open('aksharantar_sampled/'+lang2+'/'+lang2+'_'+i+'.csv')
        lines = fd.read().strip().split('\n')
        pairs += ([[s for s in l.split(',')] for l in lines],)
    return pairs
    

In [99]:
input_lang = Lang("eng")
output_lang = Lang("mni")
train_pairs,valid_pairs,test_pairs = readLangs("eng","mni")

for pair in train_pairs:
    input_lang.addword(pair[0])
    output_lang.addword(pair[1])
    

In [100]:
def indexesFromWord(lang, word):
    return [lang.chr2index[ch] for ch in word]


def tensorFromWord(lang,word):
    indexes = indexesFromWord(lang, word)
    indexes.append(EOS_token)
    return torch.tensor(indexes, dtype=torch.long, device=device).view(-1, 1)


def tensorsFromPair(pair):
    input_tensor = tensorFromWord(input_lang, pair[0])
    target_tensor = tensorFromWord(output_lang, pair[1])
    return (input_tensor, target_tensor)

## ENCODER

In [101]:
class EncoderRNN(nn.Module):
    def __init__(self, input_size, embedding_size, hidden_layer_size, num_encoder_layers, cell_type, dropout_prob, bidirectional):
      super(EncoderRNN, self).__init__()
      self.input_size = input_size
      self.hidden_layer_size = hidden_layer_size
      self.num_encoder_layers = num_encoder_layers
      self.cell_type = cell_type

      self.embedding = nn.Embedding(self.input_size, embedding_size)

      if cell_type == 'RNN':
        self.rnn = nn.RNN(input_size = embedding_size, hidden_size = hidden_layer_size, num_layers = num_encoder_layers, dropout = dropout_prob, bidirectional = bidirectional)
      elif cell_type == 'LSTM':
        self.rnn = nn.LSTM(input_size = embedding_size, hidden_size = hidden_layer_size, num_layers = num_encoder_layers, dropout = dropout_prob, bidirectional = bidirectional)
      elif cell_type == 'GRU':
        self.rnn = nn.GRU(input_size = embedding_size, hidden_size = hidden_layer_size, num_layers = num_encoder_layers, dropout = dropout_prob, bidirectional = bidirectional)
      self.dropout = nn.Dropout(dropout_prob)
      self.D = 1
      if bidirectional == True :
        self.D = 2

    def forward(self, input_tensor, prev_hidden, prev_cell = None):
      embedded = self.embedding(input_tensor).view(1,1,-1)
      embedded = self.dropout(embedded)
      if self.cell_type == 'RNN':
        output, hidden = self.rnn(embedded,prev_hidden)
      elif self.cell_type == 'LSTM':
        output, (hidden,cell) = self.rnn(embedded,(prev_hidden,prev_cell))
        return output, (hidden,cell)
      elif self.cell_type == 'GRU':
        output, hidden = self.rnn(embedded,prev_hidden)
        
      return output,hidden

    def initHidden(self):
      if self.cell_type == 'LSTM':
        hidden = torch.zeros(self.D*self.num_encoder_layers,1,self.hidden_layer_size,device = device)
        cell = torch.zeros(self.D*self.num_encoder_layers,1,self.hidden_layer_size,device = device)
        return hidden,cell
      else :
        hidden = torch.zeros(self.D*self.num_encoder_layers,1,self.hidden_layer_size,device = device)
      return hidden


## DECODER

In [102]:
# class Decoder

class DecoderRNN(nn.Module):
  def __init__(self, output_size, embedding_size, hidden_layer_size, num_decoder_layers, cell_type, dropout_prob, bidirectional):
    super(DecoderRNN, self).__init__()
    self.output_size = output_size
    self.hidden_layer_size = hidden_layer_size
    self.num_decoder_layers = num_decoder_layers
    self.cell_type = cell_type
    self.embedding_size = embedding_size
    self.embedding = nn.Embedding(output_size, embedding_size)
   
    if cell_type == 'RNN':
      self.rnn = nn.RNN(input_size = embedding_size, hidden_size = hidden_layer_size, num_layers = num_decoder_layers, dropout = dropout_prob, bidirectional = bidirectional)
    elif cell_type == 'LSTM':
      self.rnn = nn.LSTM(input_size = embedding_size, hidden_size = hidden_layer_size, num_layers = num_decoder_layers, dropout = dropout_prob, bidirectional = bidirectional)
    elif cell_type == 'GRU':
      self.rnn = nn.GRU(input_size = embedding_size, hidden_size = hidden_layer_size, num_layers = num_decoder_layers, dropout = dropout_prob, bidirectional = bidirectional)
      
    self.D = 1
    if bidirectional == True :
      self.D = 2
      
    self.dropout = nn.Dropout(dropout_prob)
    self.out = nn.Linear(self.D*hidden_layer_size, output_size)
    self.softmax = nn.LogSoftmax(dim = 1)
      
  def forward(self, input_tensor, prev_hidden, prev_cell = None):
    embedded = self.embedding(input_tensor).view(1,1,-1)
    embedded = F.relu(embedded)
    embedded = self.dropout(embedded)
    
    if self.cell_type == 'RNN':
      output, hidden = self.rnn(embedded,prev_hidden)
      
    elif self.cell_type == 'LSTM':
      output,(hidden,cell) = self.rnn(embedded,(prev_hidden,prev_cell))
    
    elif self.cell_type == 'GRU':
      output, hidden = self.rnn(embedded,prev_hidden)

    output = self.softmax(self.out(output[0]))

    if self.cell_type == 'LSTM':
      return output,(hidden,cell)
    
    return output, hidden

In [103]:
def train(input_tensor,target_tensor,encoder,decoder,encoder_optimizer,decoder_optimizer,criterion,max_length = MAX_LENGTH,teacher_forcing_ratio = 0.5):
    
    if encoder.cell_type == 'LSTM':
      encoder_hidden,encoder_cell = encoder.initHidden()
    else :
      encoder_hidden = encoder.initHidden()
    
    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()
    
    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)
    
    encoder_outputs = torch.zeros(max_length, encoder.D*encoder.hidden_layer_size, device=device)
    
    loss = 0
    
    for ei in range(input_length):
      if encoder.cell_type == 'LSTM':
        encoder_output,(encoder_hidden,encoder_cell) = encoder(input_tensor = input_tensor[ei],prev_hidden = encoder_hidden,prev_cell = encoder_cell)
      else :
        encoder_output, encoder_hidden = encoder(input_tensor = input_tensor[ei], prev_hidden = encoder_hidden)
      # print(encoder_output.size())
      encoder_outputs[ei] = encoder_output[0][0]
      
    decoder_input = torch.tensor([[SOS_token]],device=device)
    # decoder_outputs = torch.zeros(target_length)
    decoder_hidden = encoder_hidden
    if decoder.cell_type == 'LSTM':
      decoder_cell = encoder_cell
    
    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False

    if use_teacher_forcing :
      for di in range(target_length):
        if decoder.cell_type == 'LSTM':
          decoder_output,(decoder_hidden,decoder_cell) = decoder(input_tensor = decoder_input,prev_hidden = decoder_hidden,prev_cell = decoder_cell)
        else:
          decoder_output,decoder_hidden = decoder(decoder_input,decoder_hidden)
        loss += criterion(decoder_output, target_tensor[di])
        decoder_input = target_tensor[di]

    else :
      for di in range(target_length):
        if decoder.cell_type == 'LSTM':
          decoder_output,(decoder_hidden,decoder_cell) = decoder(input_tensor = decoder_input,prev_hidden = decoder_hidden,prev_cell = decoder_cell)
        else:
          decoder_output,decoder_hidden = decoder(decoder_input,decoder_hidden)
        topv, topi = decoder_output.topk(1)
        decoder_input = topi.squeeze().detach()
        loss += criterion(decoder_output, target_tensor[di])
        if decoder_input.item() == EOS_token:
          break
    
    loss.backward()
    
    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.item() / target_length
        
      

In [104]:
training_pairs = [tensorsFromPair(pair) for pair in train_pairs]
def trainIters(encoder,decoder,encoder_optimizer,decoder_optimizer,criterion):
    
    # training_pairs = [tensorsFromPair(pair) for pair in train_pairs]
    training_loss = 0
    it = 0
    for pair in training_pairs:
        input_tensor = pair[0]
        target_tensor = pair[1]
        loss = train(input_tensor,target_tensor,encoder,decoder,encoder_optimizer,decoder_optimizer,criterion)
        training_loss += loss
        if it%200==0:
            print(it,"done")
        it += 1
    return training_loss/len(training_pairs)

In [105]:
def evaluate(encoder,decoder,word,target = None,criterion = None,max_length = MAX_LENGTH):
    with torch.no_grad():
        input_tensor = tensorFromWord(input_lang,word)
        input_length = input_tensor.size()[0]
        target_length = 0
        if target != None:
            target_tensor = tensorFromWord(output_lang,target)
            target_length = target_tensor.size(0)
        
        loss = 0
        
        if encoder.cell_type == 'LSTM':
            encoder_hidden,encoder_cell = encoder.initHidden()
        else :
            encoder_hidden = encoder.initHidden()
            
        encoder_outputs = torch.zeros(max_length,encoder.D*encoder.hidden_layer_size,device=device)
        
        for ei in range(input_length):
            if encoder.cell_type == 'LSTM':
                encoder_output,(encoder_hidden,encoder_cell) = encoder(input_tensor = input_tensor[ei],prev_hidden = encoder_hidden,prev_cell = encoder_cell)
            else :
                encoder_output, encoder_hidden = encoder(input_tensor[ei], encoder_hidden)
            encoder_outputs[ei] += encoder_output[0, 0]
        
        decoder_input = torch.tensor([[SOS_token]],device=device)
        decoder_hidden = encoder_hidden
        if decoder.cell_type == 'LSTM':
            decoder_cell = encoder_cell
            
        decoded_word = ''
        

        for di in range(max_length):
            if decoder.cell_type == 'LSTM':
                decoder_output,(decoder_hidden,decoder_cell) = decoder(input_tensor = decoder_input,prev_hidden = decoder_hidden,prev_cell = decoder_cell)
            else:
                decoder_output,decoder_hidden = decoder(decoder_input,decoder_hidden)
            topv, topi = decoder_output.topk(1)
            
            if di<target_length:
                loss += criterion(decoder_output,target_tensor[di])
                
            if topi.item() == EOS_token:
                decoded_word += '#'
                break
            else :
                decoded_word += output_lang.index2chr[topi.item()]
                
            decoder_input = topi.squeeze().detach()
        
        return decoded_word,loss
            
def evaluateRandomly(encoder,decoder,n=10):
    for i in range(n):
        pair = random.choice(train_pairs)
        print('>', pair[0])
        print('=', pair[1])
        output_word = evaluate(encoder, decoder, pair[0])
        print('<', output_word)
        print('')

def eval_acc(encoder,decoder,pairs,criterion = None):
    count = 0
    tot_loss = 0
    for pair in pairs:
        pred_word,loss = evaluate(encoder,decoder,pair[0],target=pair[1],criterion=criterion)
        tot_loss += loss
        if pred_word[:-1] == pair[1] : count += 1
    return float(count/len(pairs)),tot_loss

def run(encoder,decoder,encoder_optimizer,decoder_optimizer,criterion,epochs):
    for epoch in range(epochs): 
        train_loss = trainIters(encoder=encoder,decoder=decoder,encoder_optimizer=encoder_optimizer,decoder_optimizer=decoder_optimizer,criterion = criterion)
        train_acc,_ = eval_acc(encoder,decoder,random.sample(train_pairs,1000),criterion)
        valid_acc,valid_loss = eval_acc(encoder,decoder,valid_pairs,criterion)
        print("Epoch : %d, Training Loss : %f, Training Accuracy : %f, Validation Loss = %f, Validation Accuracy : %f" % (epoch+1,train_loss,train_acc,valid_loss,valid_acc)) 
        wandb.log({ "training_accuracy" : train_acc,
                    "validation_accuracy" : valid_acc,
                    "training_loss" : train_loss,
                    "validation_loss" : valid_loss,
                    "epoch" : epoch+1})
        

## Training 

In [106]:
# input_size = input_lang.n_chrs
# output_size = output_lang.n_chrs
# embedding_size = 64
# hidden_layer_size = 64
# num_layers = 2
# cell_type = 'LSTM'
# dropout_prob = 0.05
# learning_rate = 0.01
# bidirectional = False
# epochs = 15

# encoder = EncoderRNN(input_size=input_size,embedding_size=embedding_size,hidden_layer_size=hidden_layer_size,num_encoder_layers=num_layers,cell_type=cell_type,dropout_prob=dropout_prob,bidirectional=bidirectional).to(device)
# decoder = DecoderRNN(output_size=output_size,embedding_size=embedding_size,hidden_layer_size=hidden_layer_size,num_decoder_layers=num_layers,cell_type=cell_type,dropout_prob=dropout_prob,bidirectional=bidirectional).to(device)
# encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
# decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate)
# criterion = nn.NLLLoss()
# for epoch in range(epochs): 
#     train_loss = trainIters(encoder=encoder,decoder=decoder,encoder_optimizer=encoder_optimizer,decoder_optimizer=decoder_optimizer,criterion = criterion)
#     train_acc,_ = eval_acc(encoder,decoder,train_pairs[0:10],criterion)
#     valid_acc,valid_loss = eval_acc(encoder,decoder,valid_pairs[0:10],criterion)
#     print("Epoch : %d, Training Loss : %f, Trainging Accuracy : %f, Validation Loss = %f, Validation Accuracy : %f" % (epoch+1,train_loss,train_acc,valid_loss,valid_acc)) 

In [107]:
def train_model():
    config_defaults = {
        "embedding_size" : 128,
        "hidden_layer_size" : 128,
        "num_layers" : 2,
        "cell_type" : 'GRU',
        "dropout_prob" : 0.1,
        "learning_rate" : 0.001,
        "bidirectional" : False,
        "epochs" : 10,
    }
    
    wandb.init(config=config_defaults,dir='/home/arunesh/.local/lib/python3.8/site-packages')
    config = wandb.config
    input_size = input_lang.n_chrs
    output_size = output_lang.n_chrs
    embedding_size = config.embedding_size
    hidden_layer_size = config.hidden_layer_size
    num_layers = config.num_layers
    cell_type = config.cell_type
    dropout_prob = config.dropout_prob
    learning_rate = config.learning_rate
    bidirectional = config.bidirectional
    epochs = config.epochs
    hidden_layer_size = 128
    bidirectional = False
    num_layers = 2
    encoder = EncoderRNN(input_size=input_size,embedding_size=embedding_size,hidden_layer_size=hidden_layer_size,num_encoder_layers=num_layers,cell_type=cell_type,dropout_prob=dropout_prob,bidirectional=bidirectional).to(device)
    decoder = DecoderRNN(output_size=output_size,embedding_size=embedding_size,hidden_layer_size=hidden_layer_size,num_decoder_layers=num_layers,cell_type=cell_type,dropout_prob=dropout_prob,bidirectional=bidirectional).to(device)
    encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate)
    criterion = nn.NLLLoss()
    
    # num_layers = 3
    
    print("started")
    for epoch in range(epochs): 
        # train_loss = trainIters(encoder=encoder,decoder=decoder,encoder_optimizer=encoder_optimizer,decoder_optimizer=decoder_optimizer,criterion = criterion)
        training_loss = 0
        it = 0
        for pair in training_pairs:
            input_tensor = pair[0]
            target_tensor = pair[1]
            loss = train(input_tensor,target_tensor,encoder,decoder,encoder_optimizer,decoder_optimizer,criterion)
            training_loss += loss
            if it%200==0:
                print(it,"done")
            it += 1
        train_loss = training_loss/len(training_pairs)
        train_acc,_ = eval_acc(encoder,decoder,random.sample(train_pairs,1000),criterion)
        valid_acc,valid_loss = eval_acc(encoder,decoder,valid_pairs,criterion)
        print("Epoch : %d, Training Loss : %f, Training Accuracy : %f, Validation Loss = %f, Validation Accuracy : %f" % (epoch+1,train_loss,train_acc,valid_loss,valid_acc)) 
        wandb.log({ "training_accuracy" : train_acc,
                    "validation_accuracy" : valid_acc,
                    "training_loss" : train_loss,
                    "validation_loss" : valid_loss,
                    "epoch" : epoch+1})
        
    run_name = "es_{}_hl_{}_nl_{}_ct_{}_dp_{}_lr_{}_bi_{}_ep_{}".format(embedding_size,hidden_layer_size,num_layers,cell_type,dropout_prob,learning_rate,bidirectional,epochs)
    wandb.run.name = run_name
    wandb.run.save()
    

In [108]:
sweep_config1 = {
    # "name" : "assignment_sweeps",
    "method" : "bayes",
    "metric" :{
        "name" : "validation_accuracy",
        "goal" : "maximize"
    },
    "parameters" : {
        "embedding_size" : {
            "values" : [128,256]
        },
        "num_layers" : {
            "values" : [2,3,4]
        },
        "hidden_layer_size" : {
            "values" : [128,256]
        },
        "learning_rate" : {
            "values" : [5e-3,1e-3]
        },
        "cell_type" : {
           "values" : ['LSTM', 'GRU'] 
        },
        "dropout" : {
            "values" : [0.1,0.2]
        },
        "bidirectional": {
            "values" : [True,False]
        },
        "epochs": {
            "values" : [10]
        }
    }
}

sweep_config2 = {
    # "name" : "assignment_sweeps",
    "method" : "bayes",
    "metric" :{
        "name" : "validation_accuracy",
        "goal" : "maximize"
    },
    "parameters" : {
        "embedding_size" : {
            "values" : [128,256]
        },
        "num_layers" : {
            "values" : [2,3,4]
        },
        "hidden_layer_size" : {
            "values" : [128,256]
        },
        "learning_rate" : {
            "values" : [5e-3,1e-3]
        },
        "cell_type" : {
           "values" : ['RNN'] 
        },
        "dropout" : {
            "values" : [0.1,0.2]
        },
        "bidirectional": {
            "values" : [True,False]
        },
        "epochs": {
            "values" : [10]
        }
    }
}


In [45]:
sweep_id = wandb.sweep(sweep_config2,project="Assignment_3")
wandb.agent(sweep_id = sweep_id,function = train_model,count = 1)
wandb.finish()

Create sweep with ID: xgtu9y0v
Sweep URL: https://wandb.ai/assignment3_team/Assignment_3/sweeps/xgtu9y0v


wandb: Agent Starting Run: 087fiw8a with config:
wandb: 	bidirectional: False
wandb: 	cell_type: RNN
wandb: 	dropout: 0.1
wandb: 	embedding_size: 256
wandb: 	epochs: 10
wandb: 	hidden_layer_size: 128
wandb: 	learning_rate: 0.001
wandb: 	num_layers: 3
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
2023-05-18 23:43:27.857111: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-05-18 23:43:28.037454: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-05-18 23:43:28.037

started
0 done
200 done
400 done
600 done
800 done
1000 done
1200 done
1400 done
1600 done
1800 done
2000 done
2200 done
2400 done
2600 done
2800 done
3000 done
3200 done
3400 done
3600 done
3800 done
4000 done
4200 done
4400 done
4600 done
4800 done
5000 done
5200 done
5400 done
5600 done
5800 done
6000 done
6200 done
6400 done
6600 done
6800 done
7000 done
7200 done
7400 done
7600 done
7800 done
8000 done
8200 done
8400 done
8600 done
8800 done
9000 done
9200 done
9400 done
9600 done
9800 done
10000 done
Epoch : 1, Training Loss : 2.560870, Training Accuracy : 0.000000, Validation Loss = 50897.324219, Validation Accuracy : 0.000000
0 done
200 done
400 done
600 done
800 done
1000 done
1200 done
1400 done
1600 done
1800 done
2000 done
2200 done
2400 done
2600 done
2800 done
3000 done
3200 done
3400 done
3600 done
3800 done
4000 done
4200 done
4400 done
4600 done
4800 done
5000 done
5200 done
5400 done
5600 done
5800 done
6000 done
6200 done
6400 done
6600 done
6800 done
7000 done
7200 



Epoch : 10, Training Loss : 2.352442, Training Accuracy : 0.000000, Validation Loss = 51101.117188, Validation Accuracy : 0.000000


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
training_accuracy,▁▁▁▁▁▁▁▁▁▁
training_loss,█▄▃▄▄▄▅▃▁▁
validation_accuracy,▁▁▁▁▁▁▁▁▁▁
validation_loss,▁▁█▁▃█▆▅▄▁

0,1
epoch,10.0
training_accuracy,0.0
training_loss,2.35244
validation_accuracy,0.0
validation_loss,51101.11719


In [62]:
sweep_id = wandb.sweep(sweep_config1,project="Assignment_3")
wandb.agent(sweep_id = sweep_id,function = train_model,count = 25)
wandb.finish()

Create sweep with ID: acvnhrek
Sweep URL: https://wandb.ai/assignment3_team/Assignment_3/sweeps/acvnhrek


wandb: Agent Starting Run: 4vqhvswf with config:
wandb: 	bidirectional: False
wandb: 	cell_type: GRU
wandb: 	dropout: 0.1
wandb: 	embedding_size: 256
wandb: 	epochs: 10
wandb: 	hidden_layer_size: 128
wandb: 	learning_rate: 0.001
wandb: 	num_layers: 2
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
2023-05-19 02:28:13.676471: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-05-19 02:28:13.858674: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-05-19 02:28:13.858

started
0 done
200 done
400 done
600 done
800 done
1000 done
1200 done
1400 done
1600 done
1800 done
2000 done
2200 done
2400 done
2600 done
2800 done
3000 done
3200 done
3400 done
3600 done
3800 done
4000 done
4200 done
4400 done
4600 done
4800 done
5000 done
5200 done
5400 done
5600 done
5800 done
6000 done
6200 done
6400 done
6600 done
6800 done
7000 done
7200 done
7400 done
7600 done
7800 done
8000 done
8200 done
8400 done
8600 done
8800 done
9000 done
9200 done
9400 done
9600 done
9800 done
10000 done
Epoch : 1, Training Loss : 1.287608, Training Accuracy : 0.298000, Validation Loss = 26059.335938, Validation Accuracy : 0.294172
0 done
200 done
400 done
600 done
800 done
1000 done
1200 done
1400 done
1600 done
1800 done
2000 done
2200 done
2400 done
2600 done
2800 done
3000 done
3200 done
3400 done
3600 done
3800 done
4000 done
4200 done
4400 done
4600 done
4800 done
5000 done
5200 done
5400 done
5600 done
5800 done
6000 done
6200 done
6400 done
6600 done
6800 done
7000 done
7200 

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
training_accuracy,▁▄▅▄▅▆▆█▆▇
training_loss,█▃▂▂▁▁▁▁▁▁
validation_accuracy,▁▄▇▅▇▇▇███
validation_loss,█▅▂▄▂▃▂▁▂▂

0,1
epoch,10.0
training_accuracy,0.496
training_loss,0.56585
validation_accuracy,0.44724
validation_loss,22018.49414


wandb: Agent Starting Run: vlwag8af with config:
wandb: 	bidirectional: True
wandb: 	cell_type: GRU
wandb: 	dropout: 0.2
wandb: 	embedding_size: 256
wandb: 	epochs: 10
wandb: 	hidden_layer_size: 256
wandb: 	learning_rate: 0.001
wandb: 	num_layers: 4
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
2023-05-19 04:01:18.176588: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-05-19 04:01:18.329029: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-05-19 04:01:18.3290

started
0 done
200 done
400 done
600 done
800 done
1000 done
1200 done
1400 done
1600 done
1800 done
2000 done
2200 done
2400 done
2600 done
2800 done
3000 done
3200 done
3400 done
3600 done
3800 done
4000 done
4200 done
4400 done
4600 done
4800 done
5000 done
5200 done
5400 done
5600 done
5800 done
6000 done
6200 done
6400 done
6600 done
6800 done
7000 done
7200 done
7400 done
7600 done
7800 done
8000 done
8200 done
8400 done
8600 done
8800 done
9000 done
9200 done
9400 done
9600 done
9800 done
10000 done
Epoch : 1, Training Loss : 1.619131, Training Accuracy : 0.131000, Validation Loss = 35600.574219, Validation Accuracy : 0.127914
0 done
200 done
400 done
600 done
800 done
1000 done
1200 done
1400 done
1600 done
1800 done
2000 done
2200 done
2400 done
2600 done
2800 done
3000 done
3200 done
3400 done
3600 done
3800 done
4000 done
4200 done
4400 done
4600 done
4800 done
5000 done
5200 done
5400 done
5600 done
5800 done
6000 done
6200 done
6400 done
6600 done
6800 done
7000 done
7200 

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
training_accuracy,▁▁▂▃▄▂▃▇▆█
training_loss,█▃▂▂▂▂▂▁▁▁
validation_accuracy,▁▁▃▃▄▃▅███
validation_loss,█▅▅▄▅▄▄▁▂▂

0,1
epoch,10.0
training_accuracy,0.212
training_loss,0.99608
validation_accuracy,0.20675
validation_loss,28076.38672


wandb: Agent Starting Run: cwlqfdnr with config:
wandb: 	bidirectional: False
wandb: 	cell_type: GRU
wandb: 	dropout: 0.2
wandb: 	embedding_size: 128
wandb: 	epochs: 10
wandb: 	hidden_layer_size: 128
wandb: 	learning_rate: 0.001
wandb: 	num_layers: 3
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
2023-05-19 13:55:29.129415: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-05-19 13:55:29.271795: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-05-19 13:55:29.271

started
0 done
200 done
400 done
600 done
800 done
1000 done
1200 done
1400 done
1600 done
1800 done
2000 done
2200 done
2400 done
2600 done
2800 done
3000 done
3200 done
3400 done
3600 done
3800 done
4000 done
4200 done
4400 done
4600 done
4800 done
5000 done
5200 done
5400 done
5600 done
5800 done
6000 done
6200 done
6400 done
6600 done
6800 done
7000 done
7200 done
7400 done
7600 done
7800 done
8000 done
8200 done
8400 done
8600 done
8800 done
9000 done
9200 done
9400 done
9600 done
9800 done
10000 done
Epoch : 1, Training Loss : 1.252422, Training Accuracy : 0.417000, Validation Loss = 22727.511719, Validation Accuracy : 0.376074
0 done
200 done
400 done
600 done
800 done
1000 done
1200 done
1400 done
1600 done
1800 done
2000 done
2200 done
2400 done
2600 done
2800 done
3000 done
3200 done
3400 done
3600 done
3800 done
4000 done
4200 done
4400 done
4600 done
4800 done
5000 done
5200 done
5400 done
5600 done
5800 done
6000 done
6200 done
6400 done
6600 done
6800 done
7000 done
7200 

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
training_accuracy,▁▃▅▇█▇▇▅█▆
training_loss,█▂▂▁▁▁▁▁▁▁
validation_accuracy,▁▄▆███▇▇▇▇
validation_loss,█▅▄▁▁▁▂▃▂▂

0,1
epoch,10.0
training_accuracy,0.522
training_loss,0.50695
validation_accuracy,0.49724
validation_loss,18564.84375


wandb: Agent Starting Run: 6kdk6x1l with config:
wandb: 	bidirectional: False
wandb: 	cell_type: GRU
wandb: 	dropout: 0.2
wandb: 	embedding_size: 256
wandb: 	epochs: 10
wandb: 	hidden_layer_size: 256
wandb: 	learning_rate: 0.001
wandb: 	num_layers: 3
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
2023-05-19 15:45:48.401763: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-05-19 15:45:48.548208: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-05-19 15:45:48.548

started
0 done
200 done
400 done
600 done
800 done
1000 done
1200 done
1400 done


wandb: Ctrl + C detected. Stopping sweep.
Exception in thread Thread-63:
Traceback (most recent call last):
  File "/home/arunesh/.local/lib/python3.8/site-packages/wandb/agents/pyagent.py", line 300, in _run_job
    self._function()
  File "/tmp/ipykernel_86670/1242583174.py", line 42, in train_model
  File "/tmp/ipykernel_86670/457545680.py", line 55, in train
  File "/home/arunesh/.local/lib/python3.8/site-packages/torch/_tensor.py", line 487, in backward
    torch.autograd.backward(
  File "/home/arunesh/.local/lib/python3.8/site-packages/torch/autograd/__init__.py", line 200, in backward
    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
Exception

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/usr/lib/python3.8/threading.py", line 932, in _bootstrap_inner
    self.run()
  File "/usr/lib/python3.8/threading.py", line 870, in run
    self._target(*self._args, **self._kw

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

In [109]:
sweep_id = wandb.sweep(sweep_config1,project="Assignment_3")
wandb.agent(sweep_id = sweep_id,function = train_model,count = 25)
wandb.finish()

Create sweep with ID: a6f99thc
Sweep URL: https://wandb.ai/assignment3_team/Assignment_3/sweeps/a6f99thc


wandb: Agent Starting Run: u2dwdu0c with config:
wandb: 	bidirectional: False
wandb: 	cell_type: LSTM
wandb: 	dropout: 0.2
wandb: 	embedding_size: 128
wandb: 	epochs: 10
wandb: 	hidden_layer_size: 128
wandb: 	learning_rate: 0.005
wandb: 	num_layers: 4
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
2023-05-19 15:53:48.749444: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-05-19 15:53:48.903188: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-05-19 15:53:48.90

started
0 done
200 done
400 done
600 done
800 done
1000 done
1200 done
1400 done
1600 done
1800 done
2000 done
2200 done
2400 done
2600 done
2800 done
3000 done
3200 done
3400 done
3600 done
3800 done
4000 done
4200 done
4400 done
4600 done
4800 done
5000 done
5200 done
5400 done
5600 done
5800 done
6000 done
6200 done
6400 done
6600 done
6800 done
7000 done
7200 done
7400 done
7600 done
7800 done
8000 done
8200 done
8400 done
8600 done
8800 done
9000 done
9200 done
9400 done
9600 done
9800 done
10000 done
Epoch : 1, Training Loss : 1.733602, Training Accuracy : 0.101000, Validation Loss = 37212.050781, Validation Accuracy : 0.107055
0 done
200 done
400 done
600 done
800 done
1000 done
1200 done
1400 done
1600 done
1800 done
2000 done
2200 done
2400 done
2600 done
2800 done
3000 done
3200 done
3400 done
3600 done
3800 done
4000 done
4200 done
4400 done
4600 done
4800 done
5000 done
5200 done
5400 done
5600 done
5800 done
6000 done
6200 done
6400 done
6600 done
6800 done
7000 done
7200 

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
training_accuracy,▁▄▄▆▆▇▇▇▇█
training_loss,█▄▃▂▂▂▁▁▁▁
validation_accuracy,▁▄▅▆▆▇█▇▇█
validation_loss,█▄▃▂▂▁▁▁▁▁

0,1
epoch,10.0
training_accuracy,0.346
training_loss,0.86898
validation_accuracy,0.29356
validation_loss,27911.61719


wandb: Agent Starting Run: 3vyy66e5 with config:
wandb: 	bidirectional: True
wandb: 	cell_type: GRU
wandb: 	dropout: 0.1
wandb: 	embedding_size: 128
wandb: 	epochs: 10
wandb: 	hidden_layer_size: 128
wandb: 	learning_rate: 0.005
wandb: 	num_layers: 3
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
2023-05-19 18:01:12.021389: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-05-19 18:01:12.184748: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-05-19 18:01:12.1848

started
0 done
200 done
400 done
600 done
800 done
1000 done
1200 done
1400 done
1600 done
1800 done
2000 done
2200 done
2400 done
2600 done
2800 done
3000 done
3200 done
3400 done
3600 done
3800 done
4000 done
4200 done
4400 done
4600 done
4800 done
5000 done
5200 done
5400 done
5600 done
5800 done
6000 done
6200 done
6400 done
6600 done
6800 done
7000 done
7200 done
7400 done
7600 done
7800 done
8000 done
8200 done
8400 done
8600 done
8800 done
9000 done
9200 done
9400 done
9600 done
9800 done
10000 done
Epoch : 1, Training Loss : 1.880113, Training Accuracy : 0.031000, Validation Loss = 40242.289062, Validation Accuracy : 0.034969
0 done
200 done
400 done
600 done
800 done
1000 done
1200 done
1400 done
1600 done
1800 done
2000 done
2200 done
2400 done
2600 done
2800 done
3000 done
3200 done
3400 done
3600 done
3800 done
4000 done
4200 done
4400 done
4600 done
4800 done
5000 done
5200 done
5400 done
5600 done
5800 done
6000 done
6200 done
6400 done
6600 done
6800 done
7000 done
7200 

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
training_accuracy,▁▁▄▄▅▆▆██▆
training_loss,█▄▃▃▂▂▁▁▁▁
validation_accuracy,▁▃▃▅▅▇▅▆██
validation_loss,▇█▅▄▁▂▂▂▂▁

0,1
epoch,10.0
training_accuracy,0.065
training_loss,1.30297
validation_accuracy,0.08742
validation_loss,35829.41797


wandb: Agent Starting Run: bx9wsuoi with config:
wandb: 	bidirectional: False
wandb: 	cell_type: LSTM
wandb: 	dropout: 0.2
wandb: 	embedding_size: 128
wandb: 	epochs: 10
wandb: 	hidden_layer_size: 128
wandb: 	learning_rate: 0.005
wandb: 	num_layers: 4
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
2023-05-19 19:22:00.880856: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-05-19 19:22:01.018749: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-05-19 19:22:01.01

started
0 done
200 done
400 done
600 done
800 done
1000 done
1200 done
1400 done
1600 done
1800 done
2000 done
2200 done
2400 done
2600 done
2800 done
3000 done
3200 done
3400 done
3600 done
3800 done
4000 done
4200 done
4400 done
4600 done
4800 done
5000 done
5200 done
5400 done
5600 done
5800 done
6000 done
6200 done
6400 done
6600 done
6800 done
7000 done
7200 done
7400 done
7600 done
7800 done
8000 done
8200 done
8400 done
8600 done
8800 done
9000 done
9200 done
9400 done
9600 done
9800 done
10000 done
Epoch : 1, Training Loss : 1.772974, Training Accuracy : 0.103000, Validation Loss = 36868.648438, Validation Accuracy : 0.115031
0 done
200 done
400 done
600 done
800 done
1000 done
1200 done
1400 done
1600 done
1800 done
2000 done
2200 done
2400 done
2600 done
2800 done
3000 done
3200 done
3400 done
3600 done
3800 done
4000 done
4200 done
4400 done
4600 done
4800 done
5000 done
5200 done
5400 done
5600 done
5800 done
6000 done
6200 done
6400 done
6600 done
6800 done
7000 done
7200 

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
training_accuracy,▁▃▆▆▆▆▇█▇█
training_loss,█▄▂▂▂▁▁▁▁▁
validation_accuracy,▁▄▆▆▇▇████
validation_loss,█▅▃▃▂▁▁▁▁▁

0,1
epoch,10.0
training_accuracy,0.392
training_loss,0.77714
validation_accuracy,0.33804
validation_loss,25689.64453


wandb: Agent Starting Run: 75dxvk4m with config:
wandb: 	bidirectional: False
wandb: 	cell_type: LSTM
wandb: 	dropout: 0.2
wandb: 	embedding_size: 128
wandb: 	epochs: 10
wandb: 	hidden_layer_size: 256
wandb: 	learning_rate: 0.005
wandb: 	num_layers: 2
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
2023-05-19 21:24:35.193487: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-05-19 21:24:35.341419: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-05-19 21:24:35.34

started
0 done
200 done
400 done
600 done
800 done
1000 done
1200 done
1400 done
1600 done
1800 done
2000 done
2200 done
2400 done
2600 done
2800 done
3000 done
3200 done
3400 done
3600 done
3800 done
4000 done
4200 done
4400 done
4600 done
4800 done
5000 done
5200 done
5400 done
5600 done
5800 done
6000 done
6200 done
6400 done
6600 done
6800 done
7000 done
7200 done
7400 done
7600 done
7800 done
8000 done
8200 done
8400 done
8600 done
8800 done
9000 done
9200 done
9400 done
9600 done
9800 done
10000 done
Epoch : 1, Training Loss : 1.716178, Training Accuracy : 0.112000, Validation Loss = 35634.609375, Validation Accuracy : 0.122086
0 done
200 done
400 done
600 done
800 done
1000 done
1200 done
1400 done
1600 done
1800 done
2000 done
2200 done
2400 done
2600 done
2800 done
3000 done
3200 done
3400 done
3600 done
3800 done
4000 done
4200 done
4400 done
4600 done
4800 done
5000 done
5200 done
5400 done
5600 done
5800 done
6000 done
6200 done
6400 done
6600 done
6800 done
7000 done
7200 

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
training_accuracy,▁▄▄▆▆█▇▇▇█
training_loss,█▄▃▂▂▂▁▁▁▁
validation_accuracy,▁▅▅▆▇▇▇█▇█
validation_loss,█▃▄▂▂▁▂▂▂▁

0,1
epoch,10.0
training_accuracy,0.362
training_loss,0.77854
validation_accuracy,0.31994
validation_loss,26587.95117


wandb: Agent Starting Run: lh2qnvpv with config:
wandb: 	bidirectional: False
wandb: 	cell_type: GRU
wandb: 	dropout: 0.2
wandb: 	embedding_size: 256
wandb: 	epochs: 10
wandb: 	hidden_layer_size: 256
wandb: 	learning_rate: 0.001
wandb: 	num_layers: 3
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
2023-05-19 23:25:55.037489: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-05-19 23:25:55.189316: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-05-19 23:25:55.189

started
0 done
200 done
400 done
600 done
800 done
1000 done
1200 done
1400 done
1600 done
1800 done
2000 done
2200 done
2400 done
2600 done
2800 done
3000 done
3200 done
3400 done
3600 done
3800 done
4000 done
4200 done
4400 done
4600 done
4800 done
5000 done
5200 done
5400 done
5600 done
5800 done


wandb: Ctrl + C detected. Stopping sweep.
Exception in thread Thread-119:
Traceback (most recent call last):
  File "/home/arunesh/.local/lib/python3.8/site-packages/wandb/agents/pyagent.py", line 300, in _run_job
    self._function()
  File "/tmp/ipykernel_86670/4099483575.py", line 44, in train_model
  File "/tmp/ipykernel_86670/457545680.py", line 55, in train
  File "/home/arunesh/.local/lib/python3.8/site-packages/torch/_tensor.py", line 487, in backward
    torch.autograd.backward(
  File "/home/arunesh/.local/lib/python3.8/site-packages/torch/autograd/__init__.py", line 200, in backward
    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
Exception

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/usr/lib/python3.8/threading.py", line 932, in _bootstrap_inner
    self.run()
  File "/usr/lib/python3.8/threading.py", line 870, in run
    self._target(*self._args, **self._k

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…