In [568]:
import numpy as np
import pandas as pd
import random
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import random
import wandb

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [569]:
# from google.colab import drive
# drive.mount('/content/gdrive')

In [570]:
# !unzip /content/gdrive/MyDrive/aksharantar_sampled.zip > /dev/null

In [571]:
# USE_CUDA = False

In [572]:
OUTPUT_LANGUAGE = 'mni'
MAX_WORD_LENGTH = 25

## READING DATA

In [573]:
'''
Below train, validation and test data are list of strings
'''
train_data=pd.read_csv('aksharantar_sampled/'+OUTPUT_LANGUAGE+'/'+OUTPUT_LANGUAGE+'_train.csv',header=None)
X_train = list(train_data[0])
Y_train = list(train_data[1])

validation_data=pd.read_csv('aksharantar_sampled/'+OUTPUT_LANGUAGE+'/'+OUTPUT_LANGUAGE+'_valid.csv',header=None)
X_valid = list(validation_data[0])
Y_valid = list(validation_data[1])

test_data=pd.read_csv('aksharantar_sampled/'+OUTPUT_LANGUAGE+'/'+OUTPUT_LANGUAGE+'_test.csv',header=None)
X_test = list(test_data[0])
Y_test = list(test_data[1])

In [574]:
SOS_token = 0
EOS_token = 1

class Lang:
    def __init__(self, name):
        self.name = name
        self.chr2index = {}
        self.chr2count = {}
        # self.index2chr = {0: "$", 1: "*", 2: "_"}
        self.index2chr = {0: "$", 1: "*"}
        self.n_chrs = 2  # 3 if Count SOS and EOS and UNK

    def addword(self, word):
        for letter in word:
            self.addchr(letter)

    def addchr(self, chr_):
        if chr_ not in self.chr2index:
            self.chr2index[chr_] = self.n_chrs
            self.chr2count[chr_] = 1
            self.index2chr[self.n_chrs] = chr_
            self.n_chrs += 1
        else:
            self.chr2count[chr_] += 1
            
    def add_word_list(self,word_lst):
        for word in word_lst:
            self.addword(word)

In [575]:
inp_lang = Lang("eng")
inp_lang.add_word_list(X_train)

out_lang = Lang(OUTPUT_LANGUAGE)
out_lang.add_word_list(Y_train)


## DATA PROCESSING

In [576]:

def get_one_hot(char_dict,word,len_alphabets):
  encoded_word = [char_dict[ch] for ch in word]
  encoded_word.append(EOS_token)
  encoded_word = torch.tensor(encoded_word,device = device)
  return encoded_word

def data_processing(data,char_dict,len_chrs):
  ONE_HOT = []
  for word in data :
    encoded_word = get_one_hot(char_dict,word,len_chrs)
    ONE_HOT.append(encoded_word)
  return ONE_HOT


In [577]:
INP_LANG_SIZE = inp_lang.n_chrs
OUT_LANG_SIZE = out_lang.n_chrs
# print(INP_LANG_SIZE)
# print(OUT_LANG_SIZE)

In [578]:
# # These are lists of tensors corresponding to each word
inp_train = data_processing(X_train,inp_lang.chr2index,len(inp_lang.chr2index))[:200]
tgt_train = data_processing(Y_train,out_lang.chr2index,len(out_lang.chr2index))[:200]
inp_valid = data_processing(X_valid,inp_lang.chr2index,len(inp_lang.chr2index))
tgt_valid = data_processing(Y_valid,out_lang.chr2index,len(out_lang.chr2index))
inp_test = data_processing(X_test,inp_lang.chr2index,len(inp_lang.chr2index))
tgt_test = data_processing(Y_test,out_lang.chr2index,len(out_lang.chr2index))

In [579]:
# print(max([len(X_train[i]) for i in range(len(X_train))]))
# print(max([len(X_test[i]) for i in range(len(X_test))]))
# print(max([len(X_valid[i]) for i in range(len(X_valid))]))
# print(max([len(Y_train[i]) for i in range(len(Y_train))]))
# print(max([len(Y_test[i]) for i in range(len(Y_test))]))
# print(max([len(Y_valid[i]) for i in range(len(Y_valid))]))

## ENCODER

In [580]:
class Encoder(nn.Module):
    def __init__(self, input_size, embedding_size, hidden_layer_size, num_encoder_layers, cell_type, dropout_prob, bidirectional):
      super(Encoder, self).__init__()
      self.input_size = input_size
      self.hidden_layer_size = hidden_layer_size
      self.num_encoder_layers = num_encoder_layers
      self.cell_type = cell_type

      self.embedding = nn.Embedding(self.input_size, embedding_size)

      if cell_type == 'RNN':
        self.rnn = nn.RNN(input_size = embedding_size, hidden_size = hidden_layer_size, num_layers = num_encoder_layers, dropout = dropout_prob, bidirectional = bidirectional)
      elif cell_type == 'LSTM':
        self.rnn = nn.LSTM(input_size = embedding_size, hidden_size = hidden_layer_size, num_layers = num_encoder_layers, dropout = dropout_prob, bidirectional = bidirectional)
      elif cell_type == 'GRU':
        self.rnn = nn.GRU(input_size = embedding_size, hidden_size = hidden_layer_size, num_layers = num_encoder_layers, dropout = dropout_prob, bidirectional = bidirectional)
      self.dropout = nn.Dropout(dropout_prob)
      self.D = 1
      if bidirectional == True :
        self.D = 2

    def forward(self, input, prev_hidden, prev_cell = None):
      # seq_len = len(input)
      embedded = self.embedding(input)
      embedded = embedded.view(1,1,-1)
      embedded = self.dropout(embedded)
      if self.cell_type == 'RNN':
        output, hidden = self.rnn(embedded,prev_hidden)
        
      elif self.cell_type == 'LSTM':
        output, (hidden,cell) = self.rnn(embedded,(prev_hidden,prev_cell))
        return output, (hidden,cell)
      
      elif self.cell_type == 'GRU':
        output, hidden = self.rnn(embedded,prev_hidden)
        
      return output,hidden

    def init_hidden(self):
      
      if self.cell_type == 'LSTM':
        hidden = torch.zeros(self.D*self.num_encoder_layers,1,self.hidden_layer_size,device = device)
        cell = torch.zeros(self.D*self.num_encoder_layers,1,self.hidden_layer_size,device = device)
        return hidden,cell
      else :
        hidden = torch.zeros(self.D*self.num_encoder_layers,1,self.hidden_layer_size,device = device)
      return hidden


## DECODER

In [581]:
# class Decoder

class Decoder(nn.Module):
  def __init__(self, output_size, embedding_size, hidden_layer_size, num_layers, cell_type, dropout_prob, bidirectional):
    super(Decoder, self).__init__()
    '''
      self.output_size : int
      self.hidden_layer_size : int
      self.num_encoder_layers : int
      self.cell_type : string
      self.rnn : RNN,LSTM,GRU 
    '''
    self.output_size = output_size
    self.hidden_layer_size = hidden_layer_size
    self.num_layers = num_layers
    self.cell_type = cell_type
    self.embedding_size = embedding_size
    self.embedding = nn.Embedding(OUT_LANG_SIZE, embedding_size)
    self.D = 1
    if bidirectional == True :
      self.D = 2
        
    if cell_type == 'RNN':
      self.rnn = nn.RNN(input_size = embedding_size, hidden_size = hidden_layer_size, num_layers = num_layers, dropout = dropout_prob, bidirectional = bidirectional)
    elif cell_type == 'LSTM':
      self.rnn = nn.LSTM(input_size = embedding_size, hidden_size = hidden_layer_size, num_layers = num_layers, dropout = dropout_prob, bidirectional = bidirectional)
    elif cell_type == 'GRU':
      self.rnn = nn.GRU(input_size = embedding_size, hidden_size = hidden_layer_size, num_layers = num_layers, dropout = dropout_prob, bidirectional = bidirectional)

    self.dropout = nn.Dropout(dropout_prob)
    self.fc = nn.Linear(self.D*hidden_layer_size, output_size)
    self.softmax = nn.LogSoftmax(dim = 0)
      
  def forward(self, input, prev_hidden, prev_cell = None):
    embedded = self.embedding(input)
    embedded = embedded.view(1,1,-1)
    embedded = F.relu(embedded)
    embedded = self.dropout(embedded)
    
    if self.cell_type == 'RNN':
      output, hidden = self.rnn(embedded,prev_hidden)
      
    elif self.cell_type == 'LSTM':
      output,(hidden,cell) = self.rnn(embedded,(prev_hidden,prev_cell))
    
    elif self.cell_type == 'GRU':
      output, hidden = self.rnn(embedded,prev_hidden)
    # print(output.size())
    output = self.fc(output[0][0])
    y_pred = self.softmax(output)

    if self.cell_type == 'LSTM':
      return y_pred,(hidden,cell)
    
    return y_pred, hidden

## SEQ2SEQ

In [582]:


# class Seq2Seq(nn.Module):
#   def __init__(self, encoder, decoder, learning_rate, clip = 5.0, teacher_forcing_ratio = 0.5):
#     super(Seq2Seq, self).__init__()
#     self.encoder = encoder
#     self.decoder = decoder
#     self.teacher_forcing_ratio = teacher_forcing_ratio
#     self.encoder_optimizer = optim.Adam(self.encoder.parameters(), lr = learning_rate)
#     self.decoder_optimizer = optim.Adam(self.decoder.parameters(), lr = learning_rate)
#     self.encoder_optimizer.zero_grad()
#     self.decoder_optimizer.zero_grad()
#     self.criterion = nn.NLLLoss()
#     self.clip = clip
#     self.teacher_forcing_ratio = teacher_forcing_ratio
#     # self.batch_size = batch_size
      
#   def forward(self, input, target):
#     self.encoder_optimizer.zero_grad()
#     self.decoder_optimizer.zero_grad()
#     inp_len = input.size()[0]
#     tgt_len = target.size()[0]
#     loss = 0

#     # Encoder
#     encoder_outputs = torch.zeros(MAX_WORD_LENGTH,self.encoder.D*self.encoder.hidden_layer_size)
#     if self.encoder.cell_type == 'LSTM':
#       encoder_hidden,encoder_cell = self.encoder.init_hidden()
#     else :
#       encoder_hidden = self.encoder.init_hidden()
    
#     for ei in range(inp_len):
#       if self.encoder.cell_type == 'LSTM':
#         encoder_output,(encoder_hidden,encoder_cell) = self.encoder(input = input[i],prev_hidden = encoder_hidden,prev_cell = encoder_cell)
#       else :
#         encoder_output, encoder_hidden = self.encoder(input[ei],encoder_hidden)
#       encoder_outputs[ei] = encoder_output[0][0]

#     # Decoder
#     decoder_input = torch.LongTensor([[SOS_token]])
#     decoder_hidden = encoder_hidden
    
#     if self.decoder.cell_type == 'LSTM':
#       decoder_cell = encoder_cell
      
#     # With Teacher Forcing
#     use_teacher_forcing = random.random() < self.teacher_forcing_ratio
#     # use_teacher_forcing = True
#     if use_teacher_forcing:
#       for i in range(tgt_len):
#         # print(decoder_input.size(),decoder_hidden.size())
#         if self.decoder.cell_type == 'LSTM':
#           decoder_output,(decoder_hidden,decoder_cell) = self.decoder(input = decoder_input,prev_hidden = decoder_hidden,prev_cell = decoder_cell)
#         else:
#           decoder_output,decoder_hidden = self.decoder(decoder_input,decoder_hidden)
#         # print(decoder_output.size(),target[i].size())
#         loss += self.criterion(decoder_output[0],target[i])
#         decoder_input = target[i]
#     else :
#       for i in range(tgt_len):
#         # print(decoder_input.size(),decoder_hidden.size())
#         if self.decoder.cell_type == 'LSTM':
#           decoder_output,(decoder_hidden,decoder_cell) = self.decoder(input = decoder_input,prev_hidden = decoder_hidden,prev_cell = decoder_cell)
#         else:
#           decoder_output,decoder_hidden = self.decoder(decoder_input,decoder_hidden)
#         # print(decoder_output.size(),target[i].size())
#         loss += self.criterion(decoder_output[0],target[i])
#         topv,topi = decoder_output[0].topk(1)
#         ni = topi[0]
#         decoder_input = torch.LongTensor([[ni]])
#         if ni == EOS_token : 
#           break
#     loss.backward()
#     # torch.nn.utils.clip_grad_norm_(self.encoder.parameters(), self.clip)
#     # torch.nn.utils.clip_grad_norm_(self.decoder.parameters(), self.clip)
#     self.encoder_optimizer.step()
#     self.decoder_optimizer.step()
#     return loss/tgt_len

#   def predict(self,input):
#     with torch.no_grad():
#       inp_len = input.size()[0]
      
#       # Encoder
#       encoder_outputs = torch.zeros(MAX_WORD_LENGTH,self.encoder.D*self.encoder.hidden_layer_size)
#       if self.encoder.cell_type == 'LSTM':
#         encoder_hidden,encoder_cell = self.encoder.init_hidden()
#       else :
#         encoder_hidden = self.encoder.init_hidden()
        
#       for ei in range(inp_len):
#         if self.encoder.cell_type == 'LSTM':
#           encoder_output,(encoder_hidden,encoder_cell) = self.encoder(input = input[i],prev_hidden = encoder_hidden,prev_cell = encoder_cell)
#         else :
#           encoder_output, encoder_hidden = self.encoder(input[ei],encoder_hidden)
#         encoder_outputs[ei] += encoder_output[0,0]
        
#       decoder_input = torch.LongTensor([[SOS_token]])
#       decoder_hidden = encoder_hidden
#       if self.decoder.cell_type == 'LSTM':
#         decoder_cell = encoder_cell
      
#       decoded_words = []
#       # decoder_outputs = []
#       pred_word = ''
#       for i in range(MAX_WORD_LENGTH):
#         if self.decoder.cell_type == 'LSTM':
#           decoder_output,(decoder_hidden,decoder_cell) = self.decoder(input = decoder_input,prev_hidden = decoder_hidden,prev_cell = decoder_cell)
#         else:
#           decoder_output,decoder_hidden = self.decoder(decoder_input,decoder_hidden)
#         topv,topi = decoder_output[0].topk(1)
#         ni = topi[0].item()
#         decoder_input = torch.LongTensor([[ni]])
#         decoded_words.append(ni)
#         pred_word += out_lang.index2chr[ni]
#         if ni == EOS_token : 
#           break
        
#       return (torch.LongTensor(decoded_words),pred_word)


## Train

In [583]:
def train(input_tensor,target_tensor,encoder,decoder,encoder_optimizer,decoder_optimizer,criterion,teacher_forcing_ratio = 0.5):
  
    if encoder.cell_type == 'LSTM':
      encoder_hidden,encoder_cell = encoder.init_hidden()
    else :
      encoder_hidden = encoder.init_hidden()
      
    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()
    
    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)
    loss = 0

    # Encoder
    encoder_outputs = torch.zeros(MAX_WORD_LENGTH,encoder.D*encoder.hidden_layer_size,device=device)
    
    for ei in range(input_length):
      if encoder.cell_type == 'LSTM':
        encoder_output,(encoder_hidden,encoder_cell) = encoder(input = input_tensor[i],prev_hidden = encoder_hidden,prev_cell = encoder_cell)
      else :
        encoder_output, encoder_hidden = encoder(input_tensor[ei],encoder_hidden)
      encoder_outputs[ei] = encoder_output[0,0]

    # Decoder
    decoder_input = torch.tensor([[SOS_token]],device=device)
    decoder_hidden = encoder_hidden
    
    if decoder.cell_type == 'LSTM':
      decoder_cell = encoder_cell
      
    # With Teacher Forcing
    use_teacher_forcing = random.random() < teacher_forcing_ratio
    if use_teacher_forcing:
      for i in range(target_length):
        if decoder.cell_type == 'LSTM':
          decoder_output,(decoder_hidden,decoder_cell) = decoder(input = decoder_input,prev_hidden = decoder_hidden,prev_cell = decoder_cell)
        else:
          decoder_output,decoder_hidden = decoder(decoder_input,decoder_hidden)
        loss += criterion(decoder_output,target_tensor[i])
        decoder_input = target_tensor[i]
    else :
      for i in range(target_length):
        if decoder.cell_type == 'LSTM':
          decoder_output,(decoder_hidden,decoder_cell) = decoder(input = decoder_input,prev_hidden = decoder_hidden,prev_cell = decoder_cell)
        else:
          decoder_output,decoder_hidden = decoder(decoder_input,decoder_hidden)
        topv,topi = decoder_output.topk(1)
        ni = topi[0]
        # decoder_input = torch.LongTensor([[ni]],device = device)
        decoder_input = topi.squeeze().detach()
        loss += criterion(decoder_output,target_tensor[i])
        # print(decoder_input.item())
        if ni.item() == EOS_token :
          break
    loss.backward()
    # torch.nn.utils.clip_grad_norm_(encoder.parameters(), clip)
    # torch.nn.utils.clip_grad_norm_(decoder.parameters(), clip)
    encoder_optimizer.step()
    decoder_optimizer.step()
    return loss/target_length
  
def evaluate(encoder,decoder,input_word,max_length = MAX_WORD_LENGTH):
  with torch.no_grad():
    input_tensor = get_one_hot(inp_lang.chr2index,input_word,None)
    input_length = input_tensor.size(0)
    # target_length = target_tensor.size(0)
    # loss = 0

    # Encoder
    encoder_outputs = torch.zeros(MAX_WORD_LENGTH,encoder.D*encoder.hidden_layer_size,device=device)
    if encoder.cell_type == 'LSTM':
      encoder_hidden,encoder_cell = encoder.init_hidden()
    else :
      encoder_hidden = encoder.init_hidden()
    
    for ei in range(input_length):
      if encoder.cell_type == 'LSTM':
        encoder_output,(encoder_hidden,encoder_cell) = encoder(input = input_tensor[i],prev_hidden = encoder_hidden,prev_cell = encoder_cell)
      else :
        encoder_output, encoder_hidden = encoder(input_tensor[ei],encoder_hidden)
      encoder_outputs[ei] = encoder_output[0][0]

    decoded_word = ''
    # Decoder
    decoder_input = torch.tensor([[SOS_token]],device=device)
    decoder_hidden = encoder_hidden
    
    if decoder.cell_type == 'LSTM':
      decoder_cell = encoder_cell
      
    # decoder_outputs = torch.zeros(MAX_WORD_LENGTH,decoder.D*decoder.hidden_layer_size,device=device)
    for i in range(max_length):
      if decoder.cell_type == 'LSTM':
        decoder_output,(decoder_hidden,decoder_cell) = decoder(input = decoder_input,prev_hidden = decoder_hidden,prev_cell = decoder_cell)
      else:
        decoder_output,decoder_hidden = decoder(decoder_input,decoder_hidden)
      topv,topi = decoder_output.topk(1)
      ni = topi[0]
      decoder_input = topi.squeeze().detach()
      decoded_word += out_lang.index2chr[ni.item()]
      if decoder_input.item() == EOS_token : 
        break
    # torch.nn.utils.clip_grad_norm_(encoder.parameters(), clip)
    # torch.nn.utils.clip_grad_norm_(decoder.parameters(), clip)

    return decoded_word

In [584]:

def train_once(encoder,decoder,encoder_optimizer,decoder_optimizer,criterion):
  # Trains once on the whole dataset
  training_loss = 0
  train_length = len(inp_train)
  for i in range(train_length):
    input_tensor = inp_train[i]
    target_tensor = tgt_train[i]
    loss = train(input_tensor,target_tensor,encoder,decoder,encoder_optimizer,decoder_optimizer,criterion)
    training_loss += loss
    if i%200 == 0:
      print(i)  
  return training_loss

# def accuracy_score(pred,actual):
#   # pred,actual : list of strings/words
#   return np.sum(np.array(pred) == np.array(actual))/len(pred)

# def eval_acc_loss(model):
#   train_pred_ = [model.predict(i) for i in inp_train]
#   valid_pred_ = [model.predict(i) for i in inp_valid]
  
#   train_pred = list(map(list, zip(*train_pred_)))[1]
#   valid_pred = list(map(list, zip(*valid_pred_)))[1]
  
#   train_accuracy = accuracy_score(train_pred,Y_train)
#   valid_accuracy = accuracy_score(valid_pred,Y_valid)
#   return train_accuracy,valid_accuracy

def train_all(encoder,decoder,epochs,learning_rate):
  encoder_optimizer = optim.Adam(encoder.parameters(), lr = learning_rate)
  decoder_optimizer = optim.Adam(decoder.parameters(), lr = learning_rate)
  criterion = nn.NLLLoss()
  for curr_epoch in range(epochs):
    train_loss = train_once(encoder,decoder,encoder_optimizer,decoder_optimizer,criterion)
    # train_accuracy,valid_accuracy,train_loss_,valid_loss = eval_acc_loss(model)
    # print("Epoch : %d, Training Loss : %f, Training Accuracy : %f" % (curr_epoch+1,train_loss,train_accuracy))
    print("Epoch : %d, Training Loss : %f" % (curr_epoch+1,train_loss))
    
#     # wandb.log({ "training_accuracy" : train_accuracy,
#     #           "validation_accuracy" : valid_accuracy,
#     #           "training_loss" : train_loss,
#     #           "validation_loss" : valid_loss,
#     #           "epoch" : curr_epoch+1})
    
def evaluateRandomly(encoder, decoder, n=10):
  for i in range(100):
    # index = random.randint(0,99)
    index = i
    # print('>', X_train[index])
    # print('=', Y_train[index])
    output_word = evaluate(encoder, decoder,X_train[index])
    # print('<', output_word[:-1])
    if(output_word[:-1] == Y_train[i]) : print("yes")
    # print('')


## Run

In [585]:
# define hyperparameters
input_size = inp_lang.n_chrs
embedding_size = 128
output_size = out_lang.n_chrs
hidden_layer_size = 32
num_layers = 2
cell_type = 'GRU'
dropout_prob = 0.1
learning_rate = 0.001
bidirectional = False
epochs = 10
encoder = Encoder(input_size, embedding_size, hidden_layer_size, num_layers, cell_type, dropout_prob, bidirectional)
decoder = Decoder(output_size, embedding_size, hidden_layer_size, num_layers, cell_type, dropout_prob, bidirectional)
# encoder_optimizer = optim.Adam(encoder.parameters(), lr = learning_rate)
# decoder_optimizer = optim.Adam(decoder.parameters(), lr = learning_rate)
# criterion = nn.NLLLoss()
# for curr_epoch in range(epochs):
#     train_loss = train_once(encoder,decoder,encoder_optimizer,decoder_optimizer,criterion)
#     print("Epoch : %d, Training Loss : %f" % (curr_epoch+1,train_loss))
train_all(encoder,decoder,epochs,learning_rate)




0
Epoch : 1, Training Loss : 583.357056
0
Epoch : 2, Training Loss : 567.511108
0
Epoch : 3, Training Loss : 554.176697
0
Epoch : 4, Training Loss : 511.480286
0
Epoch : 5, Training Loss : 499.416748
0
Epoch : 6, Training Loss : 490.108032
0
Epoch : 7, Training Loss : 473.805695
0
Epoch : 8, Training Loss : 464.582581
0
Epoch : 9, Training Loss : 447.580261
0
Epoch : 10, Training Loss : 442.710083


In [348]:
evaluateRandomly(encoder,decoder)

In [None]:
def train_model():
    config_defaults = {
        "embedding_size" : ,
        "hidden_layer_size" : ,
        "num_layers" : ,
        "cell_type" : ,
        "dropout_prob" : ,
        "learning_rate" : ,
        "bidirectional" : ,
    }
    
    wandb.init(config=config_defaults)
    config = wandb.config
    input_size = inp_lang.n_chrs
    embedding_size = config.embedding_size
    output_size = out_lang.n_chrs
    hidden_layer_size = config.hidden_layer_size
    num_layers = config.num_layers
    cell_type = config.cell_type
    dropout_prob = config.dropout_prob
    learning_rate = config.learning_rate
    bidirectional = config.birectional
    epochs = config.epochs
    encoder = Encoder(input_size, embedding_size, hidden_layer_size, num_layers, cell_type, dropout_prob, bidirectional)
    decoder = Decoder(output_size, embedding_size, hidden_layer_size, num_layers, cell_type, dropout_prob, bidirectional)
    # model = Seq2Seq(encoder,decoder,learning_rate)
    train(model,epochs)
    run_name = "es_{}_hl_{}_nl_{}_ct_{}_dp_{}_lr_{}_bi_{}_ep_{}".format(embedding_size,hidden_layer_size,num_layers,cell_type,dropout_prob,learning_rate,bidirectional,epochs)

    
    run_name = "es_{}_hl_{}_nl_{}_ct_{}_dp_{}_lr_{}_bi_{}_ep_{}".format(embedding_size,hidden_layer_size,num_layers,cell_type,dropout_prob,learning_rate,bidirectional,epochs)
    wandb.run.name = run_name
    wandb.run.save()
    
    
    
    

In [None]:
sweep_config = {
    # "name" : "assignment_sweeps",
    "method" : "bayes",
    "metric" :{
        "name" : "validation_accuracy",
        "goal" : "maximize"
    },
    "parameters" : {
        "embedding_size" : {
            "values" : [16,32,64,128]
        },
        "num_layers" : {
            "values" : [1,2,3,4]
        },
        "hidden_layer_size" : {
            "values" : [32,64,128]
        },
        "learning_rate" : {
            "values" : [0.001,0.0001]
        },
        "cell_type" : {
           "values" : ['RNN', 'LSTM', 'GRU'] 
        },
        "dropout" : {
            "values" : [0.1,0.2]
        },
        "bidirectional ": {
            "values" : [True,False]
        }
    }
}



In [None]:
sweep_id = wandb.sweep(sweep_config,project="CS6910_Assignment_2")
wandb.agent(sweep_id = sweep_id,function = train_model,count = 25)