Imports

In [1]:
'''
Imports
'''
import numpy as np
import tensorflow as tf
from tensorflow import keras
import pandas as pd
import copy

Downloading the Dakshina Dataset

In [2]:
'''
Downloading the data
'''
!curl https://storage.googleapis.com/gresearch/dakshina/dakshina_dataset_v1.0.tar --output daksh.tar

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 1915M  100 1915M    0     0   147M      0  0:00:12  0:00:12 --:--:--  135M


In [3]:
'''
Capturing the data and saving as the Tar file
'''
!tar -xvf  'daksh.tar' 

dakshina_dataset_v1.0/bn/
dakshina_dataset_v1.0/bn/lexicons/
dakshina_dataset_v1.0/bn/lexicons/bn.translit.sampled.test.tsv
dakshina_dataset_v1.0/bn/lexicons/bn.translit.sampled.train.tsv
dakshina_dataset_v1.0/bn/lexicons/bn.translit.sampled.dev.tsv
dakshina_dataset_v1.0/bn/native_script_wikipedia/
dakshina_dataset_v1.0/bn/native_script_wikipedia/bn.wiki-filt.valid.text.shuf.txt.gz
dakshina_dataset_v1.0/bn/native_script_wikipedia/bn.wiki-full.info.sorted.tsv.gz
dakshina_dataset_v1.0/bn/native_script_wikipedia/bn.wiki-filt.train.info.sorted.tsv.gz
dakshina_dataset_v1.0/bn/native_script_wikipedia/bn.wiki-filt.train.text.sorted.tsv.gz
dakshina_dataset_v1.0/bn/native_script_wikipedia/bn.wiki-filt.train.text.shuf.txt.gz
dakshina_dataset_v1.0/bn/native_script_wikipedia/bn.wiki-full.nonblock.sections.tsv.gz
dakshina_dataset_v1.0/bn/native_script_wikipedia/bn.wiki-full.omit_pages.txt.gz
dakshina_dataset_v1.0/bn/native_script_wikipedia/bn.wiki-full.text.sorted.tsv.gz
dakshina_dataset_v1.0/bn/na

In [50]:
def tokenizeTensor(texts, tokenizer=None):
    if tokenizer is None:
      tokenizer = tf.keras.preprocessing.text.Tokenizer(filters='', char_level=True)
      tokenizer.fit_on_texts(texts)
    tensor = tokenizer.texts_to_sequences(texts) #generating the tensor
    #performing pading on the input sequences
    tensor = tf.keras.preprocessing.sequence.pad_sequences(tensor,padding='post')
    return tensor, tokenizer

In [51]:
'''
Function to read the data
Input - Data path to read the data
Output - input text, target text, input and target tokenizier, input and target tensor
'''
def data(path,input_tokenizer=None,output_tokenizer=None,input_length=None,output_length=None):
  
  inpText = []  #list of input text
  outText = [] #list of output/target text
  
  df = pd.read_csv(path,sep="\t",names=["1", "2","3"]).astype(str)
  # sampling the input of the tokenizier in None.
  if input_tokenizer is None:
      df=df.sample(frac=1)
  # Adding all the  input and target texts with start sequence and end sequence added to target. 
  for index, row in df.iterrows():
      input_text=row['2']
      output_text= row['1']
      if output_text =='</s>' or input_text=='</s>': #adding the start character for input and output text
        continue
      output_text = "\t" + output_text + "\n" #addintg the ending character for both input an the output
      inpText.append(input_text)
      outText.append(output_text)

  input_tensor,input_tokenizer = tokenizeTensor(inpText,input_tokenizer)
  
  output_tensor,output_tokenizer = tokenizeTensor(outText,output_tokenizer)

  if input_length is not None and output_length is not None:
      input_tensor=tf.concat([input_tensor,tf.zeros((input_tensor.shape[0],input_length-input_tensor.shape[1]))],axis=1)
      output_tensor=tf.concat([output_tensor,tf.zeros((output_tensor.shape[0],output_length-output_tensor.shape[1]))],axis=1)
  #returning the input and output tokenizer, text and the tensors.
  return inpText,input_tensor,input_tokenizer,outText,output_tensor,output_tokenizer

In [52]:
# Preprocessing and reading the training data
%%capture
input_texts,input_tensor,input_tokenizer,target_texts,target_tensor,target_tokenizer=data("/content/dakshina_dataset_v1.0/hi/lexicons/hi.translit.sampled.train.tsv")

In [49]:
# Preprocessing and reading the validation data
%%capture
val_input_texts,val_input_tensor,val_input_tokenizer,val_target_texts,val_target_tensor,val_target_tokenizer=data("/content/dakshina_dataset_v1.0/hi/lexicons/hi.translit.sampled.dev.tsv",input_tokenizer,target_tokenizer,input_tensor.shape[1],target_tensor.shape[1])

In [45]:
# Preprocessing and reading the testing data
%%capture
test_input_texts,test_input_tensor,test_input_tokenizer,test_target_texts,test_target_tensor,test_target_tokenizer=data("/content/dakshina_dataset_v1.0/hi/lexicons/hi.translit.sampled.test.tsv",input_tokenizer,target_tokenizer,input_tensor.shape[1],target_tensor.shape[1])

Finding the encoder, decoder tokens and seq_lenght

In [22]:
num_encoder_tokens = len(input_tokenizer.word_index)+1  #number of encoder tokens
num_decoder_tokens = len(target_tokenizer.word_index)+1 #number of deccoder tokens
max_encoder_seq_length =  input_tensor.shape[1]         #encoder sequence length
max_decoder_seq_length = target_tensor.shape[1]         #deocoder sequence length
#converting the index to character
index_to_char_input = dict((input_tokenizer.word_index[key], key) for key in input_tokenizer.word_index.keys())     #index to input character
index_to_char_target = dict((target_tokenizer.word_index[key], key) for key in target_tokenizer.word_index.keys())  #index to output/target character

In [31]:
'''
Function - Build Model
Input - 
  The RNN cell type
  embeding dimensions
  no of encoder layers
  no of decoder layers
  dropout
Output - It returns the model object
'''
#Building the model
def build_model(rnn_type,embedding_dim,encoder_layers,decoder_layers,dropout):

  '''
  Building the Encoder
  '''

  #Specifying the dimensions of the input layer and initializing it
  encoder_inputs = keras.Input(shape=( max_encoder_seq_length))
  #initialization of the embeding layer
  embed = keras.layers.Embedding(num_encoder_tokens, embedding_dim)(encoder_inputs)
  
  #Adding multiple layers
  last_encoder=None #save the last encoder output for adding mutiple layers.

  #######################################################################  LSTM Encoder ##################################################################### 
  if rnn_type=='LSTM':
    #adding everything except the last LSTM layer, because in last layer return state=True
    for i in range(encoder_layers-1):
      encoder = keras.layers.LSTM(latent_dim, return_sequences=True,dropout=dropout) #Keras LSTM layer adding
      if i==0:
        encoder_out = encoder(embed)  #encoder the first layer
      else:
        encoder_out = encoder(last_encoder) #encode the last layer output for the next layer.
      last_encoder=encoder_out
    #Adding the last layer
    encoder = keras.layers.LSTM(latent_dim, return_state=True,dropout=dropout)
    ''' For only one encoder '''
    if encoder_layers == 1:
      encoder_outputs, state_h, state_c = encoder(embed)
    else:
      encoder_outputs, state_h, state_c = encoder(last_encoder)
    encoder_states = [state_h, state_c] #storing both the hidden states.

  #######################################################################  GRU or RNN Encoder ##################################################################### 
  elif rnn_type=='GRU' or rnn_type=="RNN":
    #adding everything except the last GRU layer, because in last layer return state=True    
    for i in range(encoder_layers-1):
      if rnn_type=="GRU":
          encoder = keras.layers.GRU(latent_dim, return_sequences=True,dropout=dropout) #keras GRU layer
          if i==0:
            encoder_out = encoder(embed) #encode the first layer
          else:
            encoder_out = encoder(last_encoder) #encode the last layer output for the next layer
      elif rnn_type=="RNN":
          encoder = keras.layers.SimpleRNN(latent_dim, return_sequences=True,dropout=dropout)
          if i==0:
            encoder_out = encoder(embed) #Encode the first layer
          else:
            encoder_out = encoder(last_encoder) #encode the last layer output for the next layer
      last_encoder=encoder_out
    #Adding the last layer
    encoder = keras.layers.GRU(latent_dim, return_state=True,dropout=dropout)
    '''If there is only one encoder'''
    if encoder_layers == 1:
      encoder_outputs, state = encoder(embed)
    else:
      encoder_outputs, state = encoder(last_encoder)
    encoder_states = [state] #Storing the encoder hidden state

  '''
  Building the Deocder
  '''
  #specifying the dimension of the input layer and initializing it
  decoder_inputs = keras.Input(shape=( max_decoder_seq_length))
  #initializing the embedding layer
  embed = keras.layers.Embedding(num_decoder_tokens, embedding_dim)(decoder_inputs)

  ######################################################################## LSTM Decoder #########################################################################
  if rnn_type=="LSTM":
    #adding all the LSTM layers
    for i in range(decoder_layers):
      decoder = keras.layers.LSTM(latent_dim, return_sequences=True, return_state=True,dropout=dropout) #Keras LSTM layer
      if i==0:
        decoder_outputs, _, _ = decoder(embed, initial_state=encoder_states) #getting the decoder output for the first decoder using embed
      else:  
        decoder_outputs, _, _ = decoder(last, initial_state=encoder_states) #getting the decoder output for the remaining decoders
      #geting the output from the last decoder
      last=decoder_outputs

    #Adding dense layer at the end
    decoder_dense = keras.layers.Dense(num_decoder_tokens, activation="softmax",name='final') #softmax dense function.
    decoder_outputs = decoder_dense(last) #geting the final decoder outputs by calling the dense layer.

  ######################################################################## GRU or RNN Decoder #########################################################################
  elif rnn_type=="GRU" or rnn_type=="RNN":
    #adding all the GRU layers
    for i in range(decoder_layers):
      if rnn_type=="GRU":
          decoder = keras.layers.GRU(latent_dim, return_sequences=True, return_state=True,dropout=dropout) #Keras GRU layer
          if i==0:
            decoder_outputs, _= decoder(embed, initial_state=encoder_states) #getting the decoder output for the first decoder using embed
          else:  
            decoder_outputs, _ = decoder(last, initial_state=encoder_states) #getting the decoder output for the remaining decoders
      elif rnn_type=="RNN":
          decoder = keras.layers.SimpleRNN(latent_dim, return_sequences=True, return_state=True,dropout=dropout) #Keras RNN layer
          if i==0:
            decoder_outputs, _= decoder(embed, initial_state=encoder_states) #getting the decoder output for the first decoder using embed
          else:  
            decoder_outputs, _ = decoder(last, initial_state=encoder_states) #getting the decoder output for the remaining decoders      
      #geting the output from the last decoder
      last=decoder_outputs

    #Adding dense layer at the end
    decoder_dense = keras.layers.Dense(num_decoder_tokens, activation="softmax",name='final') #softmax dense function.
    decoder_outputs = decoder_dense(last) #geting the final decoder outputs by calling the dense layer.

  #creating the model using the encoder inputs, decoder inputs and the decoder outputs
  model = keras.Model([encoder_inputs, decoder_inputs], decoder_outputs)

  #return the keras Model Object using the defined parameters.
  return model

In [32]:
'''
Function - inferencing
Inputs - 
  model
  encoder_layers
  decoder_layers
Output - encoder model and the deocder model separately
'''
def inferencing(model,encoder_layers,decoder_layers):

    ######################################################################### Encoder Model ###################################################################
    # Defining the encoder_inputs
    e_inputs = model.input[0]  
    # Checking if the model layers are LSTM layers
    if isinstance(model.layers[encoder_layers+3], keras.layers.LSTM):
      e_outputs, state_h_enc, state_c_enc = model.layers[encoder_layers+3].output #geting the encoded output of the layers
      e_states = [state_h_enc, state_c_enc] #getting the both hidden states of the layers
    
    # Checking if the model layers are GRU or RNN layers
    elif isinstance(model.layers[encoder_layers+3], keras.layers.GRU) or isinstance(model.layers[encoder_layers+3], keras.layers.RNN):
      e_outputs, state = model.layers[encoder_layers+3].output #geting the encoded output of the layers 
      e_states = [state] #getting the hidden states of the layers
    
    #Genrating the encoder model
    encoder_model = keras.Model(e_inputs, e_states)

    ########################################################################### Decoder Model ####################################################################
    #defining the decoder inputs
    d_inputs =  keras.Input(shape=( 1))  
    # Checking if the model layers were LSTM layers
    if isinstance(model.layers[encoder_layers+3], keras.layers.LSTM):
      decoder_states_inputs=[]
      d_states=[]
      last=None
      for i in range(decoder_layers):
        #every layer must have an input through which we can supply it's hidden state
        decoder_state_input_h = keras.Input(shape=(latent_dim,),name='inp3_'+str(i)) #decoder state H
        decoder_state_input_c = keras.Input(shape=(latent_dim,),name='inp4_'+str(i)) #decoder state C
        init = [decoder_state_input_h, decoder_state_input_c] #state containing both H and C
        decoder_lstm = model.layers[i+encoder_layers+4]
        #If it is the first decoder layer
        if i==0:
          d_outputs, state_h, state_c = decoder_lstm(
              model.layers[i+encoder_layers+2](d_inputs), initial_state=init
          )
        # Consecutive decoding layers
        else:
          d_outputs, state_h, state_c = decoder_lstm(
              last, initial_state=init 
          )
        #saving the final deocder outputs as last output.
        last=d_outputs
        #appending the input states and the hidden states at every layer
        decoder_states_inputs.append (decoder_state_input_h)
        decoder_states_inputs.append (decoder_state_input_c)
        d_states.append (state_h)
        d_states.append (state_c)

    # Checking if the model layers were GRU or RNN layers
    elif isinstance(model.layers[encoder_layers+3], keras.layers.GRU) or isinstance(model.layers[encoder_layers+3], keras.layers.RNN):
      decoder_states_inputs=[] 
      d_states=[] 
      last=None
      #every layer must have an input through which we can supply it's hidden state
      for i in range(decoder_layers):
        decoder_state_input = keras.Input(shape=(latent_dim,),name='inp3_'+str(i)) #decoder state
        init = [decoder_state_input] #state
        decoder_lstm = model.layers[i+encoder_layers+4]
        #If it is the first decoder layer
        if i==0:
          d_outputs, state = decoder_lstm(
              model.layers[i+encoder_layers+2](d_inputs), initial_state=init
          )
        # Consecutive decoding layers
        else:
          d_outputs, state = decoder_lstm(
              last, initial_state=init 
          )
        #saving the final deocder outputs as last output.
        last=d_outputs
        #appending the input states and the hidden states at every layer
        decoder_states_inputs.append (decoder_state_input)
        d_states.append (state)

    '''
    Geting ther dense final layer from the model objective
    '''
    decoder_dense = model.get_layer('final')
    d_outputs = decoder_dense(last) #outputs of the decoder dense layer
    #Finalizing the decoder model.
    decoder_model = keras.Model(
        [d_inputs] + decoder_states_inputs, [d_outputs] + d_states
    )
    #returning the encoder and the decoder model for inferencing during validation of the model
    return encoder_model,decoder_model

In [33]:
'''
Function - do_predictions (Decoding the entire batch to generate the predictions)
Input - 
  input_seq
  encoder_model
  decoder_model
  batch-size
  encoder_layers
  decoder_layers
Output - 
  Predicted words
'''
def do_predictions(input_seq,encoder_model,decoder_model,batch_size,encoder_layers,decoder_layers):
    # use the encoder model to get the value of the states
    sv = encoder_model.predict(input_seq) #values of the states
    #if GRU or RNN
    if rnn_type=='GRU' or 'RNN':
      sv=[sv]
    #save states value for RNN, LSTM as well as GRU
    nl=sv

    #keep on adding the states value for every deocoder layer
    for i in range(decoder_layers-1):
      nl=nl+sv
    sv=nl
    
    #contains previously predicted character's index for every words in batch.
    prev_index = np.zeros((batch_size, 1))
    # starting with \t for every word in batch hence tokenize.
    prev_index[:, 0] = target_tokenizer.word_index['\t']
    
    #predicted words list
    word_predictions = [ "" for i in range(batch_size)]
    #check if batch predicted or not
    check=[False for i in range(batch_size)]

    for i in range(max_decoder_seq_length):
        out = decoder_model.predict(tuple([prev_index] + sv)) #predictions of the decoder model based on the previous char index
        out_prob=out[0] #Probability as a result of the softmax function
        sv = out[1:] #decoder states value is stored.
        #for every batch we execute the following
        for j in range(batch_size):
          #if bacth already done
          if check[j]:
            continue          
          
          sampled_char_index = np.argmax(out_prob[j, -1, :]) #geting the sample token index
          #if sampled index is 0 then character is nextline character
          if sampled_char_index == 0:
            sampled_char='\n'
          # otherwise convert index to the respective character
          else:
            sampled_char = index_to_char_target[sampled_char_index]
          #check if it is ending
          if sampled_char == '\n':
            check[j]=True
            continue
          #uGet the predicted words value       
          word_predictions[j] += sampled_char
          #update the previously predicted characters        
          prev_index[j,0]=target_tokenizer.word_index[sampled_char]
    #return the predicted words.
    return word_predictions

In [34]:
'''
Function - test_accuracy (calculate the word level accuracy (Testing accuracy))
Input - 
  encoder_model
  decoder_model
  encoder_layers
  decoder_layers
Output - Testing accuracy 
'''
def test_accuracy(encoder_model,decoder_model,encoder_layers,decoder_layers):
  #count the number of words that are predicted correctly
  success=0
  #Get all the predicted words
  pred=do_predictions(test_input_tensor,encoder_model,decoder_model,test_input_tensor.shape[0],encoder_layers,decoder_layers)

  for seq_index in range(test_input_tensor.shape[0]):
      predicted_word = pred[seq_index] #predicted_Word
      target_word=test_target_texts[seq_index][1:-1] #target_word_ground_truth
      #test the word one by one and write to files
      #success word
      if target_word == predicted_word:
        success+=1
        f = open("success_predictions.txt", "a")
        f.write(test_input_texts[seq_index]+' '+target_word+' '+predicted_word+'\n')
        f.close()
      #failure word (if it is not correct predictions)
      else:
        f = open("failure_predictions.txt", "a")
        f.write(test_input_texts[seq_index]+' '+target_word+' '+predicted_word+'\n')
        f.close()
  return float(success)/float(test_input_tensor.shape[0])

In [35]:
'''
Function - batch_validate (validate entire batch)
Input - 
  encoder_model
  decoder_model
  encoder_layers
  decoder_layers
Output - 
  Return validation accuracy
'''
def batch_validate(encoder_model,decoder_model,encoder_layers,decoder_layers):
  count=0
  #get all the predicted words
  pred=do_predictions(val_input_tensor,encoder_model,decoder_model,val_input_tensor.shape[0],encoder_layers,decoder_layers)
  for idx in range(val_input_tensor.shape[0]):
      word_predictions = pred[idx] #predicted word
      target=val_target_texts[idx][1:-1] #groundtruth word (target word)
      #test the words one by one
      if word_predictions == target:
        count+=1 #increasing the success 
  return float(count)/float(val_input_tensor.shape[0]) #returning the accuracy

Training for Wandb Hyper parameter sweeping

In [None]:
#defining globals
rnn_type=None
embedding_dim=None
model= None
latent_dim = None
enc_layers=None
dec_layers=None
'''
Function- train()
Performs the entire training using Wandb sweeps
'''
def train():
  global rnn_type
  global embedding_dim
  global model
  global latent_dim
  global enc_layer
  global dec_layer
  #intializing wandb
  wandb.init()
  #initializing the wandb configurations
  rnn_type=wandb.config.rnn_type              #RNN cell type
  embedding_dim=wandb.config.embedding_dim    #embedding dimensions
  latent_dim=wandb.config.latent_dim          #latent dimensions
  enc_layer=wandb.config.enc_layer            #encoder_layer
  dec_layer=wandb.config.dec_layer            #decoder layers
  dropout=wandb.config.dropout                #dropout
  epochs=wandb.config.epochs                  #epochs
  bs=wandb.config.bs                          #batch size

  #wandb run name initialization
  wandb.run.name = 'epochs_'+str(epochs)+'_bs_'+str(bs)+'_rnn_type_'+str(rnn_type)+'_em_'+str(embedding_dim)+'_latd_'+str(latent_dim)+'_encs_'+str(enc_layer)+'_decs_'+str(dec_layer)+'_dr_'+str(dropout)

  #building the model
  model=build_model(rnn_type=rnn_type,embedding_dim=embedding_dim,encoder_layers=enc_layer,decoder_layers=dec_layer,dropout=dropout)

  #model compilation
  model.compile(
      optimizer="adam", loss=keras.losses.SparseCategoricalCrossentropy(
                                                              reduction='none'), metrics=["accuracy"]
  )
  ############################################################################### Training the model ######################################################################
  for i in range(epochs):
    hist=model.fit(
        [input_tensor, target_tensor],
        tf.concat([target_tensor[:,1:],tf.zeros((target_tensor[:,:].shape[0],1))], axis=1),
        batch_size=bs,
        epochs=1,shuffle=True
    )
    # Save model
    model.save("vanilla.keras")
    ############################################################################## Inferencing ##############################################################################
    # Restore the model and construct the encoder and decoder.
    inf = keras.models.load_model("/content/vanilla.keras")
    encoder_model,decoder_model=inferencing(inf,encoder_layers=enc_layer,decoder_layers=dec_layer)
    #log train loss to wandb
    wandb.log({"train_loss": hist.history['loss'][0]})
  #calculate the validation accuracy
  val_acc=batch_validate(encoder_model,decoder_model,enc_layer,dec_layer)
  #logging the validation accuracy
  wandb.log({"val_acc":val_acc})

Manual Training

In [29]:
#defining globals
rnn_type=None
embedding_dim=None
model= None
latent_dim = None
enc_layers=None
dec_layers=None
'''
Function - Manual Train
perform the training manually for the best configuration
'''
def manual_train(config):
  global rnn_type
  global embedding_dim
  global model
  global latent_dim
  global enc_layer
  global dec_layer
  #initializing the configured hyper-parameter values
  rnn_type=config.rnn_type            #RNN cell type
  embedding_dim=config.embedding_dim  #embedding dim
  latent_dim=config.latent_dim        #latent dim
  enc_layer=config.enc_layer          #encoder layer
  dec_layer=config.dec_layer          #decoder layer
  dropout=config.dropout              #dropout
  epochs=config.epochs                #epochs
  bs=config.bs                        #batch size
  
  #building the model
  model=build_model(rnn_type=rnn_type,embedding_dim=embedding_dim,encoder_layers=enc_layer,decoder_layers=dec_layer,dropout=dropout)

  #model compilation
  model.compile(
      optimizer="adam", loss=keras.losses.SparseCategoricalCrossentropy(
                                                              reduction='none'), metrics=["accuracy"]
  )
  #ploting the best model
  tf.keras.utils.plot_model(model, to_file='model.png', show_shapes=True, show_dtype=True,show_layer_names=True, dpi=96 )
  ##################################################################### Training #############################################################################################
  for i in range(epochs):
    hist=model.fit(
        [input_tensor, target_tensor],
        tf.concat([target_tensor[:,1:],tf.zeros((target_tensor[:,:].shape[0],1))], axis=1),
        batch_size=bs,
        epochs=1,shuffle=True
    )
    #save model
    model.save("vanilla.keras")

    #inferencing the model
    inf = keras.models.load_model("/content/vanilla.keras")
    encoder_model,decoder_model=inferencing(inf,encoder_layers=enc_layer,decoder_layers=dec_layer)
    #calculating the validation accuracy
    val_acc=batch_validate(encoder_model,decoder_model,enc_layer,dec_layer)
    print("Validation Accuracy",val_acc)
  #calculating the testing accuracy
  print("Test Accuracy",test_accuracy(encoder_model,decoder_model,enc_layer,dec_layer))    

In [None]:
'''
Wandb details importing and log in
'''
import wandb
!pip install wandb
wb=True

if wb:
  wandb.login()

In [None]:
# generating the wandb sweep configuration
if wb:
  sweep_config = {
    "name": "Bayesian Sweep without attention",
    "method": "bayes", #method used was bayesian
    "metric": {"name": "val_acc", "goal": "maximize"}, #mximizing the validation accuracy
    "parameters": {
        
        "rnn_type": {"values": ["GRU","LSTM"]},
        
        "embedding_dim": {"values": [128,256,512]},
        
        "latent_dim": {"values": [128,256,512,1024]},
        
        "enc_layer": {"values": [1, 2, 3]},
        
        "dec_layer": {"values": [1, 2, 3]},
        
        "dropout": {"values": [0.1, 0.2, 0.3]},

        "epochs": {"values": [20]},
        
        "bs": {"values": [64]},


    },
  }
  #creating the wandb sweep
  sweep_id = wandb.sweep(sweep_config, project="CS6910_Assignment3_vanilla", entity="cs21m007_cs21m013")
  #calling the wandb sweep to start the hyper parameter tuning.
  wandb.agent(sweep_id, train, count = 30)

Create sweep with ID: tvxoi4py
Sweep URL: https://wandb.ai/cs21m007_cs21m013/CS6910_Assignment3_vanilla/sweeps/tvxoi4py


[34m[1mwandb[0m: Agent Starting Run: q1272pke with config:
[34m[1mwandb[0m: 	bs: 64
[34m[1mwandb[0m: 	dec_layer: 2
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_dim: 512
[34m[1mwandb[0m: 	enc_layer: 1
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	latent_dim: 1024
[34m[1mwandb[0m: 	rnn_type: GRU




VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train_loss,█▄▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_acc,▁

0,1
train_loss,0.03644
val_acc,0.34052


[34m[1mwandb[0m: Agent Starting Run: rcodkkdi with config:
[34m[1mwandb[0m: 	bs: 64
[34m[1mwandb[0m: 	dec_layer: 2
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_dim: 512
[34m[1mwandb[0m: 	enc_layer: 3
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	latent_dim: 1024
[34m[1mwandb[0m: 	rnn_type: LSTM




VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train_loss,█▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_acc,▁

0,1
train_loss,0.02189
val_acc,0.3832


[34m[1mwandb[0m: Agent Starting Run: 44eqyngk with config:
[34m[1mwandb[0m: 	bs: 64
[34m[1mwandb[0m: 	dec_layer: 2
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_dim: 512
[34m[1mwandb[0m: 	enc_layer: 3
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	latent_dim: 1024
[34m[1mwandb[0m: 	rnn_type: GRU




VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train_loss,█▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_acc,▁

0,1
train_loss,0.05259
val_acc,0.32308


[34m[1mwandb[0m: Agent Starting Run: 2asgyvfu with config:
[34m[1mwandb[0m: 	bs: 64
[34m[1mwandb[0m: 	dec_layer: 3
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_dim: 512
[34m[1mwandb[0m: 	enc_layer: 2
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	latent_dim: 1024
[34m[1mwandb[0m: 	rnn_type: GRU


 20/691 [..............................] - ETA: 2:02 - loss: 0.0591 - accuracy: 0.9803

In [15]:
'''
defining the configuration for the best model.
'''
class best_configuration:
  def __init__(self, rnn_type, embedding_dim,latent_dim,enc_layer,dec_layer,dropout,epochs,bs):
    self.rnn_type = rnn_type
    self.embedding_dim = embedding_dim
    self.latent_dim = latent_dim
    self.enc_layer = enc_layer
    self.dec_layer = dec_layer
    self.dropout = dropout
    self.epochs = epochs
    self.bs = bs

In [None]:
#Trainig the best model for inferencing and generating the test accuracy
wb=False
if not wb:
  config=best_configuration('LSTM',512,1024,2,3,.1,25,64)
  manual_train(config) #calling the manual training of the function to train the best model and perform testing.

Validation Accuracy 0.0016062413951353832
Validation Accuracy 0.17714547957778798
Validation Accuracy 0.3031206975676916
Validation Accuracy 0.3476365305185865
Validation Accuracy 0.3506195502524094
Validation Accuracy 0.36209270307480496
Validation Accuracy 0.3513079394217531
Validation Accuracy 0.3680587425424507
Validation Accuracy 0.36920605782469024
Validation Accuracy 0.354061496099128
Validation Accuracy 0.3609453877925654
Validation Accuracy 0.3703533731069298
Validation Accuracy 0.3526847177604406
Validation Accuracy 0.35589720055071133
Validation Accuracy 0.38480954566314823
Validation Accuracy 0.37976135842129416
Validation Accuracy 0.3682882055988986
Validation Accuracy 0.3822854520422212
Validation Accuracy 0.38251491509866914
Validation Accuracy 0.3703533731069298
Validation Accuracy 0.36966498393758607
Validation Accuracy 0.3767783386874713
Validation Accuracy 0.3760899495181276
Validation Accuracy 0.37333639284075265
Validation Accuracy 0.36232216613125284
Test Accuracy