In [3]:
#Necessary Libraries

import numpy as np
from math import log,log1p
from numpy import array
from numpy import argmax
import keras
from keras.layers import Input, LSTM, Dense, Embedding, GRU, Dropout, SimpleRNN
from keras.models import Model,load_model
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.utils.vis_utils import plot_model
import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam



In [4]:
#Install WandB

%pip install wandb -q
import wandb
from wandb.keras import WandbCallback

[K     |████████████████████████████████| 1.8 MB 14.4 MB/s 
[K     |████████████████████████████████| 181 kB 73.9 MB/s 
[K     |████████████████████████████████| 145 kB 63.9 MB/s 
[K     |████████████████████████████████| 63 kB 2.1 MB/s 
[?25h  Building wheel for pathtools (setup.py) ... [?25l[?25hdone


In [5]:
wandb.login()

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize


wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit: ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [6]:
wandb.init(project="CS6910_DL_Assignment_3", entity="nomads")

[34m[1mwandb[0m: Currently logged in as: [33mtalksick[0m ([33mnomads[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [7]:
#Loading the dakshina dataset

!wget https://storage.googleapis.com/gresearch/dakshina/dakshina_dataset_v1.0.tar
!tar -xf dakshina_dataset_v1.0.tar

--2022-05-14 15:39:32--  https://storage.googleapis.com/gresearch/dakshina/dakshina_dataset_v1.0.tar
Resolving storage.googleapis.com (storage.googleapis.com)... 74.125.143.128, 172.217.218.128, 142.251.18.128, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|74.125.143.128|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2008340480 (1.9G) [application/x-tar]
Saving to: ‘dakshina_dataset_v1.0.tar’


2022-05-14 15:39:48 (124 MB/s) - ‘dakshina_dataset_v1.0.tar’ saved [2008340480/2008340480]



In [8]:
#Selecting the Hindi language

!ls dakshina_dataset_v1.0/hi/lexicons

hi.translit.sampled.dev.tsv   hi.translit.sampled.train.tsv
hi.translit.sampled.test.tsv


In [9]:
#Directory for Training,Validation and Testing
train_dir = "./dakshina_dataset_v1.0/hi/lexicons/hi.translit.sampled.train.tsv"
val_dir = "./dakshina_dataset_v1.0/hi/lexicons/hi.translit.sampled.dev.tsv"
test_dir = "./dakshina_dataset_v1.0/hi/lexicons/hi.translit.sampled.test.tsv"

In [10]:
# Reading the raw corpus
#returns the native(Hindi) and romanized(English) versions of the words in the corpus

import io
def raw_corpus(crp):
  Eng = []
  Hindi= []

  with io.open(crp, encoding ='utf-8') as f:
    for line in f:
      if '\t' not in line:
        continue
      tokens = line.rstrip().split("\t")
      Eng.append(tokens[1])
      Hindi.append(tokens[0])
      
  return Eng, Hindi                                                             


In [11]:
train_src, train_tgt = raw_corpus(train_dir)
val_src, val_tgt = raw_corpus(val_dir)
test_src, test_tgt = raw_corpus(test_dir)

print("Training examples: ", len(train_src))
print("Validation examples: ", len(val_src))
print("Testing examples: ", len(test_src))

Training examples:  44204
Validation examples:  4358
Testing examples:  4502


In [12]:
#Shuffling the Training and Validation dataset

train_arr = np.arange(len(train_src))
np.random.shuffle(train_arr)
val_arr = np.arange(len(val_src))
np.random.shuffle(val_arr)

In [13]:
ip_txt_ns = []
tgt_txt_ns = []

val_ip_txt_ns = []
val_tgt_txt_ns = []

ip_char = set()
tgt_char = set()

for (txt_ip, txt_tgt) in zip(train_src, train_tgt):

    txt_tgt = "B" + txt_tgt + "E"

    ip_txt_ns.append(txt_ip)
    
    tgt_txt_ns.append(txt_tgt)

    for char in txt_ip:

        if char not in ip_char:

            ip_char.add(char)

    for char in txt_tgt:

        if char not in tgt_char:

            tgt_char.add(char)

for (txt_ip, txt_tgt) in zip(val_src, val_tgt):

    txt_tgt = "B" + txt_tgt + "E"

    val_ip_txt_ns.append(txt_ip)

    val_tgt_txt_ns.append(txt_tgt)

    for char in txt_ip:

        if char not in ip_char:

            ip_char.add(char)
    for char in txt_tgt:

        if char not in tgt_char:

            tgt_char.add(char)



In [14]:
ip_txt= []
tgt_txt= []

for i in range(len(train_src)):

    ip_txt.append(ip_txt_ns[train_arr[i]])

    tgt_txt.append(tgt_txt_ns[train_arr[i]])

val_ip_txt= []
val_tgt_txt= []

for i in range(len(val_src)):

    val_ip_txt.append(val_ip_txt_ns[val_arr[i]])
    
    val_tgt_txt.append(val_tgt_txt_ns[val_arr[i]])

ip_char.add(" ")
tgt_char.add(" ")

ip_char = sorted(list(ip_char))
tgt_char = sorted(list(tgt_char))

In [15]:
enc_tokens = len(ip_char)
dec_tokens = len(tgt_char)

max_enc_seq_length = max([len(txt) for txt in ip_txt])
max_dec_seq_length = max([len(txt) for txt in tgt_txt])

val_max_enc_seq_length = max([len(txt) for txt in val_ip_txt])
val_max_dec_seq_length = max([len(txt) for txt in val_tgt_txt])

print("Number of samples:", len(ip_txt))
print("Number of unique input tokens:", enc_tokens)
print("Number of unique output tokens:", dec_tokens)
print("Max sequence length for inputs:", max_enc_seq_length)
print("Max sequence length for outputs:", max_dec_seq_length)
print("Max sequence length for val inputs:", val_max_enc_seq_length)
print("Max sequence length for val outputs:", val_max_dec_seq_length)


Number of samples: 44204
Number of unique input tokens: 27
Number of unique output tokens: 66
Max sequence length for inputs: 20
Max sequence length for outputs: 21
Max sequence length for val inputs: 18
Max sequence length for val outputs: 16


In [16]:

print(ip_char)
print(tgt_char)

[' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
[' ', 'B', 'E', 'ँ', 'ं', 'ः', 'अ', 'आ', 'इ', 'ई', 'उ', 'ऊ', 'ऋ', 'ए', 'ऐ', 'ऑ', 'ओ', 'औ', 'क', 'ख', 'ग', 'घ', 'ङ', 'च', 'छ', 'ज', 'झ', 'ञ', 'ट', 'ठ', 'ड', 'ढ', 'ण', 'त', 'थ', 'द', 'ध', 'न', 'प', 'फ', 'ब', 'भ', 'म', 'य', 'र', 'ल', 'व', 'श', 'ष', 'स', 'ह', '़', 'ा', 'ि', 'ी', 'ु', 'ू', 'ृ', 'ॅ', 'े', 'ै', 'ॉ', 'ो', 'ौ', '्', 'ॐ']


In [17]:
print(ip_txt[10:20])
print(tgt_txt[10:20])

['asamanya', 'tekne', 'husali', 'mangen', 'smarakon', 'palace', 'gair', 'peel', 'moortipujak', 'naidoo']
['Bअसामान्यE', 'BटेकनेE', 'BहुलसीE', 'BमागेंE', 'Bस्मारकोंE', 'BपैलेसE', 'BगैरE', 'BपिलE', 'Bमूर्तिपूजकE', 'BनायडूE']


In [18]:
ip_idx = dict([(char, i) for i, char in enumerate(ip_char)])
tgt_idx = dict([(char, i) for i, char in enumerate(tgt_char)])

print(ip_idx)
print(tgt_idx)

{' ': 0, 'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5, 'f': 6, 'g': 7, 'h': 8, 'i': 9, 'j': 10, 'k': 11, 'l': 12, 'm': 13, 'n': 14, 'o': 15, 'p': 16, 'q': 17, 'r': 18, 's': 19, 't': 20, 'u': 21, 'v': 22, 'w': 23, 'x': 24, 'y': 25, 'z': 26}
{' ': 0, 'B': 1, 'E': 2, 'ँ': 3, 'ं': 4, 'ः': 5, 'अ': 6, 'आ': 7, 'इ': 8, 'ई': 9, 'उ': 10, 'ऊ': 11, 'ऋ': 12, 'ए': 13, 'ऐ': 14, 'ऑ': 15, 'ओ': 16, 'औ': 17, 'क': 18, 'ख': 19, 'ग': 20, 'घ': 21, 'ङ': 22, 'च': 23, 'छ': 24, 'ज': 25, 'झ': 26, 'ञ': 27, 'ट': 28, 'ठ': 29, 'ड': 30, 'ढ': 31, 'ण': 32, 'त': 33, 'थ': 34, 'द': 35, 'ध': 36, 'न': 37, 'प': 38, 'फ': 39, 'ब': 40, 'भ': 41, 'म': 42, 'य': 43, 'र': 44, 'ल': 45, 'व': 46, 'श': 47, 'ष': 48, 'स': 49, 'ह': 50, '़': 51, 'ा': 52, 'ि': 53, 'ी': 54, 'ु': 55, 'ू': 56, 'ृ': 57, 'ॅ': 58, 'े': 59, 'ै': 60, 'ॉ': 61, 'ो': 62, 'ौ': 63, '्': 64, 'ॐ': 65}


In [19]:
enc_ip = np.zeros((len(ip_txt), max_enc_seq_length), dtype="float32")

dec_ip = np.zeros((len(ip_txt), max_dec_seq_length), dtype="float32")

dec_tgt = np.zeros((len(ip_txt), max_dec_seq_length, dec_tokens), dtype="float32")

for i, (txt_ip, txt_tgt) in enumerate(zip(ip_txt, tgt_txt)):

    for t, char in enumerate(txt_ip):

        enc_ip[i, t] = ip_idx[char]

    enc_ip[i, t + 1 :] = ip_idx[" "]

    for t, char in enumerate(txt_tgt):

        dec_ip[i, t] = tgt_idx[char]

        if t > 0:

            dec_tgt[i, t - 1, tgt_idx[char]] = 1.0

    dec_ip[i, t + 1: ] = tgt_idx[" "]
    dec_tgt[i, t:, tgt_idx[" "]] = 1.0

val_enc_ip = np.zeros((len(ip_txt), val_max_enc_seq_length), dtype="float32")

val_dec_ip = np.zeros((len(ip_txt), val_max_dec_seq_length), dtype="float32")

val_dec_tgt = np.zeros((len(ip_txt), val_max_dec_seq_length, dec_tokens), dtype="float32")

for i, (txt_ip, txt_tgt) in enumerate(zip(val_ip_txt, val_tgt_txt)):

    for t, char in enumerate(txt_ip):

        val_enc_ip[i, t] = ip_idx[char]

    val_enc_ip[i, t + 1 :] = ip_idx[" "]

    for t, char in enumerate(txt_tgt):

        val_dec_ip[i, t] = tgt_idx[char]

        if t > 0:

            val_dec_tgt[i, t - 1, tgt_idx[char]] = 1.0
    
    val_dec_ip[i, t + 1: ] = tgt_idx[" "]
    
    val_dec_tgt[i, t:, tgt_idx[" "]] = 1.0

In [20]:
rev_ip_char_idx = dict((i, char) for char, i in ip_idx.items())

rev_tgt_char_idx = dict((i, char) for char, i in tgt_idx.items())

print(rev_tgt_char_idx)

{0: ' ', 1: 'B', 2: 'E', 3: 'ँ', 4: 'ं', 5: 'ः', 6: 'अ', 7: 'आ', 8: 'इ', 9: 'ई', 10: 'उ', 11: 'ऊ', 12: 'ऋ', 13: 'ए', 14: 'ऐ', 15: 'ऑ', 16: 'ओ', 17: 'औ', 18: 'क', 19: 'ख', 20: 'ग', 21: 'घ', 22: 'ङ', 23: 'च', 24: 'छ', 25: 'ज', 26: 'झ', 27: 'ञ', 28: 'ट', 29: 'ठ', 30: 'ड', 31: 'ढ', 32: 'ण', 33: 'त', 34: 'थ', 35: 'द', 36: 'ध', 37: 'न', 38: 'प', 39: 'फ', 40: 'ब', 41: 'भ', 42: 'म', 43: 'य', 44: 'र', 45: 'ल', 46: 'व', 47: 'श', 48: 'ष', 49: 'स', 50: 'ह', 51: '़', 52: 'ा', 53: 'ि', 54: 'ी', 55: 'ु', 56: 'ू', 57: 'ृ', 58: 'ॅ', 59: 'े', 60: 'ै', 61: 'ॉ', 62: 'ो', 63: 'ौ', 64: '्', 65: 'ॐ'}


In [21]:
x_test = val_enc_ip
y_test = val_tgt_txt

In [22]:
class Seq_to_Seq(object):

  def __init__(self,cell = 'RNN',ip_emb = 32,epochs = 10, hidden_layer=32,batch_size = 32, learning_rate= 1e-3, 
               dropout=0.4,pred ='greedy',beam_width = 5,num_enc = 1,num_dec = 1):
    
        self.cell = cell
        self.ip_emb = ip_emb
        self.hidden_layer = hidden_layer
        self.learning_rate = learning_rate
        self.dropout = dropout
        self.pred = pred
        self.epochs = epochs
        self.batch_size = batch_size
        self.beam_width = beam_width
        self.num_enc = num_enc
        self.num_dec = num_dec

  def fit_model(self,enc_ip,dec_ip,dec_tgt,x_test, y_test):

        # Define an input sequence and process it.
        enc_ips = Input(shape=(None, ),name = 'Enc_ips')

        enc_emb =  Embedding(enc_tokens, self.ip_emb , mask_zero = True,name = 'Enc_emb')(enc_ips)

        enc_ops = enc_emb

        if self.cell == 'LSTM':

            enc_lstm = LSTM(self.hidden_layer, return_state=True,dropout = self.dropout, return_sequences=True, name="Enc_hidden_1")

            enc_ops, hs, cs = enc_lstm(enc_ops)

            enc_states = [hs, cs]

            # Add a LSTM layer with hidden_layer internal units.

            for i in range( 2, self.num_enc +1):

                layer_name = ('Enc_hidden_%d') %i

                enc_lstm = LSTM(self.hidden_layer, return_state=True,dropout = self.dropout, return_sequences=True, name=layer_name)

                enc_ops, hs, cs = enc_lstm(enc_ops,initial_state = enc_states)

                enc_states = [hs, cs]

        elif self.cell == 'GRU':

            enc_gru = GRU(self.hidden_layer, return_state=True,dropout = self.dropout, return_sequences=True, name="Enc_hidden_1")

            enc_ops, hs = enc_gru(enc_ops)

            enc_states = [hs]

            for i in range(2, self.num_enc +1):

                layer_name = ('Enc_hidden_%d') %i

                enc_gru = GRU(self.hidden_layer, return_state=True,dropout = self.dropout, return_sequences=True, name=layer_name)

                enc_ops, hs = enc_gru(enc_ops, initial_state = enc_states)

                enc_states = [hs]  

        elif self.cell == 'RNN':

            enc_rnn = SimpleRNN(self.hidden_layer, return_state=True,dropout = self.dropout, return_sequences=True, name="Enc_hidden_1")

            enc_ops, hs = enc_rnn(enc_ops)

            enc_states = [hs]

            for i in range(2, self.num_enc +1):

                layer_name = ('Enc_hidden_%d') %i

                enc_rnn = SimpleRNN(self.hidden_layer, return_state=True,dropout = self.dropout, return_sequences=True, name=layer_name)

                enc_ops, hs = enc_rnn(enc_ops, initial_state = enc_states)

                enc_states = [hs]  

        # Set up the dec, using `enc_states` as initial state.
        dec_ips = Input(shape=(None,), name = 'Dec_ips')

        dec_emb_layer = Embedding(dec_tokens, self.hidden_layer, mask_zero = True, name = 'Dec_emb')

        dec_emb = dec_emb_layer(dec_ips)

        dec_ops = dec_emb

        if self.cell == 'LSTM':

            dec_lstm = LSTM(self.hidden_layer, return_sequences=True, return_state=True,dropout = self.dropout, name="Dec_hidden_1")

            dec_ops, _, _ = dec_lstm(dec_ops, initial_state = enc_states)
          
            for i in range(2, self.num_dec +1):

              layer_name = ('Dec_hidden_%d') %i

              dec_lstm = LSTM(self.hidden_layer, return_sequences=True, return_state=True,dropout = self.dropout, name=layer_name)

              dec_ops, _, _ = dec_lstm(dec_ops, initial_state = enc_states)

        elif self.cell == 'GRU':
            dec_gru = GRU(self.hidden_layer, return_sequences=True, return_state=True,dropout = self.dropout, name="Dec_hidden_1")

            dec_ops, _ = dec_gru(dec_ops, initial_state = enc_states)

            for i in range(2, self.num_dec+1):

              layer_name = ('Dec_hidden_%d') %i

              dec_gru = GRU(self.hidden_layer, return_sequences=True, return_state=True,dropout = self.dropout, name=layer_name)

              dec_ops, _ = dec_gru(dec_ops, initial_state = enc_states)

        elif self.cell == 'RNN':
            dec_rnn = SimpleRNN(self.hidden_layer, return_sequences=True, return_state=True,dropout = self.dropout, name="Dec_hidden_1")

            dec_ops, _ = dec_rnn(dec_ops, initial_state = enc_states)

            for i in range(2, self.num_dec+1):

              layer_name = ('Dec_hidden_%d') %i

              dec_rnn = SimpleRNN(self.hidden_layer, return_sequences=True, return_state=True,dropout = self.dropout, name=layer_name)

              dec_ops, _ = dec_rnn(dec_ops, initial_state = enc_states)

        dec_dense = Dense(dec_tokens, activation='softmax', name = 'dense')

        dec_ops = dec_dense(dec_ops)

        # Define the model that takes enc and dec input 
        # to output dec_ops
        model = Model([enc_ips, dec_ips], dec_ops)
        model.summary()
        
        # Define the optimizer
        optimizer = Adam(lr=self.learning_rate, beta_1=0.9, beta_2=0.999)
        model.compile(loss = "categorical_crossentropy", optimizer = optimizer, metrics=['accuracy'])
      
        model.fit(
            [enc_ip, dec_ip],
            dec_tgt,
            batch_size=self.batch_size,
            epochs=self.epochs,
            callbacks = [WandbCallback()]
            )
        
        enc_model,dec_model = self.inference_model(model)
    
        total = 0
        right = 0
        for i in range(len(val_src)):
          input_seq = x_test[i : i + 1]
          result = self.decode_sequence(enc_model,dec_model,input_seq)

          target = y_test[i]
          target = target[1:len(target)-1]
          result = result[0:len(result)-1]

          if result.strip() == target.strip():
            right = right + 1
          
          total = total + 1
          accuracy_epoch = right/total

          if total % 50 == 0:
            wandb.log({'epoch_accuracy' : accuracy_epoch})
        
        val_accuracy = right/total

        wandb.log({'val_accuracy' : val_accuracy})
    
  def inference_model(self,model):
        enc_ips = model.input[0]  

        if self.cell == 'RNN' or self.cell == 'GRU':

          enc_ops, hs_enc = model.get_layer('Enc_hidden_'+ str(self.num_enc)).output

          enc_states = [hs_enc]

          enc_model = Model(enc_ips, enc_states)

          dec_ips = model.input[1]  

          dec_ops = model.get_layer('Dec_emb')(dec_ips)

          dec_states_ips = []

          dec_states = []

          for i in range(1,self.num_dec +1):

            dec_state_input_h = keras.Input(shape=(self.hidden_layer,))

            curr_states_ips = [dec_state_input_h]

            dec = model.get_layer('Dec_hidden_'+ str(i))

            dec_ops, hs_dec = dec(dec_ops, initial_state=curr_states_ips)

            dec_states += [hs_dec]

            dec_states_ips += curr_states_ips

        elif self.cell == 'LSTM':

          enc_ops, hs_enc, cs_enc = model.get_layer('Enc_hidden_'+ str(self.num_enc)).output 

          enc_states = [hs_enc, cs_enc]

          enc_model = Model(enc_ips, enc_states)

          dec_ips = model.input[1]  

          dec_ops = model.get_layer('Dec_emb')(dec_ips)

          dec_states_ips = []

          dec_states = []

          for i in range(1,self.num_dec +1):
            dec_state_input_h = keras.Input(shape=(self.hidden_layer,))

            dec_state_input_c = keras.Input(shape=(self.hidden_layer,))

            curr_states_ips = [dec_state_input_h, dec_state_input_c]

            dec = model.get_layer('Dec_hidden_'+ str(i))

            dec_ops, hs_dec, cs_dec = dec(dec_ops, initial_state=curr_states_ips)

            dec_states += [hs_dec, cs_dec]

            dec_states_ips += curr_states_ips


        dec_dense = model.get_layer('dense')

        dec_ops = dec_dense(dec_ops)

        dec_model = Model([dec_ips] + dec_states_ips, [dec_ops] + dec_states)

        return enc_model,dec_model

  def decode_sequence(self,enc_model,dec_model,input_seq):

        # Encode the input as state vectors.
        states_value = [enc_model.predict(input_seq)] * self.num_dec
        
        # Generate empty target sequence of length 1.
        target_seq = np.zeros((1, 1))

        # Populate the first character of target sequence with the start character.
        target_seq[0, 0] = tgt_idx['B']

        # Sampling loop for a batch of sequences
        # (to simplify, here we assume a batch of size 1).
        stop_condition = False
        decoded_sentence = ""

        while not stop_condition:

            if self.cell == 'RNN' or self.cell == 'GRU':

              dummy = dec_model.predict([target_seq] + [states_value])

              output_tokens, states_value = dummy[0],dummy[1:]
              
            elif self.cell == 'LSTM':  

              dummy = dec_model.predict([target_seq] + states_value)

              output_tokens, states_value = dummy[0],dummy[1:]

            if self.pred == 'greedy':

              beam_w = 1
            elif self.pred == 'beam_search':

              beam_w = self.beam_width

            sampled_token_index = self.beam_search_dec(output_tokens[0,:,:], beam_w)
            sampled_token_index = sampled_token_index[beam_w-1][0]

            # Sample a token
            sampled_token_index = np.argmax(output_tokens[0, -1, :])

            sampled_char = rev_tgt_char_idx[sampled_token_index]

            decoded_sentence += sampled_char

            # Exit condition: either hit max length
            # or find stop character.
            if sampled_char == 'E' or len(decoded_sentence) > max_dec_seq_length:
                stop_condition = True

            # Update the target sequence (of length 1).
            target_seq = np.zeros((1, 1))
            target_seq[0, 0] = sampled_token_index


        return decoded_sentence
  
  def beam_search_dec(self,data, k):
    
        sequences = [[list(), 0.0]]
        # walk over each step in sequence
        for row in data:
          all_candidates = list()
          # expand each current candidate
          for i in range(len(sequences)):
            seq, score = sequences[i]
            for j in range(len(row)):
              candidate = [seq + [j], score - log(row[j])]
              #candidate = [seq + [j], score - log1p(row[j])]
              all_candidates.append(candidate)
          # order all candidates by score
          ordered = sorted(all_candidates, key=lambda tup:tup[1])
          # select k best
          sequences = ordered[:k]
        return sequences

In [23]:
sweep_config = {
    'method': 'bayes', 
    'metric': {
      'name': 'val_accuracy',
      'goal': 'maximize'   
    },
    'parameters': {

        'dropout': {
            'values': [0.0, 0.1, 0.2]
        },
        'learning_rate': {
            'values': [1e-3, 1e-4]
        },
        'batch_size': {
            'values': [32,64, 128]
        },
        'ip_emb': {
            'values': [32, 64, 128, 256]
        },
        'num_enc': {
            'values': [1, 2, 3]
        },
        'num_dec': {
            'values': [1, 2, 3]
        },
        'hidden_layer':{
            'values': [32, 64, 128]
        },
        'cell': {
            'values': ['RNN', 'GRU', 'LSTM']
        },
        'dec_search': {
            'values': ['beam_search', 'greedy']
        },
        'beam_width':{
            'values': [3,5]
        }
    }
}

In [24]:
# Initialize a new sweep
sweep_id = wandb.sweep(sweep_config, entity="nomads", project="CS6910_DL_Assignment_3")

Create sweep with ID: v2382own
Sweep URL: https://wandb.ai/nomads/CS6910_DL_Assignment_3/sweeps/v2382own


In [25]:
def train_sweep():
  config_defaults = {
        'dropout': 0.4,
        'learning_rate': 1e-3,
        'batch_size': 32,
        'epochs' : 10,
        'ip_emb': 32,
        'num_enc': 2,
        'num_dec': 2,
        'hidden_layer': 32,
        'cell': 'RNN',
        'dec_search': 'beam_search',
        'beam_width': 5
        }

  # Initialize a new wandb run
  wandb.init(config = config_defaults)
  
  # Config is a variable that holds and saves hyperparameters and ip
  config = wandb.config

  wandb.run.name = str(config.cell)+ '_' + config.dec_search+'_bs_'+str(config.batch_size)
  
  rnn_model = Seq_to_Seq(config.cell, ip_emb = config.ip_emb, hidden_layer=config.hidden_layer,
                learning_rate= config.learning_rate, dropout=config.dropout,pred= config.dec_search,epochs = config.epochs,
                batch_size = config.batch_size, beam_width = config.beam_width, num_enc = config.num_enc,num_dec = config.num_dec)
  
  rnn_model.fit_model(enc_ip,dec_ip,dec_tgt,x_test, y_test)

In [None]:
wandb.agent("b6t0jg8m", entity="nomads",project="CS6910_DL_Assignment_3", function =train_sweep,count=20)


[34m[1mwandb[0m: Agent Starting Run: m55ilzko with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	beam_width: 3
[34m[1mwandb[0m: 	cell: LSTM
[34m[1mwandb[0m: 	dec_search: beam_search
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	hidden_layer: 128
[34m[1mwandb[0m: 	ip_emb: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_dec: 3
[34m[1mwandb[0m: 	num_enc: 3


Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 Enc_ips (InputLayer)           [(None, None)]       0           []                               
                                                                                                  
 Enc_emb (Embedding)            (None, None, 128)    3456        ['Enc_ips[0][0]']                
                                                                                                  
 Enc_hidden_1 (LSTM)            [(None, None, 128),  131584      ['Enc_emb[0][0]']                
                                 (None, 128),                                                     
                                 (None, 128)]                                                     
                                                                                              

  super(Adam, self).__init__(name, **kwargs)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


VBox(children=(Label(value='0.003 MB of 0.003 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▃▅▆▇▇▇███
epoch,▁▂▃▃▄▅▆▆▇█
epoch_accuracy,▅█▇▄▂▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▃▂▂▂▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂
loss,█▅▄▃▂▂▁▁▁▁
val_accuracy,▁

0,1
accuracy,0.85672
epoch,9.0
epoch_accuracy,0.29862
loss,0.17837
val_accuracy,0.2983


[34m[1mwandb[0m: Agent Starting Run: iizjtglf with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	beam_width: 3
[34m[1mwandb[0m: 	cell: LSTM
[34m[1mwandb[0m: 	dec_search: greedy
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	hidden_layer: 128
[34m[1mwandb[0m: 	ip_emb: 256
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_dec: 3
[34m[1mwandb[0m: 	num_enc: 2


Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 Enc_ips (InputLayer)           [(None, None)]       0           []                               
                                                                                                  
 Enc_emb (Embedding)            (None, None, 256)    6912        ['Enc_ips[0][0]']                
                                                                                                  
 Dec_ips (InputLayer)           [(None, None)]       0           []                               
                                                                                                  
 Enc_hidden_1 (LSTM)            [(None, None, 128),  197120      ['Enc_emb[0][0]']                
                                 (None, 128),                                                 

VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▄▆▇▇▇████
epoch,▁▂▃▃▄▅▆▆▇█
epoch_accuracy,▄█▇▄▃▄▂▂▂▁▂▁▂▂▂▁▂▂▂▂▂▁▁▂▁▂▂▂▁▂▂▂▃▂▂▂▂▁▁▁
loss,█▄▃▂▂▂▁▁▁▁
val_accuracy,▁

0,1
accuracy,0.87554
epoch,9.0
epoch_accuracy,0.31839
loss,0.15372
val_accuracy,0.31941


[34m[1mwandb[0m: Agent Starting Run: 1rerm94i with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	beam_width: 5
[34m[1mwandb[0m: 	cell: LSTM
[34m[1mwandb[0m: 	dec_search: greedy
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	hidden_layer: 64
[34m[1mwandb[0m: 	ip_emb: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_dec: 3
[34m[1mwandb[0m: 	num_enc: 3


Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 Enc_ips (InputLayer)           [(None, None)]       0           []                               
                                                                                                  
 Enc_emb (Embedding)            (None, None, 128)    3456        ['Enc_ips[0][0]']                
                                                                                                  
 Enc_hidden_1 (LSTM)            [(None, None, 64),   49408       ['Enc_emb[0][0]']                
                                 (None, 64),                                                      
                                 (None, 64)]                                                      
                                                                                              