In [1]:
#Necessary Libraries

import numpy as np
import tensorflow as tf
from random import randint
import matplotlib.pyplot as plt
from numpy import array,argmax,array_equal
import keras.backend as K
from tensorflow.keras import models,Input
from tensorflow.keras import backend as K
from tensorflow.keras.models import Sequential, Model, load_model
from tensorflow.keras.layers import LSTM, Bidirectional, SimpleRNN, GRU,Lambda,Dense, Flatten
from tensorflow.keras.optimizers import Adam
import matplotlib.ticker as ticker
tf.keras.backend.set_floatx('float64')

In [2]:
#Install WandB

%pip install wandb -q
import wandb
from wandb.keras import WandbCallback

[K     |████████████████████████████████| 1.8 MB 5.4 MB/s 
[K     |████████████████████████████████| 145 kB 43.0 MB/s 
[K     |████████████████████████████████| 181 kB 53.3 MB/s 
[K     |████████████████████████████████| 63 kB 1.6 MB/s 
[?25h  Building wheel for pathtools (setup.py) ... [?25l[?25hdone


In [3]:
wandb.init(project="CS6910_DL_Assignment_3", entity="nomads")

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize


wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit: ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


In [4]:
#Loading the dakshina dataset

!wget https://storage.googleapis.com/gresearch/dakshina/dakshina_dataset_v1.0.tar
!tar -xf dakshina_dataset_v1.0.tar

--2022-05-15 11:32:32--  https://storage.googleapis.com/gresearch/dakshina/dakshina_dataset_v1.0.tar
Resolving storage.googleapis.com (storage.googleapis.com)... 172.217.204.128, 172.253.123.128, 142.250.98.128, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|172.217.204.128|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2008340480 (1.9G) [application/x-tar]
Saving to: ‘dakshina_dataset_v1.0.tar’


2022-05-15 11:32:40 (242 MB/s) - ‘dakshina_dataset_v1.0.tar’ saved [2008340480/2008340480]



In [5]:
#Selecting the Hindi language

!ls dakshina_dataset_v1.0/hi/lexicons

hi.translit.sampled.dev.tsv   hi.translit.sampled.train.tsv
hi.translit.sampled.test.tsv


In [6]:
#Directory for Training,Validation and Testing
train_dir = "./dakshina_dataset_v1.0/hi/lexicons/hi.translit.sampled.train.tsv"
val_dir = "./dakshina_dataset_v1.0/hi/lexicons/hi.translit.sampled.dev.tsv"
test_dir = "./dakshina_dataset_v1.0/hi/lexicons/hi.translit.sampled.test.tsv"

In [7]:
# Reading the raw corpus
#returns the native(Hindi) and romanized(English) versions of the words in the corpus

import io
def raw_corpus(crp):
  Eng = []
  Hindi= []
  with io.open(crp, encoding ='utf-8') as f:
    for line in f:
      if '\t' not in line:
        continue
      tokens = line.rstrip().split("\t")
      Eng.append(tokens[1])
      Hindi.append(tokens[0])
  return Eng, Hindi 

In [8]:
train_src, train_tgt = raw_corpus(train_dir)
val_src, val_tgt = raw_corpus(val_dir)
test_src, test_tgt = raw_corpus(test_dir)

print("Training examples: ", len(train_src))
print("Validation examples: ", len(val_src))
print("Testing examples: ", len(test_src))

Training examples:  44204
Validation examples:  4358
Testing examples:  4502


In [10]:
ip_txt_ns = []
tgt_txt_ns = []
val_ip_txt_ns = []
val_tgt_txt_ns = []
ip_char = set()
tgt_char = set()

for (txt_ip, txt_tgt) in zip(train_src, train_tgt):
    # tab : "start sequence" character
    # \n  : "end sequence" character
    txt_tgt = "B" + txt_tgt + "E"
    ip_txt_ns.append(txt_ip)
    tgt_txt_ns.append(txt_tgt)

    for char in txt_ip:
        if char not in ip_char:
            ip_char.add(char)

    for char in txt_tgt:
        if char not in tgt_char:
            tgt_char.add(char)


for (txt_ip, txt_tgt) in zip(val_src, val_tgt):
    # tab : "start sequence" character
    # \n  : "end sequence" character
    txt_tgt = "B" + txt_tgt + "E"
    val_ip_txt_ns.append(txt_ip)
    val_tgt_txt_ns.append(txt_tgt)
    for char in txt_ip:
        if char not in ip_char:
            ip_char.add(char)
    for char in txt_tgt:
        if char not in tgt_char:
            tgt_char.add(char)

In [11]:
#Shuffling the Training and Validation dataset

train_arr = np.arange(len(train_src))
np.random.shuffle(train_arr)
val_arr = np.arange(len(val_src))
np.random.shuffle(val_arr)


In [13]:
ips_txt = []
tgts_txt = []

for i in range(len(train_src)):
    ips_txt.append(ip_txt_ns[train_arr[i]])
    tgts_txt.append(tgt_txt_ns[train_arr[i]])

val_ip_txt = []
val_tgt_txt = []

for i in range(len(val_src)):
    val_ip_txt.append(val_ip_txt_ns[val_arr[i]])
    val_tgt_txt.append(val_tgt_txt_ns[val_arr[i]])

ip_char.add(" ")
tgt_char.add(" ")

ip_char = sorted(list(ip_char))
tgt_char = sorted(list(tgt_char))

enc_tokens = len(ip_char)
dec_tokens= len(tgt_char)

max_enc_seq_length = max([len(txt) for txt in ips_txt])
max_dec_seq_length = max([len(txt) for txt in tgts_txt])

val_max_enc_seq_length = max([len(txt) for txt in val_ip_txt])
val_max_dec_seq_length = max([len(txt) for txt in val_tgt_txt])

ip_tk_idx= dict([(j, k) for k, j in enumerate(ip_char)])
tgt_tk_idx= dict([(j, k) for k, j in enumerate(tgt_char)])

In [18]:
print(ip_tk_idx)
print(tgt_tk_idx)

{' ': 0, 'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5, 'f': 6, 'g': 7, 'h': 8, 'i': 9, 'j': 10, 'k': 11, 'l': 12, 'm': 13, 'n': 14, 'o': 15, 'p': 16, 'q': 17, 'r': 18, 's': 19, 't': 20, 'u': 21, 'v': 22, 'w': 23, 'x': 24, 'y': 25, 'z': 26}
{' ': 0, 'B': 1, 'E': 2, 'ँ': 3, 'ं': 4, 'ः': 5, 'अ': 6, 'आ': 7, 'इ': 8, 'ई': 9, 'उ': 10, 'ऊ': 11, 'ऋ': 12, 'ए': 13, 'ऐ': 14, 'ऑ': 15, 'ओ': 16, 'औ': 17, 'क': 18, 'ख': 19, 'ग': 20, 'घ': 21, 'ङ': 22, 'च': 23, 'छ': 24, 'ज': 25, 'झ': 26, 'ञ': 27, 'ट': 28, 'ठ': 29, 'ड': 30, 'ढ': 31, 'ण': 32, 'त': 33, 'थ': 34, 'द': 35, 'ध': 36, 'न': 37, 'प': 38, 'फ': 39, 'ब': 40, 'भ': 41, 'म': 42, 'य': 43, 'र': 44, 'ल': 45, 'व': 46, 'श': 47, 'ष': 48, 'स': 49, 'ह': 50, '़': 51, 'ा': 52, 'ि': 53, 'ी': 54, 'ु': 55, 'ू': 56, 'ृ': 57, 'ॅ': 58, 'े': 59, 'ै': 60, 'ॉ': 61, 'ो': 62, 'ौ': 63, '्': 64, 'ॐ': 65}


In [19]:
trc_ip_txt = ips_txt[:44000]
trc_tgt_txt = tgts_txt[:44000]

In [21]:
ip_encd = np.zeros((len(trc_ip_txt), max_enc_seq_length, enc_tokens), dtype="float64")

tgt_decd = np.zeros((len(trc_ip_txt), max_dec_seq_length, dec_tokens), dtype="float64")

for i, (txt_ip, txt_tgt) in enumerate(zip(trc_ip_txt, trc_tgt_txt)):
    for m, n in enumerate(txt_ip):
        ip_encd[i, m, ip_tk_idx[n]] = 1.0
    ip_encd[i, m + 1 :, ip_tk_idx[" "]] = 1.0
    for m, n in enumerate(txt_tgt):
        tgt_decd[i, m, tgt_tk_idx[n]] = 1.0
    tgt_decd[i, m + 1 :, tgt_tk_idx[" "]] = 1.0

val_ip_encd= np.zeros((len(val_ip_txt), max_enc_seq_length, enc_tokens), dtype="float64")
val_tgt_decd = np.zeros((len(val_tgt_txt), max_dec_seq_length, dec_tokens), dtype="float64")

for i, (txt_ip, txt_tgt) in enumerate(zip(val_ip_txt, val_tgt_txt)):
    
    for t, n in enumerate(txt_ip):
        val_ip_encd[i, t, ip_tk_idx[n]] = 1.0
    val_ip_encd[i, t + 1 :, ip_tk_idx[" "]] = 1.0

    for t, n in enumerate(txt_tgt):
        val_tgt_decd[i, t, tgt_tk_idx[n]] = 1.0
    val_tgt_decd[i, t + 1: , tgt_tk_idx[" "]] = 1.0

In [24]:
class Bahdanau(tf.keras.layers.Layer):
  def __init__(self, units):
    super(Bahdanau, self).__init__()
    self.W1 = tf.keras.layers.Dense(units)
    self.W2 = tf.keras.layers.Dense(units)
    self.V = tf.keras.layers.Dense(1)
    
  def call(self, query, value):
    
    query_with_time_axis = tf.expand_dims(query, 1)
    
    score = self.V(tf.nn.tanh(self.W1(query_with_time_axis) + self.W2(value)))
    
    aw = tf.nn.softmax(score, axis=1)
    vc = aw * value
    vc = tf.reduce_sum(vc, axis=1)

    return vc, aw

In [25]:
class Seq_to_Seq_with_attention(object):

  def __init__(self, cell = 'RNN', hidden_layer=32, learning_rate= 1e-3, drop_out = 0.3,
               epochs = 10, batch_size = 32, attention = 'bahdanau'):
    
    self.cell = cell
    self.hidden_layer = hidden_layer
    self.learning_rate = learning_rate
    self.drop_out = drop_out
    self.epochs = epochs
    self.batch_size = batch_size
    self.attention = attention

  def fit_model(self, ip_encd, tgt_decd):

    ip_encds = Input(shape=(max_enc_seq_length, enc_tokens), name='encoder_inputs')

    if self.cell == 'LSTM':

      enc_lstm = LSTM(self.hidden_layer,return_sequences=True, return_state=True, dropout = self.drop_out, name='encoder_lstm')
      enc_ops, enc_hs, enc_cs = enc_lstm(ip_encds)
      states_enc = [enc_hs, enc_cs]

    elif self.cell == 'RNN':

      enc_rnn = SimpleRNN(self.hidden_layer,return_sequences=True, return_state=True, dropout = self.drop_out, name='encoder_rnn')
      enc_ops, enc_hs = enc_rnn(ip_encds)
      states_enc = [enc_hs]

    elif self.cell == 'GRU':

      enc_gru = GRU(self.hidden_layer,return_sequences=True, return_state=True, dropout = self.drop_out, name='encoder_gru')
      enc_ops, enc_hs = enc_gru(ip_encds)
      states_enc = [enc_hs]

    

    # Attention Layer
    if self.attention == 'bahdanau':
      attention= Bahdanau(self.hidden_layer)

    # Decoder Layers
    inps_deco = Input(shape=(1, (dec_tokens + self.hidden_layer)),name='decoder_inputs')

    if self.cell == 'LSTM':

      dec_lstm = LSTM(self.hidden_layer, dropout = self.drop_out, return_state=True, name='decoder_lstm')
    
    elif self.cell == 'GRU':

      dec_gru = GRU(self.hidden_layer, dropout = self.drop_out, return_state=True, name='decoder_gru')
    
    elif self.cell == 'RNN':

      dec_rnn = SimpleRNN(self.hidden_layer, dropout = self.drop_out, return_state=True, name='decoder_rnn')  
    
    
    dec_dense = Dense(dec_tokens, activation='softmax',  name='decoder_dense')
    all_ops = []

    ips = np.zeros((self.batch_size, 1, dec_tokens))
    ips[:, 0, 0] = 1 

    dec_ops = enc_hs
    states = states_enc

    for _ in range(max_dec_seq_length):

      vc, aw = attention(dec_ops, enc_ops)
      vc = tf.expand_dims(vc, 1)
      
      ips = tf.concat([vc, ips], axis=-1)

      if self.cell == 'LSTM':

        dec_ops, hs, cs = dec_lstm(ips, initial_state=states)

      if self.cell == 'GRU':

        dec_ops, hs = dec_gru(ips, initial_state=states)

      if self.cell == 'RNN':

        dec_ops, hs = dec_rnn(ips, initial_state=states)
      
      ops = dec_dense(dec_ops)
      ops = tf.expand_dims(ops, 1)
      all_ops.append(ops)
      ips = ops
      if self.cell == 'LSTM':

        states = [hs, cs]

      if self.cell == 'GRU' or self.cell == 'RNN':
        
        states = [hs]


    dec_ops = Lambda(lambda x: K.concatenate(x, axis=1))(all_ops)
    model = Model(ip_encds, dec_ops, name='model_encoder_decoder')
    
    optimizer = Adam(lr=self.learning_rate, beta_1=0.9, beta_2=0.999)
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

    model.fit(ip_encd, tgt_decd,
              batch_size=self.batch_size, 
              epochs=self.epochs,
              #callbacks = [WandbCallback()]
              )

    pred = model.predict(val_ip_encd[:4352], batch_size=self.batch_size)

    total = 0
    right = 0
    v_t = 4352

    for i in range(v_t):
      
      ohv = pred[i]
      ohv1 = val_tgt_decd[i]
      id2 = tf.argmax(ohv, axis=1)
      id1 = tf.argmax(ohv1, axis=1)
      
      if (id2.numpy() == id1.numpy()).all():
        right = right + 1
        
      total = total + 1
      accuracy_epoch = right/total

      if total % 50 == 0:
        wandb.log({'epoch_accuracy' : accuracy_epoch})
    
    val_accuracy = right/total
    
    wandb.log({'val_accuracy' : val_accuracy})

In [26]:
sweep_config = {
    'method': 'bayes', 
    'metric': {
      'name': 'val_accuracy',
      'goal': 'maximize'   
    },
    'parameters': {

        'drop_out': {
            'values': [0.0, 0.1, 0.2]
        },
        'learning_rate': {
            'values': [1e-3, 1e-4]
        },
        'batch_size': {
            'values': [64, 128]
        },
        'hidden_layer':{
            'values': [32, 64, 128]
        },
        'cell': {
            'values': ['RNN', 'GRU', 'LSTM']
        },
        'attention': {
            'values': ['bahdanau']    
        }
    }
}

In [27]:
sweep_id = wandb.sweep(sweep_config, entity="nomads", project="CS6910_DL_Assignment_3")

Create sweep with ID: qnkre2iq
Sweep URL: https://wandb.ai/nomads/CS6910_DL_Assignment_3/sweeps/qnkre2iq


In [28]:
def train_sweep():

  config_defaults = {
        'drop_out': 0.3,
        'learning_rate': 1e-3,
        'batch_size': 128,
        'epochs' : 10,
        'hidden_layer': 128,
        'cell': 'LSTM',
        'attention': 'bahdanau'
        }

  wandb.init(config = config_defaults)
  
  config = wandb.config

  wandb.run.name = str(config.cell)+ '_' + config.attention +'_bs_'+str(config.batch_size)
  
  rnn_model = Seq_to_Seq_with_attention(cell = config.cell, hidden_layer=config.hidden_layer, learning_rate= config.learning_rate, drop_out=config.drop_out,epochs = config.epochs, batch_size = config.batch_size, attention = config.attention)
  
  rnn_model.fit_model(ip_encd, tgt_decd)

In [29]:
wandb.agent("tzlnj4ue", entity="nomads", project="CS6910_DL_Assignment_3", function =train_sweep, count=20)

[34m[1mwandb[0m: Agent Starting Run: 4wfx2ypm with config:
[34m[1mwandb[0m: 	attention: bahdanau
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	cell: GRU
[34m[1mwandb[0m: 	drop_out: 0.1
[34m[1mwandb[0m: 	hidden_layer: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001


Epoch 1/2


  super(Adam, self).__init__(name, **kwargs)


Epoch 2/2


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁

0,1
epoch_accuracy,0.0
val_accuracy,0.0
