In [None]:
import os
import glob
import tensorflow as tf
from tensorflow import keras

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
physical_devices = tf.config.list_physical_devices('GPU')
try:
    tf.config.experimental.set_memory_growth(physical_devices[0], True)
except:
  pass

In [None]:
train_path = "drive/MyDrive/RNN_data_set/dakshina_dataset_v1.0/hi/lexicons/hi.translit.sampled.train.tsv"
test_path = "drive/MyDrive/RNN_data_set/dakshina_dataset_v1.0/hi/lexicons/hi.translit.sampled.test.tsv"
valid_path="drive/MyDrive/RNN_data_set/dakshina_dataset_v1.0/hi/lexicons/hi.translit.sampled.dev.tsv"


In [None]:
import pandas as pd
import numpy as np
def read_csv(path):
  data = pd.read_csv(
              path,
                sep="\t",
                names=["output_lang", "input_lang","attestation_count"],
            )
  return data


In [None]:
def count_tokens(src_characters,trgt_characters):
  encoder_tokens = len(src_characters)
  decoder_tokens = len(trgt_characters)
  return encoder_tokens,decoder_tokens


In [None]:
def dictionary(src_characters,trgt_characters):
  source_token_index={}
  target_token_index={}
  for i, char in enumerate(src_characters):
    source_token_index[char]=i
  for i, char in enumerate(trgt_characters):
    target_token_index[char]=i
  return source_token_index,target_token_index

In [None]:
def data_process(data_path):
  source_texts = []
  target_texts = []
  source_characters = set()
  target_characters = set()
  with open(data_path, "r", encoding="utf-8") as f:
    lines = f.read().split("\n")
  for line in lines[:len(lines) - 1]:
    target_text, source_text, _ = line.split("\t")
    target_text = "\t" + target_text + "\n"
    source_texts.append(source_text)
    target_texts.append(target_text)
    for char in source_text:
      source_characters.add(char)
    for char in target_text:
      target_characters.add(char)
    source_characters.add(" ")
    target_characters.add(" ")
  source_characters = sorted(list(source_characters))
  target_characters = sorted(list(target_characters))
  return source_texts,target_texts,source_characters,target_characters



In [None]:
def encode_decode_data(source_texts,target_texts,encoder_seq_length,
                       decoder_seq_length,encoder_tokens,decoder_tokens
                            ,source_token_index,target_token_index):

  encoder_input_data = np.zeros(
      (len(source_texts), encoder_seq_length), dtype="float32"
  )
  decoder_input_data = np.zeros(
      (len(source_texts), decoder_seq_length, decoder_tokens), dtype="float32"
  )
  decoder_target_data = np.zeros(
      (len(source_texts), decoder_seq_length, decoder_tokens), dtype="float32"
  )

  for i,(x, y) in enumerate(zip(source_texts, target_texts)):
    for t, char in enumerate(x):
      encoder_input_data[i,t] = source_token_index[char]
    encoder_input_data[i, t + 1 :] = source_token_index[" "]
    
    for t, char in enumerate(y):
      decoder_input_data[i, t, target_token_index[char]] = 1.0
      if t > 0:
        decoder_target_data[i, t - 1, target_token_index[char]] = 1.0
            
    decoder_input_data[i, t + 1 :, target_token_index[" "]] = 1.0
    decoder_target_data[i, t:, target_token_index[" "]] = 1.0
  return encoder_input_data,decoder_input_data,decoder_target_data

In [None]:
import numpy as np
source_texts,target_texts,src_characters,trgt_characters=data_process(train_path)
source_token_index,target_token_index=dictionary(src_characters,trgt_characters)
encoder_tokens,decoder_tokens=count_tokens(src_characters,trgt_characters)
encoder_seq_length = max([len(t) for t in source_texts])
decoder_seq_length = max([len(t) for t in target_texts])
train_encoder_input_data,train_decoder_input_data,train_decoder_target_data=encode_decode_data(source_texts,target_texts,encoder_seq_length,
                                                                                               decoder_seq_length,encoder_tokens,decoder_tokens
                                                                                               ,source_token_index,target_token_index)
val_source_texts,val_target_texts,val_src_characters,val_trgt_characters=data_process(valid_path)
val_encoder_input_data,val_decoder_input_data,val_decoder_target_data=encode_decode_data(val_source_texts,val_target_texts,encoder_seq_length,
                                                                                               decoder_seq_length,encoder_tokens,decoder_tokens
                                                                                               ,source_token_index,target_token_index)
test_source_texts,test_target_texts,test_src_characters,test_trgt_characters=data_process(test_path)
test_encoder_input_data,test_decoder_input_data,test_decoder_target_data=encode_decode_data(test_source_texts,test_target_texts,encoder_seq_length,
                                                                                               decoder_seq_length,encoder_tokens,decoder_tokens
                                                                                               ,source_token_index,target_token_index)

print(train_encoder_input_data[0].shape)

(20,)


# make network for LSTM

In [None]:
from tensorflow.keras.layers import Dense, Input, InputLayer, Flatten, Activation, LSTM, SimpleRNN, GRU, TimeDistributed
from tensorflow.keras.utils import plot_model
from tensorflow.keras.models import load_model, Sequential,  Model

In [None]:
class LSTM_network(object):
  def __init__(self,input_embedding,no_encoder_layers,no_decoder_layers,
               hidden_layers_size,drop_out,epochs,batch_size,
               decoder_tokens,encoder_tokens):
    self.input_embedding=input_embedding
    self.no_encoder_layers=no_encoder_layers
    self.no_decoder_layers=no_decoder_layers
    self.hidden_layers_size=hidden_layers_size
    self.drop_out=drop_out
    self.batch_size=batch_size
    self.epochs=epochs
    self.encoder_tokens=encoder_tokens
    self.decoder_tokens=decoder_tokens
    self.initialize(input_embedding,no_encoder_layers,no_decoder_layers,hidden_layers_size,drop_out,epochs,batch_size,decoder_tokens,encoder_tokens)

  def initialize(self,input_embedding,no_encoder_layers,no_decoder_layers,hidden_layers_size,drop_out,epochs,batch_size,decoder_tokens,encoder_tokens):
    encoder_input = keras.Input(shape=(None,))
    embedded=tf.keras.layers.Embedding(
                input_dim=encoder_tokens,
                output_dim=input_embedding)(encoder_input)
    encoder_output=embedded
    for i in range(0,no_encoder_layers):
      encoder = keras.layers.LSTM(
                    hidden_layers_size,
                    return_state=True,
                    return_sequences=True,
                    dropout=drop_out,
                )
      encoder_output,state_1,state_2= encoder(encoder_output)
      encoder_state = [state_1,state_2]
    decoder_input = keras.Input(shape=(None, decoder_tokens))
    decoder_output=decoder_input
    for i in range(no_decoder_layers):
      decoder = keras.layers.LSTM(
                    hidden_layers_size,
                    return_state=True,
                    return_sequences=True,
                    dropout=drop_out,
                )
      decoder_output,_,_= decoder(decoder_output,initial_state=encoder_state)
    decoder_dense = keras.layers.Dense(decoder_tokens, activation="softmax")
    decoder_output = decoder_dense(decoder_output)
    self.model = keras.Model([encoder_input, decoder_input], decoder_output)

#MAKING OF RNN NETWORK

In [None]:
class RNN_network(object):
  def __init__(self,input_embedding,no_encoder_layers,no_decoder_layers,
               hidden_layers_size,drop_out,epochs,batch_size,
               decoder_tokens,encoder_tokens):
    self.input_embedding=input_embedding
    self.no_encoder_layers=no_encoder_layers
    self.no_decoder_layers=no_decoder_layers
    self.hidden_layers_size=hidden_layers_size
    self.drop_out=drop_out
    self.batch_size=batch_size
    self.epochs=epochs
    self.encoder_tokens=encoder_tokens
    self.decoder_tokens=decoder_tokens
    self.initialize(input_embedding,no_encoder_layers,no_decoder_layers,hidden_layers_size,drop_out,epochs,batch_size,decoder_tokens,encoder_tokens)

  def initialize(self,input_embedding,no_encoder_layers,no_decoder_layers,hidden_layers_size,drop_out,epochs,batch_size,decoder_tokens,encoder_tokens):
    encoder_input = keras.Input(shape=(None,))
    embedded=tf.keras.layers.Embedding(
    input_dim=encoder_tokens,
    output_dim=input_embedding)(encoder_input)
    encoder_output=embedded
    for i in range(0,no_encoder_layers):
      encoder = keras.layers.SimpleRNN(
                    hidden_layers_size,
                    return_state=True,
                    return_sequences=True,
                    dropout=drop_out,
                )
      encoder_output,state= encoder(encoder_output)
      encoder_state = [state]
    decoder_input = keras.Input(shape=(None, decoder_tokens))
    decoder_output=decoder_input
    for i in range(no_decoder_layers):
      decoder = keras.layers.SimpleRNN(
                    hidden_layers_size,
                    return_state=True,
                    return_sequences=True,
                    dropout=drop_out,
                )
      decoder_output,_= decoder(decoder_output,initial_state=encoder_state)
    decoder_dense = keras.layers.Dense(decoder_tokens, activation="softmax")
    decoder_output = decoder_dense(decoder_output)
    self.model = keras.Model([encoder_input, decoder_input], decoder_output)

#MAKING OF GRU NETWORK

In [None]:
class GRU_network(object):
  def __init__(self,input_embedding,no_encoder_layers,no_decoder_layers,
               hidden_layers_size,drop_out,epochs,batch_size,
               decoder_tokens,encoder_tokens):
    self.input_embedding=input_embedding
    self.no_encoder_layers=no_encoder_layers
    self.no_decoder_layers=no_decoder_layers
    self.hidden_layers_size=hidden_layers_size
    self.drop_out=drop_out
    self.batch_size=batch_size
    self.epochs=epochs
    self.encoder_tokens=encoder_tokens
    self.decoder_tokens=decoder_tokens
    self.initialize(input_embedding,no_encoder_layers,no_decoder_layers,hidden_layers_size,drop_out,epochs,batch_size,decoder_tokens,encoder_tokens)

  def initialize(self,input_embedding,no_encoder_layers,no_decoder_layers,hidden_layers_size,drop_out,epochs,batch_size,decoder_tokens,encoder_tokens):
    encoder_input = keras.Input(shape=(None,))
    embedded=tf.keras.layers.Embedding(
    input_dim=encoder_tokens,
    output_dim=input_embedding)(encoder_input)
    encoder_output=embedded
    for i in range(0,no_encoder_layers):
      encoder = keras.layers.GRU(
                    hidden_layers_size,
                    return_state=True,
                    return_sequences=True,
                    dropout=drop_out,
                )
      encoder_output,state= encoder(encoder_output)
      encoder_state = [state]
    decoder_input = keras.Input(shape=(None, decoder_tokens))
    decoder_output=decoder_input
    for i in range(no_decoder_layers):
      decoder = keras.layers.GRU(
                    hidden_layers_size,
                    return_state=True,
                    return_sequences=True,
                    dropout=drop_out,
                )
      decoder_output,_= decoder(decoder_output,initial_state=encoder_state)
    decoder_dense = keras.layers.Dense(decoder_tokens, activation="softmax")
    decoder_output = decoder_dense(decoder_output)
    self.model = keras.Model([encoder_input, decoder_input], decoder_output)

In [None]:
model=LSTM_network(256,2,2,256,0.2,5,32,decoder_tokens,encoder_tokens)
#image="drive/MyDrive/akash.png"
#tf.keras.utils.plot_model(model.model, to_file=image, show_shapes=True)
print(train_encoder_input_data[0].shape)
print(model.model.summary())
model.model.compile(
    optimizer="rmsprop", loss="categorical_crossentropy", metrics=["accuracy"]
)
model.model.fit([train_encoder_input_data, train_decoder_input_data],
    train_decoder_target_data,
    batch_size=64,
    epochs=1,
    validation_data=([val_encoder_input_data, val_decoder_input_data],val_decoder_target_data),
)
loss,accuracy=model.model.evaluate(x=[test_encoder_input_data, test_decoder_input_data],y=test_decoder_target_data,batch_size=32)

(20,)
Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, None)]       0           []                               
                                                                                                  
 embedding (Embedding)          (None, None, 256)    6912        ['input_1[0][0]']                
                                                                                                  
 lstm (LSTM)                    [(None, None, 256),  525312      ['embedding[0][0]']              
                                 (None, 256),                                                     
                                 (None, 256)]                                                     
                                                                                        

In [None]:
!pip install wandb



In [None]:
import wandb 
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33makashsainics21m003[0m (use `wandb login --relogin` to force relogin)


True

In [None]:
train_models={"RNN":RNN_network,"LSTM":LSTM_network,"GRU":GRU_network}
from tensorflow.keras.optimizers import Adam,RMSprop
optimizer={"rmsprop":RMSprop,"adam":Adam}
import numpy as np
from wandb.keras import WandbCallback
from tensorflow.keras.optimizers import Adam,RMSprop

In [None]:
def run_wandb():
    wandb.init(project="assignment", entity="cs21m003_cs21d406")
    config = wandb.config
    base_model=train_models[config.base_models]
    optim=optimizer[config.optimizer]
    wandb.run.name=f"embd_{config.input_embedding}_e_{config.epochs}_bs_{config.batch_size}_enc_layers_{config.no_encoder_layers}_dec_layers_{config.no_decoder_layers}_rate_{config.learning_rate}_drp_{config.drop_out}_hidden_{config.hidden_layers_size}_optim_{config.optimizer}_model_{config.base_models}"
    """
    params = {
    "input_embedding":config.input_embedding
    "epochs"        : config.epochs,
    "batch_size"    : config.batch_size,
    "no_encoder_layers"   : config.no_encoder_layers,
    "no_decoder_layers"    : config.no_decoder_layers,
    "learning_rate" : config.learning_rate,
    "drop_out"  : config.drop_out,
    "hidden_layers_size":config.hidden_layers_size,
    "optimizer":config.optimizer,
    "base_models":config.base_models,
    }
    """
    model_1=base_model(config.input_embedding,config.no_encoder_layers,config.no_decoder_layers,config.hidden_layers_size,
                       config.drop_out,config.epochs,config.batch_size,decoder_tokens,encoder_tokens)
    model_1.model.compile(
    optimizer=optim(config.learning_rate),loss="categorical_crossentropy", metrics=["accuracy"])
    model_1.model.fit([train_encoder_input_data, train_decoder_input_data],
    train_decoder_target_data,epochs=config.epochs,batch_size=config.batch_size,validation_data=([val_encoder_input_data, val_decoder_input_data],val_decoder_target_data),
          callbacks=[WandbCallback()])
    loss,accuracy=model_1.model.evaluate(x=[test_encoder_input_data, test_decoder_input_data],y=test_decoder_target_data,batch_size=config.batch_size)
    print(f'test accuracy:{accuracy}')
    wandb.log({"test accuracy":accuracy})

In [None]:
def do_hyperparameter_search_using_wandb():
    sweep_config = {
    "name": "random sweep",
    "method": "random",
    "metric":{
      "name": "ValidationAccuracy",
      "goal": "maximize"
    },
    "parameters":{
      "input_embedding": {"values": [64,128,256]},
      "epochs": {"values": [5, 10,15]}, 
      "batch_size": {"values": [32,64,128]}, 
      "no_encoder_layers": {"values": [1, 2,3]}, 
      "no_decoder_layers": {"values": [1,2,3]},
      "learning_rate": {"values": [1e-3, 1e-4]}, 
      "drop_out": {"values": [0.2,0.3,0.4]},
      "hidden_layers_size": {"values": [64,128,256]},
      "optimizer": {"values": ["rmsprop","adam"]},
      "base_models":{"values":["RNN","LSTM","GRU"]}}}
  
    sweep_id = wandb.sweep(sweep_config, project = "ASSIGNMENT_3",entity='cs21m003_cs21d406')
    wandb.agent(sweep_id, function=run_wandb,count=30)

In [None]:
do_hyperparameter_search_using_wandb()

Create sweep with ID: auwe7blw
Sweep URL: https://wandb.ai/cs21m003_cs21d406/ASSIGNMENT_3/sweeps/auwe7blw


[34m[1mwandb[0m: Agent Starting Run: 64xcxnag with config:
[34m[1mwandb[0m: 	base_models: GRU
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	drop_out: 0.4
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	hidden_layers_size: 128
[34m[1mwandb[0m: 	input_embedding: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	no_decoder_layers: 3
[34m[1mwandb[0m: 	no_encoder_layers: 1
[34m[1mwandb[0m: 	optimizer: adam


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
test accuracy:0.8731357455253601


VBox(children=(Label(value='4.175 MB of 4.175 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▃▃▄▄▅▅▆▆▆▇▇▇██
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
loss,█▆▅▅▄▄▃▃▃▂▂▂▂▁▁
test accuracy,▁
val_accuracy,▁▂▂▃▄▅▅▅▆▆▆▇▇██
val_loss,█▇▆▆▅▄▄▃▃▃▂▂▂▁▁

0,1
accuracy,0.82882
best_epoch,14.0
best_val_loss,0.44212
epoch,14.0
loss,0.572
test accuracy,0.87314
val_accuracy,0.86892
val_loss,0.44212


[34m[1mwandb[0m: Agent Starting Run: ec75mbsk with config:
[34m[1mwandb[0m: 	base_models: RNN
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	drop_out: 0.4
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers_size: 128
[34m[1mwandb[0m: 	input_embedding: 256
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	no_decoder_layers: 3
[34m[1mwandb[0m: 	no_encoder_layers: 2
[34m[1mwandb[0m: 	optimizer: adam


Epoch 1/5

[34m[1mwandb[0m: [32m[41mERROR[0m Error while calling W&B API: context deadline exceeded (<Response [500]>)


Epoch 2/5
Epoch 3/5
Epoch 4/5
 165/1382 [==>...........................] - ETA: 4:02 - loss: 0.9031 - accuracy: 0.7637