In [2]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
import pandas as pd

In [None]:
!curl https://storage.googleapis.com/gresearch/dakshina/dakshina_dataset_v1.0.tar --output daksh.tar

In [4]:
%%capture
!tar -xvf  'daksh.tar' 

In [5]:
def data(path,input_tokenizer=None,target_tokenizer=None,input_length=None,target_length=None):
  input_texts = []
  target_texts = []
  
  df = pd.read_csv(path,sep="\t",names=["1", "2","3"]).astype(str)
  if input_tokenizer is None:
      df=df.sample(frac=1)
  # Add all the  input and target texts with start sequence and end sequence added to target 
  for index, row in df.iterrows():
      input_text=row['2']
      target_text= row['1']
      if target_text =='</s>' or input_text=='</s>':
        continue
      target_text = "\t" + target_text + "\n"
      input_texts.append(input_text)
      target_texts.append(target_text)
  
  #only train set will have input_tokenizer as none. Validation and test will will use the same.
  if input_tokenizer is None:
    input_tokenizer = tf.keras.preprocessing.text.Tokenizer(filters='', char_level=True)
    input_tokenizer.fit_on_texts(input_texts)
  input_tensor = input_tokenizer.texts_to_sequences(input_texts)
  input_tensor = tf.keras.preprocessing.sequence.pad_sequences(input_tensor,padding='post')
  if target_tokenizer is None:
    target_tokenizer = tf.keras.preprocessing.text.Tokenizer(filters='', char_level=True)
    target_tokenizer.fit_on_texts(target_texts)
  #tokenize the text
  target_tensor = target_tokenizer.texts_to_sequences(target_texts)
  #pad the text
  target_tensor = tf.keras.preprocessing.sequence.pad_sequences(target_tensor,padding='post')
  #for dataset which is not training we pad to make maximum length same as train set.
  if input_length is not None and target_length is not None:
      input_tensor=tf.concat([input_tensor,tf.zeros((input_tensor.shape[0],input_length-input_tensor.shape[1]))],axis=1)
      target_tensor=tf.concat([target_tensor,tf.zeros((target_tensor.shape[0],target_length-target_tensor.shape[1]))],axis=1)
  return input_texts,input_tensor,input_tokenizer,target_texts,target_tensor,target_tokenizer

In [6]:
%%capture
input_texts,input_tensor,input_tokenizer,target_texts,target_tensor,target_tokenizer=data("/content/dakshina_dataset_v1.0/hi/lexicons/hi.translit.sampled.train.tsv")

In [7]:
%%capture
val_input_texts,val_input_tensor,val_input_tokenizer,val_target_texts,val_target_tensor,val_target_tokenizer=data("/content/dakshina_dataset_v1.0/hi/lexicons/hi.translit.sampled.dev.tsv",input_tokenizer,target_tokenizer,input_tensor.shape[1],target_tensor.shape[1])

In [8]:
%%capture
test_input_texts,test_input_tensor,test_input_tokenizer,test_target_texts,test_target_tensor,test_target_tokenizer=data("/content/dakshina_dataset_v1.0/hi/lexicons/hi.translit.sampled.test.tsv",input_tokenizer,target_tokenizer,input_tensor.shape[1],target_tensor.shape[1])

In [9]:
num_encoder_tokens = len(input_tokenizer.word_index)+1
num_decoder_tokens = len(target_tokenizer.word_index)+1
max_encoder_seq_length =  input_tensor.shape[1]
max_decoder_seq_length = target_tensor.shape[1]

In [10]:

#convert index to character
index_to_char_input = dict((input_tokenizer.word_index[key], key) for key in input_tokenizer.word_index.keys())
index_to_char_target = dict((target_tokenizer.word_index[key], key) for key in target_tokenizer.word_index.keys())


In [11]:

#Build the model
def build_model(rnn_type,embedding_dim,encoder_layers,decoder_layers,dropout):
  #input layer ; takes in tokenize input
  encoder_inputs = keras.Input(shape=( max_encoder_seq_length))
  #embedding layer
  embed = keras.layers.Embedding(num_encoder_tokens, embedding_dim)(encoder_inputs)
  #will store output of last added layer so that we can add multiple layers
  last_encoder=None
  if rnn_type=='LSTM':
    #adding everything except the last LSTM layer, because in last layer return state=True
    for i in range(encoder_layers-1):
      encoder = keras.layers.LSTM(latent_dim, return_sequences=True,dropout=dropout)
      if i==0:
        encoder_out = encoder(embed)
      else:
        encoder_out = encoder(last_encoder)
      last_encoder=encoder_out
    #last LSTM Layer
    encoder = keras.layers.LSTM(latent_dim, return_state=True,dropout=dropout)
    #handling the corner case, when there is only one LSTM layer.The above loop won't run.
    if encoder_layers == 1:
      encoder_outputs, state_h, state_c = encoder(embed)
    else:
      encoder_outputs, state_h, state_c = encoder(last_encoder)
    #storing the hidden states only
    encoder_states = [state_h, state_c]
  elif rnn_type=='GRU':
    #adding everything except the last GRU layer, because in last layer return state=True    
    for i in range(encoder_layers-1):
      encoder = keras.layers.GRU(latent_dim, return_sequences=True,dropout=dropout)
      if i==0:
        encoder_out = encoder(embed)
      else:
        encoder_out = encoder(last_encoder)
      last_encoder=encoder_out
    #last GRU Layer
    encoder = keras.layers.GRU(latent_dim, return_state=True,dropout=dropout)
    #handling the corner case, when there is only one GRU layer.The above loop won't run
    if encoder_layers == 1:
      encoder_outputs, state = encoder(embed)
    else:
      encoder_outputs, state = encoder(last_encoder)
    encoder_states = [state]
  elif rnn_type=='RNN':
    #adding everything except the last RNN layer, because in last layer return state=True
    for i in range(encoder_layers-1):      
      encoder = keras.layers.SimpleRNN(latent_dim, return_sequences=True,dropout=dropout)
      if i==0:
        encoder_out = encoder(embed)
      else:
        encoder_out = encoder(last_encoder)
      last_encoder=encoder_out
    #last RNN Layer
    encoder = keras.layers.SimpleRNN(latent_dim, return_state=True,dropout=dropout)
    #handling the corner case, when there is only one RNN layer.The above loop won't run
    if encoder_layers == 1:
      encoder_outputs, state = encoder(embed)
    else:
      encoder_outputs, state = encoder(last_encoder)
    encoder_states = [state]  


  decoder_inputs = keras.Input(shape=( max_decoder_seq_length))
  embed = keras.layers.Embedding(num_decoder_tokens, embedding_dim)(decoder_inputs)

  if rnn_type=="LSTM":
    #add all the LSTM layers
    for i in range(decoder_layers):
      decoder_lstm = keras.layers.LSTM(latent_dim, return_sequences=True, return_state=True,dropout=dropout)
      if i==0:
        decoder_outputs, _, _ = decoder_lstm(embed, initial_state=encoder_states)
      else:  
        decoder_outputs, _, _ = decoder_lstm(last, initial_state=encoder_states)
      last=decoder_outputs
    #Adding dense layer at the end
    decoder_dense = keras.layers.Dense(num_decoder_tokens, activation="softmax",name='final')
    decoder_outputs = decoder_dense(last)
  elif rnn_type=="GRU":
    #add all the GRU layers
    for i in range(decoder_layers):
      decoder_lstm = keras.layers.GRU(latent_dim, return_sequences=True, return_state=True,dropout=dropout)
      if i==0:
        decoder_outputs, _= decoder_lstm(embed, initial_state=encoder_states)
      else:  
        decoder_outputs, _ = decoder_lstm(last, initial_state=encoder_states)
      last=decoder_outputs
    #Adding dense layer at the end
    decoder_dense = keras.layers.Dense(num_decoder_tokens, activation="softmax",name='final')
    decoder_outputs = decoder_dense(last)
  elif rnn_type=="RNN":
    #add all the RNN layers
    for i in range(decoder_layers):
      decoder_lstm = keras.layers.SimpleRNN(latent_dim, return_sequences=True, return_state=True,dropout=dropout)
      if i==0:
        decoder_outputs, _= decoder_lstm(embed, initial_state=encoder_states)
      else:  
        decoder_outputs, _ = decoder_lstm(last, initial_state=encoder_states)
      last=decoder_outputs
    #Adding dense layer at the end
    decoder_dense = keras.layers.Dense(num_decoder_tokens, activation="softmax",name='final')
    decoder_outputs = decoder_dense(last)
  #specifying model inputs and outputs.
  # encoder_inputs -> Input to encoder
  # decoder_inputs -> Input to decoder for teacher forcing
  # decoder_outputs -> Output
  model = keras.Model([encoder_inputs, decoder_inputs], decoder_outputs)
  return model

In [12]:
import copy
def build_inference(model,encoder_layers,decoder_layers):
    encoder_inputs = model.input[0]  
    if isinstance(model.layers[encoder_layers+3], keras.layers.LSTM):
      encoder_outputs, state_h_enc, state_c_enc = model.layers[encoder_layers+3].output  
      encoder_states = [state_h_enc, state_c_enc]
    elif isinstance(model.layers[encoder_layers+3], keras.layers.GRU):
      encoder_outputs, state = model.layers[encoder_layers+3].output  
      encoder_states = [state]
    elif isinstance(model.layers[encoder_layers+3], keras.layers.RNN):
      encoder_outputs, state = model.layers[encoder_layers+3].output  
      encoder_states = [state]
    encoder_model = keras.Model(encoder_inputs, encoder_states)
    decoder_inputs =  keras.Input(shape=( 1))  
    if isinstance(model.layers[encoder_layers+3], keras.layers.LSTM):
      decoder_states_inputs=[]
      decoder_states=[]
      last=None
      for i in range(decoder_layers):
        #every layer must have an input through which we can supply it's hidden state
        decoder_state_input_h = keras.Input(shape=(latent_dim,),name='inp3_'+str(i))
        decoder_state_input_c = keras.Input(shape=(latent_dim,),name='inp4_'+str(i))
        x = [decoder_state_input_h, decoder_state_input_c]
        decoder_lstm = model.layers[i+encoder_layers+4]
        if i==0:
          decoder_outputs, state_h_dec, state_c_dec = decoder_lstm(
              model.layers[i+encoder_layers+2](decoder_inputs), initial_state=x
          )
        else:
          decoder_outputs, state_h_dec, state_c_dec = decoder_lstm(
              last, initial_state=x 
          )
        last=decoder_outputs
        decoder_states_inputs.append (decoder_state_input_h)
        decoder_states_inputs.append (decoder_state_input_c)
        decoder_states.append (state_h_dec)
        decoder_states.append (state_c_dec)
    elif isinstance(model.layers[encoder_layers+3], keras.layers.GRU):
      decoder_states_inputs=[] #Contain all input layers for different GRU's hidden state
      decoder_states=[] #Contains the hidden states
      last=None
      for i in range(decoder_layers):
        decoder_state_input = keras.Input(shape=(latent_dim,),name='inp3_'+str(i))
        x = [decoder_state_input]
        decoder_lstm = model.layers[i+encoder_layers+4]
        if i==0:
          decoder_outputs, state = decoder_lstm(
              model.layers[i+encoder_layers+2](decoder_inputs), initial_state=x
          )
        else:
          decoder_outputs, state = decoder_lstm(
              last, initial_state=x 
          )
        last=decoder_outputs
        decoder_states_inputs.append (decoder_state_input)
        decoder_states.append (state)
    elif isinstance(model.layers[encoder_layers+3], keras.layers.RNN):
      decoder_states_inputs=[]
      decoder_states=[]
      last=None
      for i in range(decoder_layers):
        decoder_state_input = keras.Input(shape=(latent_dim,),name='inp3_'+str(i))
        x = [decoder_state_input]
        decoder_lstm = model.layers[i+encoder_layers+4]
        if i==0:
          decoder_outputs, state = decoder_lstm(
              model.layers[i+encoder_layers+2](decoder_inputs), initial_state=x
          )
        else:
          decoder_outputs, state = decoder_lstm(
              last, initial_state=x 
          )
        last=decoder_outputs
        decoder_states_inputs.append (decoder_state_input)
        decoder_states.append (state)      
    decoder_dense = model.get_layer('final')
    decoder_outputs = decoder_dense(last)
    decoder_model = keras.Model(
        [decoder_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states
    )
    return encoder_model,decoder_model


In [13]:
def decode_batch(input_seq,encoder_model,decoder_model,batch_size,encoder_layers,decoder_layers):
    # Get encoder output
    states_value = encoder_model.predict(input_seq)
    if rnn_type=='GRU' or 'RNN':
      states_value=[states_value]
    nl=states_value
    for i in range(decoder_layers-1):
      nl=nl+states_value
    states_value=nl
    
    # This is contain previously predicted character's index for every words in batch.
    prev_char_index = np.zeros((batch_size, 1))
    # We start with \t for every word in batch
    prev_char_index[:, 0] = target_tokenizer.word_index['\t']
    
    predicted_words = [ "" for i in range(batch_size)]
    done=[False for i in range(batch_size)]
    for i in range(max_decoder_seq_length):
        out = decoder_model.predict(tuple([prev_char_index] + states_value))
        output_probability=out[0]
        states_value = out[1:]
        for j in range(batch_size):
          if done[j]:
            continue          
          sampled_token_index = np.argmax(output_probability[j, -1, :])
          if sampled_token_index == 0:
            sampled_char='\n'
          else:
            sampled_char = index_to_char_target[sampled_token_index]
          if sampled_char == '\n':
            done[j]=True
            continue            
          predicted_words[j] += sampled_char
          #update the previously predicted characters        
          prev_char_index[j,0]=target_tokenizer.word_index[sampled_char]
    return predicted_words

In [14]:
def test_accuracy(encoder_model,decoder_model,encoder_layers,decoder_layers):
  success=0
  #Get all the predicted words
  pred=decode_batch(test_input_tensor,encoder_model,decoder_model,test_input_tensor.shape[0],encoder_layers,decoder_layers)
  for seq_index in range(test_input_tensor.shape[0]):
      predicted_word = pred[seq_index]
      target_word=test_target_texts[seq_index][1:-1]
      #test the word one by one and write to files
      if target_word == predicted_word:
        success+=1
        f = open("success.txt", "a")
        f.write(test_input_texts[seq_index]+' '+target_word+' '+predicted_word+'\n')
        f.close()
      else:
        f = open("failure.txt", "a")
        f.write(test_input_texts[seq_index]+' '+target_word+' '+predicted_word+'\n')
        f.close()
  return float(success)/float(test_input_tensor.shape[0])

In [15]:
def batch_validate(encoder_model,decoder_model,encoder_layers,decoder_layers):
  success=0
  #get all the predicted words
  pred=decode_batch(val_input_tensor,encoder_model,decoder_model,val_input_tensor.shape[0],encoder_layers,decoder_layers)
  for seq_index in range(val_input_tensor.shape[0]):
      predicted_word = pred[seq_index]
      target_word=val_target_texts[seq_index][1:-1]
      #test the words one by one
      if predicted_word == target_word:
        success+=1
  return float(success)/float(val_input_tensor.shape[0])

In [29]:
rnn_type=None
embedding_dim=None
model= None
latent_dim = None
enc_layers=None
dec_layers=None
def train():
  global rnn_type
  global embedding_dim
  global model
  global latent_dim
  global enc_layer
  global dec_layer
  wandb.init()
  rnn_type=wandb.config.rnn_type
  embedding_dim=wandb.config.embedding_dim
  latent_dim=wandb.config.latent_dim
  enc_layer=wandb.config.enc_layer
  dec_layer=wandb.config.dec_layer
  dropout=wandb.config.dropout
  epochs=wandb.config.epochs
  bs=wandb.config.bs
  wandb.run.name = 'epochs_'+str(epochs)+'_bs_'+str(bs)+'_rnn_type_'+str(rnn_type)+'_em_'+str(embedding_dim)+'_latd_'+str(latent_dim)+'_encs_'+str(enc_layer)+'_decs_'+str(dec_layer)+'_dr_'+str(dropout)


  model=build_model(rnn_type=rnn_type,embedding_dim=embedding_dim,encoder_layers=enc_layer,decoder_layers=dec_layer,dropout=.1)

  model.compile(
      optimizer="adam", loss=keras.losses.SparseCategoricalCrossentropy(
                                                              reduction='none'), metrics=["accuracy"]
  )
  for i in range(epochs):
    hist=model.fit(
        [input_tensor, target_tensor],
        tf.concat([target_tensor[:,1:],tf.zeros((target_tensor[:,:].shape[0],1))], axis=1),
        batch_size=bs,
        epochs=1,shuffle=True
    )
    # Save model
    model.save("s2s.keras")
    # Run inferencing
    # Define sampling models
    # Restore the model and construct the encoder and decoder.
    inf = keras.models.load_model("/content/s2s.keras")
    encoder_model,decoder_model=build_inference(inf,encoder_layers=enc_layer,decoder_layers=dec_layer)
    #log train loss to wandb
    wandb.log({"train_loss": hist.history['loss'][0]})
  val_acc=batch_validate(encoder_model,decoder_model,enc_layer,dec_layer)
  wandb.log({"val_acc":val_acc})
  

In [30]:
rnn_type=None
embedding_dim=None
model= None
latent_dim = None
enc_layers=None
dec_layers=None
#this function is needed for training manually
def manual_train(config):
  global rnn_type
  global embedding_dim
  global model
  global latent_dim
  global enc_layer
  global dec_layer
  rnn_type=config.rnn_type
  embedding_dim=config.embedding_dim
  latent_dim=config.latent_dim
  enc_layer=config.enc_layer
  dec_layer=config.dec_layer
  dropout=config.dropout
  epochs=config.epochs
  bs=config.bs
  
  model=build_model(rnn_type=rnn_type,embedding_dim=embedding_dim,encoder_layers=enc_layer,decoder_layers=dec_layer,dropout=.1)

  model.compile(
      optimizer="adam", loss=keras.losses.SparseCategoricalCrossentropy(
                                                              reduction='none'), metrics=["accuracy"]
  )
  tf.keras.utils.plot_model(model, to_file='model.png', show_shapes=True, show_dtype=True,show_layer_names=True, dpi=96 )
  for i in range(epochs):
    hist=model.fit(
        [input_tensor, target_tensor],
        tf.concat([target_tensor[:,1:],tf.zeros((target_tensor[:,:].shape[0],1))], axis=1),
        batch_size=bs,
        epochs=1,shuffle=True
    )

    model.save("s2s.keras")

    inf = keras.models.load_model("/content/s2s.keras")
    encoder_model,decoder_model=build_inference(inf,encoder_layers=enc_layer,decoder_layers=dec_layer)

    val_acc=batch_validate(encoder_model,decoder_model,enc_layer,dec_layer)
    print("Validation Accuracy",val_acc)
  print("Test Accuracy",test_accuracy(encoder_model,decoder_model,enc_layer,dec_layer))    

In [18]:
%%capture
!pip install wandb


In [19]:
wb=False

In [20]:
import wandb
if wb:
  wandb.login()

In [21]:
if wb:
  wandb.agent('utsavdey/seq_to_seq/cdgdxg9i', function=train)

In [22]:
class configuration:
  def __init__(self, rnn_type, embedding_dim,latent_dim,enc_layer,dec_layer,dropout,epochs,bs):
    self.rnn_type = rnn_type
    self.embedding_dim = embedding_dim
    self.latent_dim = latent_dim
    self.enc_layer = enc_layer
    self.dec_layer = dec_layer
    self.dropout = dropout
    self.epochs = epochs
    self.bs = bs


In [None]:
if not wb:
  config=configuration('LSTM',32,512,3,2,.3,20,64)
  manual_train(config)