In [1]:
from google.colab import files
uploaded = files.upload()

Saving attention.py to attention.py


In [2]:
import warnings
warnings.filterwarnings('ignore')


import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
import keras.backend as K
from tensorflow.keras import layers
from keras.utils.vis_utils import plot_model

from keras.layers.embeddings import Embedding
from keras.layers import Input,Dense,LSTM,GRU,RNN,SimpleRNN,Softmax,Dropout,Concatenate
from keras.layers import TimeDistributed
from keras.callbacks import Callback

from keras import Model
from attention import AttentionLayer

from math import log
import math

<h2>1.Get the train, val and test Data</h2>

In [3]:
!wget https://storage.googleapis.com/gresearch/dakshina/dakshina_dataset_v1.0.tar
!tar -xvf '/content/dakshina_dataset_v1.0.tar'


--2021-05-23 06:35:03--  https://storage.googleapis.com/gresearch/dakshina/dakshina_dataset_v1.0.tar
Resolving storage.googleapis.com (storage.googleapis.com)... 142.250.81.208, 172.217.7.240, 172.217.8.16, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|142.250.81.208|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2008340480 (1.9G) [application/x-tar]
Saving to: ‘dakshina_dataset_v1.0.tar’


2021-05-23 06:35:21 (104 MB/s) - ‘dakshina_dataset_v1.0.tar’ saved [2008340480/2008340480]

dakshina_dataset_v1.0/bn/
dakshina_dataset_v1.0/bn/lexicons/
dakshina_dataset_v1.0/bn/lexicons/bn.translit.sampled.test.tsv
dakshina_dataset_v1.0/bn/lexicons/bn.translit.sampled.train.tsv
dakshina_dataset_v1.0/bn/lexicons/bn.translit.sampled.dev.tsv
dakshina_dataset_v1.0/bn/native_script_wikipedia/
dakshina_dataset_v1.0/bn/native_script_wikipedia/bn.wiki-filt.valid.text.shuf.txt.gz
dakshina_dataset_v1.0/bn/native_script_wikipedia/bn.wiki-full.info.sorted.tsv.gz
d

In [4]:
train = '/content/dakshina_dataset_v1.0/hi/lexicons/hi.translit.sampled.train.tsv'
val =   '/content/dakshina_dataset_v1.0/hi/lexicons/hi.translit.sampled.dev.tsv'
test =  '/content/dakshina_dataset_v1.0/hi/lexicons/hi.translit.sampled.test.tsv'

<h2>2 Preprocessing</h2>

In [5]:
class Preprocess:

  def __init__(self):

    self.num_encoder_tokens = None
    self.num_decoder_tokens = None
    self.input_token_index = None
    self.target_token_index = None
    self.reverse_input_char_index = None
    self.reverse_target_char_index = None
    self.max_encoder_seq_length = None
    self.max_decoder_seq_length = None

    return


  def extract_train(self,path):

    input_texts = []
    target_texts = []
    input_characters = set()
    target_characters = set()
    with open(path, "r", encoding="utf-8") as f:
        lines = f.read().split("\n")
    for line in lines[: len(lines) - 1]:
      target_text,input_text, attestation = line.split("\t")
      # We use "tab" as the "start sequence" character
      # for the targets, and "\n" as "end sequence" character.
      target_text = "\t" + target_text + "\n"
      input_texts.append(input_text)
      target_texts.append(target_text)
      for char in input_text:
        if char not in input_characters:
          input_characters.add(char)
      for char in target_text:
        if char not in target_characters:
          target_characters.add(char)

    input_characters = sorted(list(input_characters))
    target_characters = sorted(list(target_characters))
    self.num_encoder_tokens = len(input_characters)
    self.num_decoder_tokens = len(target_characters)
    self.max_encoder_seq_length = max([len(txt) for txt in input_texts])
    self.max_decoder_seq_length = max([len(txt) for txt in target_texts])
    self.input_token_index = dict([(char, i) for i, char in enumerate(input_characters)])
    self.target_token_index = dict([(char, i) for i, char in enumerate(target_characters)])

    self.reverse_input_char_index = dict((i, char) for char, i in self.input_token_index.items())
    self.reverse_target_char_index = dict((i, char) for char, i in self.target_token_index.items())

    return self.reverse_target_char_index

  def vectorize_(self,path):
    input_texts = []
    target_texts = []
    
    with open(path, "r", encoding="utf-8") as f:
      lines = f.read().split("\n")
    for line in lines[: len(lines) - 1]:
      target_text,input_text, attestation = line.split("\t")
      target_text = "\t" + target_text + "\n"
      input_texts.append(input_text)
      target_texts.append(target_text)
      
      
    encoder_input_data = np.zeros((len(input_texts), self.max_encoder_seq_length), dtype="float32")
    decoder_input_data = np.zeros((len(input_texts), self.max_decoder_seq_length), dtype="float32")
    decoder_target_data = np.zeros((len(input_texts), self.max_decoder_seq_length, self.num_decoder_tokens), dtype="float32")

    
    for i, (input_text, target_text) in enumerate(zip(input_texts, target_texts)):
      for t, char in enumerate(input_text):
        encoder_input_data[i][t] = self.input_token_index[char]
      for t, char in enumerate(target_text):
        decoder_input_data[i][t] = self.target_token_index[char]
        if t > 0:
          decoder_target_data[i, t - 1, self.target_token_index[char]] = 1.0
    
    return (input_texts ,target_texts ,encoder_input_data,decoder_input_data,decoder_target_data)

  def print_features(self):

    print("Number of unique input tokens:", self.num_encoder_tokens)
    print("Number of unique output tokens:", self.num_decoder_tokens)
    print("Max sequence length for inputs:", self.max_encoder_seq_length)
    print("Max sequence length for outputs:", self.max_decoder_seq_length)

    return

<h4>2.1 extract features from train data</h4>

In [6]:
pr_obj=Preprocess()
reverse_target_char_index = pr_obj.extract_train(train)
pr_obj.print_features()

Number of unique input tokens: 26
Number of unique output tokens: 65
Max sequence length for inputs: 20
Max sequence length for outputs: 21


<h4>2.2 Vectorize the train,val,test data</h4>

In [7]:
en_ip_tr_text ,de_ip_tr_text , en_ip_tr_data , de_ip_tr_data , de_op_tr_data  = pr_obj.vectorize_(train)
en_ip_val_text ,de_ip_val_text , en_ip_val_data , de_ip_val_data , de_op_val_data  = pr_obj.vectorize_(val)
en_ip_ts_text ,de_ip_ts_text , en_ip_ts_data , de_ip_ts_data , de_op_ts_data  = pr_obj.vectorize_(test)

print("Train data shape : ",en_ip_tr_data.shape)
print("Validation data shape : ",en_ip_val_data.shape)
print("Test data shape : ",en_ip_ts_data.shape)

Train data shape :  (44204, 20)
Validation data shape :  (4358, 20)
Test data shape :  (4502, 20)


Utlity fuctions and Word Accuracy Callback

In [8]:
class Utility:
  def beam_search_decoder(self,data, k):
    decodedSequences = [[list(), 0.0]]
    # walk over each step in sequence
    for row in data:
      all_candidates = list()
      # expand each current candidate
      for i in range(len(decodedSequences)):
        seq, score = decodedSequences[i]
        for j in range(len(row)):
          candidate = [seq + [j], score - log(row[j])]
          all_candidates.append(candidate)
      # order all candidates by score
      ordered = sorted(all_candidates, key=lambda tup:tup[1])
      # select k best
      decodedSequences = ordered[:k]
    return decodedSequences
  
  def decode_translate(self,seq):
    decoded_sentence = [] 
    for x in seq:
      sampled_char = reverse_target_char_index[x]
      decoded_sentence.append(sampled_char)
    return "".join(decoded_sentence)
  

In [9]:
class WordAccuracyCallback(keras.callbacks.Callback):
  def __init__(self,beam_size):
    self.beam_size=beam_size
  def on_epoch_end(self, epoch, logs=None):
    yPred=self.model.predict([en_ip_val_data,de_ip_val_data])
    count=0
    U=Utility()
    k=self.beam_size
    for i in range(yPred.shape[0]):
      predictedSequences=U.beam_search_decoder(yPred[i],k)
      for j in range(k):
        if "\t"+U.decode_translate(predictedSequences[j][0][:len(de_ip_val_text[i])-1])==de_ip_val_text[i]:
          count=count+1
          break
    factor = 10.0 ** 4
    logs["WordAccuracy"]=math.trunc((count/yPred.shape[0])*factor)/factor
    print("- wordAccuracy:",logs["WordAccuracy"])

<h2>3.Model Class</h2>

In [10]:
class RNN_Model:

  def __init__(self,pr_obj,embed_size ,no_of_encoder_layers,no_of_decoder_layers,latent_dimension,dropout,recurrent_dropout,cell_type,beam_size):

    self.pr_obj=pr_obj
    self.embed_size = embed_size
    self.no_of_encoder_layers = no_of_encoder_layers
    self.no_of_decoder_layers = no_of_decoder_layers

    self.latent_dimension = latent_dimension
    self.dropout = dropout
    self.recurrent_dropout=recurrent_dropout
    self.cell_type=cell_type
    self.model = None

    #used to store all the encoder layers and dec layers..
    self.input_layers = None
    self.output_layers = None

    #the inference model..
    self.encoder_model = None
    self.decoder_model = None

    #for beam search and greedysearch
    self.beam_size=beam_size

  def BUILD_MODEL(self,max_encoder_seq_length,num_encoder_tokens,max_decoder_seq_length,num_decoder_tokens):

    #Define the encoder Model..
    encoder_inputs = Input(shape=(max_encoder_seq_length,))
    embd_enc_ip = Embedding(input_dim = num_encoder_tokens,output_dim = self.embed_size ,
                            input_length=max_encoder_seq_length ,name='enc_embd_layer')(encoder_inputs)

    outputs = embd_enc_ip
    encoder_states = []
    self.input_layers = []

    for j in range(self.no_of_encoder_layers):

      if self.cell_type == 'LSTM':
        self.input_layers.append(
            LSTM(self.latent_dimension,return_state=True, return_sequences=True ,recurrent_dropout=self.recurrent_dropout,dropout=self.dropout))
        

      elif self.cell_type == 'GRU':
        self.input_layers.append(
            GRU(self.latent_dimension,return_state=True, return_sequences=True ,recurrent_dropout=self.recurrent_dropout,dropout=self.dropout))

      elif self.cell_type == 'RNN':
        self.input_layers.append(
            SimpleRNN(self.latent_dimension,return_state=True,
                      return_sequences=True ,recurrent_dropout=self.recurrent_dropout,dropout=self.dropout))
        
      all_op = self.input_layers[-1](outputs)
      outputs,encoder_state = all_op[0],all_op[1:]
      encoder_states += [encoder_state]
  
    encoder_outputs=outputs

    #Define The Decoder Model:
    decoder_inputs = Input(shape=(max_decoder_seq_length,))
    embd_dec_ip = Embedding(input_dim = num_decoder_tokens,output_dim = self.embed_size,
                            input_length=max_decoder_seq_length ,name='dec_embd_layer')(decoder_inputs)

    outputs = embd_dec_ip
    self.output_layers = []

    for j in range(self.no_of_decoder_layers):
      if self.cell_type == 'LSTM':
        self.output_layers.append(LSTM(self.latent_dimension,
                                       return_sequences=True, return_state=True ,recurrent_dropout=self.recurrent_dropout,dropout=self.dropout))
      
      elif self.cell_type == 'GRU':
        self.output_layers.append(GRU(self.latent_dimension,
                                       return_sequences=True, return_state=True ,recurrent_dropout=self.recurrent_dropout,dropout=self.dropout))
      
      elif self.cell_type == 'RNN':
        self.output_layers.append(SimpleRNN(self.latent_dimension,
                                       return_sequences=True, return_state=True ,recurrent_dropout=self.recurrent_dropout,dropout=self.dropout))
      
      all_op = self.output_layers[-1](outputs, initial_state = encoder_states[j])
      outputs, dstates = all_op[0],all_op[1:]

    decoder_outputs=outputs

    attn_layer = AttentionLayer(name='attention_layer')
    attn_out, attn_states = attn_layer([encoder_outputs,decoder_outputs])

    decoder_concat_input = Concatenate(axis=-1, name='concat_layer')([decoder_outputs, attn_out])
    dense = Dense(num_decoder_tokens, activation='softmax', name='dense_layer')

    dense_time = TimeDistributed(dense, name='time_distributed_layer')
    decoder_pred = dense_time(decoder_concat_input)

    self.model = keras.Model([encoder_inputs, decoder_inputs], decoder_pred)
    self.model.compile(optimizer='adam',loss='categorical_crossentropy' , metrics=['accuracy'])
    return

  def FIT_RNN(self ,  en_ip_tr_data , de_ip_tr_data , de_op_tr_data,epochs ,
              batch_size):
    self.model.fit(
        [en_ip_tr_data, de_ip_tr_data],
        de_op_tr_data,
        batch_size=batch_size,
        epochs=epochs,
        shuffle=True,
        callbacks = [WordAccuracyCallback(self.beam_size)],verbose=1)
    return

<h2>4. Hyperparameter Tuning Using Wandb</h2>

In [11]:
!pip install wandb -qqq
import wandb
wandb.login()

[K     |████████████████████████████████| 1.8MB 36.9MB/s 
[K     |████████████████████████████████| 174kB 51.1MB/s 
[K     |████████████████████████████████| 102kB 14.7MB/s 
[K     |████████████████████████████████| 133kB 56.3MB/s 
[K     |████████████████████████████████| 71kB 11.5MB/s 
[?25h  Building wheel for subprocess32 (setup.py) ... [?25l[?25hdone
  Building wheel for pathtools (setup.py) ... [?25l[?25hdone


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [12]:
import wandb
from wandb.keras import WandbCallback

In [13]:
sweep_config = {
  'name': 'RNN',
  'method': 'bayes',
  'metric': {
      'name': 'accuracy',
      'goal': 'maximize'   
    },
  'parameters': {
        'embed_size':{
            'values':[32,64,128]
        },
        'encoder_layers':{
            'values':[1,2,3]
        },
        'decoder_layers':{
            'values':[1,2,3]
        },
        'hidden_layer_size':{
            'values':[64,128,256]
        },
        'cell_type':{
            'values':['GRU', 'LSTM','RNN']
        },
        'dropout':{
            'values':[0.0,0.3]
        },
        'recurrent_dropout':{
            'values':[0.0]
        },
        'beam_size':{
            'values':[1,3]
        }
    }
}

sweep_id = wandb.sweep(sweep_config, project='Assignment3', entity='manideepladi')

Create sweep with ID: bzfh2z0l
Sweep URL: https://wandb.ai/manideepladi/Assignment3/sweeps/bzfh2z0l


In [None]:
def train():
  run = wandb.init()
  configuration=run.config
  if configuration.encoder_layers == configuration.decoder_layers:

    parent = RNN_Model(pr_obj,
                       embed_size = configuration.embed_size,
                       no_of_encoder_layers = configuration.encoder_layers,
                       no_of_decoder_layers = configuration.decoder_layers,
                       latent_dimension = configuration.hidden_layer_size,
                       dropout = configuration.dropout,
                       recurrent_dropout = configuration.recurrent_dropout,
                       cell_type = configuration.cell_type,
                       beam_size=configuration.beam_size
                       )
    
    
    parent.BUILD_MODEL(pr_obj.max_encoder_seq_length ,pr_obj.num_encoder_tokens ,pr_obj.max_decoder_seq_length , pr_obj.num_decoder_tokens)
    
    parent.FIT_RNN(en_ip_tr_data,
               de_ip_tr_data,
               de_op_tr_data,
               epochs = 10,
               batch_size = 64)
sweep_id="9z4n9blj"
wandb.agent(sweep_id=sweep_id, function=train)

[34m[1mwandb[0m: Agent Starting Run: q05o1hyr with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_size: 64
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_layer_size: 64
[34m[1mwandb[0m: 	recurrent_dropout: 0
[34m[1mwandb[0m: Currently logged in as: [33mmanideepladi[0m (use `wandb login --relogin` to force relogin)


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.0, max=1.0)…

[34m[1mwandb[0m: Agent Starting Run: k0vtn6kp with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_size: 64
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	hidden_layer_size: 64
[34m[1mwandb[0m: 	recurrent_dropout: 0


Epoch 1/10
- wordAccuracy: 0.0293
Epoch 2/10
- wordAccuracy: 0.081
Epoch 3/10
- wordAccuracy: 0.1319
Epoch 4/10
- wordAccuracy: 0.1815
Epoch 5/10
- wordAccuracy: 0.2466
Epoch 6/10
- wordAccuracy: 0.2576
Epoch 7/10
- wordAccuracy: 0.2606
Epoch 8/10
- wordAccuracy: 0.3988
Epoch 9/10
- wordAccuracy: 0.4274
Epoch 10/10
- wordAccuracy: 0.4531


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

[34m[1mwandb[0m: Agent Starting Run: vscbx4ec with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	embed_size: 128
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	hidden_layer_size: 256
[34m[1mwandb[0m: 	recurrent_dropout: 0


Epoch 1/10
- wordAccuracy: 0.2134
Epoch 2/10
- wordAccuracy: 0.2469
Epoch 3/10
- wordAccuracy: 0.279
Epoch 4/10
- wordAccuracy: 0.2833
Epoch 5/10
- wordAccuracy: 0.2797
Epoch 6/10
- wordAccuracy: 0.2868
Epoch 7/10
- wordAccuracy: 0.2914
Epoch 8/10
- wordAccuracy: 0.2868
Epoch 9/10
- wordAccuracy: 0.3088
Epoch 10/10
- wordAccuracy: 0.2957


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

[34m[1mwandb[0m: Agent Starting Run: cmysbedr with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_size: 32
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	hidden_layer_size: 256
[34m[1mwandb[0m: 	recurrent_dropout: 0


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.0, max=1.0)…

[34m[1mwandb[0m: Agent Starting Run: nc1lgqbr with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	embed_size: 64
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	recurrent_dropout: 0


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.0, max=1.0)…

[34m[1mwandb[0m: Agent Starting Run: 7bcuq2ng with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	embed_size: 32
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	hidden_layer_size: 256
[34m[1mwandb[0m: 	recurrent_dropout: 0


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.0, max=1.0)…

[34m[1mwandb[0m: Agent Starting Run: lw5qsnk3 with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	embed_size: 128
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	hidden_layer_size: 256
[34m[1mwandb[0m: 	recurrent_dropout: 0


Epoch 1/10
- wordAccuracy: 0.4293
Epoch 2/10
- wordAccuracy: 0.4609
Epoch 3/10
- wordAccuracy: 0.4993
Epoch 4/10
- wordAccuracy: 0.4894
Epoch 5/10
- wordAccuracy: 0.5022
Epoch 6/10
- wordAccuracy: 0.4986
Epoch 7/10
- wordAccuracy: 0.5165
Epoch 8/10
- wordAccuracy: 0.5392
Epoch 9/10
- wordAccuracy: 0.5213
Epoch 10/10
- wordAccuracy: 0.5229


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: qrb98sai with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	embed_size: 64
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	hidden_layer_size: 256
[34m[1mwandb[0m: 	recurrent_dropout: 0


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.0, max=1.0)…

[34m[1mwandb[0m: Agent Starting Run: gv8d3i6h with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	embed_size: 128
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	recurrent_dropout: 0


Epoch 1/10
- wordAccuracy: 0.3033
Epoch 2/10
- wordAccuracy: 0.4265
Epoch 3/10
- wordAccuracy: 0.4639
Epoch 4/10
- wordAccuracy: 0.4687
Epoch 5/10
- wordAccuracy: 0.4598
Epoch 6/10
- wordAccuracy: 0.4873
Epoch 7/10
- wordAccuracy: 0.4903
Epoch 8/10
- wordAccuracy: 0.4997
Epoch 9/10
- wordAccuracy: 0.5181
Epoch 10/10
- wordAccuracy: 0.5091


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

[34m[1mwandb[0m: Agent Starting Run: kyz1ocpe with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_size: 64
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	hidden_layer_size: 64
[34m[1mwandb[0m: 	recurrent_dropout: 0


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.0, max=1.0)…

[34m[1mwandb[0m: Agent Starting Run: fx9i86ey with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	embed_size: 128
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_layer_size: 64
[34m[1mwandb[0m: 	recurrent_dropout: 0


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.0, max=1.0)…

[34m[1mwandb[0m: Agent Starting Run: zaayz18x with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	embed_size: 128
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	recurrent_dropout: 0


Epoch 1/10
- wordAccuracy: 0.1952
Epoch 2/10
- wordAccuracy: 0.2877
Epoch 3/10
- wordAccuracy: 0.3097
Epoch 4/10
- wordAccuracy: 0.3494
Epoch 5/10
- wordAccuracy: 0.3575
Epoch 6/10
- wordAccuracy: 0.3698
Epoch 7/10
- wordAccuracy: 0.3586
Epoch 8/10
- wordAccuracy: 0.3848
Epoch 9/10
- wordAccuracy: 0.3932
Epoch 10/10

For Testing the Model

In [None]:
rnn = RNN_Model(pr_obj,64,3,3,256,0,0,"LSTM",3)
rnn.BUILD_MODEL(pr_obj.max_encoder_seq_length ,pr_obj.num_encoder_tokens ,pr_obj.max_decoder_seq_length , pr_obj.num_decoder_tokens,)

In [None]:
plot_model(rnn.model, show_shapes=True)

In [None]:
rnn.FIT_RNN(en_ip_tr_data,
               de_ip_tr_data,
               de_op_tr_data,
               epochs = 10,
               batch_size = 64)

In [None]:
rnn.model.summary()

In [None]:
attentionLayer=rnn.model.get_layer("attention_layer")

In [None]:
outputTensor=attentionLayer.output[1]

In [None]:
outputTensor.tensor_shape

In [None]:
def predictResponse_into_nparray( tensor):
    shape = tensor.shape
    print(shape)
    return np.reshape(tensor, shape)

In [None]:
predictResponse_into_nparray(outputTensor)

In [None]:
print(attentionLayer.output[1])

In [None]:
import matplotlib.pyplot as plt
sns.heatmap(attentionLayer.output[1])