<a href="https://colab.research.google.com/github/AslanDevbrat/Seq2Seq/blob/dev/seq2seq.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Setup


In [1]:
%%capture
!pip install wandb --upgrade
!pip install tensorflow-addons

In [2]:
import numpy as np
import tensorflow as tf
import tensorflow_addons as tfa
from tensorflow import keras
from tensorflow.keras.layers import Embedding, SimpleRNNCell, GRUCell, Dense, LSTMCell
from tensorflow.keras import Input
import pandas as pd
from numpy import argmax
from math import log
import pprint
import math
import wandb
import os
import io
from wandb.keras import WandbCallback
import time
import sys
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
wandb_api = user_secrets.get_secret("wandb_api")

#wandb.login(key=wandb_api)
! wandb login $wandb_api

os.environ["WANDB_SILENT"] = "false"
wandb.login()

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


[34m[1mwandb[0m: Currently logged in as: [33maslan[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

## Download the data


## Configuration


In [3]:
!wget  https://storage.googleapis.com/gresearch/dakshina/dakshina_dataset_v1.0.tar
!tar -xf 'dakshina_dataset_v1.0.tar'
train_file_path = "./dakshina_dataset_v1.0/hi/lexicons/hi.translit.sampled.train.tsv"
val_file_path= "./dakshina_dataset_v1.0/hi/lexicons/hi.translit.sampled.test.tsv"
test_file_path  = "./dakshina_dataset_v1.0/hi/lexicons/hi.translit.sampled.dev.tsv"

--2022-06-23 14:15:35--  https://storage.googleapis.com/gresearch/dakshina/dakshina_dataset_v1.0.tar
Resolving storage.googleapis.com (storage.googleapis.com)... 74.125.134.128, 142.251.107.128, 142.250.98.128, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|74.125.134.128|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2008340480 (1.9G) [application/x-tar]
Saving to: ‘dakshina_dataset_v1.0.tar’


2022-06-23 14:15:45 (182 MB/s) - ‘dakshina_dataset_v1.0.tar’ saved [2008340480/2008340480]



In [14]:
batch_size = 64  # Batch size for training.
epochs = 100  # Number of epochs to train for.
latent_dim = 256  # Latent dimensionality of the encoding space.
num_samples = 100000  # Number of samples to train on.
# Path to the data txt file on disk.
data_path = train_file_path


## Prepare the data


In [15]:
def processData(filename,input_chars=set(),target_chars=set()):
  input=[]
  target=[]
  with open(filename, "r", encoding="utf-8") as f:
    lines = f.read().split("\n")
  for line in lines[:len(lines)-1]:
      t_text,i_text, attestation = line.split("\t")
       # We use "\t" as the "start sequence" character and "\n" as "end sequence" character for the target text.
      input.append("\t"+i_text+"\n")
      target.append("\t"+t_text+"\n")
      for char in i_text:
        if char not in input_chars:
            input_chars.add(char)
      for char in t_text:
        if char not in target_chars:
            target_chars.add(char)
  target_chars.add("\t")
  target_chars.add("\n")
  input_chars.add("\t")
  input_chars.add("\n")

  input_chars = sorted(list(input_chars))
  target_chars = sorted(list(target_chars))
  num_encoder_tokens = len(input_chars)
  num_decoder_tokens = len(target_chars)
  max_encoder_seq_length = max([len(txt) for txt in input])
  max_decoder_seq_length = max([len(txt) for txt in target])
  return input,target,input_chars,target_chars,num_encoder_tokens,num_decoder_tokens, max_encoder_seq_length, max_decoder_seq_length     

In [16]:
# Vectorize the data.
input,target,input_chars,target_chars,num_encoder_tokens,num_decoder_tokens, max_encoder_seq_length, max_decoder_seq_length=processData(train_file_path)
print("Number of samples:", len(input))
print("Number of unique input tokens:", num_encoder_tokens)
print("Number of unique output tokens:", num_decoder_tokens)
print("Max sequence length for inputs:", max_encoder_seq_length)
print("Max sequence length for outputs:", max_decoder_seq_length)

Number of samples: 44204
Number of unique input tokens: 28
Number of unique output tokens: 65
Max sequence length for inputs: 22
Max sequence length for outputs: 21


In [17]:
# Vectorize the data.
# Vectorize the data.
validation_input,validation_target,input_chars,target_chars,num_encoder_tokens,num_decoder_tokens, validation_max_encoder_seq_length, validation_max_decoder_seq_length=processData(val_file_path,set(input_chars),set(target_chars))

print("Number of validation samples:", len(validation_input))
print("Number of unique input tokens:", num_encoder_tokens)
print("Number of unique output tokens:", num_decoder_tokens)
print("validation Max sequence length for inputs:", validation_max_encoder_seq_length)
print("validation Max sequence length for outputs:", validation_max_decoder_seq_length)

Number of validation samples: 4502
Number of unique input tokens: 28
Number of unique output tokens: 65
validation Max sequence length for inputs: 18
validation Max sequence length for outputs: 17


In [18]:
# Vectorize the data.
test_input,test_target,test_input_chars,test_target_chars,test_num_encoder_tokens,test_num_decoder_tokens, test_max_encoder_seq_length, test_max_decoder_seq_length=processData(test_file_path)
print("Number of validation samples:", len(test_input))
print("Test Max sequence length for inputs:", test_max_encoder_seq_length)
print("Test Max sequence length for outputs:", test_max_decoder_seq_length)

Number of validation samples: 4358
Test Max sequence length for inputs: 20
Test Max sequence length for outputs: 16


In [19]:
# input_token = dict([(char, i) for i, char in enumerate(input_chars)])
# target_token = dict([(char, i) for i, char in enumerate(target_chars)])

# reverse_input_token = dict((i, char) for char, i in input_token.items())
# reverse_target_token = dict((i, char) for char, i in target_token.items())


# encoder_input_data = np.zeros(
#     (len(input), max_encoder_seq_length, num_encoder_tokens), dtype="float32"
# )
# validation_encoder_input_data=np.zeros(
#     (len(validation_input), max_encoder_seq_length, num_encoder_tokens), dtype="float32"
# )
# test_encoder_input_data=np.zeros(
#     (len(test_input), max_encoder_seq_length, num_encoder_tokens), dtype="float32"
# )
# decoder_input_data = np.zeros(
#     (len(input), max_decoder_seq_length, num_decoder_tokens), dtype="float32"
# )
# validation_decoder_input_data =np.zeros(
#     (len(validation_input), max_decoder_seq_length, num_decoder_tokens), dtype="float32"
# )
# decoder_target_data = np.zeros(
#     (len(input), max_decoder_seq_length, num_decoder_tokens), dtype="float32"
# )
# validation_decoder_target_data = np.zeros(
#     (len(validation_input), max_decoder_seq_length, num_decoder_tokens), dtype="float32"
# )

# for i, (input_text, target_text) in enumerate(zip(input, target)):
#     for t, char in enumerate(input_text):
#         encoder_input_data[i, t, input_token[char]] = 1.0
#     for t, char in enumerate(target_text):
#         # decoder_target_data is ahead of decoder_input_data by one timestep
#         decoder_input_data[i, t, target_token[char]] = 1.0
#         if t > 0:
#             # decoder_target_data will be ahead by one timestep
#             # and will not include the start character.
#             decoder_target_data[i, t - 1, target_token[char]] = 1.0
# # for validation data
# for i, (validation_input_text, validation_target_text) in enumerate(zip(validation_input, validation_target)):
#     for t, char in enumerate(validation_input_text):
#         validation_encoder_input_data[i, t, input_token[char]] = 1.0
#     for t, char in enumerate(validation_target_text):
#         # decoder_target_data is ahead of decoder_input_data by one timestep
#         validation_decoder_input_data[i, t, target_token[char]] = 1.0
#         if t > 0:
#             # decoder_target_data will be ahead by one timestep
#             # and will not include the start character.
#             validation_decoder_target_data[i, t - 1, target_token[char]] = 1.0

# # for test data
# for i, (test_input_text, test_target_text) in enumerate(zip(test_input, test_target)):
#     for t, char in enumerate(test_input_text):
#         test_encoder_input_data[i, t, input_token[char]] = 1.0

In [20]:
input_token = dict([(char, i) for i, char in enumerate(input_chars)])
target_token = dict([(char, i) for i, char in enumerate(target_chars)])

reverse_input_token = dict((i, char) for char, i in input_token.items())
reverse_target_token = dict((i, char) for char, i in target_token.items())

enc_input_data = np.zeros(
    (len(input), max_encoder_seq_length), dtype="float32"
)
dec_input_data = np.zeros(
    (len(input), max_decoder_seq_length), dtype="float32"
)
dec_target_data = np.ones(
    (len(input), max_decoder_seq_length), dtype="float32"
)
#Decoder Target Sequences are Padded to a maximum length of max_decoder SeqLen characters with a vocabulary of sizeofTeluguVocab different characters. 
for i, (input_text, target_text) in enumerate(zip(input, target)):
    for t, char in enumerate(input_text):
        enc_input_data[i, t] = input_token[char]
    #enc_input_data[i, t + 1 :] = input_token[" "]

    for t, char in enumerate(target_text):
        # dec_target_data is ahead of dec_input_data by one timestep
        dec_input_data[i, t] = target_token[char]
        if t > 0:
            # dec_target_data will not include the start character.
            dec_target_data[i, t - 1] = target_token[char]
    #dec_input_data[i, t + 1: ] = target_token[" "]
    #dec_target_data[i, t:, target_token[" "]] = 1.0
    
val_enc_input_data = np.zeros(
    (len(validation_input), validation_max_encoder_seq_length), dtype="float32"
)
val_dec_input_data = np.zeros(
    (len(validation_input), validation_max_decoder_seq_length), dtype="float32"
)
val_dec_target_data = np.ones(
    (len(validation_input), validation_max_decoder_seq_length), dtype="float32"
)

for i, (input_text, target_text) in enumerate(zip(validation_input,validation_target)):
    for t, char in enumerate(input_text):
        # Enumerate() method adds a counter to an iterable and returns it in a form of enumerating object. 
        # This enumerated object can then be used directly for loops or converted into a list of tuples using the list() method.
        val_enc_input_data[i, t] = input_token[char]
    #val_enc_input_data[i, t + 1 :] = input_token_index[" "]

    for t, char in enumerate(target_text):
        val_dec_input_data[i, t] = target_token[char]
        if t > 0:
            # dec_target_data will be ahead by one timestep
            # and will not include the start character.
            val_dec_target_data[i, t - 1] =  target_token[char]
    #val_dec_input_data[i, t + 1: ] = target_token_index[" "]

In [21]:
# class NMTDataset:
#     def __init__(self, problem_type='en-hi'):
#         self.problem_type = 'en-'
#         self.inp_lang_tokenizer = None
#         self.targ_lang_tokenizer = None
    

#     def unicode_to_ascii(self, s):
#         return ''.join(c for c in unicodedata.normalize('NFD', s) if unicodedata.category(c) != 'Mn')

#     ## Step 1 and Step 2 
#     def preprocess_sentence(self, w):
#         # w = self.unicode_to_ascii(w.lower().strip())

#         # # creating a space between a word and the punctuation following it
#         # # eg: "he is a boy." => "he is a boy ."
#         # # Reference:- https://stackoverflow.com/questions/3645931/python-padding-punctuation-with-white-spaces-keeping-punctuation
#         # w = re.sub(r"([?.!,¿])", r" \1 ", w)
#         # w = re.sub(r'[" "]+', " ", w)

#         # # replacing everything with space except (a-z, A-Z, ".", "?", "!", ",")
#         # w = re.sub(r"[^a-zA-Z?.!,¿]+", " ", w)

#         # w = w.strip()

#         # adding a start and an end token to the sentence
#         # so that the model know when to start and stop predicting.
#         #print(w)
#         w = '\t' + w + '\n'
        
#         return w
    
#     def create_dataset(self, path, num_examples):
#         # path : path to spa-eng.txt file
#         # num_examples : Limit the total number of training example for faster training (set num_examples = len(lines) to use full data)
#         #lines = io.open(path, encoding='UTF-8').read().strip().split('\n')
#         #word_pairs = [[self.preprocess_sentence(w) for w in l.split('\t')]  for l in lines[:num_examples]]
#         data =  pd.read_csv(path,delimiter="\t", header= None, nrows = num_examples )
#         data = data.dropna()
#         print(data.info())
#         return data[0].apply(self.preprocess_sentence).values.astype(str), data[1].apply(self.preprocess_sentence).values.astype(str)

#     # Step 3 and Step 4
#     def tokenize(self, lang):
#         # lang = list of sentences in a language
        
#         # print(len(lang), "example sentence: {}".format(lang[0]))
#         lang_tokenizer = tf.keras.preprocessing.text.Tokenizer(char_level = True)
#         lang_tokenizer.fit_on_texts(lang)

#         ## tf.keras.preprocessing.text.Tokenizer.texts_to_sequences converts string (w1, w2, w3, ......, wn) 
#         ## to a list of correspoding integer ids of words (id_w1, id_w2, id_w3, ...., id_wn)
#         tensor = lang_tokenizer.texts_to_sequences(lang) 

#         ## tf.keras.preprocessing.sequence.pad_sequences takes argument a list of integer id sequences 
#         ## and pads the sequences to match the longest sequences in the given input
#         tensor = tf.keras.preprocessing.sequence.pad_sequences(tensor, padding='post')

#         return tensor, lang_tokenizer

#     def load_dataset(self, path, num_examples=None):
#         # creating cleaned input, output pairs
#         targ_lang, inp_lang = self.create_dataset(path, num_examples)

#         input_tensor, inp_lang_tokenizer = self.tokenize(inp_lang)
#         target_tensor, targ_lang_tokenizer = self.tokenize(targ_lang)

#         return input_tensor, target_tensor, inp_lang_tokenizer, targ_lang_tokenizer

#     def call(self, num_examples, BUFFER_SIZE, BATCH_SIZE):
#         #file_path = download_dakshina()
#         input_tensor_train, target_tensor_train, self.inp_lang_tokenizer, self.targ_lang_tokenizer = self.load_dataset(train_file_path, num_examples)
#         input_tensor_val, target_tensor_val, self.inp_lang_tokenizer, self.targ_lang_tokenizer = self.load_dataset(val_file_path, num_examples)
#         input_tensor_test, target_tensor_test, self.inp_lang_tokenizer, self.targ_lang_tokenizer = self.load_dataset(test_file_path, num_examples)
#         x = input_tensor_train
#         y  =target_tensor_train
#         #input_tensor_train, input_tensor_val, target_tensor_train, target_tensor_val = train_test_split(input_tensor, target_tensor, test_size=0.4)

#         train_dataset = tf.data.Dataset.from_tensor_slices((input_tensor_train,target_tensor_train))
#         train_dataset = train_dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)

#         val_dataset = tf.data.Dataset.from_tensor_slices((input_tensor_val, target_tensor_val))
#         val_dataset = val_dataset.batch(BATCH_SIZE, drop_remainder=True)

#         test_dataset = tf.data.Dataset.from_tensor_slices((input_tensor_test, target_tensor_test))
#         test_dataset = test_dataset.batch(BATCH_SIZE, drop_remainder=True)
#         return train_dataset, val_dataset, test_dataset, self.inp_lang_tokenizer, self.targ_lang_tokenizer


In [22]:
# BUFFER_SIZE = 32000
# BATCH_SIZE = 64
# # Let's limit the #training examples for faster training
# num_examples = 300000

# dataset_creator = NMTDataset('en-spa')
# train_dataset, val_dataset,test_dataset, inp_lang, targ_lang = dataset_creator.call(num_examples, BUFFER_SIZE, BATCH_SIZE)

In [23]:
# next(iter(train_dataset))[1][0]

In [24]:
# example_input_batch, example_target_batch = next(iter(train_dataset))
# example_input_batch.shape, example_target_batch.shape

## Build the model


In [112]:
class Encoder(tf.keras.Model):
  def __init__(self, vocab_size, embedding_dim, enc_units, num_of_layers, enc_unit_type, batch_sz, recurrent_dropout, dropout):
    super(Encoder, self).__init__()

    self.batch_sz = batch_sz
    self.enc_units = enc_units
    self.enc_unit_type = enc_unit_type
    self.num_of_layers = num_of_layers
    self.recurrent_dropout = recurrent_dropout
    self.dropout = dropout
    self.embedding = Embedding( vocab_size, embedding_dim)

    self.encoder_layer = self.get_encoder_layer(self.enc_units,
                                                self.num_of_layers, self.enc_unit_type)
    

  def get_encoder_layer(self, enc_units, num_of_layers, enc_unit_type):
    return tf.keras.layers.RNN(tf.keras.layers.StackedRNNCells( [self.get_cell(enc_unit_type, 
                                                                                 enc_units) for i in range(num_of_layers)],),
                                  return_sequences=True, return_state=True, name = "Encoder")

  def get_cell(self, cell_type = "lstm", num_of_cell = 1, name = None):
      #print(cell_type)
      if cell_type == "lstm":
        return LSTMCell(num_of_cell, dropout = self.dropout, recurrent_dropout = self.recurrent_dropout, )
      elif cell_type == "rnn":
        return SimpleRNNCell(num_of_cell, dropout = self.dropout, recurrent_dropout = self.recurrent_dropout)
      elif cell_type =="gru":
        return GRUCell(num_of_cell, dropout = self.dropout, recurrent_dropout = self.recurrent_dropout)
      else:
        print(f"Invalid cell type: {cell_type}")

    
  def call(self, x, hidden):
      x = self.embedding(x)
      output = self.encoder_layer(x,initial_state = hidden)

      #print(output)
      return output
    
  def initialize_hidden_state(self):
      print("Called")
        
      if self.enc_unit_type == 'rnn' or self.enc_unit_type == "gru":
        return [tf.zeros((self.batch_sz, self.enc_units))]*self.num_of_layers
      else:
        return [[tf.zeros((self.batch_sz, self.enc_units)),tf.zeros((self.batch_sz, self.enc_units))]]*self.num_of_layers

In [113]:
# enc_input_data[0].shape

In [119]:

#encoder = Encoder( num_encoder_tokens, 1024, 1024, 3, "lstm", batch_size, 0.0, 0.0).build(input_shape =(None,22))
encoder = Encoder( num_encoder_tokens, 2, 16, 2, "lstm", 64, 0.0, 0.0)
sample_hidden = encoder.initialize_hidden_state()
# encoder.build(input_shape =(None,26))
# encoder.summary()
sample_output = encoder(enc_input_data[:64], sample_hidden)
out , state = sample_output[0], sample_output[1:]

Called


In [100]:
# out, state

In [29]:
class Decoder(tf.keras.Model):
  def __init__(self, vocab_size, embedding_dim, dec_units, num_of_layers, 
               dec_unit_type, batch_sz, recurrent_dropout, dropout, 
               attention_type = None):
    
    super(Decoder, self).__init__()
    self.batch_sz = batch_sz
    self.dec_units = dec_units
    self.dec_unit_type = dec_unit_type
    self.num_of_layers = num_of_layers
    self.attention_type = attention_type
    self.recurrent_dropout = recurrent_dropout
    self.dropout = dropout
    #print("decoder embedding dim", embedding_dim)
    self.embedding = Embedding( vocab_size, embedding_dim)

    self.fc  = tf.keras.layers.Dense(vocab_size, activation = "softmax")

    self.decoder_cells = self.get_stacked_rnn_cell()
    self.sampler = tfa.seq2seq.sampler.TrainingSampler()
    if attention_type:        
        self.attention_mechanism = self.build_attention_mechanism(self.dec_units, None
                                                                  , self.batch_sz*[max_encoder_seq_length], 
                                                                  self.attention_type)

        self.cell = self.build_cell()

        #print(self.cell)

        self.decoder = tfa.seq2seq.BasicDecoder(self.cell, sampler = self.sampler, output_layer = self.fc)

    else:
        self.decoder = tfa.seq2seq.BasicDecoder(self.decoder_cells, self.sampler, self.fc)

  def build_cell(self):
    cell = tfa.seq2seq.AttentionWrapper(self.decoder_cells, self.attention_mechanism,
                                        attention_layer_size = self.dec_units)
    return cell
  
  def build_attention_mechanism(self, dec_units, memory, memory_sequence_length, attention_type='luong'):
    # ------------- #
    # typ: Which sort of attention (Bahdanau, Luong)
    # dec_units: final dimension of attention outputs 
    # memory: encoder hidden states of shape (batch_size, max_length_input, enc_units)
    # memory_sequence_length: 1d array of shape (batch_size) with every element set to max_length_input (for masking purpose)

    if(attention_type=='bahdanau'):
      return tfa.seq2seq.BahdanauAttention(units=dec_units, memory=memory, memory_sequence_length=memory_sequence_length)
    else:
      return tfa.seq2seq.LuongAttention(units=dec_units, memory=memory, memory_sequence_length=memory_sequence_length)

  def get_cell(self, cell_type = "lstm", num_of_cell = 1, name = None):
      #print(cell_type)
      if cell_type == "lstm":
        return LSTMCell(num_of_cell, dropout = self.dropout, recurrent_dropout = self.recurrent_dropout, )
      elif cell_type == "rnn":
        return SimpleRNNCell(num_of_cell, dropout = self.dropout, recurrent_dropout = self.recurrent_dropout)
      elif cell_type =="gru":
        return GRUCell(num_of_cell, dropout = self.dropout, recurrent_dropout = self.recurrent_dropout)
      else:
        print(f"Invalid cell type: {cell_type}")

  def get_stacked_rnn_cell(self,):
    return tf.keras.layers.StackedRNNCells( [self.get_cell(self.dec_unit_type, self.dec_units,) for i in range(self.num_of_layers)])

  def build_initial_state(self, batch_sz, encoder_state, Dtype):
    decoder_initial_state = self.cell.get_initial_state(batch_size=batch_sz, dtype=Dtype)
    #print(decoder_initial_state, len(decoder_initial_state))
    #print(batch_sz)
    #print(len(encoder_state))
    decoder_initial_state = decoder_initial_state.clone(cell_state=encoder_state)
    return decoder_initial_state

  def call(self, x, initial_state):
    x = self.embedding(x)
    #print("calles")
    output = self.decoder(x, initial_state=initial_state)
    return output

In [30]:

# #encoder = Encoder( num_encoder_tokens, 1024, 1024, 3, "lstm", batch_size, 0.0, 0.0).build(input_shape =(None,22))
# decoder = Decoder( num_decoder_tokens,  1, 16, 3, "lstm", 64, 0.0, 0.0)
# #sample_hidden = encoder.initialize_hidden_state()
# #decoder.build(input_shape =(None, ))
# # decoder.summary()
# #sample_x = tf.random.uniform((2  ,max_decoder_seq_length))
# decoder.attention_mechanism.setup_memory(out)
# initial_state = decoder.build_initial_state(64, tuple(state), tf.float32)
# # sample_output = decoder(dec_input_data[:8192], initial_state)
# # out1 , state1 = sample_output[0], sample_output[1:]

In [31]:
# out1

In [32]:
input_data = tf.data.Dataset.from_tensor_slices((enc_input_data, dec_input_data))
target_data =  tf.data.Dataset.from_tensor_slices(dec_target_data)
train_dataset  = tf.data.Dataset.zip((input_data, target_data)).batch(batch_size, drop_remainder=True)

input_data = tf.data.Dataset.from_tensor_slices((val_enc_input_data, val_dec_input_data))
target_data =  tf.data.Dataset.from_tensor_slices(val_dec_target_data)
val_dataset  = tf.data.Dataset.zip((input_data, target_data)).batch(batch_size, drop_remainder=True)

In [33]:
class Seq2Seq():
  def __init__(self, num_encoder_tokens, num_decoder_token, encoder_embedding_dim, decoder_embedding_dim,num_of_unit, num_of_layers, unit_type, batch_size, recurrent_dropout, dropout, attention_type = None):
    super().__init__()
    self.batch_size = batch_size
    self.attention_type = attention_type
    self.encoder = Encoder(  num_encoder_tokens, encoder_embedding_dim, num_of_unit, num_of_layers, unit_type, self.batch_size,  recurrent_dropout, dropout)
    #self.encoder.summary()
    self.dec = Decoder( num_decoder_tokens,  decoder_embedding_dim, num_of_unit, num_of_layers, unit_type, self.batch_size, recurrent_dropout, dropout, attention_type)
    #sample_x = tf.random.uniform((batch_size  ,max_decoder_seq_length))

  def call(self, enc_inp, dec_inp):
    #print("fsdfa",dec_inp.shape)
    x = self.encoder(enc_inp)
    enc_out, enc_state = x[0], x[1:]
    #print(enc_out.shape)
    if self.attention_type:
        self.dec.attention_mechanism.setup_memory(enc_out)
        enc_state = self.dec.build_initial_state(self.batch_size, tuple(enc_state), tf.float32)
    #print("fucck")
    x = self.dec(dec_inp,enc_state)
    return x

  @tf.function
  def validation_step(self, val_enc_input_data, val_dec_input_data, targ):
    #dec_input_data = val_dec_input_data[ : , :-1 ]
    out = self.call(val_enc_input_data, val_dec_input_data)
    logits = out[0].rnn_output
    #print(logits.item())
    loss = 0
    for (i, (ta, pre)) in enumerate(zip(tf.unstack(targ),tf.unstack(logits))):
        stop = tf.where( ta == 1)[0][0]
        self.metric.update_state(ta[:stop], pre[:stop])
        loss += self.loss_function(ta[:stop], pre[:stop])
    #loss += self.loss_function(real, logits)
    #print("Validation Loss = ", loss.numpy())
    #self.metric.update_state(real, logits)
    return loss/i, self.metric.result()

  @tf.function
  def train_step(self, enc_input_data, dec_input_data, targ):
    loss = 0

    with tf.GradientTape() as tape:
      
      out = self.call(enc_input_data, dec_input_data)
      logits = out[0].rnn_output
      loss = 0
      for (i, (ta, pre)) in enumerate(zip(tf.unstack(targ),tf.unstack(logits))):
        stop = tf.where( ta == 1)[0][0]
        self.metric.update_state(ta[:stop], pre[:stop])
        loss += self.loss_function(ta[:stop], pre[:stop])
 
      
    variables = self.encoder.variables + self.dec.variables
    gradients = tape.gradient(loss, variables)
    self.optimizer.apply_gradients(zip(gradients, variables))

    return loss/i, self.metric.result()

  def fit(self, train_dataset, val_dataset, epochs, loss, optimizer, checkpoint, metric):
    self.metric = metric

    
    self.loss_function = loss
    self.optimizer = optimizer
    steps_per_epoch = len(input)//batch_size
    step_per_val_epoch  = len(validation_input)//batch_size
    print(steps_per_epoch)
    for epoch in range(epochs):
      start = time.time()

      #enc_hidden = encoder.initialize_hidden_state()
      total_loss = 0
      total_acc = 0
      # print(enc_hidden[0].shape, enc_hidden[1].shape)

      self.metric.reset_states()
      for (batch, (inp, targ)) in enumerate(train_dataset.take(steps_per_epoch)):
        #print("targ", targ)
        batch_loss , acc = self.train_step(inp[0],inp[1] ,targ )
        total_loss += batch_loss
        total_acc += acc
        if batch % 100 == 0:
          #break
          print('Epoch {} Batch {} Loss {:.4f}'.format(epoch + 1,
                                                      batch,
                                                      batch_loss.numpy()))
      # saving (checkpoint) the model every 2 epochs
      #val_enc_inp, val_dec_inp , val_targ = val_dataset.take(-1)
      #val_enc_inp, val_dec_inp = val_inp.take(-1)
          #andb.log({"Epoch {epoch + 1} Batch {batch}": batch_loss.numpy()})
      total_val_loss = 0
      total_val_acc = 0

      self.metric.reset_states()
      for (batch, (inp, targ)) in enumerate(val_dataset.take(steps_per_epoch)):
        #print(batch)
       
        val_batch_loss, val_acc = self.validation_step(inp[0],inp[1] ,targ)
        total_val_loss +=val_batch_loss
        total_val_acc += val_acc

      print(f"Validatiion loss:  {total_val_loss.numpy()/  step_per_val_epoch}")
      print((f"Validatiion Acc:  {(total_val_acc.numpy()/  step_per_val_epoch)*100}"))
      # print('Epoch {} Batch {} Loss {:.4f}'.format(epoch + 1,
      #                                                 batch,
      #                                                 val_batch_loss.numpy()))
      
        
      if val_acc>=95:
        checkpoint.save(file_prefix = checkpoint_prefix)
        break
      if (epoch + 1) % 2 == 0:
        checkpoint.save(file_prefix = checkpoint_prefix)

      print('Epoch {} Loss {:.4f}'.format(epoch + 1,
                                          total_loss / steps_per_epoch))
      print("Accuracy ",(total_acc.numpy()/steps_per_epoch) *100)
      print('Time taken for 1 epoch {} sec\n'.format(time.time() - start))
         
#       wandb.log({"Epoch": epoch + 1,
#                 "Train loss": total_loss / steps_per_epoch,
#                  "Train Accuracy": (total_acc.numpy()/steps_per_epoch) *100,
#                  "Val Accuracy": (total_val_acc.numpy()/  step_per_val_epoch)*100,
#                  "Val Loss": total_val_loss.numpy()/  step_per_val_epoch
#                 })

        
            

    

In [34]:

s2s = Seq2Seq(num_encoder_tokens, num_decoder_tokens,  encoder_embedding_dim =1024,
              decoder_embedding_dim= 128,
              num_of_unit =64,
              num_of_layers = 2, 
              unit_type ="gru",
             batch_size = batch_size, 
              recurrent_dropout = 0.3,
              dropout = 0.4 , 
              attention_type = None)

sample_out = s2s.call(enc_input_data[:batch_size], dec_input_data[:batch_size])

In [35]:
#s2s.call(enc_input_data[:batch_size], dec_input_data[:batch_size])

In [110]:
def create_batch(appende, batch_size = 64):
  temp  = []
  for i in range(batch_size):
    temp.append(appende)
  return np.array(temp)
def idx_to_word(word):
  return  "".join([reverse_target_token[char] for char in word])
def word_to_index(word):
  return  [reverse_input_token[char] for char in word]


In [124]:
class BeamSearch():

  def __init__(self, beam_size):
    self.beam_size = beam_size

  def beam_search_decoder(aelf, data, k):
    sequences = [[list(), 0.0]]
    # walk over each step in sequence
    for row in data:
      all_candidates = list()
      # expand each current candidate
      for i in range(len(sequences)):
        seq, score = sequences[i]
        for j in range(len(row)):
          candidate = [seq + [j], score - log(row[j])]
          all_candidates.append(candidate)
      # order all candidates by score
      ordered = sorted(all_candidates, key=lambda tup:tup[1])
      # select k best
      sequences = ordered[:k]
    return sequences
  
  def on_epoch_end(self):
    acc = 0
    for (i, (inp, targ)) in enumerate(val_dataset.take(batch_size)):
        prediction = s2s.call(inp[0], inp[1])[0].rnn_output
    #prediction = self.model.predict([val_enc_input_data , val_dec_input_data])
        print(prediction.shape)
        for i, pred in enumerate(prediction):
          beam_search_prediction = self.beam_search_decoder(pred, self.beam_size)
          correct_prediction = 0
          for k in range(self.beam_size):
            translated_word = "\t"+"".join([reverse_target_token[x] for x in beam_search_prediction[k][0][:len(validation_target[i])-1]])
            print(translated_word, validation_target[i])
            #print(validation_target[i])

            def idx2char(idx_list):
              return "".join([reverse_target_token[x] for x in idx_list])

            if "\t"+ idx2char(beam_search_prediction[k][0][:len(validation_target[i])-1]) == validation_target[i]:
              correct_prediction+=1
              break
        mul = 10.0**4
        acc += ((correct_prediction/prediction.shape[0])*mul)/mul 
        
    logs["character_accuracy"] = ((correct_prediction/prediction.shape[0])*mul)/mul
    print("- character_accuracy",logs["character_accuracy"])
    #print(f"Accuracy by Beam Search {correct_prediction/len(validation_target)}")
      # print(len(beam_search_prediction))
      # print(beam_search_prediction)

In [36]:
optimizer = tf.keras.optimizers.RMSprop()


def loss_function(real, pred):
  # real shape = (BATCH_SIZE, max_length_output)
  # pred shape = (BATCH_SIZE, max_length_output, tar_vocab_size )
  #print(pred,"fucck", real)
  cross_entropy = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none')
  loss = cross_entropy(y_true=real, y_pred=pred)
  mask = tf.logical_not(tf.math.equal(real,0))   #output 0 for y=0 else output 1
  mask = tf.cast(mask, dtype=loss.dtype)  
  loss = mask* loss
  loss = tf.reduce_mean(loss)
  return loss  

In [37]:
checkpoint_dir = './training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
checkpoint = tf.train.Checkpoint(optimizer=optimizer,
                                 encoder=s2s.encoder,
                                 decoder=s2s.dec,
                                 )

In [39]:
tf.config.run_functions_eagerly(False)
s2s.fit(train_dataset, val_dataset, 30, loss_function, optimizer, checkpoint , metric =  tf.keras.metrics.SparseCategoricalAccuracy())
#beam_search = BeamSearch(3)
#beam_search.on_epoch_end()

690
Epoch 1 Batch 0 Loss 3.5763
Epoch 1 Batch 100 Loss 3.4973
Epoch 1 Batch 200 Loss 3.4369
Epoch 1 Batch 300 Loss 3.5817
Epoch 1 Batch 400 Loss 3.7672
Epoch 1 Batch 500 Loss 3.6298
Epoch 1 Batch 600 Loss 3.7385
Validatiion loss:  3.685123988560268
Validatiion Acc:  70.04863739013672
Epoch 1 Loss 3.5447
Accuracy  68.08103699615036
Time taken for 1 epoch 97.48323965072632 sec

Epoch 2 Batch 0 Loss 3.6035
Epoch 2 Batch 100 Loss 3.4749
Epoch 2 Batch 200 Loss 3.4495
Epoch 2 Batch 300 Loss 3.5760
Epoch 2 Batch 400 Loss 3.7596
Epoch 2 Batch 500 Loss 3.6399
Epoch 2 Batch 600 Loss 3.7253
Validatiion loss:  3.680878121512277
Validatiion Acc:  70.39741516113281
Epoch 2 Loss 3.5444
Accuracy  70.13961570850317
Time taken for 1 epoch 95.45968866348267 sec

Epoch 3 Batch 0 Loss 3.6002
Epoch 3 Batch 100 Loss 3.4918
Epoch 3 Batch 200 Loss 3.4336
Epoch 3 Batch 300 Loss 3.5812
Epoch 3 Batch 400 Loss 3.7651
Epoch 3 Batch 500 Loss 3.6222
Epoch 3 Batch 600 Loss 3.7305
Validatiion loss:  3.673712158203125
V

In [151]:
s2s.dec.summary()

Model: "decoder_29"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_59 (Embedding)     multiple                  66560     
_________________________________________________________________
dense_29 (Dense)             multiple                  1105      
_________________________________________________________________
stacked_rnn_cells_59 (Stacke multiple                  66624     
_________________________________________________________________
basic_decoder_27 (BasicDecod multiple                  67729     
Total params: 134,289
Trainable params: 134,289
Non-trainable params: 0
_________________________________________________________________


In [152]:
sweep_config = {
    
    'method':'bayes',
    'metric': {
        'name':'Val Accuracy',
        'goal':'maximize'
    },
    'parameters':{
    
    "num_of_layer" : {'values': [1,2,3]},
    "unit_size": {"values":[16,32,64]},
    "unit_type": {"values":["lstm","rnn","gru"]},
    "dropout": {"values": [0.0, 0.2, 0.4]},
    'recurrent_dropout':{'values':[0.0,0.3]},
    "epochs":{"value":10},
    "encoder_embedding_dim":{"values": [64, 128, 1024]},
    "decoder_embedding_dim":{"values": [64, 128, 1024]},
    "optimizer":{"values": ["adam","rmsprop"]}             
                   }
}
pprint.pprint(sweep_config)


{'method': 'bayes',
 'metric': {'goal': 'maximize', 'name': 'Val Accuracy'},
 'parameters': {'decoder_embedding_dim': {'values': [64, 128, 1024]},
                'dropout': {'values': [0.0, 0.2, 0.4]},
                'encoder_embedding_dim': {'values': [64, 128, 1024]},
                'epochs': {'value': 10},
                'num_of_layer': {'values': [1, 2, 3]},
                'optimizer': {'values': ['adam', 'rmsprop']},
                'recurrent_dropout': {'values': [0.0, 0.3]},
                'unit_size': {'values': [16, 32, 64]},
                'unit_type': {'values': ['lstm', 'rnn', 'gru']}}}


In [153]:
sweep_id = wandb.sweep(sweep_config, project="Sweep_without_Attention")


Create sweep with ID: gk7yunpp
Sweep URL: https://wandb.ai/aslan/Sweep_without_Attention/sweeps/gk7yunpp


In [154]:
checkpoint_dir = './training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")

In [155]:

def train(config = None):
  with wandb.init(config=config):
    config = wandb.config
    #print(config)
    s2s = Seq2Seq(num_encoder_tokens,num_decoder_tokens,config.encoder_embedding_dim, config.decoder_embedding_dim, config.unit_size, config.num_of_layer,config.unit_type , batch_size, config.dropout,config.recurrent_dropout, attention_type = None)
    if config.optimizer == "adm":
        optimizer = tf.keras.optimizers.Adam()
    else:
        optimizer = tf.keras.optimizers.RMSprop()
    checkpoint_dir = './training_checkpoints'
    checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
    checkpoint = tf.train.Checkpoint(optimizer=optimizer,
                                 encoder=s2s.encoder,
                                 decoder=s2s.dec,
                                 )

    # seq2seq.compile(optimizer=config.optimizer, loss="categorical_crossentropy", metrics=["accuracy",])
    s2s.fit(
        train_dataset,
        val_dataset,
        config.epochs,
        loss_function,
        optimizer,
        checkpoint,
        metric = tf.keras.metrics.SparseCategoricalAccuracy()
        )
wandb.agent(sweep_id, train)

[34m[1mwandb[0m: Agent Starting Run: 6gxy0b9z with config:
[34m[1mwandb[0m: 	decoder_embedding_dim: 64
[34m[1mwandb[0m: 	dropout: 0.4
[34m[1mwandb[0m: 	encoder_embedding_dim: 128
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	num_of_layer: 1
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	recurrent_dropout: 0
[34m[1mwandb[0m: 	unit_size: 64
[34m[1mwandb[0m: 	unit_type: lstm


690
Epoch 1 Batch 0 Loss 4.2406
Epoch 1 Batch 100 Loss 4.2131
Epoch 1 Batch 200 Loss 4.1920
Epoch 1 Batch 300 Loss 4.1047
Epoch 1 Batch 400 Loss 4.1948
Epoch 1 Batch 500 Loss 4.0740
Epoch 1 Batch 600 Loss 4.1345
Validatiion loss:  4.123540823800223
Validatiion Acc:  13.466736929757253
Epoch 1 Loss 4.1287
Accuracy  14.915398860323256
Time taken for 1 epoch 92.08108830451965 sec

Epoch 2 Batch 0 Loss 4.1785
Epoch 2 Batch 100 Loss 4.2118
Epoch 2 Batch 200 Loss 4.1484
Epoch 2 Batch 300 Loss 4.0263
Epoch 2 Batch 400 Loss 4.1537
Epoch 2 Batch 500 Loss 3.9949
Epoch 2 Batch 600 Loss 4.1288
Validatiion loss:  4.083060128348214
Validatiion Acc:  16.441647665841238
Epoch 2 Loss 4.0667
Accuracy  18.529208639393683
Time taken for 1 epoch 68.99501252174377 sec

Epoch 3 Batch 0 Loss 4.1539


[34m[1mwandb[0m: Ctrl + C detected. Stopping sweep.


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Epoch,▁█
Train Accuracy,▁█
Train loss,█▁
Val Accuracy,▁█
Val Loss,█▁

0,1
Epoch,2.0
Train Accuracy,18.52921
Train loss,4.06675
Val Accuracy,16.44165
Val Loss,4.08306


In [None]:
# !cat ./wandb/run-20220622_082243-tv6u3ywu/logs/debug.log

# Visualisation

In [None]:
# def idx_to_word(word, translate_dict):
#   return  "".join([translate_dict[char] for char in word])


# idx_to_word(next(iter(train_dataset))[1][0].numpy(), reverse_target_token)

In [None]:
# sample_out = s2s.call(val_enc_input_data[:batch_size], dec_input_data[:batch_size])

In [None]:
# next(iter(train_dataset))[1][0].numpy()

In [None]:
# argmax(sample_out[0].rnn_output[], axis = 1)

In [None]:
# reverse_target_token

In [None]:
# idx_to_word(argmax(sample_out[0].rnn_output[0], axis = 1), reverse_target_token)

In [None]:
# val_enc_input_data[0]

In [None]:
# val_dec_input_data.shape

In [None]:
# val_dec_target_data[110]

In [None]:
# sample_out = s2s.call(val_enc_input_data[:batch_size], val_dec_input_data[:batch_size])

In [None]:
# np.dot(1,np.equal(val_dec_target_data[0][:3], np.argmax(sample_out[0].rnn_output[0][:3], axis =1)))

In [None]:
# sample_out[0].rnn_output[0].shape

In [None]:
# val_dec_target_data[0][:3]

In [None]:
# validation_target[0]

In [None]:
# target_token

In [None]:
# #m = tf.keras.metrics.SparseCategoricalAccuracy()
# m.update_state(val_dec_target_data[0], sample_out[0].rnn_output[0][:3])
# m.result().numpy()

In [None]:
# np.where(val_dec_target_data[0] == 1)

In [None]:
# zip(val_dec_target_data, sample_out[0].rnn_output)

In [None]:
# loss_function(val_dec_input_data[:64], sample_out[0].rnn_output)

In [None]:
# m = tf.keras.metrics.SparseCategoricalAccuracy()
# step_per_val_epoch  = len(validation_input)//batch_size
# m.update_state(val_dec_input_data[:64], sample_out[0].rnn_output)/step_per_val_epoch

In [None]:
#  dec_input_data[0]

In [None]:
# t = list(val_dataset)[:2]
# x, y , z = t[0][0], t[0][1], t[1]

In [None]:
# for val in t:
#   enc_inp
#   print(len(val[0]))

In [None]:
# cross_entropy = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none')
# loss = cross_entropy(y_true=dec_target_data[:64], y_pred=sample_out[0].rnn_output)
# loss

In [None]:
# for x, y in val_dataset:
#   print(x.shape,y.shape)

In [None]:
# x,y=val_dataset.take(1)

In [None]:
# print(val_dataset.range(1))


In [None]:

# s2s.predict([val_enc_input_data,val_dec_input_data])

In [None]:
# s2s.compile(
#     optimizer="rmsprop", loss="categorical_crossentropy", metrics=[tf.keras.metrics.CategoricalAccuracy(name='acc')]
# )
# input_data = tf.data.Dataset.from_tensor_slices((enc_input_data, dec_input_data))
# target_data =  tf.data.Dataset.from_tensor_slices(dec_target_data)
# train_dataset  = tf.data.Dataset.zip((input_data, target_data)).batch(batch_size)
# #s2s.summary()
# s2s.fit(
#     train_dataset,
#     batch_size=64,
#     epochs=1,
# )

In [None]:

# class Seq2seq(tf.keras.Model):
#   def __init__(self, num_encoder_tokens, num_decoder_tokens,embedding_dim,num_of_layers,unit_type, dropout , recurrent_dropout):
#     super().__init__()
#     self.encoder_inputs = Input(shape = (None,), name = "Input_layer_1")
#     self.decoder_inputs = keras.Input(shape=(None,), name = "Input_layer_2")
#     self.num_encoder_tokens = num_encoder_tokens
#     self.embedding_dim = embedding_dim
#     self.dropout = dropout
#     self.recurrent_dropout = recurrent_dropout
#     self.num_decoder_tokens = num_decoder_tokens
#     self.num_of_encoder_layer  =num_of_layers
#     self.num_of_decoder_layer =num_of_layers
#     self.type_encoder_unit =unit_type 
#     self.type_decoder_unit =unit_type
#     self.train_step()
#     self.build_model()

#   def get_embedding_layer(self, num_encoder_tokens, embedding_dim,  name):
#     return Embedding(num_encoder_tokens, embedding_dim, mask_zero = True, name =name )

#   def get_cell(self, cell_type = "lstm", num_of_cell = 1, name = None):
#     #print(cell_type)
#     if cell_type == "lstm":
#       return LSTMCell(num_of_cell, dropout = self.dropout, recurrent_dropout = self.recurrent_dropout, )
#     elif cell_type == "rnn":
#       return SimpleRNNCell(num_of_cell, dropout = self.dropout, recurrent_dropout = self.recurrent_dropout)
#     elif cell_type =="gru":
#       return GRUCell(num_of_cell, dropout = self.dropout, recurrent_dropout = self.recurrent_dropout)
#     else:
#       print(f"Invalid cell type: {cell_type}")
#   def get_encoder(self,latent_dim, cell_type = "lstm", num_of_layer = 1, name = None ):
#     return tf.keras.layers.RNN(tf.keras.layers.StackedRNNCells( [self.get_cell(cell_type, latent_dim) for i in range(num_of_layer)],), return_sequences=True, return_state=True, name = name)

#   def get_decoder(self,latent_dim ,cell_type = "lstm", num_of_layer = 1, name = None ):
#     return tf.keras.layers.RNN(tf.keras.layers.StackedRNNCells( [self.get_cell(cell_type, latent_dim,) for i in range(num_of_layer)]), return_sequences=True, return_state=True)

#   def get_dense_layer(self, num_decoder_token, activation = "softmax"):
#     return Dense(num_decoder_tokens, activation= activation)

#   def train_step(self):
#     self.embedding_layer = self.get_embedding_layer( self.num_encoder_tokens, self.embedding_dim ,name = "encoder_embedding")
#     self.embedding_results = self.embedding_layer(self.encoder_inputs)
#     print(self.embedding_results.shape)
#     self.encoder = self.get_encoder( self.embedding_dim,self.type_encoder_unit, self.num_of_encoder_layer , name ="encoder" )
#     encoder_results = self.encoder(self.embedding_results)

#     self.encoder_outputs, self.encoder_states = encoder_results[0], encoder_results[1:]

#     self.embedding_layer2 = self.get_embedding_layer( self.num_decoder_tokens, self.embedding_dim, name = "decoder_embedding")
#     self.embedding_results2 = self.embedding_layer2(self.decoder_inputs,)

#     self.decoder = self.get_decoder( self.embedding_dim, self.type_decoder_unit, self.num_of_decoder_layer,)
#     self.decoder_results = self.decoder(self.embedding_results2, initial_state=self.encoder_states)

#     self.decoder_output = self.decoder_results[0]
#     self.decoder_dense = self.get_dense_layer(self.num_decoder_tokens)
#     self.dense_output = self.decoder_dense(self.decoder_output)

#   def build_model(self):
    
#     self.model = keras.Model([self.encoder_inputs, self.decoder_inputs], self.dense_output, name = "Seq2Seq_model")
#     return self.model



In [None]:

# seq2seq = Seq2seq(num_encoder_tokens,num_decoder_tokens, 1024,1,"rnn", 0.0, 0.0).build_model()
# seq2seq.summary()

## Train the model


In [None]:
# s2s.compile(
#     optimizer="rmsprop", loss="categorical_crossentropy", metrics=[tf.keras.metrics.CategoricalAccuracy(name='acc')]
# )

# s2s.fit(
#     [enc_input_data, dec_input_data],
#     dec_target_data,
#     batch_size=64,
#     epochs=1,
# )

In [None]:
# class BeamSearch(keras.callbacks.Callback):

#   def __init__(self, beam_size):
#     self.beam_size = beam_size

#   def beam_search_decoder(aelf, data, k):
#     sequences = [[list(), 0.0]]
#     # walk over each step in sequence
#     for row in data:
#       all_candidates = list()
#       # expand each current candidate
#       for i in range(len(sequences)):
#         seq, score = sequences[i]
#         for j in range(len(row)):
#           candidate = [seq + [j], score - log(row[j])]
#           all_candidates.append(candidate)
#       # order all candidates by score
#       ordered = sorted(all_candidates, key=lambda tup:tup[1])
#       # select k best
#       sequences = ordered[:k]
#     return sequences
  
#   def on_epoch_end(self, epoch, logs = None):
#     prediction = self.model.predict([val_enc_input_data , val_dec_input_data])
#     print(prediction.shape)
#     for i, pred in enumerate(prediction):
#       beam_search_prediction = self.beam_search_decoder(pred, self.beam_size)
#       correct_prediction = 0
#       for k in range(self.beam_size):
#         #translated_word = "\t"+"".join([reverse_target_token[x] for x in beam_search_prediction[k][0][:len(validation_target[i])-1]])
#         #print(translated_word, validation_target[i])
#         #print(validation_target[i])
        
#         def idx2char(idx_list):
#           return "".join([reverse_target_token[x] for x in idx_list])

#         if "\t"+ idx2char(beam_search_prediction[k][0][:len(validation_target[i])-1]) == validation_target[i]:
#           correct_prediction+=1
#           break
#     mul = 10.0**2
#     logs["character_accuracy"] = ((correct_prediction/prediction.shape[0])*mul)/mul
#     print("- character_accuracy",logs["character_accuracy"])
#     #print(f"Accuracy by Beam Search {correct_prediction/len(validation_target)}")
#       # print(len(beam_search_prediction))
#       # print(beam_search_prediction)


In [None]:
# def beam_search_decoder(data, k):
#     decodedWords = [[list(), 0.0]]
#     # walk over each step in sequence
#     for word in data:
#       candidates = list()
#       # expand each current candidate
#       for sequence in decodedWords:
#         seq, score = sequence
#         for j in range(len(word)):
#           candidate = [seq + [j], score - log(word[j])]
#           candidates.append(candidate)
#       # order all candidates by score
#       ordered = sorted(candidates, key=lambda a:a[1])
#       # select k best
#       decodedWords = ordered[:k]
#     return decodedWords
  
# def translate(seq):
#   sentence = [] 
#   for x in seq:
#     char = reverse_target_token[x]
#     sentence.append(char)
#   return "".join(sentence)
# class WordAccuracyCallback(keras.callbacks.Callback):
#   def __init__(self,beam_size):
#     self.beam_size=beam_size
#   def on_epoch_end(self, epoch, logs=None):
#     pred=self.model.predict([val_enc_input_data , val_dec_input_data])
#     count=0
#     for i in range(pred.shape[0]):
#       pSequences=beam_search_decoder(pred[i],self.beam_size)
#       for j in range(self.beam_size):
#         if "\t"+translate(pSequences[j][0][:len(validation_target[i])-1])==validation_target[i]:
#           count=count+1
#           break
#     factor = 10.0 ** 4
#     logs["WordAccuracy"]=math.trunc((count/pred.shape[0])*factor)/factor
#     print("- wordAccuracy:",logs["WordAccuracy"])

In [None]:
# sweep_config = {
    
#     'method':'bayes',
#     'metric': {
#         'name':'val_accuracy',
#         'goal':'maximize'
#     },
#     'parameters':{
    
#     "num_of_layer" : {'values': [1,2,3]},
#     "unit_size": {"values":[16,32,64]},
#     "unit_type": {"values":["lstm","rnn","gru"]},
#     "dropout": {"values": [0.0, 0.2, 0.4]},
#     'recurrent_dropout':{'values':[0.0,0.3]},
#     "beam_size" : {"values":[1,2,3,4]},
#     "epochs":{"value":20},  
#     "optimizer":{"values": ["adam","rmsprop"]}             
#                    }
# }



# pprint.pprint(sweep_config)

In [None]:
# sweep_id = wandb.sweep(sweep_config, project="seq2seq")

In [None]:
# def train(config = None):
#   with wandb.init(config=config):
#     config = wandb.config
#     #print(config)
#     seq2seq = Seq2seq(num_encoder_tokens,num_decoder_tokens, config.unit_size, config.num_of_layer,config.unit_type , config.dropout,config.recurrent_dropout).build_model()
#     seq2seq.compile(optimizer=config.optimizer, loss="categorical_crossentropy", metrics=["accuracy",])
#     seq2seq.fit(
#         [encoder_input_data, decoder_input_data],
#         decoder_target_data,
#         batch_size=batch_size,
#         epochs=config.epochs,
#         validation_data =  ([validation_encoder_input_data , validation_decoder_input_data] ,validation_decoder_target_data),
#         callbacks = [BeamSearch(config.beam_size), WandbCallback()],verbose = 1, 
#         )


    
    
# wandb.agent(sweep_id, train)

In [None]:
# seq2seq.compile(
#     optimizer="rmsprop", loss="categorical_crossentropy", metrics=[tf.keras.metrics.CategoricalAccuracy(name='acc')]
# )
# seq2seq.metrics_names



In [None]:
# pred=seq2seq.predict([val_enc_input_data , val_dec_input_data])
# count=0
# for i in range(pred.shape[0]//400):
#       pSequences=beam_search_decoder(pred[i],3)
#       for j in range(3):
#         print({"\t"+translate(pSequences[j][0][:len(validation_target[i])-1])}, "original =", {validation_target[i]} )
#         if "\t"+translate(pSequences[j][0][:len(validation_target[i])-1])==validation_target[i]:
#           count=count+1
#           print("yes")
#           break
# factor = 10.0 ** 4


In [None]:
# x = seq2seq.predict([val_enc_input_data , val_dec_input_data])
# x.shape

In [None]:

# histotry = seq2seq.fit(
#     [enc_input_data, dec_input_data],
#     dec_target_data,
#     batch_size=8192,
#     epochs=1,
#     callbacks = [WordAccuracyCallback(3), ],
# )
# # Save model
# seq2seq.save("s2s")


In [None]:
# for key in histotry.history.keys():
#       print(key , histotry.history[key])
#       #wandb.log({key : histotry.history[key]})

In [None]:
# seq2seq.metrics_names

## Run inference (sampling)

1. encode input and retrieve initial decoder state
2. run one step of decoder with this initial state
and a "start of sequence" token as target.
Output will be the next target token.
3. Repeat with the current target token and current states


In [None]:
# # Define sampling models
# # Restore the model and construct the encoder and decoder.
# model = keras.models.load_model("s2s")

# encoder_inputs = model.input[0]  # input_1
# temp = model.layers[2].output
# encoder_outputs, state = temp[0], temp[1:]  # lstm_1
# encoder_states = state
# encoder_model = keras.Model(encoder_inputs, encoder_states)

# decoder_inputs = model.input[1]  # input_2
# decoder_state_input_h = keras.Input(shape=(latent_dim,))
# decoder_state_input_c = keras.Input(shape=(latent_dim,))
# decoder_states_inputs = state
# decoder_lstm = model.layers[3]
# temp = decoder_lstm(decoder_inputs, initial_state=decoder_states_inputs)
# decoder_outputs, state_dec = temp[0], temp[1:]
# decoder_states = state_dec
# decoder_dense = model.layers[4]
# decoder_outputs = decoder_dense(decoder_outputs)
# decoder_model = keras.Model(
#     [decoder_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states
# )

# # Reverse-lookup token index to decode sequences back to
# # something readable.
# # reverse_input_char_index = dict((i, char) for char, i in num_encoder_tokens.items())
# # reverse_target_char_index = dict((i, char) for char, i in num_decoder_tokens.items())
# # print(reverse_input_char_index)
# # print(input_token_index)

# reverse_input_token = dict((i, char) for char, i in input_token.items())
# reverse_target_token = dict((i, char) for char, i in target_token.items())
# def decode_sequence(input_seq):
#     # Encode the input as state vectors.
#     states_value = encoder_model.predict(input_seq)

#     # Generate empty target sequence of length 1.
#     target_seq = np.zeros((1, 1, num_decoder_tokens))
#     # Populate the first character of target sequence with the start character.
#     target_seq[0, 0, target_token_index["\t"]] = 1.0

#     # Sampling loop for a batch of sequences
#     # (to simplify, here we assume a batch of size 1).
#     stop_condition = False
#     decoded_sentence = ""
#     while not stop_condition:
#         temp = decoder_model.predict([target_seq] + states_value)
#         output_tokens, state = temp[0],temp[1:]

#         # Sample a token
#         sampled_token_index = np.argmax(output_tokens[0, -1, :])
#         #print(reverse_target_char_index)
#         sampled_char = reverse_target_token[sampled_token_index]
#         decoded_sentence += sampled_char

#         # Exit condition: either hit max length
#         # or find stop character.
#         if sampled_char == "\n" or len(decoded_sentence) > max_decoder_seq_length:
#             stop_condition = True

#         # Update the target sequence (of length 1).
#         target_seq = np.zeros((1, 1, num_decoder_tokens))
#         target_seq[0, 0, sampled_token_index] = 1.0

#         # Update states
#         states_value = state
#     return decoded_sentence



You can now generate decoded sentences as such:


In [None]:
# for seq_index in range(20):
#     # Take one sequence (part of the training set)
#     # for trying out decoding.
#     input_seq = encoder_input_data[seq_index : seq_index + 1]
#     decoded_sentence = decode_sequence(input_seq)
#     print("-")
#     print("Input sentence:", input_texts[seq_index])
#     print("Decoded sentence:", decoded_sentence)


In [None]:
# ! git log