In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras

In [None]:
import os

In [None]:
dataset_path = os.getcwd() + os.sep + 'te' + os.sep + 'lexicons'
train_path = dataset_path + os.sep + 'te.translit.sampled.train.tsv'
valid_path = dataset_path + os.sep + 'te.translit.sampled.dev.tsv'
test_path = dataset_path + os.sep + 'te.translit.sampled.test.tsv'

In [None]:
train_inputs = []
train_outputs = []
valid_inputs = []
valid_outputs = []
test_inputs = []
test_outputs = []
input_chars = set()
output_chars = set()

In [None]:
include_all = True

In [None]:
with open(train_path, "r", encoding="utf-8") as f:
    lines = f.read().split("\n")
for line in lines[: (len(lines) - 1 )]:
    out,inp,a = line.split('\t')
#     if not include_all and a!=1:
#         continue
    # We use "tab" as the "start sequence" character
    # for the targets, and "\n" as "end sequence" character.
#     print(out,out[-1],inp,inp[-1])
    out = "\t" + out + "\n"
    train_inputs.append(inp)
    train_outputs.append(out)
    for char in inp:
        if char not in input_chars:
            input_chars.add(char)
    for char in out:
        if char not in output_chars:
            output_chars.add(char)

In [None]:
with open(valid_path, "r", encoding="utf-8") as f:
    lines = f.read().split("\n")
for line in lines[: (len(lines) - 1 )]:
    out,inp,a = line.split('\t')
    if not include_all and a!=1:
        continue
    # We use "tab" as the "start sequence" character
    # for the targets, and "\n" as "end sequence" character.
#     print(out,out[-1],inp,inp[-1])
    out = "\t" + out + "\n"
    valid_inputs.append(inp)
    valid_outputs.append(out)

In [None]:
with open(test_path, "r", encoding="utf-8") as f:
    lines = f.read().split("\n")
for line in lines[: (len(lines) - 1 )]:
    out,inp,a = line.split('\t')
    if not include_all and a!=1:
        continue
    # We use "tab" as the "start sequence" character
    # for the targets, and "\n" as "end sequence" character.
#     print(out,out[-1],inp,inp[-1])
    out = "\t" + out + "\n"
    test_inputs.append(inp)
    test_outputs.append(out)

In [None]:
input_chars = sorted(list(input_chars))
print(input_chars)
num_input_chars = len(input_chars)

In [None]:
output_chars = sorted(list(output_chars))
print(output_chars)
num_output_chars = len(output_chars)

In [None]:
train_size = len(train_inputs)
max_input_size = max([len(txt) for txt in train_inputs])
print(max_input_size)
max_output_size = max([len(txt) for txt in  train_outputs])
print(max_output_size)

In [None]:
input_index = dict([(char, i) for i, char in enumerate(input_chars)])
output_index = dict([(char, i+1) for i, char in enumerate(output_chars)])


In [None]:
print(output_index)

In [None]:
#### Enoding in indexes of characters in the set
def encode_index(inputs,index):
    data = []
    for i in range(len(inputs)):
        a = np.zeros(len(inputs[i]))
        j = 0
        for char in inputs[i]:
            a[j] = index[char]
            j += 1
        data.append(a)
    data = np.asarray(data).astype(np.ndarray)
    return data
    

In [None]:
input_data = encode_index(train_inputs,input_index)
input_tensor = tf.ragged.constant(input_data)

In [None]:
val_input_data = encode_index(valid_inputs,input_index)
val_input_tensor = tf.ragged.constant(val_input_data)

In [None]:
test_input_data = encode_index(test_inputs,input_index)
test_input_tensor = tf.ragged.constant(test_input_data)

In [None]:
print(len(input_data))

In [None]:
max_val__input_size = max([len(txt) for txt in valid_inputs])
max_val_output_size = max([len(txt) for txt in  valid_outputs])
max_test_input_size = max([len(txt) for txt in test_inputs])
max_test_output_size = max([len(txt) for txt in  test_outputs])

In [None]:
decoder_input_data = np.zeros(
    (len(train_inputs), max_output_size,num_output_chars+1), dtype="float32"
)
decoder_output_data = np.zeros(
    (len(train_inputs), max_output_size,num_output_chars+1), dtype="float32"
)
for i,target_text in enumerate(train_outputs):
    for t, char in enumerate(target_text):
        # decoder_target_data is ahead of decoder_input_data by one timestep
        decoder_input_data[i, t, output_index[char]] = 1.0
        if t > 0:
            # decoder_target_data will be ahead by one timestep
            # and will not include the start character.
            decoder_output_data[i, t - 1, output_index[char]] = 1.0
#     decoder_input_data[i, t + 1 :, output_index[" "]] = 1.0
#     decoder_output_data[i, t:, output_index[" "]] = 1.0
# print(decoder_input_data[0])
decoder_input_data = np.argmax(decoder_input_data,axis=2).astype(dtype='float32')


In [None]:
decoder_input_data[0]

In [None]:
decoder_val_input_data = np.zeros(
    (len(valid_inputs), max_val_output_size,num_output_chars+1), dtype="float32"
)
decoder_val_output_data = np.zeros(
    (len(valid_inputs), max_val_output_size,num_output_chars+1), dtype="float32"
)
for i,target_text in enumerate(valid_outputs):
    for t, char in enumerate(target_text):
        # decoder_target_data is ahead of decoder_input_data by one timestep
        decoder_val_input_data[i, t, output_index[char]] = 1.0
        if t > 0:
            # decoder_target_data will be ahead by one timestep
            # and will not include the start character.
            decoder_val_output_data[i, t - 1, output_index[char]] = 1.0
#     decoder_input_data[i, t + 1 :, output_index[" "]] = 1.0
#     decoder_output_data[i, t:, output_index[" "]] = 1.0
decoder_val_input_data = np.argmax(decoder_val_input_data,axis=2).astype(dtype='float32')


In [None]:
decoder_test_input_data = np.zeros(
    (len(test_inputs), max_test_output_size,num_output_chars+1), dtype="float32"
)
decoder_test_output_data = np.zeros(
    (len(test_inputs), max_test_output_size,num_output_chars+1), dtype="float32"
)
for i,target_text in enumerate(test_outputs):
    for t, char in enumerate(target_text):
        # decoder_target_data is ahead of decoder_input_data by one timestep
        decoder_test_input_data[i, t, output_index[char]] = 1.0
        if t > 0:
            # decoder_target_data will be ahead by one timestep
            # and will not include the start character.
            decoder_test_output_data[i, t - 1, output_index[char]] = 1.0
#     decoder_input_data[i, t + 1 :, output_index[" "]] = 1.0
#     decoder_output_data[i, t:, output_index[" "]] = 1.0
decoder_test_input_data = np.argmax(decoder_test_input_data,axis=2).astype(dtype='float32')


# Sample Model

In [None]:
# def get_sample_model(input_embed_size , hidden_size):
#     charinput = tf.keras.Input(shape=(None,),name="input")
#     embedding = tf.keras.layers.Embedding(num_input_chars,input_embed_size, name="embedding")(charinput)
    
#     encoder = tf.keras.layers.LSTM(hidden_size, return_state=True )
#     encoder_outputs, state_h, state_c = encoder(embedding)
#     encoder_states = [state_h, state_c]
    
#     decoder_inputs = tf.keras.Input(shape=(None,),name="decoder_input")
#     decoder_embedding = tf.keras.layers.Embedding(num_output_chars + 1,input_embed_size, name="decoder_embedding",mask_zero=True)(decoder_inputs)
    
#     decoder_lstm = tf.keras.layers.LSTM(hidden_size, return_sequences=True, return_state=True)
#     decoder_outputs, _, _ = decoder_lstm(decoder_embedding, initial_state=encoder_states)
#     decoder_dense = tf.keras.layers.Dense(num_output_chars + 1, activation="softmax")
#     decoder_outputs = decoder_dense(decoder_outputs)
#     model = tf.keras.Model([charinput,decoder_inputs],decoder_outputs)
    
#     encoder_model = tf.keras.Model(charinput, encoder_states)
#     # define inference decoder
#     decoder_state_input_h = tf.keras.Input(shape=(hidden_size,))
#     decoder_state_input_c = tf.keras.Input(shape=(hidden_size,))
#     decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
#     decoder_outputs, state_h, state_c = decoder_lstm(decoder_embedding, initial_state=decoder_states_inputs)
#     decoder_states = [state_h, state_c]
#     decoder_outputs = decoder_dense(decoder_outputs)
#     decoder_model = tf.keras.Model([decoder_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states)
#     return model, encoder_model, decoder_model

In [None]:
# sample_model, enc_model, dec_model = get_Model_v2(16,128,'RNN',1,1,0)

In [None]:
# sample_model.compile(
#     optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"]
# )
# sample_model.summary()

In [None]:
reverse_input_char_index = dict((i, char) for char, i in input_index.items())
reverse_target_char_index = dict((i, char) for char, i in output_index.items())
reverse_target_char_index[0] = ' '

def decode_single_sequence(input_seq):
    states_value = enc_model.predict(input_seq)
    target_seq = np.zeros((1, 1, num_output_chars+1))
    target_seq[0, 0, output_index["\t"]] = 1.0
    target_seq = np.argmax(target_seq,axis=2).astype('float32')
    stop_condition = False
    decoded_sentence = ""
    while not stop_condition:
        output_tokens, h, c = dec_model.predict([target_seq] + states_value)

#         print(output_tokens)
        sampled_token_index = np.argmax(output_tokens[0, -1, :])
#         print(sampled_token_index)
        sampled_char = reverse_target_char_index[sampled_token_index]
        decoded_sentence += sampled_char
#         print(sampled_char)
        # Exit condition: either hit max length
        # or find stop character.
        if sampled_char == "\n" or sampled_char == ' ' or len(decoded_sentence) > max_output_size:
            stop_condition = True

        # Update the target sequence (of length 1).
        target_seq = np.zeros((1, 1, num_output_chars+1))
        target_seq[0, 0, sampled_token_index] = 1.0
        target_seq = np.argmax(target_seq,axis=2).astype('float32')
        # Update states
        states_value = [h, c]
    return decoded_sentence
def decode_sequence(input_seq):
    sz  = input_seq.shape[0]
    states_value = enc_model.predict(input_seq)
    target_seq = np.zeros((sz,1,num_output_chars+1))
    for i in range(sz):
        target_seq[i, 0, output_index["\t"]] = 1.0
    target_seq = np.argmax(target_seq,axis=2).astype('float32')
    decoded_seqs = ["" for i in range(sz)]
    j = 0
    while j < max_output_size:
        output_tokens, h, c = dec_model.predict([target_seq] + states_value)

#         print(output_tokens)
        sampled_token_index = np.argmax(output_tokens[:, -1, :],axis=1)
        target_seq = np.zeros((sz, 1, num_output_chars+1))
        for i in range(sz):
            sampled_char = reverse_target_char_index[sampled_token_index[i]]
            decoded_seqs[i] += sampled_char
            target_seq[i, 0, sampled_token_index[i]] = 1.0
        target_seq = np.argmax(target_seq,axis=2).astype('float32')
        # Update states
        states_value = [h,c]
        j+=1
    output = [ ("\t"+st.split('\n')[0]+"\n") for st in decoded_seqs]
    return output

In [None]:
# for seqid in range(5):
#     input_seq = input_tensor[seqid:seqid+1]
# #     print(input_seq.shape,input_tensor.shape)
#     decoded_sentence = decode_single_sequence(input_seq)
#     print("-")
#     print("Input sentence:", train_inputs[seqid])
#     print("Decoded sentence:", decoded_sentence)

In [None]:
# tf.config.run_functions_eagerly(True)


In [None]:
# sample_model.fit(
#     [input_tensor,decoder_input_data],
#     decoder_output_data,
#     batch_size=64,
#     epochs=10,
#     validation_data=([val_input_tensor,decoder_val_input_data],decoder_val_output_data),
#     shuffle=True,
# )

In [None]:
# def evaluate(data_tensor,data_output,k):
#     crct = 0
#     input_seq = data_tensor[:k]
# #     print(input_seq.shape,input_tensor.shape)
#     decoded_sentences = decode_sequence(input_seq)
#     sts = data_output[:k]
#     crct += np.sum(np.array(sts) == np.array(decoded_sentences))

#     return crct/k,zip(decoded_sentences,sts)

# Beam Implementation

In [None]:
import math
reverse_input_char_index = dict((i, char) for char, i in input_index.items())
reverse_target_char_index = dict((i, char) for char, i in output_index.items())
reverse_target_char_index[0] = ' '

def beam_decode(input_seq, beam_size, enc_model, dec_model, cell_type):
    sz  = input_seq.shape[0]
    
    states_value = enc_model.predict(input_seq)
    
    target_seq = np.zeros((sz,1,num_output_chars+1))
    
    for i in range(sz):
        target_seq[i, 0, output_index["\t"]] = 1.0
    
    target_seq = np.argmax(target_seq,axis=2).astype('float32')

    decoded_seqs = ["" for i in range(sz)]
    if cell_type == 'LSTM':

        l = dec_model.predict([target_seq] + states_value)
        output_tokens = l[0]
        states = l[1:]
    if cell_type == 'GRU' or cell_type == 'RNN':
        if type(states_value) == list:
          l = dec_model.predict([target_seq] + states_value)
        else:
          l = dec_model.predict([target_seq] + [states_value])
        output_tokens = l[0] 
        states = l[1:]
    
    
    sequences = []
    for i in range(sz):
        sequences.append([])
    sampled_token_beam = np.argpartition(output_tokens[:, -1, :], -beam_size ,axis=1)[:,-beam_size:]
    sampled_token_indexes = np.argmax(output_tokens[:, -1, :],axis=1)
    for i in range(sz):
        allcandidates = list()
        for j in range(beam_size):
            allcandidates.append(
                    [ [ sampled_token_beam[i][j] ],
                        -np.log( 
                        output_tokens[i][-1][sampled_token_beam[i][j]]),
                        states ,
                        False])
        ordered = sorted(allcandidates, key=lambda tup:tup[1])
        sequences[i] = ordered[:beam_size]
        
    
    
    target_seq = np.zeros((sz, beam_size, num_output_chars+1))
#     states_beam = []
    for i in range(sz):
        for j in range(beam_size): 
            target_seq[i, j, sequences[i][j][0][-1]] = 1.0
    target_seq = np.argmax(target_seq,axis=2).astype('float32')

    it = 1
    while it < max_output_size:
        allcandidates = [list() for i in range(sz)]
        for k in range(len(sequences[i])):
#             print(target_seq.shape)
#             print(target_seq[:,k],sequences[i][k][2])
            if cell_type == 'LSTM':
                l = dec_model.predict(
                [target_seq[:,k]] + 
                sequences[i][k][2])
                output_tokens = l[0]
                states = l[1:]
            if cell_type == 'GRU' or cell_type == 'RNN':
                l = dec_model.predict(
                [target_seq[:,k]] + 
                sequences[i][k][2])
                output_tokens = l[0]
                states = l[1:]
            sampled_token_beam = np.argpartition(output_tokens[:, -1, :], -beam_size ,axis=1)[:,-beam_size:]
            sampled_token_indexes = np.argmax(output_tokens[:, -1, :],axis=1)
            
#             print(output_tokens)
#             print(sampled_token_beam.shape,sampled_token_indexes.shape)
            for i in range(sz):
                    if sequences[i][k][3]:
                        allcandidates[i].append(
                                [ sequences[i][k][0]+[ sampled_token_beam[i][j] ],
                                 sequences[i][k][1],
                                           states, True ])
                        continue
                    for j in range(beam_size):
                        if reverse_target_char_index[sampled_token_beam[i][j]]=='\n':
                            allcandidates[i].append(
                                [ sequences[i][k][0]+[ sampled_token_beam[i][j] ],
                                 sequences[i][k][1]-np.log( 
                                     output_tokens[i][-1][sampled_token_beam[i][j]]),
                                           states, True ])
                        else:
                            allcandidates[i].append(
                            [ sequences[i][k][0]+[ sampled_token_beam[i][j] ],
                             sequences[i][k][1]-np.log( 
                                 output_tokens[i][-1][sampled_token_beam[i][j]]),
                                       states, False ])
        for i in range(sz):
            ordered = sorted(allcandidates[i], key=lambda tup:tup[1])
            sequences[i] = ordered[:beam_size]
        target_seq = np.zeros((sz, beam_size, num_output_chars+1))
        for i in range(sz):
            for j in range(beam_size): 
                target_seq[i, j, sequences[i][j][0][-1]] = 1.0
        target_seq = np.argmax(target_seq,axis=2).astype('float32')
        it+=1
#     output = [ ("\t"+st.split('\n')[0]+"\n") for st in decoded_seqs]
    output = []
    for i in range(sz):
        st = ""
        for ind in sequences[i][0][0]:
            st += reverse_target_char_index[ind]
        output.append("\t"+st.split('\n')[0]+"\n")
            
    return output
        

In [None]:

def beam_evaluate(data_tensor,data_output,k,beam_size,enc_model, dec_model, cell_type):
    crct = 0
    input_seq = data_tensor[:k]
    decoded_sentences = beam_decode(input_seq,beam_size,enc_model, dec_model, cell_type)
    sts = data_output[:k]
    crct += np.sum(np.array(sts) == np.array(decoded_sentences))
    return crct/k,zip(decoded_sentences,sts)

In [None]:
# a1, b1 = beam_evaluate(test_input_tensor,test_outputs,100,1,enc_model,dec_model,'RNN')

# a2, b2 = evaluate(test_input_tensor,test_outputs,100)
# # print(a1,a2)
# # for ((l,m),(n,o)) in zip(b1,b2):
# #     print(l,m,n)

# Wandb Sweep

In [None]:
import wandb
from wandb.keras import WandbCallback
wandb.login(key="866040d7d81f67025d43e7d50ecd83d54b6cf977", relogin=False)

In [None]:
sweep_config = {
    'method': 'random', #grid, random
    'metric': {
      'name': 'val_word_accuracy',
      'goal': 'maximize'   
    },
    'parameters': {
         'beam_size' : {
            'values' : [2,3]
        },
        'input_embed_size': {
            'values' : [16,32]
        },
        'hidden_size' : {
            'values' : [64, 128, 256]
        },
        'cell_type' : {
            'values' : ['GRU','LSTM','RNN']
        },
        'num_hidden_layers' : {
            'values' : [1, 2, 3]
        },
        'dropout' : {
            'values' : [0, 0.2]
        },
       
    }
}

In [None]:
sweep_id = wandb.sweep(sweep_config, entity="mooizz",project="Rec_dakhashina")

In [None]:
from keras.models import Model
from keras.layers import Input, LSTM, RNN, GRU, Dense, SimpleRNN, Embedding

In [None]:
def get_Model_v2(input_embed_size, hidden_size, cell_type, num_decoder_layers, num_encoder_layers, dropout):
    assert(num_encoder_layers == num_decoder_layers)
    charinput = Input(shape=(None,),name="input")
    embedding = Embedding(num_input_chars,input_embed_size, name="embedding")(charinput)    
    
    if cell_type == 'LSTM':
        hl = []
        cl = []
        encoder_states = []
        e_outputs, htemp, ctemp = LSTM(hidden_size, return_sequences=True,
                                        return_state=True, dropout=dropout)(embedding)
        hl.append(htemp)
        cl.append(ctemp)
        encoder_states.append(htemp)
        encoder_states.append(ctemp)
        for hln_iter in range(num_encoder_layers-1):
            e_outputs, htemp, ctemp = LSTM(hidden_size, return_sequences=True,
                                            return_state=True, dropout=dropout)(e_outputs)
            hl.append(htemp)
            cl.append(ctemp)
            encoder_states.append(htemp)
            encoder_states.append(ctemp)
    if cell_type == 'RNN':
        rnn_states = []
        encoder_states = []
        e_outputs, htemp = SimpleRNN(hidden_size, return_sequences=True,
                                        return_state=True, dropout=dropout)(embedding)
        rnn_states.append(htemp)
        encoder_states.append(htemp)
        for hln_iter in range(num_encoder_layers-1):
            e_outputs, htemp = SimpleRNN(hidden_size, return_sequences=True,
                                            return_state=True, dropout=dropout)(e_outputs)
            rnn_states.append(htemp)
            encoder_states.append(htemp)
    if cell_type == 'GRU':
        gru_states = []
        encoder_states = []
        e_outputs, htemp = GRU(hidden_size, return_sequences=True,
                                        return_state=True, dropout=dropout)(embedding)
        gru_states.append(htemp)
        encoder_states.append(htemp)
        for hln_iter in range(num_encoder_layers-1):
            e_outputs, htemp = GRU(hidden_size, return_sequences=True,
                                            return_state=True, dropout=dropout)(e_outputs)
            gru_states.append(htemp)
            encoder_states.append(htemp)
        
    
    decoder_inputs = Input(shape=(None,),name="decoder_input")
    decoder_embedding = Embedding(num_output_chars + 1, input_embed_size, name="decoder_embedding",mask_zero=True)(decoder_inputs)
    

    if cell_type == 'LSTM':
        d_out_layers = []
        out_layer = LSTM(hidden_size, return_sequences=True,
                                            return_state=True, dropout=dropout)
        d_out_layers.append(out_layer)
        d_outputs, _, _ = out_layer(decoder_embedding, initial_state=[hl[0], cl[0]]) 
        for hln_iter in range(num_decoder_layers-1):
            out_layer = LSTM(hidden_size, return_sequences=True,
                                            return_state=True, dropout=dropout)
            d_out_layers.append(out_layer)
            d_outputs, _, _ = out_layer(d_outputs, initial_state=[hl[hln_iter+1], cl[hln_iter+1]])

    if cell_type == 'RNN':
        d_out_layers = []
        out_layer = SimpleRNN(hidden_size, return_sequences=True,
                                            return_state=True, dropout=dropout)
        d_out_layers.append(out_layer)
        d_outputs, _ = out_layer(decoder_embedding, initial_state=rnn_states[0]) 
        for hln_iter in range(num_decoder_layers-1):
            out_layer = SimpleRNN(hidden_size, return_sequences=True,
                                            return_state=True, dropout=dropout)
            d_out_layers.append(out_layer)
            d_outputs, _ = out_layer(d_outputs, initial_state=rnn_states[hln_iter+1])

    if cell_type == 'GRU':
        d_out_layers = []
        out_layer = GRU(hidden_size, return_sequences=True,
                                            return_state=True, dropout=dropout)
        d_out_layers.append(out_layer)
        d_outputs, _ = out_layer(decoder_embedding, initial_state=gru_states[0]) 
        for hln_iter in range(num_decoder_layers-1):
            out_layer = GRU(hidden_size, return_sequences=True,
                                            return_state=True, dropout=dropout)
            d_out_layers.append(out_layer)
            d_outputs, _ = out_layer(d_outputs, initial_state=gru_states[hln_iter+1])


    decoder_dense = Dense(num_output_chars + 1, activation="softmax")
    decoder_outputs = decoder_dense(d_outputs)
    
    model = tf.keras.Model([charinput,decoder_inputs],decoder_outputs)

    ########2nd

    if cell_type == 'LSTM':
        encoder_model = tf.keras.Model(charinput, encoder_states)
        decoder_state_input_h = []
        decoder_state_input_c = []
        decoder_state_inputs = []
        for _ in range(num_decoder_layers):
            decoder_state_input_h.append(Input(shape=(hidden_size)))
            decoder_state_input_c.append(Input(shape=(hidden_size)))
            decoder_state_inputs.append(decoder_state_input_h[-1])
            decoder_state_inputs.append(decoder_state_input_c[-1])
        
        decoder_states = []
        decoder_outputs, dhtemp, dctemp = d_out_layers[0](decoder_embedding, initial_state=decoder_state_inputs[:2])
        decoder_states.append(dhtemp)
        decoder_states.append(dctemp)
        for i in range(num_decoder_layers-1):
            decoder_outputs, dhtemp, dctemp = d_out_layers[i+1](decoder_outputs, initial_state=decoder_state_inputs[2*(i+1):2*(i+2)])
            decoder_states.append(dhtemp)
            decoder_states.append(dctemp)

        decoder_outputs = decoder_dense(decoder_outputs)
        decoder_model = Model([decoder_inputs] + decoder_state_inputs, [decoder_outputs] + decoder_states)

    if cell_type == 'GRU':
        encoder_model = tf.keras.Model(charinput, encoder_states)
        decoder_state_input_gru = []
        for _ in range(num_decoder_layers):
            decoder_state_input_gru.append(Input(shape=(hidden_size)))
        
        decoder_states = []
        decoder_outputs, dhtemp = d_out_layers[0](decoder_embedding, initial_state=decoder_state_input_gru[0])
        decoder_states.append(dhtemp)
        for i in range(num_decoder_layers-1):
            decoder_outputs, dhtemp = d_out_layers[i+1](decoder_outputs, initial_state=decoder_state_input_gru[i+1])
            decoder_states.append(dhtemp)

        decoder_outputs = decoder_dense(decoder_outputs)
        decoder_model = Model([decoder_inputs] + decoder_state_input_gru, [decoder_outputs] + decoder_states)

    if cell_type == 'RNN':
        encoder_model = tf.keras.Model(charinput, encoder_states)
        decoder_state_input_rnn = []
        for _ in range(num_decoder_layers):
            decoder_state_input_rnn.append(Input(shape=(hidden_size)))
        
        decoder_states = []
        decoder_outputs, dhtemp = d_out_layers[0](decoder_embedding, initial_state=decoder_state_input_rnn[0])
        decoder_states.append(dhtemp)
        for i in range(num_decoder_layers-1):
            decoder_outputs, dhtemp = d_out_layers[i+1](decoder_outputs, initial_state=decoder_state_input_rnn[i+1])
            decoder_states.append(dhtemp)

        decoder_outputs = decoder_dense(decoder_outputs)
        decoder_model = Model([decoder_inputs] + decoder_state_input_rnn, [decoder_outputs] + decoder_states)          
        
    return model, encoder_model, decoder_model

In [None]:
def train():
    config_defaults = {
        'epochs' : 10,
        'batch_size' : 64,
        'optimizer' : 'adam',
        'beam_size' : 1,
        'input_embed_size': 32,
        'hidden_size' : 256,
        'cell_type' : 'LSTM',
        'num_hidden_layers' : 1,
        'dropout' : 0,
    }
    wandb.init(config=config_defaults)

    config = wandb.config
    model, enc_model, dec_model = get_Model_v2(config.input_embed_size,config.hidden_size,
                     config.cell_type,
                     config.num_hidden_layers,
                     config.num_hidden_layers,
                     config.dropout)
    model.compile(
    optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"]
    )
#     sample_model.summary()
    EarlyStopCB = tf.keras.callbacks.EarlyStopping(patience=30, monitor='val_accuracy',
                                                  restore_best_weights=True)
    tf.config.run_functions_eagerly(True)
    model.fit(
        [input_tensor,decoder_input_data],
        decoder_output_data,
        batch_size=config.batch_size,
        epochs=config.epochs,
        validation_data=(
            [val_input_tensor,decoder_val_input_data],
            decoder_val_output_data
        ),
        shuffle=True,
        callbacks=[WandbCallback(), EarlyStopCB])
    beam_acc , _ = beam_evaluate(val_input_tensor,valid_outputs,len(valid_outputs),config.beam_size,
                                enc_model,
                                dec_model,
                                config.cell_type)
    wandb.log({'val_word_accuracy' : beam_acc})


In [None]:
# wandb.agent(sweep_id, train)

# Best Model

In [None]:
wandb.init( entity="mooizz",project="Rec_dakhashina")

In [None]:
best_model, enc_model, dec_model = get_Model_v2(32, 
                                                256, 
                                                'LSTM', 
                                                3, 
                                                3, 0.2)
best_model.compile(
    optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"]
    )
best_model.summary()


In [None]:
EarlyStopCB = tf.keras.callbacks.EarlyStopping(patience=30, monitor='val_accuracy',
                                                  restore_best_weights=True)
tf.config.run_functions_eagerly(True)
best_model.fit(
        [input_tensor,decoder_input_data],
        decoder_output_data,
        batch_size=64,
        epochs=10,
        validation_data=(
            [val_input_tensor,decoder_val_input_data],
            decoder_val_output_data
        ),
        shuffle=True,
        callbacks=[WandbCallback(), EarlyStopCB]) 

In [None]:
beam_acc , _  = beam_evaluate(val_input_tensor,valid_outputs,len(valid_outputs),1,
                                enc_model,
                                dec_model,
                                'LSTM')
wandb.log({'val_word_accuracy' : beam_acc})

In [None]:
print(beam_acc)

In [None]:
beam_acc , pred = beam_evaluate(test_input_tensor,test_outputs,len(test_outputs),1,
                                enc_model,
                                dec_model,
                                'LSTM')

In [None]:
print(beam_acc)

In [None]:
# tabular format
from tabulate import tabulate
i = 0
data = []
for a,b in pred:
    l = []
    l.append(test_inputs[i])
    l.append(a[1:-1])
    l.append(b[1:-1])
    
    i+=1
    data.append(l)
import csv
with open('predictions_vanilla.csv', 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(["Input Word", "Predicted Word", "True Word"])
    for i in range(len(data)):
      writer.writerow(data[i])

In [None]:
import pandas as pd
df = pd.read_csv('/content/predictions_vanilla.csv')
frame = df.sample(frac=1).head(100)

In [None]:
wandb.log({'predictions': wandb.Table(dataframe=frame)})
