In [1]:
import numpy as np
import tensorflow as tf
from tensorflow import keras

In [2]:
import os
# %config IPCompleter.greedy=True
# For limiting GPU VRAM used by process

gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
  try:
    tf.config.experimental.set_virtual_device_configuration(gpus[0],
                                                            [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=7500)])
  except RuntimeError as e:
    print(e)
    
config = tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.compat.v1.Session(config=config)

In [3]:
dataset_path = os.getcwd() + os.sep + 'te' + os.sep + 'lexicons'
train_path = dataset_path + os.sep + 'te.translit.sampled.train.tsv'
valid_path = dataset_path + os.sep + 'te.translit.sampled.dev.tsv'
test_path = dataset_path + os.sep + 'te.translit.sampled.test.tsv'

In [4]:
train_inputs = []
train_outputs = []
valid_inputs = []
valid_outputs = []
test_inputs = []
test_outputs = []
input_chars = set()
output_chars = set()

In [5]:
include_all = True

In [6]:
with open(train_path, "r", encoding="utf-8") as f:
    lines = f.read().split("\n")
for line in lines[: (len(lines) - 1 )]:
    out,inp,a = line.split('\t')
#     if not include_all and a!=1:
#         continue
    # We use "tab" as the "start sequence" character
    # for the targets, and "\n" as "end sequence" character.
#     print(out,out[-1],inp,inp[-1])
    out = "\t" + out + "\n"
    train_inputs.append(inp)
    train_outputs.append(out)
    for char in inp:
        if char not in input_chars:
            input_chars.add(char)
    for char in out:
        if char not in output_chars:
            output_chars.add(char)

In [7]:
len(train_inputs)

58550

In [8]:
with open(valid_path, "r", encoding="utf-8") as f:
    lines = f.read().split("\n")
for line in lines[: (len(lines) - 1 )]:
    out,inp,a = line.split('\t')
    if not include_all and a!=1:
        continue
    # We use "tab" as the "start sequence" character
    # for the targets, and "\n" as "end sequence" character.
#     print(out,out[-1],inp,inp[-1])
    out = "\t" + out + "\n"
    valid_inputs.append(inp)
    valid_outputs.append(out)
#     for char in inp:
#         if char not in input_chars:
#             input_chars.add(char)
#     for char in out:
#         if char not in output_chars:
#             output_chars.add(char)

In [9]:
with open(test_path, "r", encoding="utf-8") as f:
    lines = f.read().split("\n")
for line in lines[: (len(lines) - 1 )]:
    out,inp,a = line.split('\t')
    if not include_all and a!=1:
        continue
    # We use "tab" as the "start sequence" character
    # for the targets, and "\n" as "end sequence" character.
#     print(out,out[-1],inp,inp[-1])
    out = "\t" + out + "\n"
    test_inputs.append(inp)
    test_outputs.append(out)
#     for char in inp:
#         if char not in input_chars:
#             input_chars.add(char)
#     for char in out:
#         if char not in output_chars:
#             output_chars.add(char)

In [10]:
input_chars = sorted(list(input_chars))
print(input_chars)
num_input_chars = len(input_chars)

['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']


In [11]:
output_chars = sorted(list(output_chars))
print(output_chars)
num_output_chars = len(output_chars)

['\t', '\n', 'ం', 'ః', 'అ', 'ఆ', 'ఇ', 'ఈ', 'ఉ', 'ఊ', 'ఋ', 'ఎ', 'ఏ', 'ఐ', 'ఒ', 'ఓ', 'ఔ', 'క', 'ఖ', 'గ', 'ఘ', 'చ', 'ఛ', 'జ', 'ఝ', 'ఞ', 'ట', 'ఠ', 'డ', 'ఢ', 'ణ', 'త', 'థ', 'ద', 'ధ', 'న', 'ప', 'ఫ', 'బ', 'భ', 'మ', 'య', 'ర', 'ఱ', 'ల', 'ళ', 'వ', 'శ', 'ష', 'స', 'హ', 'ా', 'ి', 'ీ', 'ు', 'ూ', 'ృ', 'ె', 'ే', 'ై', 'ొ', 'ో', 'ౌ', '్', '\u200c']


In [12]:
train_size = len(train_inputs)
max_input_size = max([len(txt) for txt in train_inputs])
print(max_input_size)
max_output_size = max([len(txt) for txt in  train_outputs])
print(max_output_size)

25
22


In [13]:
input_index = dict([(char, i) for i, char in enumerate(input_chars)])
output_index = dict([(char, i+1) for i, char in enumerate(output_chars)])


In [14]:
print(output_index)

{'\t': 1, '\n': 2, 'ం': 3, 'ః': 4, 'అ': 5, 'ఆ': 6, 'ఇ': 7, 'ఈ': 8, 'ఉ': 9, 'ఊ': 10, 'ఋ': 11, 'ఎ': 12, 'ఏ': 13, 'ఐ': 14, 'ఒ': 15, 'ఓ': 16, 'ఔ': 17, 'క': 18, 'ఖ': 19, 'గ': 20, 'ఘ': 21, 'చ': 22, 'ఛ': 23, 'జ': 24, 'ఝ': 25, 'ఞ': 26, 'ట': 27, 'ఠ': 28, 'డ': 29, 'ఢ': 30, 'ణ': 31, 'త': 32, 'థ': 33, 'ద': 34, 'ధ': 35, 'న': 36, 'ప': 37, 'ఫ': 38, 'బ': 39, 'భ': 40, 'మ': 41, 'య': 42, 'ర': 43, 'ఱ': 44, 'ల': 45, 'ళ': 46, 'వ': 47, 'శ': 48, 'ష': 49, 'స': 50, 'హ': 51, 'ా': 52, 'ి': 53, 'ీ': 54, 'ు': 55, 'ూ': 56, 'ృ': 57, 'ె': 58, 'ే': 59, 'ై': 60, 'ొ': 61, 'ో': 62, 'ౌ': 63, '్': 64, '\u200c': 65}


In [15]:
#### Enoding in indexes of characters in the set
def encode_index(inputs,index):
    data = []
    for i in range(len(inputs)):
        a = np.zeros(len(inputs[i]))
        j = 0
        for char in inputs[i]:
            a[j] = index[char]
            j += 1
        data.append(a)
    data = np.asarray(data).astype(np.ndarray)
    return data
    

In [16]:
# input_data = []
# for i in range(train_size):
#     a = np.zeros(len(train_inputs[i]))
#     j = 0
#     for char in train_inputs[i]:
#         a[j] = input_index[char]
#         j += 1
#     input_data.append(a)
# input_data = np.asarray(input_data).astype(np.ndarray)

In [17]:
input_data = encode_index(train_inputs,input_index)
input_tensor = tf.ragged.constant(input_data)

  return array(a, dtype, copy=False, order=order)


In [18]:
val_input_data = encode_index(valid_inputs,input_index)
val_input_tensor = tf.ragged.constant(val_input_data)

In [19]:
test_input_data = encode_index(test_inputs,input_index)
test_input_tensor = tf.ragged.constant(test_input_data)

In [20]:
print(len(input_data))

58550


In [21]:
max_val__input_size = max([len(txt) for txt in valid_inputs])
max_val_output_size = max([len(txt) for txt in  valid_outputs])
max_test_input_size = max([len(txt) for txt in test_inputs])
max_test_output_size = max([len(txt) for txt in  test_outputs])

In [22]:
decoder_input_data = np.zeros(
    (len(train_inputs), max_output_size,num_output_chars+1), dtype="float32"
)
decoder_output_data = np.zeros(
    (len(train_inputs), max_output_size,num_output_chars+1), dtype="float32"
)
for i,target_text in enumerate(train_outputs):
    for t, char in enumerate(target_text):
        # decoder_target_data is ahead of decoder_input_data by one timestep
        decoder_input_data[i, t, output_index[char]] = 1.0
        if t > 0:
            # decoder_target_data will be ahead by one timestep
            # and will not include the start character.
            decoder_output_data[i, t - 1, output_index[char]] = 1.0
#     decoder_input_data[i, t + 1 :, output_index[" "]] = 1.0
#     decoder_output_data[i, t:, output_index[" "]] = 1.0
# print(decoder_input_data[0])
decoder_input_data = np.argmax(decoder_input_data,axis=2).astype(dtype='float32')
# decoder_output_data = np.argmax(decoder_output_data,axis=2).astype(dtype='float32')
# decoder_input_data = tf.convert_to_tensor(decoder_input_data)
# decoder_output_data = tf.convert_to_tensor(decoder_output_data) 

In [23]:
decoder_input_data[0]

array([ 1.,  5.,  3., 18., 53., 32.,  2.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.], dtype=float32)

In [24]:
decoder_val_input_data = np.zeros(
    (len(valid_inputs), max_val_output_size,num_output_chars+1), dtype="float32"
)
decoder_val_output_data = np.zeros(
    (len(valid_inputs), max_val_output_size,num_output_chars+1), dtype="float32"
)
for i,target_text in enumerate(valid_outputs):
    for t, char in enumerate(target_text):
        # decoder_target_data is ahead of decoder_input_data by one timestep
        decoder_val_input_data[i, t, output_index[char]] = 1.0
        if t > 0:
            # decoder_target_data will be ahead by one timestep
            # and will not include the start character.
            decoder_val_output_data[i, t - 1, output_index[char]] = 1.0
#     decoder_input_data[i, t + 1 :, output_index[" "]] = 1.0
#     decoder_output_data[i, t:, output_index[" "]] = 1.0
decoder_val_input_data = np.argmax(decoder_val_input_data,axis=2).astype(dtype='float32')
# decoder_val_output_data = np.argmax(decoder_val_output_data,axis=2).astype(dtype='float32')
# decoder_val_input_data = tf.convert_to_tensor(decoder_val_input_data)
# decoder_val_output_data = tf.convert_to_tensor(decoder_val_output_data) 

In [25]:
decoder_test_input_data = np.zeros(
    (len(test_inputs), max_test_output_size,num_output_chars+1), dtype="float32"
)
decoder_test_output_data = np.zeros(
    (len(test_inputs), max_test_output_size,num_output_chars+1), dtype="float32"
)
for i,target_text in enumerate(test_outputs):
    for t, char in enumerate(target_text):
        # decoder_target_data is ahead of decoder_input_data by one timestep
        decoder_test_input_data[i, t, output_index[char]] = 1.0
        if t > 0:
            # decoder_target_data will be ahead by one timestep
            # and will not include the start character.
            decoder_test_output_data[i, t - 1, output_index[char]] = 1.0
#     decoder_input_data[i, t + 1 :, output_index[" "]] = 1.0
#     decoder_output_data[i, t:, output_index[" "]] = 1.0
decoder_test_input_data = np.argmax(decoder_test_input_data,axis=2).astype(dtype='float32')
# decoder_test_output_data = np.argmax(decoder_test_output_data,axis=2).astype(dtype='float32')
# decoder_test_input_data = tf.convert_to_tensor(decoder_test_input_data)
# decoder_test_output_data = tf.convert_to_tensor(decoder_test_output_data) 

In [26]:
# charinput = tf.keras.Input(shape=(None,),name="input")
# embedding = tf.keras.layers.Embedding(num_input_chars,input_embed_size, name="embedding")(charinput)

In [27]:
# model = tf.keras.Model(charinput,embedding)

In [28]:
# model.compile("rmsprop","mse")

In [29]:
# out = model.predict(input_data[0])
# print(out)

# Sample Model

In [30]:
def get_sample_model(input_embed_size , hidden_size):
    charinput = tf.keras.Input(shape=(None,),name="input")
    embedding = tf.keras.layers.Embedding(num_input_chars,input_embed_size, name="embedding")(charinput)
    
    encoder = tf.keras.layers.LSTM(hidden_size, return_state=True )
    encoder_outputs, state_h, state_c = encoder(embedding)
    encoder_states = [state_h, state_c]
    
    decoder_inputs = tf.keras.Input(shape=(None,),name="decoder_input")
    decoder_embedding = tf.keras.layers.Embedding(num_output_chars + 1,64, name="decoder_embedding",mask_zero=True)(decoder_inputs)
    
    decoder_lstm = tf.keras.layers.LSTM(hidden_size, return_sequences=True, return_state=True)
    decoder_outputs, _, _ = decoder_lstm(decoder_embedding, initial_state=encoder_states)
    decoder_dense = tf.keras.layers.Dense(num_output_chars + 1, activation="softmax")
    decoder_outputs = decoder_dense(decoder_outputs)
    model = tf.keras.Model([charinput,decoder_inputs],decoder_outputs)
    
    encoder_model = tf.keras.Model(charinput, encoder_states)
    # define inference decoder
    decoder_state_input_h = tf.keras.Input(shape=(hidden_size,))
    decoder_state_input_c = tf.keras.Input(shape=(hidden_size,))
    decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
    decoder_outputs, state_h, state_c = decoder_lstm(decoder_embedding, initial_state=decoder_states_inputs)
    decoder_states = [state_h, state_c]
    decoder_outputs = decoder_dense(decoder_outputs)
    decoder_model = tf.keras.Model([decoder_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states)
    return model, encoder_model, decoder_model

In [31]:
# sample_model, enc_model, dec_model = get_sample_model(32,256)

In [32]:
# sample_model.compile(
#     optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"]
# )
# sample_model.summary()

In [33]:
reverse_input_char_index = dict((i, char) for char, i in input_index.items())
reverse_target_char_index = dict((i, char) for char, i in output_index.items())
reverse_target_char_index[0] = ' '

def decode_single_sequence(input_seq):
    states_value = enc_model.predict(input_seq)
    target_seq = np.zeros((1, 1, num_output_chars+1))
    target_seq[0, 0, output_index["\t"]] = 1.0
    target_seq = np.argmax(target_seq,axis=2).astype('float32')
    stop_condition = False
    decoded_sentence = ""
    while not stop_condition:
        output_tokens, h, c = dec_model.predict([target_seq] + states_value)

#         print(output_tokens)
        sampled_token_index = np.argmax(output_tokens[0, -1, :])
#         print(sampled_token_index)
        sampled_char = reverse_target_char_index[sampled_token_index]
        decoded_sentence += sampled_char
#         print(sampled_char)
        # Exit condition: either hit max length
        # or find stop character.
        if sampled_char == "\n" or sampled_char == ' ' or len(decoded_sentence) > max_output_size:
            stop_condition = True

        # Update the target sequence (of length 1).
        target_seq = np.zeros((1, 1, num_output_chars+1))
        target_seq[0, 0, sampled_token_index] = 1.0
        target_seq = np.argmax(target_seq,axis=2).astype('float32')
        # Update states
        states_value = [h, c]
    return decoded_sentence
def decode_sequence(input_seq):
    sz  = input_seq.shape[0]
    states_value = enc_model.predict(input_seq)
    target_seq = np.zeros((sz,1,num_output_chars+1))
    for i in range(sz):
        target_seq[i, 0, output_index["\t"]] = 1.0
    target_seq = np.argmax(target_seq,axis=2).astype('float32')
    decoded_seqs = ["" for i in range(sz)]
    j = 0
    while j < max_output_size:
        output_tokens, h, c = dec_model.predict([target_seq] + states_value)

#         print(output_tokens)
        sampled_token_index = np.argmax(output_tokens[:, -1, :],axis=1)
        target_seq = np.zeros((sz, 1, num_output_chars+1))
        for i in range(sz):
            sampled_char = reverse_target_char_index[sampled_token_index[i]]
            decoded_seqs[i] += sampled_char
            target_seq[i, 0, sampled_token_index[i]] = 1.0
        target_seq = np.argmax(target_seq,axis=2).astype('float32')
        # Update states
        states_value = [h,c]
        j+=1
    output = [ ("\t"+st.split('\n')[0]+"\n") for st in decoded_seqs]
    return output

In [34]:
# for seqid in range(5):
#     input_seq = input_tensor[seqid:seqid+1]
# #     print(input_seq.shape,input_tensor.shape)
#     decoded_sentence = decode_single_sequence(input_seq)
#     print("-")
#     print("Input sentence:", train_inputs[seqid])
#     print("Decoded sentence:", decoded_sentence)

In [35]:
# tf.config.run_functions_eagerly(True)


In [36]:
# sample_model.fit(
#     [input_tensor,decoder_input_data],
#     decoder_output_data,
#     batch_size=64,
#     epochs=10,
#     validation_data=([val_input_tensor,decoder_val_input_data],decoder_val_output_data),
#     shuffle=True,
# )

In [37]:
def evaluate(data_tensor,data_output,k):
    crct = 0
    input_seq = data_tensor[:k]
#     print(input_seq.shape,input_tensor.shape)
    decoded_sentences = decode_sequence(input_seq)
    sts = data_output[:k]
    crct += np.sum(np.array(sts) == np.array(decoded_sentences))
#         print(crct/(seqid+1))
#         for st,d in zip(sts,decoded_sentences):
#             print(st+"_o")
#             print(d+"_o")
    return crct/k,zip(decoded_sentences,sts)

# Beam Implementation

In [38]:
# def beam_search(outputs, k, output_words, reverse_index):
#     for i in range(len(outputs)):
#         seqs = beam_search_decoder(outputs[i],k)
import math
reverse_input_char_index = dict((i, char) for char, i in input_index.items())
reverse_target_char_index = dict((i, char) for char, i in output_index.items())
reverse_target_char_index[0] = ' '

def beam_decode(input_seq, beam_size, enc_model, dec_model, cell_type):
    sz  = input_seq.shape[0]
    
    states_value = enc_model.predict(input_seq)
    
    target_seq = np.zeros((sz,1,num_output_chars+1))
    
    for i in range(sz):
        target_seq[i, 0, output_index["\t"]] = 1.0
    
    target_seq = np.argmax(target_seq,axis=2).astype('float32')

    decoded_seqs = ["" for i in range(sz)]
    if cell_type == 'LSTM':
        l = dec_model.predict([target_seq] + states_value)
        output_tokens = l[0]
        states = l[1:]
    if cell_type == 'GRU' or cell_type == 'RNN':
        l = dec_model.predict([target_seq] + states_value)
        output_tokens = l[0] 
        states = l[1:]
    
    
    sequences = []
    for i in range(sz):
        sequences.append([])
#     beam_decoded 
    sampled_token_beam = np.argpartition(output_tokens[:, -1, :], -beam_size ,axis=1)[:,-beam_size:]
    sampled_token_indexes = np.argmax(output_tokens[:, -1, :],axis=1)
#     print(sampled_token_beam.shape,sampled_token_indexes.shape)
    for i in range(sz):
        allcandidates = list()
        for j in range(beam_size):
            allcandidates.append(
                    [ [ sampled_token_beam[i][j] ],
                        -np.log( 
                        output_tokens[i][-1][sampled_token_beam[i][j]]),
                        states ,
                        False])
        ordered = sorted(allcandidates, key=lambda tup:tup[1])
        sequences[i] = ordered[:beam_size]
        
    
    
    target_seq = np.zeros((sz, beam_size, num_output_chars+1))
#     states_beam = []
    for i in range(sz):
        for j in range(beam_size): 
#             sampled_char = reverse_target_char_index[sequences[i][j][0][-1]]
#             decoded_seqs[i]
#             print(j,sequences[i][j][0][-1]
            target_seq[i, j, sequences[i][j][0][-1]] = 1.0
    target_seq = np.argmax(target_seq,axis=2).astype('float32')
#     enc_out = states_value[0]
    
#     for i in range(beam_size):
#         a = []
# #         a.append(enc_out)
#         a.append(h)
#         a.append(c)
#         states_beam.append(a)
#     states_value[1] = h
#     states_value[2] = c
#     a = []
#     for j in range(beam_size):
#         st = ""
#         for ind in sequences[0][j][0]:
#             st += reverse_target_char_index[ind]
#         a.append( ("\t"+st+"\n",sequences[0][j][1]) )
#     print(a)
#     print()
    it = 1
    while it < max_output_size:
        allcandidates = [list() for i in range(sz)]
        for k in range(len(sequences[i])):
#             print(target_seq.shape)
#             print(target_seq[:,k],sequences[i][k][2])
            if cell_type == 'LSTM':
                l = dec_model.predict(
                [target_seq[:,k]] + 
                sequences[i][k][2])
                output_tokens = l[0]
                states = l[1:]
            if cell_type == 'GRU' or cell_type == 'RNN':
                l = dec_model.predict(
                [target_seq[:,k]] + 
                sequences[i][k][2])
                output_tokens = l[0]
                states = l[1:]
            sampled_token_beam = np.argpartition(output_tokens[:, -1, :], -beam_size ,axis=1)[:,-beam_size:]
            sampled_token_indexes = np.argmax(output_tokens[:, -1, :],axis=1)
            
#             print(output_tokens)
#             print(sampled_token_beam.shape,sampled_token_indexes.shape)
            for i in range(sz):
                    if sequences[i][k][3]:
                        allcandidates[i].append(
                                [ sequences[i][k][0]+[ sampled_token_beam[i][j] ],
                                 sequences[i][k][1],
                                           states, True ])
                        continue
                    for j in range(beam_size):
                        if reverse_target_char_index[sampled_token_beam[i][j]]=='\n':
                            allcandidates[i].append(
                                [ sequences[i][k][0]+[ sampled_token_beam[i][j] ],
                                 sequences[i][k][1]-np.log( 
                                     output_tokens[i][-1][sampled_token_beam[i][j]]),
                                           states, True ])
                        else:
                            allcandidates[i].append(
                            [ sequences[i][k][0]+[ sampled_token_beam[i][j] ],
                             sequences[i][k][1]-np.log( 
                                 output_tokens[i][-1][sampled_token_beam[i][j]]),
                                       states, False ])
        for i in range(sz):
            ordered = sorted(allcandidates[i], key=lambda tup:tup[1])
            sequences[i] = ordered[:beam_size]
#         for i in range(sz):
#             print("all")
#             a = []
#             for j in range(len(allcandidates[i])):
#                 st = ""
#                 for ind in allcandidates[i][j][0]:
#                     st += reverse_target_char_index[ind]
#                 a.append( ("\t"+st+"\n",allcandidates[i][j][1]) )
#             print(a)
#             print()
#             print("seq")
#             a = []
#             for j in range(beam_size):
#                 st = ""
#                 for ind in sequences[i][j][0]:
#                     st += reverse_target_char_index[ind]
#                 a.append( ("\t"+st+"\n",sequences[i][j][1]) )
#             print(a)
#             print()
        target_seq = np.zeros((sz, beam_size, num_output_chars+1))
        for i in range(sz):
            for j in range(beam_size): 
                target_seq[i, j, sequences[i][j][0][-1]] = 1.0
        target_seq = np.argmax(target_seq,axis=2).astype('float32')
        it+=1
#     output = [ ("\t"+st.split('\n')[0]+"\n") for st in decoded_seqs]
    output = []
    for i in range(sz):
        st = ""
        for ind in sequences[i][0][0]:
            st += reverse_target_char_index[ind]
        output.append("\t"+st.split('\n')[0]+"\n")
            
    return output
        

In [39]:

def beam_evaluate(data_tensor,data_output,k,beam_size,enc_model, dec_model, cell_type):
    crct = 0
    input_seq = data_tensor[:k]
    decoded_sentences = beam_decode(input_seq,beam_size,enc_model, dec_model, cell_type)
    sts = data_output[:k]
    crct += np.sum(np.array(sts) == np.array(decoded_sentences))
    return crct/k,zip(decoded_sentences,sts)

In [40]:
# a1, b1 = beam_evaluate(test_input_tensor,test_outputs,100,1,enc_model,dec_model,'LSTM')

# a2, b2 = evaluate(test_input_tensor,test_outputs,100)
# # print(a1,a2)
# # for ((l,m),(n,o)) in zip(b1,b2):
# #     print(l,m,n)

# Wandb Sweep

In [41]:
import wandb
from wandb.keras import WandbCallback
wandb.login(key="866040d7d81f67025d43e7d50ecd83d54b6cf977", relogin=False)

wandb: Currently logged in as: mooizz (use `wandb login --relogin` to force relogin)
wandb: Appending key for api.wandb.ai to your netrc file: C:\Users\Jaitesh/.netrc


True

In [42]:
sweep_config = {
    'method': 'bayes', #grid, random
    'metric': {
      'name': 'val_word_accuracy',
      'goal': 'maximize'   
    },
    'parameters': {
         'beam_size' : {
            'values' : [1, 2 ,3]
        },
        'input_embed_size': {
            'values' : [16,32]
        },
        'hidden_size' : {
            'values' : [64, 128, 256]
        },
        'cell_type' : {
            'values' : ['GRU','LSTM','RNN']
        },
        'num_hidden_layers' : {
            'values' : [1, 2, 3]
        },
        'dropout' : {
            'values' : [0, 0.2]
        },
       
    }
}

In [43]:
# sweep_id = wandb.sweep(sweep_config, entity="mooizz",project="Rec_dakhashina")

In [44]:
from keras.models import Model
from keras.layers import Input, LSTM, RNN, GRU, Dense, SimpleRNN, Embedding

In [45]:
def get_Model_v2(input_embed_size, hidden_size, cell_type, num_decoder_layers, num_encoder_layers, dropout, decoder_embed_size = 64):
    assert(num_encoder_layers == num_decoder_layers)
    charinput = Input(shape=(None,),name="input")
    embedding = Embedding(num_input_chars,input_embed_size, name="embedding")(charinput)    
    
    if cell_type == 'LSTM':
        hl = []
        cl = []
        encoder_states = []
        e_outputs, htemp, ctemp = LSTM(hidden_size, return_sequences=True,
                                        return_state=True, dropout=dropout)(embedding)
        hl.append(htemp)
        cl.append(ctemp)
        encoder_states.append(htemp)
        encoder_states.append(ctemp)
        for hln_iter in range(num_encoder_layers-1):
            e_outputs, htemp, ctemp = LSTM(hidden_size, return_sequences=True,
                                            return_state=True, dropout=dropout)(e_outputs)
            hl.append(htemp)
            cl.append(ctemp)
            encoder_states.append(htemp)
            encoder_states.append(ctemp)
    if cell_type == 'RNN':
        rnn_states = []
        encoder_states = []
        e_outputs, htemp = SimpleRNN(hidden_size, return_sequences=True,
                                        return_state=True, dropout=dropout)(embedding)
        rnn_states.append(htemp)
        encoder_states.append(htemp)
        for hln_iter in range(num_encoder_layers-1):
            e_outputs, htemp = SimpleRNN(hidden_size, return_sequences=True,
                                            return_state=True, dropout=dropout)(e_outputs)
            rnn_states.append(htemp)
            encoder_states.append(htemp)
    if cell_type == 'GRU':
        gru_states = []
        encoder_states = []
        e_outputs, htemp = GRU(hidden_size, return_sequences=True,
                                        return_state=True, dropout=dropout)(embedding)
        gru_states.append(htemp)
        encoder_states.append(htemp)
        for hln_iter in range(num_encoder_layers-1):
            e_outputs, htemp = GRU(hidden_size, return_sequences=True,
                                            return_state=True, dropout=dropout)(e_outputs)
            gru_states.append(htemp)
            encoder_states.append(htemp)
        
    
    decoder_inputs = Input(shape=(None,),name="decoder_input")
    decoder_embedding = Embedding(num_output_chars + 1, decoder_embed_size, name="decoder_embedding",mask_zero=True)(decoder_inputs)
    

    if cell_type == 'LSTM':
        d_out_layers = []
        out_layer = LSTM(hidden_size, return_sequences=True,
                                            return_state=True, dropout=dropout)
        d_out_layers.append(out_layer)
        d_outputs, _, _ = out_layer(decoder_embedding, initial_state=[hl[0], cl[0]]) 
        for hln_iter in range(num_decoder_layers-1):
            out_layer = LSTM(hidden_size, return_sequences=True,
                                            return_state=True, dropout=dropout)
            d_out_layers.append(out_layer)
            d_outputs, _, _ = out_layer(d_outputs, initial_state=[hl[hln_iter+1], cl[hln_iter+1]])

    if cell_type == 'RNN':
        d_out_layers = []
        out_layer = SimpleRNN(hidden_size, return_sequences=True,
                                            return_state=True, dropout=dropout)
        d_out_layers.append(out_layer)
        d_outputs, _ = out_layer(decoder_embedding, initial_state=rnn_states[0]) 
        for hln_iter in range(num_decoder_layers-1):
            out_layer = SimpleRNN(hidden_size, return_sequences=True,
                                            return_state=True, dropout=dropout)
            d_out_layers.append(out_layer)
            d_outputs, _ = out_layer(d_outputs, initial_state=rnn_states[hln_iter+1])

    if cell_type == 'GRU':
        d_out_layers = []
        out_layer = GRU(hidden_size, return_sequences=True,
                                            return_state=True, dropout=dropout)
        d_out_layers.append(out_layer)
        d_outputs, _ = out_layer(decoder_embedding, initial_state=gru_states[0]) 
        for hln_iter in range(num_decoder_layers-1):
            out_layer = GRU(hidden_size, return_sequences=True,
                                            return_state=True, dropout=dropout)
            d_out_layers.append(out_layer)
            d_outputs, _ = out_layer(d_outputs, initial_state=gru_states[hln_iter+1])


    decoder_dense = Dense(num_output_chars + 1, activation="softmax")
    decoder_outputs = decoder_dense(d_outputs)
    
    model = tf.keras.Model([charinput,decoder_inputs],decoder_outputs)

    ########2nd

    if cell_type == 'LSTM':
        encoder_model = tf.keras.Model(charinput, encoder_states)
        decoder_state_input_h = []
        decoder_state_input_c = []
        decoder_state_inputs = []
        for _ in range(num_decoder_layers):
            decoder_state_input_h.append(Input(shape=(hidden_size)))
            decoder_state_input_c.append(Input(shape=(hidden_size)))
            decoder_state_inputs.append(decoder_state_input_h[-1])
            decoder_state_inputs.append(decoder_state_input_c[-1])
        
        decoder_states = []
        decoder_outputs, dhtemp, dctemp = d_out_layers[0](decoder_embedding, initial_state=decoder_state_inputs[:2])
        decoder_states.append(dhtemp)
        decoder_states.append(dctemp)
        for i in range(num_decoder_layers-1):
            decoder_outputs, dhtemp, dctemp = d_out_layers[i+1](decoder_outputs, initial_state=decoder_state_inputs[2*(i+1):2*(i+2)])
            decoder_states.append(dhtemp)
            decoder_states.append(dctemp)

        decoder_outputs = decoder_dense(decoder_outputs)
        decoder_model = Model([decoder_inputs] + decoder_state_inputs, [decoder_outputs] + decoder_states)

    if cell_type == 'GRU':
        encoder_model = tf.keras.Model(charinput, encoder_states)
        decoder_state_input_gru = []
        for _ in range(num_decoder_layers):
            decoder_state_input_gru.append(Input(shape=(hidden_size)))
        
        decoder_states = []
        decoder_outputs, dhtemp = d_out_layers[0](decoder_embedding, initial_state=decoder_state_input_gru[0])
        decoder_states.append(dhtemp)
        for i in range(num_decoder_layers-1):
            decoder_outputs, dhtemp = d_out_layers[i+1](decoder_outputs, initial_state=decoder_state_input_gru[i+1])
            decoder_states.append(dhtemp)

        decoder_outputs = decoder_dense(decoder_outputs)
        decoder_model = Model([decoder_inputs] + decoder_state_input_gru, [decoder_outputs] + decoder_states)

    if cell_type == 'RNN':
        encoder_model = tf.keras.Model(charinput, encoder_states)
        decoder_state_input_rnn = []
        for _ in range(num_decoder_layers):
            decoder_state_input_rnn.append(Input(shape=(hidden_size)))
        
        decoder_states = []
        decoder_outputs, dhtemp = d_out_layers[0](decoder_embedding, initial_state=decoder_state_input_rnn[0])
        decoder_states.append(dhtemp)
        for i in range(num_decoder_layers-1):
            decoder_outputs, dhtemp = d_out_layers[i+1](decoder_outputs, initial_state=decoder_state_input_rnn[i+1])
            decoder_states.append(dhtemp)

        decoder_outputs = decoder_dense(decoder_outputs)
        decoder_model = Model([decoder_inputs] + decoder_state_input_rnn, [decoder_outputs] + decoder_states)          
        
    return model, encoder_model, decoder_model

In [46]:
def train():
    config_defaults = {
        'epochs' : 10,
        'batch_size' : 64,
        'optimizer' : 'adam',
        'beam_size' : 1,
        'input_embed_size': 32,
        'hidden_size' : 256,
        'cell_type' : 'LSTM',
        'num_hidden_layers' : 1,
        'dropout' : 0,
    }
    wandb.init(config=config_defaults)

    config = wandb.config
    model, enc_model, dec_model = get_Model_v2(config.input_embed_size,config.hidden_size,
                     config.cell_type,
                     config.num_hidden_layers,
                     config.num_hidden_layers,
                     config.dropout)
    model.compile(
    optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"]
    )
#     sample_model.summary()
    EarlyStopCB = tf.keras.callbacks.EarlyStopping(patience=30, monitor='val_accuracy',
                                                  restore_best_weights=True)
    tf.config.run_functions_eagerly(True)
    model.fit(
        [input_tensor,decoder_input_data],
        decoder_output_data,
        batch_size=config.batch_size,
        epochs=config.epochs,
        validation_data=(
            [val_input_tensor,decoder_val_input_data],
            decoder_val_output_data
        ),
        shuffle=True,
        callbacks=[WandbCallback(), EarlyStopCB])
    beam_acc , _ = beam_evaluate(val_input_tensor,valid_outputs,len(valid_outputs),config.beam_size,
                                enc_model,
                                dec_model,
                                config.cell_type)
    wandb.log({'val_word_accuracy' : beam_acc})
#     model.save('models'+os.sep+str(sweep_id)+os.sep+wandb.run.name)

In [None]:
wandb.agent('qsllfslr', train, entity="mooizz",project="Rec_dakhashina")

wandb: Agent Starting Run: qhzy1tya with config:
wandb: 	beam_size: 2
wandb: 	cell_type: RNN
wandb: 	dropout: 0.2
wandb: 	hidden_size: 64
wandb: 	input_embed_size: 32
wandb: 	num_hidden_layers: 3


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10




VBox(children=(Label(value=' 0.75MB of 0.75MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,9.0
loss,0.55914
accuracy,0.53264
val_loss,0.4544
val_accuracy,0.58675
_runtime,4181.0
_timestamp,1621495495.0
_step,10.0
best_val_loss,0.4544
best_epoch,9.0


0,1
epoch,▁▂▃▃▄▅▆▆▇█
loss,█▄▃▃▂▂▂▁▁▁
accuracy,▁▅▆▆▇▇▇███
val_loss,█▆▄▄▃▂▂▂▂▁
val_accuracy,▁▃▅▆▆▇▇▇▇█
_runtime,▁▁▂▃▄▅▆▆▇██
_timestamp,▁▁▂▃▄▅▆▆▇██
_step,▁▂▂▃▄▅▅▆▇▇█
val_word_accuracy,▁


wandb: Agent Starting Run: ji103vrn with config:
wandb: 	beam_size: 3
wandb: 	cell_type: LSTM
wandb: 	dropout: 0.2
wandb: 	hidden_size: 256
wandb: 	input_embed_size: 16
wandb: 	num_hidden_layers: 3


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


VBox(children=(Label(value=' 31.36MB of 31.36MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.…

0,1
epoch,9.0
loss,0.06955
accuracy,0.85278
val_loss,0.13679
val_accuracy,0.81035
_runtime,3231.0
_timestamp,1621498731.0
_step,10.0
best_val_loss,0.13679
best_epoch,9.0


0,1
epoch,▁▂▃▃▄▅▆▆▇█
loss,█▄▃▂▂▁▁▁▁▁
accuracy,▁▅▆▇▇█████
val_loss,█▄▂▂▁▁▁▁▁▁
val_accuracy,▁▅▆▇▇█████
_runtime,▁▂▃▅▅▆▆▇▇██
_timestamp,▁▂▃▅▅▆▆▇▇██
_step,▁▂▂▃▄▅▅▆▇▇█
val_word_accuracy,▁


wandb: Agent Starting Run: l807tkcz with config:
wandb: 	beam_size: 3
wandb: 	cell_type: RNN
wandb: 	dropout: 0.2
wandb: 	hidden_size: 256
wandb: 	input_embed_size: 16
wandb: 	num_hidden_layers: 3


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


VBox(children=(Label(value=' 8.10MB of 8.10MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,9.0
loss,0.36881
accuracy,0.65471
val_loss,0.34657
val_accuracy,0.6645
_runtime,2579.0
_timestamp,1621501318.0
_step,10.0
best_val_loss,0.34657
best_epoch,9.0


0,1
epoch,▁▂▃▃▄▅▆▆▇█
loss,█▅▄▃▂▂▂▁▁▁
accuracy,▁▄▅▆▇▇▇███
val_loss,█▅▄▃▃▂▂▁▁▁
val_accuracy,▁▃▅▅▆▆▇███
_runtime,▁▂▂▃▄▅▅▆▇██
_timestamp,▁▂▂▃▄▅▅▆▇██
_step,▁▂▂▃▄▅▅▆▇▇█
val_word_accuracy,▁


wandb: Agent Starting Run: d75hrree with config:
wandb: 	beam_size: 3
wandb: 	cell_type: LSTM
wandb: 	dropout: 0.2
wandb: 	hidden_size: 256
wandb: 	input_embed_size: 32
wandb: 	num_hidden_layers: 3


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


VBox(children=(Label(value=' 31.55MB of 31.55MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.…

0,1
epoch,9.0
loss,0.06113
accuracy,0.85812
val_loss,0.12757
val_accuracy,0.81373
_runtime,2239.0
_timestamp,1621503565.0
_step,10.0
best_val_loss,0.12469
best_epoch,6.0


0,1
epoch,▁▂▃▃▄▅▆▆▇█
loss,█▄▃▂▂▁▁▁▁▁
accuracy,▁▅▆▇▇█████
val_loss,█▄▂▂▁▁▁▁▁▁
val_accuracy,▁▅▇▇██████
_runtime,▁▂▂▃▄▅▅▆▇▇█
_timestamp,▁▂▂▃▄▅▅▆▇▇█
_step,▁▂▂▃▄▅▅▆▇▇█
val_word_accuracy,▁


wandb: Agent Starting Run: o1v0fbej with config:
wandb: 	beam_size: 1
wandb: 	cell_type: LSTM
wandb: 	dropout: 0.2
wandb: 	hidden_size: 64
wandb: 	input_embed_size: 16
wandb: 	num_hidden_layers: 2


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


VBox(children=(Label(value=' 1.55MB of 1.55MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,9.0
loss,0.34086
accuracy,0.6795
val_loss,0.27131
val_accuracy,0.71674
_runtime,1663.0
_timestamp,1621505239.0
_step,10.0
best_val_loss,0.27131
best_epoch,9.0


0,1
epoch,▁▂▃▃▄▅▆▆▇█
loss,█▆▅▄▃▂▂▁▁▁
accuracy,▁▃▄▅▆▇▇▇██
val_loss,█▆▅▄▃▂▂▁▁▁
val_accuracy,▁▃▄▅▆▇▇▇██
_runtime,▁▂▂▃▄▄▅▅▆██
_timestamp,▁▂▂▃▄▄▅▅▆██
_step,▁▂▂▃▄▅▅▆▇▇█
val_word_accuracy,▁


wandb: Agent Starting Run: 3j8516wy with config:
wandb: 	beam_size: 1
wandb: 	cell_type: RNN
wandb: 	dropout: 0.2
wandb: 	hidden_size: 256
wandb: 	input_embed_size: 16
wandb: 	num_hidden_layers: 1


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


VBox(children=(Label(value=' 2.04MB of 2.04MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,9.0
loss,0.65445
accuracy,0.4766
val_loss,0.72121
val_accuracy,0.43243
_runtime,1071.0
_timestamp,1621506314.0
_step,9.0
best_val_loss,0.69524
best_epoch,8.0


0,1
epoch,▁▂▃▃▄▅▆▆▇█
loss,█▅▄▃▂▂▂▁▁▁
accuracy,▁▄▅▆▇▇▇███
val_loss,█▆▄▃▂▂▂▂▁▂
val_accuracy,▁▃▅▆▇▇▇██▇
_runtime,▁▂▃▄▅▅▆▇▇█
_timestamp,▁▂▃▄▅▅▆▇▇█
_step,▁▂▃▃▄▅▆▆▇█


Run 3j8516wy errored: ValueError('Layer model_2 expects 2 input(s), but it received 1 input tensors. Inputs received: [<tf.Tensor: shape=(1, 5683, 256), dtype=float32, numpy=\narray([[[1.9999998e+00, 6.7985296e-02, 1.5618563e-01, ...,\n         1.1011417e+00, 1.9999940e+00, 4.8611820e-02],\n        [1.9999998e+00, 1.5131074e-01, 1.1441231e-01, ...,\n         1.0832253e+00, 1.9999957e+00, 2.2667170e-02],\n        [1.9999998e+00, 1.8726516e-01, 1.4551163e-02, ...,\n         1.9252095e+00, 1.9999998e+00, 5.7880282e-03],\n        ...,\n        [1.9999998e+00, 1.8541539e-01, 2.3841858e-07, ...,\n         4.6169758e-04, 1.9999998e+00, 1.4865398e-04],\n        [1.9999998e+00, 1.5480143e-01, 2.3841858e-07, ...,\n         4.5889616e-04, 1.9999998e+00, 1.5658140e-04],\n        [1.9999998e+00, 1.7641264e-01, 2.3841858e-07, ...,\n         4.5996904e-04, 1.9999998e+00, 1.4835596e-04]]], dtype=float32)>]')
wandb: ERROR Run 3j8516wy errored: ValueError('Layer model_2 expects 2 input(s), but it receiv

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


VBox(children=(Label(value=' 1.55MB of 1.55MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,9.0
loss,0.22633
accuracy,0.75556
val_loss,0.22389
val_accuracy,0.74584
_runtime,1362.0
_timestamp,1621507688.0
_step,10.0
best_val_loss,0.22389
best_epoch,9.0


0,1
epoch,▁▂▃▃▄▅▆▆▇█
loss,█▆▄▃▃▂▂▁▁▁
accuracy,▁▃▄▅▆▇▇███
val_loss,█▆▅▄▃▂▂▁▁▁
val_accuracy,▁▃▄▅▆▇▇███
_runtime,▁▂▂▃▄▅▅▆▇▇█
_timestamp,▁▂▂▃▄▅▅▆▇▇█
_step,▁▂▂▃▄▅▅▆▇▇█
val_word_accuracy,▁


wandb: Agent Starting Run: emtjtkem with config:
wandb: 	beam_size: 3
wandb: 	cell_type: RNN
wandb: 	dropout: 0
wandb: 	hidden_size: 256
wandb: 	input_embed_size: 16
wandb: 	num_hidden_layers: 3


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


VBox(children=(Label(value=' 8.10MB of 8.10MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,9.0
loss,0.19044
accuracy,0.76907
val_loss,0.30211
val_accuracy,0.7008
_runtime,2381.0
_timestamp,1621510074.0
_step,10.0
best_val_loss,0.29499
best_epoch,8.0


0,1
epoch,▁▂▃▃▄▅▆▆▇█
loss,█▄▃▂▂▂▁▁▁▁
accuracy,▁▅▆▇▇▇████
val_loss,█▄▂▃▂▁▁▂▁▁
val_accuracy,▁▅▆▆▇▇█▇██
_runtime,▁▂▂▃▄▅▅▆▇██
_timestamp,▁▂▂▃▄▅▅▆▇██
_step,▁▂▂▃▄▅▅▆▇▇█
val_word_accuracy,▁


wandb: Agent Starting Run: kbbybyax with config:
wandb: 	beam_size: 1
wandb: 	cell_type: LSTM
wandb: 	dropout: 0.2
wandb: 	hidden_size: 128
wandb: 	input_embed_size: 16
wandb: 	num_hidden_layers: 1


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


VBox(children=(Label(value=' 2.19MB of 2.19MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,9.0
loss,0.22288
accuracy,0.75574
val_loss,0.19958
val_accuracy,0.76404
_runtime,745.0
_timestamp,1621510825.0
_step,10.0
best_val_loss,0.19958
best_epoch,9.0


0,1
epoch,▁▂▃▃▄▅▆▆▇█
loss,█▅▄▃▂▂▂▁▁▁
accuracy,▁▃▅▆▇▇▇███
val_loss,█▅▄▃▂▂▂▁▁▁
val_accuracy,▁▃▅▆▇▇▇███
_runtime,▁▂▂▃▄▅▆▆▇██
_timestamp,▁▂▂▃▄▅▆▆▇██
_step,▁▂▂▃▄▅▅▆▇▇█
val_word_accuracy,▁


wandb: Agent Starting Run: rjnluc8g with config:
wandb: 	beam_size: 3
wandb: 	cell_type: LSTM
wandb: 	dropout: 0.2
wandb: 	hidden_size: 64
wandb: 	input_embed_size: 16
wandb: 	num_hidden_layers: 3


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


VBox(children=(Label(value=' 2.33MB of 2.33MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,9.0
loss,0.33603
accuracy,0.67901
val_loss,0.26979
val_accuracy,0.71622
_runtime,2049.0
_timestamp,1621512879.0
_step,10.0
best_val_loss,0.26979
best_epoch,9.0


0,1
epoch,▁▂▃▃▄▅▆▆▇█
loss,█▅▄▃▃▂▂▂▁▁
accuracy,▁▃▄▅▆▆▇▇██
val_loss,█▆▅▄▃▂▂▂▁▁
val_accuracy,▁▃▄▅▆▆▇▇██
_runtime,▁▂▂▃▄▅▅▆▇▇█
_timestamp,▁▂▂▃▄▅▅▆▇▇█
_step,▁▂▂▃▄▅▅▆▇▇█
val_word_accuracy,▁


wandb: Agent Starting Run: ysxj5mg4 with config:
wandb: 	beam_size: 3
wandb: 	cell_type: LSTM
wandb: 	dropout: 0
wandb: 	hidden_size: 128
wandb: 	input_embed_size: 16
wandb: 	num_hidden_layers: 1


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


VBox(children=(Label(value=' 2.19MB of 2.19MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,9.0
loss,0.14848
accuracy,0.80614
val_loss,0.18539
val_accuracy,0.77194
_runtime,735.0
_timestamp,1621513620.0
_step,10.0
best_val_loss,0.18539
best_epoch,9.0


0,1
epoch,▁▂▃▃▄▅▆▆▇█
loss,█▅▃▃▂▂▁▁▁▁
accuracy,▁▄▅▆▇▇████
val_loss,█▅▃▂▂▁▁▁▁▁
val_accuracy,▁▄▆▆▇█████
_runtime,▁▂▂▃▄▅▅▆▇▇█
_timestamp,▁▂▂▃▄▅▅▆▇▇█
_step,▁▂▂▃▄▅▅▆▇▇█
val_word_accuracy,▁


wandb: Agent Starting Run: wy3fk0ff with config:
wandb: 	beam_size: 3
wandb: 	cell_type: RNN
wandb: 	dropout: 0
wandb: 	hidden_size: 256
wandb: 	input_embed_size: 16
wandb: 	num_hidden_layers: 3


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


VBox(children=(Label(value=' 8.10MB of 8.10MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,9.0
loss,0.17207
accuracy,0.7819
val_loss,0.29162
val_accuracy,0.71253
_runtime,2364.0
_timestamp,1621515989.0
_step,10.0
best_val_loss,0.28065
best_epoch,6.0


0,1
epoch,▁▂▃▃▄▅▆▆▇█
loss,█▄▃▂▂▁▁▁▁▁
accuracy,▁▅▆▇▇▇████
val_loss,█▄▃▂▂▂▁▂▁▁
val_accuracy,▁▅▆▇▇▇████
_runtime,▁▂▂▃▄▅▅▆▇██
_timestamp,▁▂▂▃▄▅▅▆▇██
_step,▁▂▂▃▄▅▅▆▇▇█
val_word_accuracy,▁


wandb: Agent Starting Run: 95ejbpuz with config:
wandb: 	beam_size: 3
wandb: 	cell_type: LSTM
wandb: 	dropout: 0
wandb: 	hidden_size: 128
wandb: 	input_embed_size: 32
wandb: 	num_hidden_layers: 1


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


VBox(children=(Label(value=' 2.29MB of 2.29MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,9.0
loss,0.13058
accuracy,0.81722
val_loss,0.16596
val_accuracy,0.78343
_runtime,784.0
_timestamp,1621516779.0
_step,10.0
best_val_loss,0.16517
best_epoch,8.0


0,1
epoch,▁▂▃▃▄▅▆▆▇█
loss,█▄▃▂▂▁▁▁▁▁
accuracy,▁▄▆▇▇▇████
val_loss,█▄▃▂▂▁▁▁▁▁
val_accuracy,▁▄▆▇▇█████
_runtime,▁▂▂▃▄▅▅▆▇▇█
_timestamp,▁▂▂▃▄▅▅▆▇▇█
_step,▁▂▂▃▄▅▅▆▇▇█
val_word_accuracy,▁


wandb: Sweep Agent: Waiting for job.
wandb: Job received.
wandb: Agent Starting Run: ctj5h5vs with config:
wandb: 	beam_size: 2
wandb: 	cell_type: RNN
wandb: 	dropout: 0
wandb: 	hidden_size: 64
wandb: 	input_embed_size: 32
wandb: 	num_hidden_layers: 1


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


VBox(children=(Label(value=' 0.33MB of 0.33MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,9.0
loss,0.69308
accuracy,0.46501
val_loss,0.70566
val_accuracy,0.45227
_runtime,870.0
_timestamp,1621517670.0
_step,9.0
best_val_loss,0.70566
best_epoch,9.0


0,1
epoch,▁▂▃▃▄▅▆▆▇█
loss,█▅▄▃▂▂▂▁▁▁
accuracy,▁▄▅▆▇▇▇███
val_loss,█▆▄▃▃▂▂▂▁▁
val_accuracy,▁▃▅▆▇▇▇▇██
_runtime,▁▂▃▃▄▅▆▆▇█
_timestamp,▁▂▃▃▄▅▆▆▇█
_step,▁▂▃▃▄▅▆▆▇█


Run ctj5h5vs errored: ValueError('Layer model_2 expects 2 input(s), but it received 1 input tensors. Inputs received: [<tf.Tensor: shape=(1, 5683, 64), dtype=float32, numpy=\narray([[[1.821042  , 0.8365084 , 1.0051454 , ..., 0.02152371,\n         1.9936523 , 0.68204236],\n        [1.8289256 , 0.41307187, 1.6580634 , ..., 0.44160134,\n         1.9899341 , 0.68103653],\n        [1.8853003 , 1.5647914 , 0.8078265 , ..., 0.00447625,\n         1.9995476 , 0.43663305],\n        ...,\n        [1.3693285 , 0.7379951 , 0.08407372, ..., 0.00269192,\n         1.9905937 , 1.8223171 ],\n        [1.338457  , 0.7284463 , 0.08183938, ..., 0.00240868,\n         1.9891922 , 1.8067912 ],\n        [1.3145291 , 0.7966283 , 0.07268566, ..., 0.00218236,\n         1.9888995 , 1.8366071 ]]], dtype=float32)>]')
wandb: ERROR Run ctj5h5vs errored: ValueError('Layer model_2 expects 2 input(s), but it received 1 input tensors. Inputs received: [<tf.Tensor: shape=(1, 5683, 64), dtype=float32, numpy=\narray([[[1.8210

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


VBox(children=(Label(value=' 1.86MB of 1.86MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,9.0
loss,0.28049
accuracy,0.71842
val_loss,0.22147
val_accuracy,0.75026
_runtime,2160.0
_timestamp,1621519845.0
_step,10.0
best_val_loss,0.22147
best_epoch,9.0


0,1
epoch,▁▂▃▃▄▅▆▆▇█
loss,█▆▄▃▃▂▂▂▁▁
accuracy,▁▃▄▅▆▇▇▇██
val_loss,█▆▅▃▃▂▂▁▁▁
val_accuracy,▁▃▄▅▆▇▇███
_runtime,▁▂▃▃▄▅▆▆▇██
_timestamp,▁▂▃▃▄▅▆▆▇██
_step,▁▂▂▃▄▅▅▆▇▇█
val_word_accuracy,▁


wandb: Agent Starting Run: gw3ak27q with config:
wandb: 	beam_size: 1
wandb: 	cell_type: GRU
wandb: 	dropout: 0
wandb: 	hidden_size: 64
wandb: 	input_embed_size: 16
wandb: 	num_hidden_layers: 1


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


VBox(children=(Label(value=' 0.63MB of 0.63MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,9.0
loss,0.3899
accuracy,0.64944
val_loss,0.38868
val_accuracy,0.64002
_runtime,695.0
_timestamp,1621520550.0
_step,9.0
best_val_loss,0.38868
best_epoch,9.0


0,1
epoch,▁▂▃▃▄▅▆▆▇█
loss,█▆▅▄▃▃▂▂▁▁
accuracy,▁▃▄▅▆▆▇▇██
val_loss,█▇▅▄▄▃▂▂▁▁
val_accuracy,▁▂▃▅▅▆▇▇██
_runtime,▁▂▃▃▄▅▆▆▇█
_timestamp,▁▂▃▃▄▅▆▆▇█
_step,▁▂▃▃▄▅▆▆▇█


Run gw3ak27q errored: ValueError('Layer model_2 expects 2 input(s), but it received 1 input tensors. Inputs received: [<tf.Tensor: shape=(1, 5683, 64), dtype=float32, numpy=\narray([[[0.5965537 , 0.70451826, 1.0520122 , ..., 1.9713657 ,\n         0.7417288 , 0.51667184],\n        [0.6206449 , 0.9878473 , 1.1065133 , ..., 1.9633237 ,\n         0.99450165, 0.34481353],\n        [0.4307683 , 0.72340816, 1.4807289 , ..., 1.9962276 ,\n         0.8731511 , 0.27273995],\n        ...,\n        [0.8223995 , 0.8911724 , 1.4307868 , ..., 1.9980197 ,\n         1.6384444 , 1.3306001 ],\n        [0.8314328 , 1.4124658 , 1.4729568 , ..., 1.9978261 ,\n         1.686892  , 1.2350366 ],\n        [0.85714734, 1.4144787 , 1.3912349 , ..., 1.998105  ,\n         1.7088742 , 1.2392203 ]]], dtype=float32)>]')
wandb: ERROR Run gw3ak27q errored: ValueError('Layer model_2 expects 2 input(s), but it received 1 input tensors. Inputs received: [<tf.Tensor: shape=(1, 5683, 64), dtype=float32, numpy=\narray([[[0.5965

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


VBox(children=(Label(value=' 0.63MB of 0.63MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,9.0
loss,0.44046
accuracy,0.61519
val_loss,0.44009
val_accuracy,0.60444
_runtime,705.0
_timestamp,1621521270.0
_step,9.0
best_val_loss,0.44009
best_epoch,9.0


0,1
epoch,▁▂▃▃▄▅▆▆▇█
loss,█▆▅▄▄▃▂▂▁▁
accuracy,▁▃▄▄▅▆▇▇██
val_loss,█▇▆▅▄▃▃▂▁▁
val_accuracy,▁▂▃▄▅▆▆▇▇█
_runtime,▁▂▃▃▄▅▆▆▇█
_timestamp,▁▂▃▃▄▅▆▆▇█
_step,▁▂▃▃▄▅▆▆▇█


Run 9kex35bs errored: ValueError('Layer model_2 expects 2 input(s), but it received 1 input tensors. Inputs received: [<tf.Tensor: shape=(1, 5683, 64), dtype=float32, numpy=\narray([[[1.2581433 , 1.745116  , 0.9231166 , ..., 0.8892307 ,\n         1.3120826 , 1.0723695 ],\n        [1.169044  , 1.7230686 , 0.9726476 , ..., 0.9431375 ,\n         1.3212488 , 1.1052711 ],\n        [1.3606989 , 1.7866385 , 0.85719013, ..., 0.9501438 ,\n         1.3217638 , 1.4320225 ],\n        ...,\n        [0.7935209 , 1.6927521 , 0.42333597, ..., 1.4786463 ,\n         1.6420738 , 1.3521307 ],\n        [0.86961025, 1.6456761 , 0.4371115 , ..., 1.5414785 ,\n         1.5510821 , 1.4138    ],\n        [0.7031333 , 1.6440543 , 0.4423195 , ..., 1.5675039 ,\n         1.5511264 , 1.5710901 ]]], dtype=float32)>]')
wandb: ERROR Run 9kex35bs errored: ValueError('Layer model_2 expects 2 input(s), but it received 1 input tensors. Inputs received: [<tf.Tensor: shape=(1, 5683, 64), dtype=float32, numpy=\narray([[[1.2581

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


VBox(children=(Label(value=' 4.07MB of 4.07MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,9.0
loss,0.09649
accuracy,0.83936
val_loss,0.15139
val_accuracy,0.7932
_runtime,1316.0
_timestamp,1621522595.0
_step,10.0
best_val_loss,0.15139
best_epoch,9.0


0,1
epoch,▁▂▃▃▄▅▆▆▇█
loss,█▅▃▂▂▂▁▁▁▁
accuracy,▁▄▅▇▇▇████
val_loss,█▅▃▂▂▁▁▁▁▁
val_accuracy,▁▄▆▇▇█████
_runtime,▁▂▃▃▄▅▆▆▇██
_timestamp,▁▂▃▃▄▅▆▆▇██
_step,▁▂▂▃▄▅▅▆▇▇█
val_word_accuracy,▁


wandb: Agent Starting Run: ifeiip2d with config:
wandb: 	beam_size: 1
wandb: 	cell_type: RNN
wandb: 	dropout: 0.2
wandb: 	hidden_size: 256
wandb: 	input_embed_size: 32
wandb: 	num_hidden_layers: 3


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


VBox(children=(Label(value=' 8.15MB of 8.15MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,9.0
loss,0.32941
accuracy,0.67929
val_loss,0.31565
val_accuracy,0.67947
_runtime,2431.0
_timestamp,1621525033.0
_step,10.0
best_val_loss,0.31565
best_epoch,9.0


0,1
epoch,▁▂▃▃▄▅▆▆▇█
loss,█▄▃▃▂▂▂▁▁▁
accuracy,▁▄▅▆▇▇▇███
val_loss,█▅▄▃▂▂▁▁▁▁
val_accuracy,▁▄▅▆▇▇████
_runtime,▁▂▃▃▄▅▆▆▇██
_timestamp,▁▂▃▃▄▅▆▆▇██
_step,▁▂▂▃▄▅▅▆▇▇█
val_word_accuracy,▁


wandb: Sweep Agent: Waiting for job.
wandb: Job received.
wandb: Agent Starting Run: p2i8a23g with config:
wandb: 	beam_size: 2
wandb: 	cell_type: RNN
wandb: 	dropout: 0
wandb: 	hidden_size: 256
wandb: 	input_embed_size: 16
wandb: 	num_hidden_layers: 3


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10