In [None]:
#pip install ipynb #to use other python notebooks from the same folder

In [None]:
import wandb
from wandb.keras import WandbCallback
from tensorflow import keras
from ipynb.fs.full.load_data import load_data_prediction
from ipynb.fs.full.validation_code import *

In [None]:
wandb.login()

In [1]:
def obtain_params():
    batch_size=int(input("Batch size: "))
    embedding_size=int(input("Embedding size: "))
    encoder_layers  = int(input("Number of encoder layers: "))
    decoder_layers  = int(input("Number of decoder layers: "))
    hidden_layer_size  = int(input("Size of hidden layer: "))
    cell_type=(input("Enter the cell type -lstm/rnn/gru "))
    drop= input("Incorporate Dropout? - y/n ")
    if (drop=='y'):
        dropout=float(input("Enter dropout rate: "))
    else:
        dropout=0                          
    print("\n")
    beam_size= int(input("Enter beam width - beam width=0 enables a standard decoder")) 
    epochs=int(input("Enter the number of epochs: "))
    return batch_size,embedding_size,encoder_layers,decoder_layers,hidden_layer_size,cell_type,drop,dropout,beam_size,epochs

In [None]:
def train_test():

    # Load train, val and test data. Also load the encoder and decoder properties
    (encoder_train_input_data, decoder_train_input_data, decoder_train_target_data), (encoder_val_input_data, decoder_val_input_data, decoder_val_target_data), (val_input_words, val_target_words), (encoder_test_input_data, test_input_words, test_target_words), (num_encoder_characters, num_decoder_characters, max_encoder_seq_length, max_decoder_seq_length), (target_characters_index, inverse_target_characters_index) = load_data_split()
    #obtain parameters
    batch_size,embedding_size,encoder_layers,decoder_layers,hidden_layer_size,cell_type,drop,dropout,beam_size,epochs=obtain_params()


    enc_dims = [hidden_layer_size] * encoder_layers
    dec_dims  = [hidden_layer_size] * decoder_layers
    cell_type = cell_type
    dropout = dropout
    beam_size = beam_size

    # Encoder
    encoder_inputs = keras.Input(shape = (None, ))
    encoder_outputs = keras.layers.Embedding(input_dim = num_encoder_characters, output_dim = embedding_size, input_length = max_encoder_seq_length)(encoder_inputs)

    # Encoder LSTM layers
    encoder_states = list()
    for j in range(len(enc_dims)):
        if cell_type == "rnn":
            encoder_outputs, state = keras.layers.SimpleRNN(enc_dims[j], dropout = dropout, return_state = True, return_sequences = True)(encoder_outputs)
            encoder_states = [state]
        if cell_type == "lstm":
            encoder_outputs, state_h, state_c = keras.layers.LSTM(enc_dims[j], dropout = dropout, return_state = True, return_sequences = True)(encoder_outputs)
            encoder_states = [state_h,state_c]
        if cell_type == "gru":
            encoder_outputs, state = keras.layers.GRU(enc_dims[j], dropout = dropout, return_state = True, return_sequences = True)(encoder_outputs)
            encoder_states = [state]

    # Decoder
    decoder_inputs = keras.Input(shape=(None, ))
    decoder_outputs = keras.layers.Embedding(input_dim = num_decoder_characters, output_dim = embedding_size, input_length = max_decoder_seq_length)(decoder_inputs)

    decoder_states = encoder_states.copy()

    for j in range(len(dec_dims)):
        if cell_type == "rnn": 
            decoder = keras.layers.SimpleRNN(dec_dims[j], dropout = dropout, return_sequences = True, return_state = True)
            decoder_outputs, state = decoder(decoder_outputs, initial_state = decoder_states)

        if cell_type == "lstm":
            decoder = keras.layers.LSTM(dec_dims[j], dropout = dropout, return_sequences = True, return_state = True)
            decoder_outputs, state_h, state_c = decoder(decoder_outputs, initial_state = decoder_states)

        if cell_type == "gru":
            decoder = keras.layers.GRU(dec_dims[j], dropout = dropout, return_sequences = True, return_state = True)
            decoder_outputs, state = decoder(decoder_outputs, initial_state = decoder_states)


    decoder_dense = keras.layers.Dense(num_decoder_characters, activation = "softmax")
    decoder_outputs = decoder_dense(decoder_outputs)

    # Model definition
    model = keras.Model([encoder_inputs, decoder_inputs], decoder_outputs)

    model.compile(
        optimizer="rmsprop", loss="categorical_crossentropy", metrics=["accuracy"])

    model.fit([encoder_train_input_data, decoder_train_input_data],
        decoder_train_target_data,
        batch_size = batch_size,
        epochs = epochs)

    # print('train done')

    # Inference Call for Validation Data
    #estimates the validation accuracy and the number of correct predictions in the validation dataset
    val_accuracy, count1 = valid_func(model,encoder_val_input_data, val_input_words, val_target_words, max_decoder_seq_length,max_encoder_seq_length, target_characters_index, inverse_target_characters_index, enc_dims, dec_dims, cell_type, beam_size)
    print("Validation accuracy",val_accuracy"\n")
    print("Number of correct guesses",count1,"\n")

    # Inference Call for Test Data. comment the following two lines when training the model.
    test_accuracy,count1 = valid_func(model,encoder_test_input_data, test_input_words, test_target_words, max_decoder_seq_length, max_encoder_seq_length,target_characters_index, inverse_target_characters_index, enc_dims, dec_dims, cell_type, beam_size)
    print("test accuracy",test_accuracy"\n")
    print("Number of correct guesses",count1,"\n")

In [None]:
# wandb sweep configuration 
sweep_config = {
  
  "method": "random",

  'metric': {
      'name': 'accuracy',
      'goal': 'maximize'
  },

  "parameters": {
        "embedding_size": {
            "values": [256]
        },
        "encoder_layers" :{
            "values" : [3]
        },
        "decoder_layers": {
            "values": [1]
        },
        "hidden_layer_size": {
            "values": [256]
        },
        "cell_type": {
            "values": ["lstm"]
        },
        "dropout": {
            "values": [0.2]
        },
        "beam_size": {
            "values": [4]
        }
    }
}

sweep_id = wandb.sweep(sweep_config, project="Assignment 3 test data")
wandb.agent(sweep_id, project = "Assignment 3 test data", function = main,count=1)

# Load train, val and test data. Also load the encoder and decoder properties
(encoder_train_input_data, decoder_train_input_data, decoder_train_target_data), (encoder_val_input_data, decoder_val_input_data, decoder_val_target_data), (val_input_words, val_target_words), (encoder_test_input_data, test_input_words, test_target_words), (num_encoder_characters, num_decoder_characters, max_encoder_seq_length, max_decoder_seq_length), (target_characters_index, inverse_target_characters_index) = load_data_split()

def main(config = None):
    # wandb config
    run = wandb.init(config = config)
    config = wandb.config

    run.name = "Embedding Size: " + str(config.embedding_size) + " Cell Type: " + config.cell_type + " Dropout: " + str(config.dropout) + " Beam Size: " + str(config.beam_size) + " Encoder Layers: " + str(config.encoder_layers) + " Decoder Layers: " + str(config.decoder_layers) + " Hidden Layer Size: " + str(config.hidden_layer_size)

    # Parameters
    batch_size = 128
    epochs = 20
    embedding_size = config.embedding_size
    enc_dims = [config.hidden_layer_size] * config.encoder_layers
    dec_dims  = [config.hidden_layer_size] * config.decoder_layers
    cell_type = config.cell_type
    dropout = config.dropout
    beam_size = config.beam_size

    # Encoder
    encoder_inputs = keras.Input(shape = (None, ))
    encoder_outputs = keras.layers.Embedding(input_dim = num_encoder_characters, output_dim = embedding_size, input_length = max_encoder_seq_length)(encoder_inputs)

    # Encoder LSTM layers
    encoder_states = list()
    for j in range(len(enc_dims)):
        if cell_type == "rnn":
            encoder_outputs, state = keras.layers.SimpleRNN(enc_dims[j], dropout = dropout, return_state = True, return_sequences = True)(encoder_outputs)
            encoder_states = [state]
        if cell_type == "lstm":
            encoder_outputs, state_h, state_c = keras.layers.LSTM(enc_dims[j], dropout = dropout, return_state = True, return_sequences = True)(encoder_outputs)
            encoder_states = [state_h,state_c]
        if cell_type == "gru":
            encoder_outputs, state = keras.layers.GRU(enc_dims[j], dropout = dropout, return_state = True, return_sequences = True)(encoder_outputs)
            encoder_states = [state]

    # Decoder
    decoder_inputs = keras.Input(shape=(None, ))
    decoder_outputs = keras.layers.Embedding(input_dim = num_decoder_characters, output_dim = embedding_size, input_length = max_decoder_seq_length)(decoder_inputs)

    decoder_states = encoder_states.copy()

    for j in range(len(dec_dims)):
        if cell_type == "rnn": 
            decoder = keras.layers.SimpleRNN(dec_dims[j], dropout = dropout, return_sequences = True, return_state = True)
            decoder_outputs, state = decoder(decoder_outputs, initial_state = decoder_states)
           
        if cell_type == "lstm":
            decoder = keras.layers.LSTM(dec_dims[j], dropout = dropout, return_sequences = True, return_state = True)
            decoder_outputs, state_h, state_c = decoder(decoder_outputs, initial_state = decoder_states)
            
        if cell_type == "gru":
            decoder = keras.layers.GRU(dec_dims[j], dropout = dropout, return_sequences = True, return_state = True)
            decoder_outputs, state = decoder(decoder_outputs, initial_state = decoder_states)
            

    decoder_dense = keras.layers.Dense(num_decoder_characters, activation = "softmax")
    decoder_outputs = decoder_dense(decoder_outputs)

    #create models
    model = keras.Model([encoder_inputs, decoder_inputs], decoder_outputs)

    model.compile(optimizer="rmsprop", loss="categorical_crossentropy", metrics=["accuracy"]) 

    model.fit(
        [encoder_train_input_data, decoder_train_input_data],
        decoder_train_target_data,
        batch_size = batch_size,
        epochs = epochs,
        callbacks = [WandbCallback()])

   # print('train done')

    # Inference Call for Validation Data
    #estimates the validation accuracy and the number of correct predictions in the validation dataset
    val_accuracy, count1 = valid_func(model,encoder_val_input_data, val_input_words, val_target_words, max_decoder_seq_length,max_encoder_seq_length, target_characters_index, inverse_target_characters_index, enc_dims, dec_dims, cell_type, beam_size)
    wandb.log( { "val_accuracy": val_accuracy})
    wandb.log({"count": count1})

    # Inference Call for Test Data, comment the following two lines when training the model.
    test_accuracy,count1 = valid_func(model,encoder_test_input_data, test_input_words, test_target_words, max_decoder_seq_length, max_encoder_seq_length,target_characters_index, inverse_target_characters_index, enc_dims, dec_dims, cell_type, beam_size)
    wandb.log( { "test_accuracy": test_accuracy} )