In [None]:
# Libraries
import numpy as np
import matplotlib.pyplot as plt
from utils.preprocessing import *
from utils.model import *
from utils.config import *
from keras.models import load_model
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences

In [None]:
# Paths
PATH_ENG = 'data/small_vocab_en'
PATH_FR = 'data/small_vocab_fr'
PATH_GLOVE = 'data/glove.6B.100d.txt'
MODEL_SAVE_PATH = 'weights/model.h5'

In [None]:
# Reading dataset
english = read_english(PATH_ENG)
french, french_inputs = read_french(PATH_FR)

In [None]:
# finding maximum length of input snetence
max_len_input = max(len(s) for s in english)

In [None]:
# Tokenizing English
input_sequence, word2idx_english = tokenize_english(english)

In [None]:
# Tokenizing French
target_sequence, target_sequence_inputs, word2idx_french = tokenize_french(french, french_inputs)

In [None]:
num_words_output = len(word2idx_french) + 1
max_len_target = max(len(s) for s in target_sequence)

In [None]:
# Padding all inputs for encoder and decoders
encoder_inputs, decoder_inputs, decoder_targets = padding(input_sequence,
                                                          target_sequence, 
                                                          target_sequence_inputs, 
                                                          max_len_input, 
                                                          max_len_target)

In [None]:
# Loading GloVe Word Embedding
word2vec, embedding_matrix = glove_embedding(word2idx_english, PATH_GLOVE)

In [None]:
num_words = min(MAX_NUM_WORDS, len(word2idx_english) + 1)

In [None]:
# one hot encoding decoder taregts
decoder_targets_one_hot = np.zeros(
  (
    len(english),
    max_len_target,
    num_words_output
  ),
  dtype='float32'
)
for i, d in enumerate(decoder_targets):
  for t, word in enumerate(d):
    decoder_targets_one_hot[i, t, word] = 1

In [None]:
# creating object of model class
x = model(num_words, embedding_matrix, max_len_input, max_len_target, num_words_output)

In [None]:
# creating model
train_model = x.Seq2SeqModel()

In [None]:
# compile the model
train_model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
# train the model
z = np.zeros((MAX_SAMPLES, LATENT_DIM_DECODER)) # initial [s, c]
r = train_model.fit(
  [encoder_inputs, decoder_inputs, z, z], decoder_targets_one_hot,
  batch_size=BATCH_SIZE,
  epochs=EPOCHS,
  validation_split=0.2
)

In [None]:
# saving weights
train_model.save(MODEL_SAVE_PATH)

In [None]:
# plotting loss curve
plt.plot(r.history['loss'], label='loss')
plt.plot(r.history['val_loss'], label='val_loss')
plt.legend()
plt.show()

In [None]:
# Plotting accuracy curve
plt.plot(r.history['acc'], label='acc')
plt.plot(r.history['val_acc'], label='val_acc')
plt.legend()
plt.show()

In [None]:
# Check config.py file for some hyperparameters