In [1]:
import warnings
warnings.filterwarnings('ignore')

import pickle
import numpy as np
import tensorflow as tf
from tensorflow.keras.losses import CategoricalCrossentropy, SparseCategoricalCrossentropy
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Input, LSTM, Dense, Embedding, TimeDistributed

In [2]:
training_data = pickle.load(open("../data/train-v1.1.pkl", "rb"))
print("%d questions in our training set" % len(training_data))

87599 questions in our training set


In [3]:
val_data = pickle.load(open("../data/dev-v1.1.pkl", "rb"))
print("%d questions in our validation set" % len(val_data))

10570 questions in our validation set


In [4]:
word_to_index, index_to_words, word_to_vec_map = pickle.load(open("../embeddings/glove.6B.50d.pkl", "rb"))
vocab_len = len(index_to_words)
print("%d words in our model's vocabulary" % vocab_len)

70056 words in our model's vocabulary


In [5]:
max_qc_len = max([len(qac["question"] + ["<sep>"] + qac["context"]) for qac in training_data])
max_ans_len = max([len(qac["answer"]) + 1 for qac in training_data])

encoder_input_data = np.zeros((len(training_data), max_qc_len))
decoder_input_data = np.zeros((len(training_data), max_ans_len))
decoder_target_data = np.zeros((len(training_data), max_ans_len), dtype=np.uint8)

for i in range(encoder_input_data.shape[0]):
    qc = training_data[i]["question"] + ["<sep>"] + training_data[i]["context"]
    encoder_input_data[i,:] = [word_to_index[word] if word in word_to_index else word_to_index["<unk>"] for word in qc] + [0 for j in range(max_qc_len - len(qc))]
    
    ans_input = ["<start>"] + training_data[i]["answer"]
    decoder_input_data[i,:] = [word_to_index[word] if word in word_to_index else word_to_index["<unk>"] for word in ans_input] + [0 for j in range(max_ans_len - len(ans_input))]
    
    ans_output = training_data[i]["answer"] + ["<end>"]
    decoder_target_data[i,:] = [word_to_index[word] if word in word_to_index else word_to_index["<unk>"] for word in ans_output] + [0 for j in range(max_ans_len - len(ans_input))]

encoder_input_data.shape, decoder_input_data.shape

((87599, 766), (87599, 47))

In [6]:
emb_dim = len(word_to_vec_map['a'])
state_dim = 256
batch_size = 16
epochs = 1
learning_rate = 0.01
path = "saves/keras_LSTM.h5"
load = False

In [7]:
if load:
    model = load_model(path)
else:
    encoder_inputs = Input(shape=(max_qc_len,), dtype='int32')

    emb_matrix = np.zeros((vocab_len, emb_dim))
    for word, index in word_to_index.items():
        emb_matrix[index, :] = word_to_vec_map[word]
    embedding_layer = Embedding(vocab_len, emb_dim, trainable=False, mask_zero=True)
    embedding_layer.build((None,))
    embedding_layer.set_weights([emb_matrix])
    
    encoder_embeddings = embedding_layer(encoder_inputs)
    
    encoder = LSTM(state_dim, return_state=True)(encoder_embeddings)

    encoder_outputs, state_h, state_c = encoder

    encoder_states = [state_h, state_c]


    decoder_inputs = Input(shape=(max_ans_len,))
    decoder_embeddings = embedding_layer(decoder_inputs)
    decoder_lstm = LSTM(state_dim, return_sequences=True, return_state=True)

    decoder_outputs, _, _ = decoder_lstm(decoder_embeddings, initial_state=encoder_states)
    outputs = TimeDistributed(Dense(vocab_len, activation='softmax'))(decoder_outputs)


    model = Model([encoder_inputs, decoder_inputs], outputs)
model.summary()

W1205 01:16:57.752143 140386153781056 deprecation.py:506] From /opt/conda/lib/python3.7/site-packages/tensorflow/python/keras/initializers.py:119: calling RandomUniform.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
W1205 01:16:58.650388 140386153781056 deprecation.py:506] From /opt/conda/lib/python3.7/site-packages/tensorflow/python/ops/init_ops.py:1251: calling VarianceScaling.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
W1205 01:16:59.715981 140386153781056 deprecation.py:323] From /opt/conda/lib/python3.7/site-packages/tensorflow/python/keras/backend.py:3794: add_dispatch_support.<locals>.wrapper (from tensorflow.pytho

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, 47)]         0                                            
__________________________________________________________________________________________________
input_1 (InputLayer)            [(None, 766)]        0                                            
__________________________________________________________________________________________________
embedding (Embedding)           multiple             3502800     input_1[0][0]                    
                                                                 input_2[0][0]                    
__________________________________________________________________________________________________
lstm (LSTM)                     [(None, 256), (None, 314368      embedding[0][0]              

In [None]:
opt = Adam(lr=0.01)
loss_fn = SparseCategoricalCrossentropy(from_logits=True)

model.compile(optimizer=opt, loss=loss_fn)
model.fit([encoder_input_data, decoder_input_data], decoder_target_data,
          batch_size=batch_size, epochs=epochs)
model.save(path)

 1088/87599 [..............................] - ETA: 2:43:03 - loss: 1.0540