# Implementing GRU model for SQUAD

Importing libraries

In [3]:
  
import numpy as np
import h5py

from keras.models import Sequential, Model
from keras.layers import Embedding, Dropout, Dense, Activation
from keras.layers import LSTM, Bidirectional, Merge, Input,GRU
from keras.layers import concatenate


Using TensorFlow backend.


Getting the pretrained glove embeddings

In [2]:
embeddings_index = {}
f = open( 'C:/Users/bhash/Downloads/glove.6B/glove.6B.300d.txt',encoding="utf8")
for line in f:
    values = line.split()
    word = values[0]
    coefs = np.asarray(values[1:], dtype='float32')
    embeddings_index[word] = coefs
f.close()

print('Found %s word vectors.' % len(embeddings_index))

Found 400000 word vectors.


Loading processed files from disk

In [4]:

# loading data
with h5py.File('context.h5', 'r') as hf:
    context_array = hf['context'][:]
with h5py.File('questions.h5', 'r') as hf:
    question_array = hf['questions'][:]
with h5py.File('begin.h5', 'r') as hf:
    begin_span = hf['begin'][:]
with h5py.File('end.h5', 'r') as hf:
    end_span = hf['end'][:]

In [4]:

# loding vocabulary
word_index = np.load('word_to_indx.npy').item()




In [5]:
vocab_size = len(word_index) + 1
embedding_vector_length = 50
max_span_begin = np.amax(begin_span)
max_span_end = np.amax(end_span)
batch = 64
# slice of data to be used as one epoch training on full data is expensive
slce = 1000


creating embedding matrix

In [6]:

embedding_matrix = np.zeros((len(word_index) + 1, 300))
for word,i in word_index.items():
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None:
        embedding_matrix[i] = embedding_vector

In [7]:
# model1
context_input = Input(shape=(700, ), dtype='int32', name='context_input')
x = Embedding(input_dim=vocab_size, output_dim=300, weights=[embedding_matrix],
              input_length=700, trainable=False)(context_input)
gru_out = GRU(output_dim=128, return_sequences=True)(x)

drop_1 = Dropout(0.5)(gru_out)

# model2
ques_input = Input(shape=(100, ), dtype='int32', name='ques_input')
x = Embedding(input_dim=vocab_size, output_dim=300, weights=[embedding_matrix],
              input_length=100, trainable=False)(ques_input)
gru_out = GRU(output_dim=128, return_sequences=True)(x)
drop_2 = Dropout(0.5)(gru_out)

# merger model
merge_layer = concatenate([drop_1, drop_2], axis=1)
biLSTM = Bidirectional(LSTM(512, implementation=2), merge_mode='mul')(merge_layer)
drop_3 =  Dropout(0.5)(biLSTM)
softmax_1 = Dense(max_span_begin, activation='softmax')(biLSTM)
softmax_2 = Dense(max_span_end, activation='softmax')(biLSTM)

model = Model(inputs=[context_input, ques_input], outputs=[softmax_1, softmax_2])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.summary()

  """


Instructions for updating:
keep_dims is deprecated, use keepdims instead


  del sys.path[0]


Instructions for updating:
keep_dims is deprecated, use keepdims instead
Instructions for updating:
keep_dims is deprecated, use keepdims instead
____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
context_input (InputLayer)       (None, 700)           0                                            
____________________________________________________________________________________________________
ques_input (InputLayer)          (None, 100)           0                                            
____________________________________________________________________________________________________
embedding_1 (Embedding)          (None, 700, 300)      35884800    context_input[0][0]              
____________________________________________________________________________________________________
embedding_2 (Embedding)          (None, 100, 3

In [20]:
model_history = model.fit([context_array[:slce], question_array[:slce]], [begin_span[:slce], end_span[:slce]], verbose=2, batch_size=batch, epochs=10)


Epoch 1/10
1017s - loss: 15.8509 - dense_3_loss: 7.9253 - dense_4_loss: 7.9257 - dense_3_acc: 0.0180 - dense_4_acc: 0.0060
Epoch 2/10
1540s - loss: 13.8766 - dense_3_loss: 6.9241 - dense_4_loss: 6.9525 - dense_3_acc: 0.0170 - dense_4_acc: 0.0100
Epoch 3/10
1334s - loss: 12.4359 - dense_3_loss: 6.2234 - dense_4_loss: 6.2125 - dense_3_acc: 0.0260 - dense_4_acc: 0.0050
Epoch 4/10
1277s - loss: 12.1654 - dense_3_loss: 6.0788 - dense_4_loss: 6.0866 - dense_3_acc: 0.0290 - dense_4_acc: 0.0060
Epoch 5/10


KeyboardInterrupt: 

Model is stopped after 5 epochs as there seems to be not much improvement in performance and it also seems to take long time to run. Running it further would cost time and memory.

In [21]:
# serialize model to JSON
model1_json = model.to_json()
with open("model.json", "w") as json_file:
    json_file.write(model1_json)
# serialize weights to HDF5
model.save_weights("model1.h5")
print("Saved model to disk")

Saved model to disk


In [8]:
from keras.models import model_from_json

# Model reconstruction from JSON file
with open('model.json', 'r') as f:
    model = model_from_json(f.read())

# Load weights into the new model
model.load_weights('model1.h5')