# IMPLEMENTING ENSEMBLE OF BIDIRECTIONAL LSTM AND LSTM

In [13]:
import json
import numpy as np
import re
import io
import nltk
import h5py
import keras as k
from keras.layers.embeddings import Embedding
from keras.layers import Input, Dense, Dropout, RepeatVector, Activation, merge, Lambda, Flatten, Reshape,Permute
from keras.layers import LSTM, Bidirectional, TimeDistributed, GRU
from keras.models import Model
from keras.preprocessing.sequence import pad_sequences
from keras.models import Model
from keras import optimizers
from keras.optimizers import Adam, RMSprop
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.layers import concatenate
from sklearn.metrics import f1_score
from keras_self_attention import SeqSelfAttention

In [2]:
embeddings_index = {}
f = open( 'glove.6B.100d.txt',encoding="utf8")
for line in f:
    values = line.split()
    word = values[0]
    coefs = np.asarray(values[1:], dtype='float32')
    embeddings_index[word] = coefs
f.close()

print('Found %s word vectors.' % len(embeddings_index))

Found 400000 word vectors.


In [3]:
context = h5py.File('context.h5','r')
questions = h5py.File('questions.h5','r')
answers = h5py.File('answers.h5','r')
ans_begin = h5py.File('begin.h5','r')
ans_end = h5py.File('end.h5','r')

In [4]:
c_data = context['context'][:]
qn_data = questions['questions'][:]
ans_data = answers['answers'][:]
begin_ans = ans_begin['begin'][:]
end_ans = ans_end['end'][:]

In [5]:
# loding vocabulary
word_index = np.load('words.npy').item()

In [6]:
embedding_matrix = np.zeros((len(word_index) + 1, 100))
for word, i in word_index.items():
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None:
        # words not found in embedding index will be all-zeros.
        embedding_matrix[i] = embedding_vector

In [7]:
vocab_size = len(word_index) + 1
#embedding_vector_length = 50
batch_size = 64
max_span_begin = np.amax(begin_ans)
max_span_end = np.amax(end_ans)
train = 10000


In [8]:
print("Vocab Size")
vocab_size

Vocab Size


119616

In [9]:
context_input = Input(shape=(700, ), dtype='int32', name='c_data')
embed_c = Embedding(input_dim=vocab_size, output_dim=100, weights=[embedding_matrix], 
              input_length=700, trainable=False)(context_input)
lstm_1 = LSTM(256, return_sequences=True, implementation=2)(embed_c)
bidir_1 = Bidirectional(LSTM(500, return_sequences=True, implementation=2), merge_mode='concat')(lstm_1)
bidir_12 = Bidirectional(LSTM(500, return_sequences=True, implementation=2), merge_mode='concat')(bidir_1)
drop_1 = Dropout(0.5)(bidir_12)

In [10]:
ques_input = Input(shape=(100, ), dtype='int32', name='qn_data')
embed_q = Embedding(input_dim=vocab_size, output_dim=100, weights=[embedding_matrix], 
              input_length=100, trainable=False)(ques_input)
lstm_2 = LSTM(256, return_sequences=True, implementation=2)(embed_q)
bidir_2 = Bidirectional(LSTM(500, return_sequences=True, implementation=2), merge_mode='concat')(lstm_2)
bidir_22 = Bidirectional(LSTM(500, return_sequences=True, implementation=2), merge_mode='concat')(bidir_2)
drop_2 = Dropout(0.5)(bidir_22)

In [11]:
merge_layer = concatenate([drop_1, drop_2], axis=1)
bidir_3 = Bidirectional(LSTM(500, implementation=2), merge_mode='mul')(merge_layer)
drop_3 =  Dropout(0.4)(bidir_3)
softmax_1 = Dense(max_span_begin, activation='softmax')(bidir_3)
softmax_2 = Dense(max_span_end, activation='softmax')(bidir_3)
model = Model(inputs=[context_input, ques_input], outputs=[softmax_1, softmax_2])
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
c_data (InputLayer)             (None, 700)          0                                            
__________________________________________________________________________________________________
qn_data (InputLayer)            (None, 100)          0                                            
__________________________________________________________________________________________________
embedding_1 (Embedding)         (None, 700, 100)     11961600    c_data[0][0]                     
__________________________________________________________________________________________________
embedding_2 (Embedding)         (None, 100, 100)     11961600    qn_data[0][0]                    
__________________________________________________________________________________________________
lstm_1 (LS

In [12]:
model_history = model.fit([c_data[:train], qn_data[:train]],
                        [begin_ans[:train], end_ans[:train]], verbose=2,
                         batch_size=batch_size, epochs=100)

Epoch 1/100
 - 1962s - loss: 14.1034 - dense_1_loss: 7.0156 - dense_2_loss: 7.0878 - dense_1_acc: 0.0274 - dense_2_acc: 0.0032
Epoch 2/100
 - 1939s - loss: 13.3732 - dense_1_loss: 6.6457 - dense_2_loss: 6.7275 - dense_1_acc: 0.0280 - dense_2_acc: 0.0058
Epoch 3/100
 - 1940s - loss: 13.3343 - dense_1_loss: 6.6257 - dense_2_loss: 6.7086 - dense_1_acc: 0.0280 - dense_2_acc: 0.0048
Epoch 4/100
 - 1941s - loss: 13.3321 - dense_1_loss: 6.6259 - dense_2_loss: 6.7062 - dense_1_acc: 0.0280 - dense_2_acc: 0.0048
Epoch 5/100
 - 1940s - loss: 13.3299 - dense_1_loss: 6.6254 - dense_2_loss: 6.7045 - dense_1_acc: 0.0280 - dense_2_acc: 0.0046
Epoch 6/100
 - 1939s - loss: 13.3277 - dense_1_loss: 6.6239 - dense_2_loss: 6.7038 - dense_1_acc: 0.0273 - dense_2_acc: 0.0054
Epoch 7/100
 - 1939s - loss: 13.3250 - dense_1_loss: 6.6210 - dense_2_loss: 6.7040 - dense_1_acc: 0.0280 - dense_2_acc: 0.0059
Epoch 8/100
 - 1939s - loss: 13.3240 - dense_1_loss: 6.6225 - dense_2_loss: 6.7015 - dense_1_acc: 0.0280 - dens

KeyboardInterrupt: 

Stopping the Model, since there seems to be no improvement in the model performance for more than 10 epochs. Each epoch takes about 32 minutes in a GPU. This seems to be a waste of memory and time if run further.