# Ensemble of Bidirectional LSTM and LSTM Model

In [29]:
import json
import numpy as np
import re
import io
import nltk
import h5py
import keras as k
from keras.layers.embeddings import Embedding
from keras.layers import Input, Dense, Dropout, RepeatVector, Activation, merge, Lambda, Flatten, Reshape,Permute
from keras.layers import LSTM, Bidirectional, TimeDistributed, GRU
from keras.models import Model
from keras.preprocessing.sequence import pad_sequences
from keras.models import Model
from keras import optimizers
from keras.optimizers import Adam, RMSprop
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.layers import concatenate
from sklearn.metrics import f1_score
from keras_self_attention import SeqSelfAttention

In [2]:
embeddings_index = {}
f = open( 'glove.6B.100d.txt',encoding="utf8")
for line in f:
    values = line.split()
    word = values[0]
    coefs = np.asarray(values[1:], dtype='float32')
    embeddings_index[word] = coefs
f.close()

print('Found %s word vectors.' % len(embeddings_index))

Found 400000 word vectors.


In [3]:
context = h5py.File('context.h5','r')
questions = h5py.File('questions.h5','r')
answers = h5py.File('answers.h5','r')
ans_begin = h5py.File('begin.h5','r')
ans_end = h5py.File('end.h5','r')

In [5]:
c_data = context['context'][:]
qn_data = questions['questions'][:]
ans_data = answers['answers'][:]
begin_ans = ans_begin['begin'][:]
end_ans = ans_end['end'][:]

In [6]:
# loding vocabulary
word_index = np.load('words.npy').item()

In [7]:
embedding_matrix = np.zeros((len(word_index) + 1, 100))
for word, i in word_index.items():
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None:
        # words not found in embedding index will be all-zeros.
        embedding_matrix[i] = embedding_vector

In [22]:
vocab_size = len(word_index) + 1
#embedding_vector_length = 50
batch_size = 64
max_span_begin = np.amax(begin_ans)
max_span_end = np.amax(end_ans)
train = 10000


In [9]:
print("Vocab Size")
vocab_size

Vocab Size


119616

In [46]:
context_input = Input(shape=(700, ), dtype='int32', name='c_data')
embed_c = Embedding(input_dim=vocab_size, output_dim=100, weights=[embedding_matrix], 
              input_length=700, trainable=False)(context_input)
lstm_1 = LSTM(256, return_sequences=True, implementation=2)(embed_c)
bidir_1 = Bidirectional(LSTM(500, return_sequences=True, implementation=2), merge_mode='concat')(lstm_1)
drop_1 = Dropout(0.5)(bidir_1)

In [47]:
ques_input = Input(shape=(100, ), dtype='int32', name='qn_data')
embed_q = Embedding(input_dim=vocab_size, output_dim=100, weights=[embedding_matrix], 
              input_length=100, trainable=False)(ques_input)
lstm_2 = LSTM(256, return_sequences=True, implementation=2)(embed_q)
bidir_2 = Bidirectional(LSTM(500, return_sequences=True, implementation=2), merge_mode='concat')(lstm_2)
drop_2 = Dropout(0.5)(bidir_2)

In [48]:
merge_layer = concatenate([drop_1, drop_2], axis=1)
bidir_3 = Bidirectional(LSTM(500, implementation=2), merge_mode='mul')(merge_layer)
drop_3 =  Dropout(0.4)(bidir_3)
softmax_1 = Dense(max_span_begin, activation='softmax')(bidir_3)
softmax_2 = Dense(max_span_end, activation='softmax')(bidir_3)
model = Model(inputs=[context_input, ques_input], outputs=[softmax_1, softmax_2])
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
c_data (InputLayer)             (None, 700)          0                                            
__________________________________________________________________________________________________
qn_data (InputLayer)            (None, 100)          0                                            
__________________________________________________________________________________________________
embedding_16 (Embedding)        (None, 700, 100)     11961600    c_data[0][0]                     
__________________________________________________________________________________________________
embedding_17 (Embedding)        (None, 100, 100)     11961600    qn_data[0][0]                    
__________________________________________________________________________________________________
lstm_21 (L

In [49]:
model_history = model.fit([c_data[:train], qn_data[:train]],
                        [begin_ans[:train], end_ans[:train]], verbose=2,
                         batch_size=batch_size, epochs=20)

Epoch 1/20
 - 1294s - loss: 14.1160 - dense_11_loss: 7.0224 - dense_12_loss: 7.0936 - dense_11_acc: 0.0267 - dense_12_acc: 0.0044
Epoch 2/20
 - 1280s - loss: 13.3320 - dense_11_loss: 6.6253 - dense_12_loss: 6.7068 - dense_11_acc: 0.0280 - dense_12_acc: 0.0057
Epoch 3/20
 - 1275s - loss: 13.2753 - dense_11_loss: 6.5972 - dense_12_loss: 6.6782 - dense_11_acc: 0.0280 - dense_12_acc: 0.0055
Epoch 4/20
 - 1272s - loss: 13.8021 - dense_11_loss: 6.8672 - dense_12_loss: 6.9350 - dense_11_acc: 0.0263 - dense_12_acc: 0.0066
Epoch 5/20
 - 1275s - loss: 13.9166 - dense_11_loss: 6.9313 - dense_12_loss: 6.9853 - dense_11_acc: 0.0279 - dense_12_acc: 0.0049
Epoch 6/20
 - 1270s - loss: 13.4011 - dense_11_loss: 6.6634 - dense_12_loss: 6.7378 - dense_11_acc: 0.0280 - dense_12_acc: 0.0072
Epoch 7/20
 - 1276s - loss: 13.3434 - dense_11_loss: 6.6338 - dense_12_loss: 6.7097 - dense_11_acc: 0.0279 - dense_12_acc: 0.0073
Epoch 8/20
 - 1273s - loss: 13.4126 - dense_11_loss: 6.6698 - dense_12_loss: 6.7428 - dens

In [50]:
t_context = h5py.File('context_test.h5','r')
t_questions = h5py.File('questions_test.h5','r')
t_answers = h5py.File('answers_test.h5','r')
t_ans_begin = h5py.File('begin_test.h5','r')
t_ans_end = h5py.File('end_test.h5','r')

In [51]:
t_c_data = t_context['context'][:]
t_qn_data = t_questions['questions'][:]
t_ans_data = t_answers['answers'][:]
t_begin_ans = t_ans_begin['begin'][:]
t_end_ans = t_ans_end['end'][:]

In [52]:
predictions = model.predict([t_c_data,t_qn_data], batch_size=128)

In [53]:
print(predictions[0].shape, predictions[1].shape)
print(predictions[0].shape, predictions[1].shape)

(20302, 3126) (20302, 3136)
(20302, 3126) (20302, 3136)


In [54]:
ansBegin = np.zeros((predictions[0].shape[0],), dtype=np.int32)
ansEnd = np.zeros((predictions[0].shape[0],),dtype=np.int32) 

In [55]:
f1_b = f1_score(t_begin_ans, ansBegin, average='micro')  
f1_e = f1_score(t_end_ans, ansEnd, average='micro') 
print("F1 Score")
f1_b + f1_e

F1 Score


0.023002659836469312