In [1]:
import numpy as np
import h5py
from keras import optimizers
from keras.models import Sequential, Model
from keras.layers import Embedding, Dropout, Dense, Activation
from keras.layers import LSTM, Bidirectional, Merge, Input
from keras.layers import concatenate
import matplotlib.pyplot as plt

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [3]:
# loading data
with h5py.File('context.h5', 'r') as hf:
    context_array = hf['context'][:]
with h5py.File('questions.h5', 'r') as hf:
    question_array = hf['questions'][:]
with h5py.File('begin.h5', 'r') as hf:
    begin_span = hf['begin'][:]
with h5py.File('end.h5', 'r') as hf:
    end_span = hf['end'][:]
    
# loading Glove embeddings
with h5py.File('embeddings_50.h5', 'r') as hf:
    embedding_matrix = hf['embed'][:]
    
# loding vocabulary
word_index = np.load('word_to_indx.npy').item()

In [4]:
print (context_array.shape)
print (question_array.shape)
print (begin_span.shape)
print (end_span.shape)

(87598, 700)
(87598, 50)
(87598,)
(87598,)


In [9]:
#print((context_array[0]==context_array[1]))
#print((question_array[0]==question_array[1]))

In [5]:
vocab_size = len(word_index) + 1
embedding_vector_length = 50
max_span_begin = np.amax(begin_span)
max_span_end = np.amax(end_span)
batch = 8
# slice of data to be used as one epoch training on full data is expensive
slce = 100

In [6]:
# model1
context_input = Input(shape=(700, ), dtype='int32', name='context_input')
x = Embedding(input_dim=vocab_size, output_dim=50, weights=[embedding_matrix], 
              input_length=700, trainable=False)(context_input)
lstm_out = Bidirectional(LSTM(256, return_sequences=True, implementation=2), merge_mode='concat')(x)
drop_1 = Dropout(0.5)(lstm_out)

In [7]:
# model2
ques_input = Input(shape=(50, ), dtype='int32', name='ques_input')
x = Embedding(input_dim=vocab_size, output_dim=50, weights=[embedding_matrix], 
              input_length=50, trainable=False)(ques_input)
lstm_out = Bidirectional(LSTM(256, return_sequences=True, implementation=2), merge_mode='concat')(x)
drop_2 = Dropout(0.5)(lstm_out)

In [8]:
# merger model
merge_layer = concatenate([drop_1, drop_2], axis=1)
biLSTM = Bidirectional(LSTM(512, implementation=2), merge_mode='mul')(merge_layer)
drop_3 =  Dropout(0.5)(biLSTM)
softmax_1 = Dense(max_span_begin, activation='softmax')(drop_3)
softmax_2 = Dense(max_span_end, activation='softmax')(drop_3)

model = Model(inputs=[context_input, ques_input], outputs=[softmax_1, softmax_2])
adam01=optimizers.Adam(lr=0.1)
adam001=optimizers.Adam(lr=0.01)
adam=optimizers.Adam(lr=0.001)
#model.compile(optimizer=fast, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.summary()



__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
context_input (InputLayer)      (None, 700)          0                                            
__________________________________________________________________________________________________
ques_input (InputLayer)         (None, 50)           0                                            
__________________________________________________________________________________________________
embedding_1 (Embedding)         (None, 700, 50)      5984650     context_input[0][0]              
__________________________________________________________________________________________________
embedding_2 (Embedding)         (None, 50, 50)       5984650     ques_input[0][0]                 
__________________________________________________________________________________________________
bidirectio

In [10]:
'''slce=1
model_history = model.fit([context_array[:slce], question_array[:slce]],
                        [begin_span[:slce], end_span[:slce]], verbose=2,
                         batch_size=batch, epochs=1)'''


In [11]:
from tensorflow.python.client import device_lib

def get_available_devices():
    local_device_protos = device_lib.list_local_devices()
    return [x.name for x in local_device_protos]

print(get_available_devices())


['/device:CPU:0']


In [14]:
'''import keras
model=keras.models.load_model('modelstep30')
model.load_weights('./modelweights/QANet2_weights.h5')'''
stindex=0
endindex=1000
model.compile(optimizer=adam01, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
history = model.fit([context_array[stindex:endindex], question_array[stindex:endindex]],
                [begin_span[stindex:endindex], end_span[stindex:endindex]], verbose=2,
                 batch_size=64, epochs=1,shuffle=True)
model.save_weights('./dropout/adam01epoch')

Epoch 1/1
 - 318s - loss: 29.2064 - dense_3_loss: 14.6762 - dense_4_loss: 14.5302 - dense_3_acc: 0.0040 - dense_4_acc: 0.0030


In [16]:

model.load_weights('./dropout/adam01epoch')
model.compile(optimizer=adam001, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
history = model.fit([context_array[stindex:endindex], question_array[stindex:endindex]],
                [begin_span[stindex:endindex], end_span[stindex:endindex]], verbose=2,
                 batch_size=64, epochs=5,shuffle=True)
model.save_weights('./dropout/adam001_5epoch')

Epoch 1/5
 - 314s - loss: 31.9144 - dense_3_loss: 16.0539 - dense_4_loss: 15.8605 - dense_3_acc: 0.0040 - dense_4_acc: 0.0050
Epoch 2/5
 - 311s - loss: 31.8629 - dense_3_loss: 16.0539 - dense_4_loss: 15.8090 - dense_3_acc: 0.0040 - dense_4_acc: 0.0070
Epoch 3/5
 - 310s - loss: 31.8258 - dense_3_loss: 16.0360 - dense_4_loss: 15.7898 - dense_3_acc: 0.0040 - dense_4_acc: 0.0050
Epoch 4/5


KeyboardInterrupt: 

In [None]:
#def triphase_modelfitter(model,optimizerlist,context_array,question_array,begin_span,end_span,batches,epochlist,slces):
#First we fit the model with a learning rate of 0.1 for 5 epochs and 10 questions,batch_size=32
#Next round, we fit with lr=0.01, 5 epochs and 100 qs,batch_size=16,
#Last round we fit with lr=0.001, 5 epochs and 1000 qs, batch_size=8
#This model has no dropout in the last layer.
optimizerlist=[adam,adam,adam]
slces=[[0,1000],[0,1000],[1000,2000],[4500,5000]]#train first round with qs from 0 to 1000 
epochlist=[10,10,10,10]
batches=[64,64,16,16]
#triphase_modelfitter(model,optimizerlist,context_array,question_array,begin_span,end_span,batches,epochlist,slces)
for step in range(1):
    #model.compile(optimizer=optimizerlist[step], loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    stindex=0
    endindex=1000

    history = model.fit([context_array[stindex:endindex], question_array[stindex:endindex]],
                    [begin_span[stindex:endindex], end_span[stindex:endindex]], verbose=2,
                     batch_size=64, epochs=25,shuffle=True)
    """model_history = model.fit([context_array[:slces[step]], question_array[:slces[step]]],
                    [begin_span[:slces[step]], end_span[:slces[step]]], verbose=2,
                     batch_size=batches[step], epochs=epochlist[step])"""

    history
    '''plt.plot(history.history['dense_9_acc'])
    #plt.plot(history.history['val_acc'])
    plt.title('model accuracy step:'+str(step))
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()'''
    # summarize history for loss
    plt.plot(history.history['loss'])
    #plt.plot(history.history['val_loss'])
    plt.title('model loss step:'+str(step))
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()
    modelname='model95epochs'+str(step)
    model.save(modelname)
    model.save_weights('./modelweights/QANet95epochs'+str(step)+'.h5')

Epoch 1/25
 - 311s - loss: 9.7595 - dense_3_loss: 4.8560 - dense_4_loss: 4.9035 - dense_3_acc: 0.0590 - dense_4_acc: 0.0570
Epoch 2/25
 - 312s - loss: 9.7302 - dense_3_loss: 4.8421 - dense_4_loss: 4.8881 - dense_3_acc: 0.0550 - dense_4_acc: 0.0570
Epoch 3/25
 - 311s - loss: 9.6968 - dense_3_loss: 4.8254 - dense_4_loss: 4.8714 - dense_3_acc: 0.0590 - dense_4_acc: 0.0580
Epoch 4/25
 - 311s - loss: 9.6573 - dense_3_loss: 4.8054 - dense_4_loss: 4.8519 - dense_3_acc: 0.0610 - dense_4_acc: 0.0550
Epoch 5/25
 - 312s - loss: 9.6250 - dense_3_loss: 4.7894 - dense_4_loss: 4.8355 - dense_3_acc: 0.0620 - dense_4_acc: 0.0590
Epoch 6/25
 - 311s - loss: 9.5909 - dense_3_loss: 4.7729 - dense_4_loss: 4.8180 - dense_3_acc: 0.0620 - dense_4_acc: 0.0590
Epoch 7/25
 - 312s - loss: 9.5568 - dense_3_loss: 4.7561 - dense_4_loss: 4.8007 - dense_3_acc: 0.0630 - dense_4_acc: 0.0600
Epoch 8/25
 - 311s - loss: 9.5248 - dense_3_loss: 4.7413 - dense_4_loss: 4.7835 - dense_3_acc: 0.0640 - dense_4_acc: 0.0590
Epoch 9/

In [16]:
model.save_weights('./modelweights/QANet2_weights.h5')

In [None]:

history=model_history
plt.plot(history.history['dense_4_acc'])
#plt.plot(history.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [30]:
print(history.history.keys())

dict_keys(['loss', 'dense_3_loss', 'dense_4_loss', 'dense_3_acc', 'dense_4_acc'])


In [17]:
vocab_size = len(word_index) + 1
embedding_vector_length = 50
max_span_begin = np.amax(begin_span)
max_span_end = np.amax(end_span)
batch = 64
# slice of data to be used as one epoch training on full data is expensive
slce = 1000

# model1
context_input = Input(shape=(700, ), dtype='int32', name='context_input')
x = Embedding(input_dim=vocab_size, output_dim=50, weights=[embedding_matrix],
              input_length=700, trainable=False)(context_input)
lstm_out = Bidirectional(LSTM(256, return_sequences=True, implementation=2), merge_mode='concat')(x)
drop_1 = Dropout(0.5)(lstm_out)

# model2
ques_input = Input(shape=(50, ), dtype='int32', name='ques_input')
x = Embedding(input_dim=vocab_size, output_dim=50, weights=[embedding_matrix],
              input_length=50, trainable=False)(ques_input)
lstm_out = Bidirectional(LSTM(256, return_sequences=True, implementation=2), merge_mode='concat')(x)
drop_2 = Dropout(0.5)(lstm_out)

# merger model
merge_layer = concatenate([drop_1, drop_2], axis=1)
biLSTM = Bidirectional(LSTM(512, implementation=2), merge_mode='mul')(merge_layer)
drop_3 =  Dropout(0.5)(biLSTM)
softmax_1 = Dense(max_span_begin, activation='softmax')(biLSTM)
softmax_2 = Dense(max_span_end, activation='softmax')(biLSTM)

model = Model(inputs=[context_input, ques_input], outputs=[softmax_1, softmax_2])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.summary()

model_history = model.fit([context_array[:slce], question_array[:slce]], [begin_span[:slce], end_span[:slce]], verbose=2, batch_size=batch, epochs=10)


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
context_input (InputLayer)      (None, 700)          0                                            
__________________________________________________________________________________________________
ques_input (InputLayer)         (None, 50)           0                                            
__________________________________________________________________________________________________
embedding_5 (Embedding)         (None, 700, 50)      5984650     context_input[0][0]              
__________________________________________________________________________________________________
embedding_6 (Embedding)         (None, 50, 50)       5984650     ques_input[0][0]                 
__________________________________________________________________________________________________
bidirectio

In [24]:
model.fit([context_array[stindex:endindex], question_array[stindex:endindex]],
                [begin_span[stindex:endindex], end_span[stindex:endindex]], verbose=2,
                 batch_size=batch, epochs=25)
model.save_weights('./dropout/adam01_25epochs')
model.load_weights('./adam001_120epochs')
stindex=1000
endindex=2000
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
#model.summary()
history = model.fit([context_array[stindex:endindex], question_array[stindex:endindex]],
                [begin_span[stindex:endindex], end_span[stindex:endindex]], verbose=2,
                 batch_size=batch, epochs=100)
model.save_weights('./adam0001_220epochs')

Epoch 1/25
 - 362s - loss: nan - dense_5_loss: nan - dense_6_loss: nan - dense_5_acc: 0.0110 - dense_6_acc: 0.0090
Epoch 2/25
 - 326s - loss: nan - dense_5_loss: nan - dense_6_loss: nan - dense_5_acc: 0.0120 - dense_6_acc: 0.0000e+00
Epoch 3/25
 - 328s - loss: nan - dense_5_loss: nan - dense_6_loss: nan - dense_5_acc: 0.0120 - dense_6_acc: 0.0000e+00
Epoch 4/25
 - 328s - loss: nan - dense_5_loss: nan - dense_6_loss: nan - dense_5_acc: 0.0120 - dense_6_acc: 0.0000e+00
Epoch 5/25
 - 326s - loss: nan - dense_5_loss: nan - dense_6_loss: nan - dense_5_acc: 0.0120 - dense_6_acc: 0.0000e+00
Epoch 6/25
 - 322s - loss: nan - dense_5_loss: nan - dense_6_loss: nan - dense_5_acc: 0.0120 - dense_6_acc: 0.0000e+00
Epoch 7/25
 - 322s - loss: nan - dense_5_loss: nan - dense_6_loss: nan - dense_5_acc: 0.0120 - dense_6_acc: 0.0000e+00
Epoch 8/25
 - 321s - loss: nan - dense_5_loss: nan - dense_6_loss: nan - dense_5_acc: 0.0120 - dense_6_acc: 0.0000e+00
Epoch 9/25
 - 322s - loss: nan - dense_5_loss: nan -

Epoch 43/100
 - 320s - loss: 7.0027 - dense_5_loss: 3.4892 - dense_6_loss: 3.5136 - dense_5_acc: 0.0990 - dense_6_acc: 0.0810
Epoch 44/100
 - 319s - loss: 6.9901 - dense_5_loss: 3.4837 - dense_6_loss: 3.5064 - dense_5_acc: 0.1010 - dense_6_acc: 0.0750
Epoch 45/100
 - 319s - loss: 7.0011 - dense_5_loss: 3.4858 - dense_6_loss: 3.5152 - dense_5_acc: 0.0940 - dense_6_acc: 0.0750
Epoch 46/100
 - 319s - loss: 6.9894 - dense_5_loss: 3.4857 - dense_6_loss: 3.5037 - dense_5_acc: 0.0990 - dense_6_acc: 0.0800
Epoch 47/100
 - 318s - loss: 6.9887 - dense_5_loss: 3.4823 - dense_6_loss: 3.5065 - dense_5_acc: 0.0990 - dense_6_acc: 0.0790
Epoch 48/100
 - 318s - loss: 6.9970 - dense_5_loss: 3.4866 - dense_6_loss: 3.5104 - dense_5_acc: 0.1010 - dense_6_acc: 0.0830
Epoch 49/100
 - 319s - loss: 6.9824 - dense_5_loss: 3.4802 - dense_6_loss: 3.5022 - dense_5_acc: 0.1020 - dense_6_acc: 0.0810
Epoch 50/100
 - 320s - loss: 6.9852 - dense_5_loss: 3.4821 - dense_6_loss: 3.5031 - dense_5_acc: 0.1010 - dense_6_acc:

In [None]:
stindex=0
endindex=1000

model.compile(optimizer="adam", loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.load_weights('./model70_epochs0')
#model.summary()
history = model.fit([context_array[stindex:endindex], question_array[stindex:endindex]],
                [begin_span[stindex:endindex], end_span[stindex:endindex]], verbose=2,
                 batch_size=batch, epochs=50)
model.save_weights('./Weights_adam001_170epochs')

Epoch 1/50
 - 21140s - loss: 27.2224 - dense_1_loss: 13.5343 - dense_2_loss: 13.6881 - dense_1_acc: 0.0060 - dense_2_acc: 0.0000e+00
Epoch 2/50
