# Bi-Directional Attention Flow (BiDAF)

In [1]:
import os
import sys
import copy
#import spacy
import random
import pandas as pd
import json
import nltk
#import jsonlines
import numpy as np
import pickle
import re
from tqdm import tqdm
from six.moves.urllib.request import urlretrieve
from keras import backend as K
from keras.engine.topology import Layer
from keras.layers import Conv1D,Dense, Activation, Multiply, Add, Lambda,Conv2D ,Conv3D, \
MaxPooling1D,MaxPooling2D,Input, TimeDistributed, LSTM, Bidirectional,Flatten,Embedding,Dense,Dropout,Concatenate,AveragePooling1D
from keras.initializers import Constant
from keras.models import Model, load_model
from keras.optimizers import Adadelta,Adam
import tensorflow as tf
from keras.activations import linear
from keras.layers.advanced_activations import Softmax
from six.moves.urllib.request import urlretrieve

nltk.download('punkt')

Using TensorFlow backend.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [2]:
''' The Highway is used to form a residual connection where T-transform_gate
output=T * g(wy+b) + (1-T) * y
where T->transform_gate sigma(wy+b)
g(wy+b) -> transformed_data
output shape(none,350)'''


from keras import backend as K
from keras.engine.topology import Layer
from keras.layers import Dense, Activation, Multiply, Add, Lambda
from keras.initializers import Constant


class Highway(Layer):

    activation = None
    transform_gate_bias = None

    def __init__(self, activation='relu', transform_gate_bias=-1, **kwargs):
        self.activation = activation
        self.transform_gate_bias = transform_gate_bias
        super(Highway, self).__init__(**kwargs)

    def build(self, input_shape):
        # Create a trainable weight variable for this layer.
        dim = input_shape[-1]
        transform_gate_bias_initializer = Constant(self.transform_gate_bias)
        input_shape_dense_1 = input_shape[-1]
        self.dense_1 = Dense(units=dim, bias_initializer=transform_gate_bias_initializer)
        self.dense_1.build(input_shape)
        self.dense_2 = Dense(units=dim)
        self.dense_2.build(input_shape)
        self.trainable_weights = self.dense_1.trainable_weights + self.dense_2.trainable_weights

        super(Highway, self).build(input_shape)  # Be sure to call this at the end

    def call(self, x):
        dim = K.int_shape(x)[-1]
        transform_gate = self.dense_1(x) #context-,350  ,que-,350 , transform_gate: ,350
        transform_gate = Activation("sigmoid")(transform_gate)#Z=T g(W y+b) + (1-T) y
        carry_gate = Lambda(lambda x: 1.0 - x, output_shape=(dim,))(transform_gate)
        transformed_data = self.dense_2(x)
        transformed_data = Activation(self.activation)(transformed_data)
        transformed_gated = Multiply()([transform_gate, transformed_data])
        identity_gated = Multiply()([carry_gate, x])
        value = Add()([transformed_gated, identity_gated])#shape(none,350)
        return value

    def compute_output_shape(self, input_shape):
        return input_shape

    def get_config(self):
        config = super().get_config()
        config['activation'] = self.activation
        config['transform_gate_bias'] = self.transform_gate_bias
        return config


In [3]:
''' calculating the Similarity between the query and the contex concatenation of both query , con
tex vector and

pairwise Similarity between query and contex
input context_vectors (None, 250, 350),query_vectors (None, 35, 350)
output(None,250,35)
Z=W * [a:b:a*b]
a->repeated_context_vectors
b->repeated_query_vectors
'''

from keras.engine.topology import Layer
tf.compat.v1.keras.backend.expand_dims

class Similarity(Layer):

    def __init__(self, **kwargs):
        super(Similarity, self).__init__(**kwargs)

    def compute_similarity(self, repeated_context_vectors, repeated_query_vectors):
        element_wise_multiply = repeated_context_vectors * repeated_query_vectors #element_wise_multiply (None, 250, 35, 350)
        concatenated_tensor = K.concatenate(
            [repeated_context_vectors, repeated_query_vectors, element_wise_multiply], axis=-1) #concatenated_tensor (None, 250, 35, 1050)
        dot_product = K.squeeze(K.dot(concatenated_tensor, self.kernel), axis=-1)#dot_product (None, 250, 35)
        return linear(dot_product + self.bias)#(None,250,35)

    def build(self, input_shape):
        word_vector_dim = input_shape[0][-1]
        weight_vector_dim = word_vector_dim * 3
        self.kernel = self.add_weight(name='similarity_weight',
                                      shape=(weight_vector_dim, 1),
                                      initializer='uniform',
                                      trainable=True)
        self.bias = self.add_weight(name='similarity_bias',
                                    shape=(),
                                    initializer='ones',
                                    trainable=True)
        super(Similarity, self).build(input_shape)

    def call(self, inputs):
        context_vectors, query_vectors = inputs  #context_vectors (None, 250, 350) ,query_vectors(None, 35, 350)
        num_context_words = K.shape(context_vectors)[1] #num_context_words(250)
        num_query_words = K.shape(query_vectors)[1] #num_query_words(35)
        context_dim_repeat = K.concatenate([[1, 1], [num_query_words], [1]], 0)#[1,1,35,1]
        query_dim_repeat = K.concatenate([[1], [num_context_words], [1, 1]], 0)#[1,250,1,1]
         #(None, 250, 1, 350) (None, 1, 35, 350) 
        repeated_context_vectors = K.tile(tf.compat.v1.keras.backend.expand_dims(context_vectors, axis=2), context_dim_repeat)
        repeated_query_vectors = K.tile(tf.compat.v1.keras.backend.expand_dims(query_vectors, axis=1), query_dim_repeat)
        #repeated_context_vectors (None, 250, 35, 350),repeated_query_vectors (None, 250, 35, 350)
        similarity_matrix = self.compute_similarity(repeated_context_vectors, repeated_query_vectors)
        return similarity_matrix

    def compute_output_shape(self, input_shape):
        batch_size = input_shape[0][0]
        num_context_words = input_shape[0][1]
        num_query_words = input_shape[1][1]
        return (batch_size, num_context_words, num_query_words)

    def get_config(self):
        config = super().get_config()
        return config


In [4]:
'''Context-to-Query Attention taking the row-wise softmax of Similarity and multiply with question
vector
input similarity_matrix(None, 250, 35),encoded_question(None, 35, 350)
output (None, 250, 350)'''


class C2QAttention(Layer):

    def __init__(self, **kwargs):
        super(C2QAttention, self).__init__(**kwargs)

    def build(self, input_shape):
        super(C2QAttention, self).build(input_shape)

    def call(self, inputs):
        similarity_matrix, encoded_question = inputs #similarity_matrix(None, 250, 35),encoded_question(None, 35, 350)
        context_to_query_attention = Softmax(axis=-1)(similarity_matrix)#context_to_query_attention (None, 250, 35)
        encoded_question = K.expand_dims(encoded_question, axis=1)#encoded_question(None, 1, 35, 350)
        #(None, 250, 35, 1)
        return K.sum(K.expand_dims(context_to_query_attention, axis=-1) * encoded_question, -2)

    def compute_output_shape(self, input_shape):
        similarity_matrix_shape, encoded_question_shape = input_shape
        return similarity_matrix_shape[:-1] + encoded_question_shape[-1:]

    def get_config(self):
        config = super().get_config()
        return config


In [5]:
'''Query-to-Context (Q2C) Attention we taking the max in row-wise of similarity_matrix and applyin
g softmax at last
multiply with contex vector
input similarity_matrix(None, 250, 35),encoded_context(None, 250, 350)
output (None, None, 350)
'''

class Q2CAttention(Layer):

    def __init__(self, **kwargs):
        super(Q2CAttention, self).__init__(**kwargs)

    def build(self, input_shape):
        super(Q2CAttention, self).build(input_shape)

    def call(self, inputs):
        similarity_matrix, encoded_context = inputs #similarity_matrix(None, 250, 35),encoded_context(None, 250, 350)
        max_similarity = K.max(similarity_matrix, axis=-1) #max_similarity(None, 250)
        # by default, axis = -1 in Softmax
        context_to_query_attention = Softmax()(max_similarity)#context_to_query_attention(None, 250)
        weighted_sum = K.sum(K.expand_dims(context_to_query_attention, axis=-1) * encoded_context, -2)#weighted_sum(None, 350)
        expanded_weighted_sum = K.expand_dims(weighted_sum, 1)#expanded_weighted_sum (None, 1, 350)
        num_of_repeatations = K.shape(encoded_context)[1]
        return K.tile(expanded_weighted_sum, [1, num_of_repeatations, 1])#(None, 250, 350)

    def compute_output_shape(self, input_shape):
        similarity_matrix_shape, encoded_context_shape = input_shape
        return similarity_matrix_shape[:-1] + encoded_context_shape[-1:]

    def get_config(self):
        config = super().get_config()
        return config


In [6]:
'''MergedContext multiply the contex vector with Context-to-Query and Query-to-Context vectors at
last concatenated
contex,context_to_query,multiply1,multiply2
input encoded_context(None, 250, 350),context_to_query_attention(None, 250,
350),query_to_context_attention(None, None, 350)
output (None, 250, 1400)
'''


class MergedContext(Layer):

    def __init__(self, **kwargs):
        super(MergedContext, self).__init__(**kwargs)

    def build(self, input_shape):
        super(MergedContext, self).build(input_shape)

    def call(self, inputs):
        encoded_context, context_to_query_attention, query_to_context_attention = inputs
        #encoded_context(None, 250, 350),context_to_query_attention(None, 250,350),query_to_context_attention(None, 250, 350)
        element_wise_multiply1 = encoded_context * context_to_query_attention #element_wise_multiply1 (None, 250, 350)
        element_wise_multiply2 = encoded_context * query_to_context_attention #element_wise_multiply2 (None, 250, 350)
        concatenated_tensor = K.concatenate(
            [encoded_context, context_to_query_attention, element_wise_multiply1, element_wise_multiply2], axis=-1)
        return concatenated_tensor #(None, 250, 1400)

    def compute_output_shape(self, input_shape):
        encoded_context_shape, _, _ = input_shape
        return encoded_context_shape[:-1] + (encoded_context_shape[-1] * 4, )

    def get_config(self):
        config = super().get_config()
        return config

In [7]:
'''CombineOutputs use to stack the inputs'''
class CombineOutputs(Layer):

    def __init__(self, **kwargs):
        super(CombineOutputs, self).__init__(**kwargs)

    def build(self, input_shape):
        super(CombineOutputs, self).build(input_shape)

    def call(self, inputs):
        span_begin_probabilities, span_end_probabilities,span_begin_probabilities_1,span_end_probabilities_1,span_end_probabilities_2 = inputs
        return K.stack([span_begin_probabilities, span_end_probabilities,span_begin_probabilities_1,span_end_probabilities_1,span_end_probabilities_2 ], axis = 1)

    def compute_output_shape(self, input_shape):
        number_of_tensors = len(input_shape)
        return input_shape[0][0:1] + (number_of_tensors, ) + input_shape[0][1:]

    def get_config(self):
        config = super().get_config()
        return config


In [8]:
'''MagnitudeVectors use to Loading the glove vector '''
import os
from pymagnitude import Magnitude, MagnitudeUtils


class MagnitudeVectors():

    def __init__(self, emdim):

        base_dir = "challenge"

        self.fasttext_dim = 300
        self.glove_dim = emdim - 300

        assert self.glove_dim in [50, 100, 200,
                                  300], "Embedding dimension must be one of the following: 350, 400, 500, 600"

       # print("Will download magnitude files from the server if they aren't avaialble locally.. So, grab a cup of coffee while the downloading is under progress..")
        glove = Magnitude(MagnitudeUtils.download_model('glove/medium/glove.6B.{}d'.format(self.glove_dim),
                                                        download_dir=os.path.join(base_dir, 'magnitude')), case_insensitive=True)
        fasttext = Magnitude(MagnitudeUtils.download_model('fasttext/medium/wiki-news-300d-1M-subword',
                                                           download_dir=os.path.join(base_dir, 'magnitude')), case_insensitive=True)
        self.vectors = Magnitude(glove, fasttext)

    def load_vectors(self):
        return self.vectors

In [9]:
from keras.utils import multi_gpu_model


class ModelMGPU(Model):
    def __init__(self, ser_model, gpus=None):
        pmodel = multi_gpu_model(ser_model, gpus)
        self.__dict__.update(pmodel.__dict__)
        self._smodel = ser_model

    def __getattribute__(self, attrname):
        '''Override load and save methods to be used from the serial-model. The
        serial-model holds references to the weights in the multi-gpu model.
        '''
        # return Model.__getattribute__(self, attrname)
        if 'load' in attrname or 'save' in attrname:
            return getattr(self._smodel, attrname)

        return super(ModelMGPU, self).__getattribute__(attrname)

In [18]:
from keras.optimizers import Adadelta,Adam
from scipy.sparse import hstack
from time import time
from keras.callbacks import TensorBoard

class BidirectionalAttentionFlow():

    def __init__(self, emdim, max_passage_length=None, max_query_length=None, num_highway_layers=2, num_decoders=1,
                 encoder_dropout=0, decoder_dropout=0):
        self.emdim = emdim
        self.max_passage_length =250
        self.max_query_length = 35

        passage_input = Input(shape=(self.max_passage_length, emdim), dtype='float32', name="passage_input")
        question_input = Input(shape=(self.max_query_length, emdim), dtype='float32', name="question_input")

        choice_input_1 = Input(shape=(8, emdim), dtype='float32', name="choice_input_1")
        choice_input_2 = Input(shape=(8, emdim), dtype='float32', name="choice_input_2")
        choice_input_3 = Input(shape=(8, emdim), dtype='float32', name="choice_input_3")
        choice_input_4 = Input(shape=(8, emdim), dtype='float32', name="choice_input_4")
        choice_input_5 = Input(shape=(8, emdim), dtype='float32', name="choice_input_5")

        question_embedding = question_input #35,350
        passage_embedding = passage_input #250,350
        #choice_embedding = choice_input
        for i in range(num_highway_layers):
            highway_layer = Highway(name='highway_{}'.format(i))
            question_layer = TimeDistributed(highway_layer, name=highway_layer.name + "_qtd")
            question_embedding = question_layer(question_embedding)
            passage_layer = TimeDistributed(highway_layer, name=highway_layer.name + "_ptd")
            passage_embedding = passage_layer(passage_embedding)
           

        encoder_layer = Bidirectional(LSTM(175, recurrent_dropout=encoder_dropout,
                                           return_sequences=True), name='bidirectional_encoder')
        encoded_question = encoder_layer(question_embedding)
        encoded_passage = encoder_layer(passage_embedding)
        
        #encoded_passage (None, 250, 350),encoded_question (None, 35, 350)
        similarity_matrix = Similarity(name='similarity_layer')([encoded_passage, encoded_question])#(None,250,35)
        #i,j represents the similarity between the ith word in context and jth word in the query

        context_to_query_attention = C2QAttention(name='context_to_query_attention')([
            similarity_matrix, encoded_question])#(None, 250, 350)
        query_to_context_attention = Q2CAttention(name='query_to_context_attention')([
            similarity_matrix, encoded_passage])#(None, 250, 350)
      # 100, (i,j) ==?

        merged_context = MergedContext(name='merged_context')(
            [encoded_passage, context_to_query_attention, query_to_context_attention])#(None, 250, 1400)
        
        modeled_passage = merged_context
        
        for i in range(num_decoders):
            hidden_layer = Bidirectional(LSTM(175, recurrent_dropout=decoder_dropout,
                                              return_sequences=True), name='bidirectional_decoder_{}'.format(i))
            modeled_passage = hidden_layer(modeled_passage)#(None, 250, 350)

        #choice_input None,8,350
        similarity_matrix_choice_1 = Similarity(name='similarity_layer_choice_1')([ choice_input_1,modeled_passage])
        similarity_matrix_choice_2 = Similarity(name='similarity_layer_choice_2')([ choice_input_2,modeled_passage])
        similarity_matrix_choice_3 = Similarity(name='similarity_layer_choice_3')([ choice_input_3,modeled_passage])
        similarity_matrix_choice_4 = Similarity(name='similarity_layer_choice_4')([ choice_input_4,modeled_passage])
        similarity_matrix_choice_5 = Similarity(name='similarity_layer_choice_5')([ choice_input_5,modeled_passage])

        #similarity_matrix_choice (None, 8, 250)
        merged_context = CombineOutputs(name='combine_outputs')(
            [similarity_matrix_choice_1, similarity_matrix_choice_2, similarity_matrix_choice_3,similarity_matrix_choice_4,similarity_matrix_choice_5])
        
        modeled_passage = merged_context#(None, 5, 8, 250)
        
        den=Dense(150, activation='relu')( modeled_passage) #(None, 5, 8, 150) 
        dropout_layer3=Dropout(rate=0.5)(den)
        den=Dense(70, activation='relu')( dropout_layer3)#(None, 5, 8, 70) 
        dropout_layer3=Dropout(rate=0.5)(den)
        
        
        flat_out=Flatten()(dropout_layer3)#(None, 2800)
        output=Dense(5, activation='softmax')( flat_out )#(None, 5)
       
        
        model = Model([passage_input, question_input,choice_input_1,choice_input_2,choice_input_3,choice_input_4,choice_input_5],[output])

        model.summary()
        

        try:
            model = ModelMGPU(model)
        except:
            pass

        self.model = model

    def load_bidaf(self, path):
        custom_objects = {
            'Highway': Highway,
            'Similarity': Similarity,
            'C2QAttention': C2QAttention,
            'Q2CAttention': Q2CAttention,
            'MergedContext': MergedContext,
            'CombineOutputs': CombineOutputs
        }

        self.model = load_model(path, custom_objects=custom_objects)

    def train_model(self, train_generator, steps_per_epoch=None, epochs=1, validation_generator=None,
                    validation_steps=None,  use_multiprocessing=False, shuffle=True, initial_epoch=0,
                    save_history=False, save_model_per_epoch=True):

        saved_items_dir = "/content/drive/My Drive/case_study1/saved_items"
        if not os.path.exists(saved_items_dir):
            os.makedirs(saved_items_dir)

        callbacks = []

        if save_history:
            history_file = os.path.join(saved_items_dir, 'history')
            csv_logger = CSVLogger(history_file, append=True)
            callbacks.append(csv_logger)

        if save_model_per_epoch:
            save_model_file = os.path.join(saved_items_dir, 'bidaf_{epoch:02d}.h5')
            log_dir="saved_items/logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
            tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)
            checkpoint_path = "saved_items/weights-improvement.hdf5" 
            checkpoint = ModelCheckpoint(checkpoint_path, verbose=1)
            #callbacks_list = [checkpoint,tensorboard_callback]
            callbacks_list = [checkpoint]

        history = self.model.fit_generator(train_generator, steps_per_epoch=steps_per_epoch, epochs=epochs,
                                           callbacks=callbacks_list, validation_data=validation_generator,
                                           validation_steps=validation_steps, 
                                           use_multiprocessing=use_multiprocessing, shuffle=shuffle,
                                           initial_epoch=initial_epoch,class_weight='auto')
        if save_model_per_epoch:
            self.model.save(os.path.join(saved_items_dir, 'bidaf.h5'))

        return history, self.model


In [19]:
tf.keras.backend.clear_session()


bidaf_model = BidirectionalAttentionFlow(emdim=350, max_passage_length=None,
                                             max_query_length=None,
                                             num_highway_layers=2, num_decoders=2,
                                             encoder_dropout=0.5, decoder_dropout=0.5)

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
passage_input (InputLayer)      (None, 250, 350)     0                                            
__________________________________________________________________________________________________
question_input (InputLayer)     (None, 35, 350)      0                                            
__________________________________________________________________________________________________
highway_0_ptd (TimeDistributed) (None, 250, 350)     245700      passage_input[0][0]              
__________________________________________________________________________________________________
highway_0_qtd (TimeDistributed) (None, 35, 350)      245700      question_input[0][0]             
____________________________________________________________________________________________

In [20]:
bidaf_model.model.compile(loss='categorical_crossentropy',
              optimizer=Adam(),
              metrics=['accuracy'])

In [17]:
#bidaf_model.load_bidaf('saved_items/weights-improvement (5).hdf5')

In [21]:

def load_data_generators(batch_size, emdim, squad_version=1.1, max_passage_length=None, max_query_length=None,
                         shuffle=False):
    train_generator = BatchGenerator('train', batch_size, emdim, squad_version, max_passage_length, max_query_length,
                                     shuffle)
    validation_generator = BatchGenerator('dev', batch_size, emdim, squad_version, max_passage_length, max_query_length,
                                          shuffle)
    return train_generator, validation_generator


In [22]:
stopwords= ['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've",\
            "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', \
            'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their',\
            'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', \
            'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', \
            'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', \
            'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after',\
            'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further',\
            'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more',\
            'most', 'other', 'some', 'such', 'only', 'own', 'same', 'so', 'than', 'too', 'very', \
            's', 't', 'can', 'will', 'just', 'don', "don't", 'should', "should've", 'now', 'd', 'll', 'm', 'o', 're', \
            've', 'y', 'ain', 'aren', "aren't", 'couldn', "couldn't", 'didn', "didn't", 'doesn', "doesn't", 'hadn',\
            "hadn't", 'hasn', "hasn't", 'haven', "haven't", 'isn', "isn't", 'ma', 'mightn', "mightn't", 'mustn',\
            "mustn't", 'needn', "needn't", 'shan', "shan't", 'shouldn', "shouldn't", 'wasn', "wasn't", 'weren', "weren't", \
            'won', "won't", 'wouldn', "wouldn't"]

In [23]:
'''BatchGenerator use to generate the batch of inputes contex with 250 words and query with 35 words and choice with 8 words
i also used all the Easy and Challenge both combained data for train and validate '''

from keras.utils import Sequence



class BatchGenerator(Sequence):
    'Generates data for Keras'

    vectors = None

    def __init__(self, gen_type, batch_size, emdim, squad_version, max_passage_length, max_query_length, shuffle):
        'Initialization'

        base_dir = "storage/"

        self.vectors = MagnitudeVectors(emdim).load_vectors()
        self.squad_version = squad_version

        self.max_passage_length = max_passage_length
        self.max_query_length = max_query_length

        self.context_file = os.path.join('all_data', gen_type + '-v{}.cha_context'.format(squad_version))
        self.question_file = os.path.join('all_data', gen_type + '-v{}.cha_question'.format(squad_version))
        self.span_file_ans = os.path.join('all_data', gen_type + '-v{}.cha_choices'.format(squad_version))
        self.span_file = os.path.join('all_data', gen_type + '-v{}.cha_answer'.format(squad_version))
        
        self.gen_type = gen_type
        self.batch_size = batch_size
        i = 0
        with open(self.span_file, 'r', encoding='utf-8') as f:

            for i, _ in enumerate(f):
                pass
        self.num_of_batches = (i + 1) // self.batch_size
        self.indices = np.arange(i + 1)
        self.shuffle = shuffle

    def __len__(self):
        'Denotes the number of batches per epoch'
        return self.num_of_batches

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        start_index = (index * self.batch_size) + 1
        end_index = ((index + 1) * self.batch_size) + 1

        inds = self.indices[start_index:end_index]

        contexts = []
        with open(self.context_file, 'r', encoding='utf-8') as cf:
            for i, line in enumerate(cf, start=1):
                line = line[:-1]
                if i in inds:
                    word_no=[]
                    count=0
                    for word in line.split(' '):
                        if word.lower() not in stopwords and count<250 and word.strip() !='':
                            count=count+1
                            word_no.append(word.lower().strip())
                    if count<250:
                        for i in range(250 - count):
                            word_no.append(0)    
                    contexts.append(word_no)

        questions = []
        with open(self.question_file, 'r', encoding='utf-8') as qf:
            for i, line in enumerate(qf, start=1):
                line = line[:-1]
                if i in inds:
                    word_no=[]
                    count=0
                    for word in line.split(' '):
                        #for i in word.split(','):
                        if word.lower().strip() not in stopwords and count<35 and word.strip() !='':
                            count=count+1
                            word_no.append(word.lower().strip())
                    if count<35:
                        for i in range(35 - count):
                            word_no.append(0)    
                    questions.append(word_no)

        answer_spans = []
        with open(self.span_file, 'r', encoding='utf-8') as sf:
            for i, line in enumerate(sf, start=1):
                line = line[:-1]
                if i in inds:
                        line=line.strip()
                        if line=='A' or line=='1':
                            answer_spans.append([1,0,0,0,0])
                        if line=='B' or line=='2':
                            answer_spans.append([0,1,0,0,0])
                        if line=='C' or line=='3':
                            answer_spans.append([0,0,1,0,0])
                        if line=='D' or line=='4':
                            answer_spans.append([0,0,0,1,0])
                        if line=='E':
                            answer_spans.append([0,0,0,0,1])



        answer_contexts = []
        answer_choice_1=[]
        answer_choice_2=[]
        answer_choice_3=[]
        answer_choice_4=[]
        answer_choice_5=[]
        
        with open(self.span_file_ans, 'r', encoding='utf-8') as sf:
            for i, line in enumerate(sf, start=1):
                line = line[:-1]
                if i in inds:
                    word_no2=[]
                    last_count=0
                    for word in line.split(','):
                        count=0
                        word_no=[]
                        for i in word.split(' '):
                              if count<8 and i.strip() !='':
                                    count=count+1
                                    word_no.append(i.lower().strip())
                    
                        if count<8:
                              for i in range(8 - count):
                                      word_no.append(0)  
                        word_no2.append(word_no)
                        last_count=last_count+1
                    if  last_count<5:
                        last_count=last_count+1
                        last_word_no=[]
                        for i in range(8):
                              last_word_no.append(0)
                        word_no2.append(last_word_no)

                    if  last_count<5:
                        last_count=last_count+1
                        last_word_no=[]
                        for i in range(8):
                              last_word_no.append(0)
                        word_no2.append(last_word_no)
                        
                    answer_choice_1.append(word_no2[0])
                    answer_choice_2.append(word_no2[1])
                    answer_choice_3.append(word_no2[2])
                    answer_choice_4.append(word_no2[3])
                    answer_choice_5.append(word_no2[4])

                    #answer_contexts.append(self.vectors.query(word_no2, pad_to_length=350))
                   

        context_batch = self.vectors.query(contexts, pad_to_length=self.max_passage_length)
        question_batch = self.vectors.query(questions, pad_to_length=self.max_query_length)
        #answer_contexts_batch = np.array(answer_contexts)
        
        answer_contexts_batch_1 = self.vectors.query(answer_choice_1, pad_to_length=self.max_query_length)
        answer_contexts_batch_2 = self.vectors.query(answer_choice_2, pad_to_length=self.max_query_length)
        answer_contexts_batch_3 = self.vectors.query(answer_choice_3, pad_to_length=self.max_query_length)
        answer_contexts_batch_4 = self.vectors.query(answer_choice_4, pad_to_length=self.max_query_length)
        answer_contexts_batch_5 = self.vectors.query(answer_choice_5, pad_to_length=self.max_query_length)
        
        return [context_batch, question_batch,answer_contexts_batch_1,answer_contexts_batch_2,answer_contexts_batch_3,answer_contexts_batch_4,answer_contexts_batch_5], [answer_spans]

    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.indices)

In [None]:
'''Generate the batch and train the model with batch 15 and epoch 20'''
from keras.callbacks import CSVLogger, ModelCheckpoint
from keras.callbacks import TensorBoard
import warnings 
from time import time

import datetime
warnings.filterwarnings('ignore')

train_generator, validation_generator = load_data_generators(batch_size=15, emdim=350,
                                                                     squad_version=1.1,
                                                                     max_passage_length=None,
                                                                     max_query_length=None,
                                                                     shuffle=False)

bidaf_model.train_model(train_generator, steps_per_epoch=None, epochs=20,
                                validation_generator=validation_generator, validation_steps=None, use_multiprocessing=False,
                                shuffle=False, save_history=False,
                                save_model_per_epoch=True)

print("Training Completed!")

In [21]:

def load_data_generators_test(batch_size, emdim, squad_version=1.1, max_passage_length=None, max_query_length=None,
                         shuffle=False):
    test_generator = BatchGenerator('test', batch_size, emdim, squad_version, max_passage_length, max_query_length,
                                     shuffle)
    
    return test_generator

In [22]:
import warnings 
from time import time

import datetime
warnings.filterwarnings('ignore')

'''I have used only the Challenge data for testing '''

test_generator = load_data_generators_test(batch_size=15, emdim=350,
                                                                     squad_version=1.1,
                                                                     max_passage_length=None,
                                                                     max_query_length=None,
                                                                     shuffle=False)

loss,acc = bidaf_model.model.evaluate(test_generator, verbose=2)

Will download magnitude files from the server if they aren't avaialble locally.. So, grab a cup of coffee while the downloading is under progress..


In [22]:
class_out=bidaf_model.model.predict(test_generator)

In [23]:
print("Test accuracy: ",acc," Test Loss: ",loss)

Test accuracy:  0.2777777910232544  Test Loss:  1.4846593141555786


In Leader board The BiDAF model has accuracy of 26.54 %  here i have improved the model as 27.77 %

https://leaderboard.allenai.org/arc/submissions/public

# Failure case:

1) While sending the context words more than 300 the model is not improving the loss 

2) Mode Over fit while batch size is 5,10

3) if the padding is more in contex,query the model not improving the loss

4) sending the choices at single string and calculating the similarity also go to over fitting of model 

5) sending the choices with question also not improving the model 