In [3]:
import numpy as np
import pandas as pd
import json
import os
import re
import spacy
nlp = spacy.load('en_core_web_sm')
import os
import pickle

In [4]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Embedding, Bidirectional, LSTM, Concatenate, Dense, Dropout, MultiHeadAttention, Attention
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import ModelCheckpoint

In [5]:
from sklearn.model_selection import train_test_split

In [6]:
word_embeddings = np.load(( "./weights_matrix_100D.npy"), allow_pickle=True)

In [7]:
with open('traindata.pkl', 'rb') as handle:
    train_df = pickle.load(handle)

In [8]:
#train_df

In [9]:
with open('./data/word_vocab.pkl', "rb") as wv, \
      open('./data/word2index.pkl', "rb") as wi, \
         open('./data/index2word.pkl', "rb") as iw:
         word_vocab = pickle.load(wv)
         word2idx = pickle.load(wi)
         idx2word = pickle.load(iw)

In [10]:
max_contex_length = 200
max_question_length = 30
max_sequence_length =256
batch_size = 16
epochs = 5
learning_rate = 1e-3

In [11]:
Input_data = train_df[['context_ids','question_ids','label_idx']]

In [12]:
training_data, Val_data = train_test_split(Input_data, test_size=0.2, random_state=25)

In [13]:
train_context_sequences = training_data['context_ids']
train_context_sequences = tf.keras.preprocessing.sequence.pad_sequences(train_context_sequences, maxlen=max_contex_length,padding='post')
train_question_sequences = training_data['question_ids']
train_question_sequences = tf.keras.preprocessing.sequence.pad_sequences(train_question_sequences, maxlen=max_question_length,padding='post')
train_Answer = training_data['label_idx']
train_Ans_start = [i[0] for i in train_Answer]
train_Ans_end = [i[1] for i in train_Answer]
train_context_sequences_AA = np.array(train_context_sequences)
train_question_sequences_AA = np.array(train_question_sequences)
train_Ans_start_AA = np.array(train_Ans_start)
#train_Ans_start_AA=train_Ans_start_AA.reshape(train_Ans_start_AA.shape[0],1)
train_Ans_end_AA = np.array(train_Ans_end)
#train_Ans_end_AA=train_Ans_end_AA.reshape(train_Ans_end_AA.shape[0],1)

In [14]:
val_context_sequences = Val_data['context_ids']
val_context_sequences = tf.keras.preprocessing.sequence.pad_sequences(val_context_sequences, maxlen=max_contex_length,padding='post')
val_question_sequences = Val_data['question_ids']
val_question_sequences = tf.keras.preprocessing.sequence.pad_sequences(val_question_sequences, maxlen=max_question_length,padding='post')
val_Answer = Val_data['label_idx']
val_Ans_start = [i[0] for i in val_Answer]
val_Ans_end = [i[1] for i in val_Answer]
val_context_sequences_AA = np.array(val_context_sequences)
val_question_sequences_AA = np.array(val_question_sequences)
val_Ans_start_AA = np.array(val_Ans_start)
#val_Ans_start_AA=val_Ans_start_AA.reshape(val_Ans_start_AA.shape[0],1)
val_Ans_end_AA = np.array(val_Ans_end)
#val_Ans_end_AA=val_Ans_end_AA.reshape(val_Ans_end_AA.shape[0],1)

In [15]:
train_question_sequences[0].shape

(30,)

In [16]:
train_Ans_start_AA

array([43,  9,  1, ..., 46, 10, 82])

In [17]:
#MODEL
context_input = Input(shape=(None,), dtype='int32', name='context_input')
question_input = Input(shape=(None,), dtype='int32', name='question_input')
embedding_layer = Embedding(input_dim=len(word2idx), output_dim=100, 
                            weights=[word_embeddings], trainable=False, mask_zero=True)
context_embedded = embedding_layer(context_input)
question_embedded = embedding_layer(question_input)
lstm_layer1 = Bidirectional(LSTM(256, return_sequences=True))
#lstm_layer2 = Bidirectional(LSTM(256, return_sequences=True))
context_output = lstm_layer1(context_embedded)
question_output = lstm_layer1(question_embedded)
# Multi-head attention layer
attention_output = Attention()([context_output, question_output])
concat_output = Concatenate(axis=-1)([context_output, attention_output])

# Dropout layer for regularization
dropout_layer = Dropout(0.2)(concat_output)
#dense1 = Dense(1024, activation='softmax', name='dense1')(dropout_layer)
#dense2 = Dense(1024, activation='softmax', name='start_output')(dense1)
# Output layers for start and end position prediction
start_output = Dense(1, activation='softmax', name='start_output')(dropout_layer)
end_output = Dense(1, activation='softmax', name='end_output')(dropout_layer)
model = Model(inputs=[context_input, question_input], outputs=[start_output, end_output])
model.compile(optimizer='adam', loss=tf.keras.losses.MeanSquaredError(), metrics=['accuracy'])

In [18]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 context_input (InputLayer)     [(None, None)]       0           []                               
                                                                                                  
 question_input (InputLayer)    [(None, None)]       0           []                               
                                                                                                  
 embedding (Embedding)          (None, None, 100)    9420000     ['context_input[0][0]',          
                                                                  'question_input[0][0]']         
                                                                                                  
 bidirectional (Bidirectional)  (None, None, 512)    731136      ['embedding[0][0]',          

In [19]:
checkpoint = ModelCheckpoint('model.h5', save_best_only=True, save_weights_only=False)

In [20]:
history = model.fit([train_context_sequences_AA, train_question_sequences_AA], [train_Ans_start_AA, train_Ans_end_AA], 
                    validation_data=([val_context_sequences_AA, val_question_sequences_AA], [val_Ans_start_AA, val_Ans_end_AA]),
                    epochs=5, batch_size=16, callbacks=[checkpoint])

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [36]:
model.save('final_model.h5')

In [22]:
with open('/trainHistoryDict', 'wb') as file_pi:
    pickle.dump(history.history, file_pi)