In [None]:
# importing necessary libraries

import tensorflow as tf
import pandas as pd
import numpy as np
import json
from tensorflow.keras.layers import Input,LSTM,Bidirectional,Dense,Dropout,Flatten,Activation
from tensorflow.keras.models import Model
from random import shuffle

In [None]:
# configuring tpu scope

tpu = tf.distribute.cluster_resolver.TPUClusterResolver.connect()
tpu_strategy = tf.distribute.TPUStrategy(tpu)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# loading the data

with open('train.json','rb') as f:
  k=json.load(f)

In [None]:
# creating multiple arrays for questions, answer-1, answer-2, answer-3, answer-4

a=[]
b=[]
c=[]
d=[]
q=[]

In [None]:
# Appending the answers in arrays

for i in range(len(k)):
  a.append(k[i]['correct_answer'])
  b.append(k[i]['distractor1'])
  c.append(k[i]['distractor2'])
  d.append(k[i]['distractor3'])
  q.append(k[i]['question'])

In [None]:
!pip install transformers

In [None]:
from transformers import BertTokenizerFast

In [None]:
# Initailizing the tokenizer

tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')

In [None]:
# Converting all answers into a single string by seperating with <sep>

s=[]
for i in range(len(k)):
  p=[a[i],b[i],c[i],d[i]]
  shuffle(p)
  s.append(' <sep> '.join(p))

In [None]:
tokenizer.add_tokens(['<sep>'])

1

In [None]:
# finding the encodings

train_encodings=tokenizer(q,s,padding=True)

In [None]:
al=[]
for i in range(len(k)):
    s1=s[i].index(a[i])
    s2=s1+len(a[i])
    al.append((s1,s2))

In [None]:
# function to find the token positions for the question-answering model

def add_token_positions(encodings, al):
    start_positions = []
    end_positions = []
    for i in range(len(k)):
        start_positions.append(encodings.char_to_token(i, al[i][0],1))
        end_positions.append(encodings.char_to_token(i,al[i][1]-1,1))
        if start_positions[-1] is None:
            start_positions[-1] = tokenizer.model_max_length-1
        if end_positions[-1] is None:
            end_positions[-1] = tokenizer.model_max_length-1
    return start_positions,end_positions

In [None]:
# finding the start and end token by calling above function

start,end=add_token_positions(train_encodings,al)

In [None]:
from transformers import TFBertModel

In [None]:
# Transfer learning

with tpu_strategy.scope():
    model=TFBertModel.from_pretrained('bert-base-uncased')

In [None]:
# Resizing Token Embeddings

with tpu_strategy.scope():
    model.resize_token_embeddings(len(tokenizer))

In [None]:
from tensorflow.keras.layers import Input,LSTM,Bidirectional,Dense,Dropout,Flatten,Activation
from tensorflow.keras.models import Model

In [None]:
import keras

In [None]:
# Converting lists to Numpy Arrays

q=np.array(train_encodings['input_ids'])
q1=np.array(train_encodings['attention_mask'])
q2=np.array(train_encodings['token_type_ids'])

In [None]:
# Converting lists to Numpy Arrays

train_start=np.array(start)
train_end=np.array(end)

In [None]:
# Creating the architecture for our model

with tpu_strategy.scope():
  inp1=Input((121,),dtype='int32')
  inp2=Input((121,),dtype='int32')
  inp3=Input((121,),dtype='int32')
  emb=model(inp1,attention_mask=inp2,token_type_ids=inp3)[0]
  s1=Dense(1,use_bias=False)(emb)
  s1=Flatten()(s1)
  s1=Activation(keras.activations.softmax)(s1)
  s2=Dense(1,use_bias=False)(emb)
  s2=Flatten()(s2)
  s2=Activation(keras.activations.softmax)(s2)
  m=Model(inputs=[inp1,inp2,inp3],outputs=[s1,s2])

In [None]:
# Compiling the model

with tpu_strategy.scope():
  m.compile(optimizer=keras.optimizers.SGD(learning_rate=2e-5),loss=['sparse_categorical_crossentropy','sparse_categorical_crossentropy'],metrics=['accuracy'],steps_per_execution=64)

In [None]:
# Training our model

m.fit([q,q1,q2],[train_start,train_end],epochs=10,batch_size=64)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f97a1488450>

In [None]:
# Compiling with different learning rate

with tpu_strategy.scope():
  m.compile(optimizer=keras.optimizers.Adam(learning_rate=5e-5),loss=['sparse_categorical_crossentropy','sparse_categorical_crossentropy'],metrics=['accuracy'],steps_per_execution=64)

In [None]:
# Training with new learning rate

m.fit([q,q1,q2],[train_start,train_end],epochs=7,batch_size=64)

Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7


<tensorflow.python.keras.callbacks.History at 0x7f96ab877790>

In [None]:
# Function to find the answer

def find_answer(context,question):
  enc=tokenizer(question,context,padding='max_length',max_length=121)
  k = np.array([enc['input_ids']])
  k1 = np.array([enc['attention_mask']])
  k2 = np.array([enc['token_type_ids']])
  res=m([k,k1,k2])
  start=np.argmax(res[0].numpy()[0])
  end=np.argmax(res[1].numpy()[0])
  return tokenizer.decode(k[0][start:end+1])

In [None]:
# Validation data

with open('valid.json','rb') as f:
  k1=json.load(f)

In [None]:
# Encoding the validation data

a1=[]
b1=[]
c1=[]
d1=[]
q1=[]

for i in range(len(k1)):
  a1.append(k1[i]['correct_answer'])
  b1.append(k1[i]['distractor1'])
  c1.append(k1[i]['distractor2'])
  d1.append(k1[i]['distractor3'])
  q1.append(k1[i]['question'])

s1=[]
for i in range(len(k1)):
  p=[a1[i],b1[i],c1[i],d1[i]]
  shuffle(p)
  s1.append(' <sep> '.join(p))

In [None]:
# Index to the question

i=75

In [None]:
# Selecting a particular question

q1[i]

'What is caused by the reaction of nonmetal oxides with water in the atmosphere?'

In [None]:
# Multiple choices of that question

s1[i]

'ozone rain <sep> yellow rain <sep> carbon rain <sep> acid rain'

In [None]:
# Correct answer for that question
a1[i]

'acid rain'

In [None]:
# Predicted answer
find_answer(s1[i],q1[i])

'acid rain'

We can see that that both answers are same here

In [None]:
# Saving the model to reuse later

save_locally = tf.saved_model.SaveOptions(experimental_io_device='/job:localhost')
m.save_weights('/content/drive/MyDrive/mcq model/model_weights', options=save_locally)