In [21]:
# Importing the necessary libraries

import pandas as pd
import numpy as np
import json
import matplotlib.pyplot as plt
import nltk
import torch
import json
import string
import re
from transformers import AutoTokenizer, AdamW, BertForQuestionAnswering, BertConfig
from torch.utils.data import DataLoader
from nltk.corpus import stopwords

In [18]:
model = torch.load('/kaggle/input/finetunedmodel/finetunedmodel.h5', map_location = torch.device('cpu'))
model.eval()

BertForQuestionAnswering(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_

In [26]:
tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
# stop_words = set(stopwords.words('english'))

def preprocess(message):
    message = message.lower()
    punct = set(string.punctuation)
    list_of_words = [i for i in message if i not in punct]
    message = "".join(list_of_words)
    articles = re.compile(r'\b(a|an|the)\b', re.UNICODE)
    message = re.sub(articles, " ", message)
    message = " ".join(message.split())
    return message
#     message_temp = []
#     for i in message.split():
#         if i not in stop_words:
#             message_temp.append(i)
#     return "".join(message_temp)

def return_answer(paragraph, question):
    inputs = tokenizer.encode_plus(question, paragraph, return_tensors = 'pt')
    outputs = model(**inputs)
    start_idx = torch.argmax(outputs[0])
    end_idx = torch.argmax(outputs[1])
    tokens = tokenizer.convert_ids_to_tokens(inputs['input_ids'][0][start_idx: end_idx])
    predicted_answer = tokenizer.convert_tokens_to_string(tokens)
    return predicted_answer

def calculate_f1(pred, y):
    list_y = preprocess(y).split()
    list_pred = preprocess(pred).split()
    common = set(list_y) & set(list_pred)
    if len(common) == 0:
        return 0
    
    precision = len(common)/len(list_pred)
    recall = len(common)/len(list_y)
    f1 = (2 * precision * recall) / (precision + recall)
    return f1

def predict_answer(context, question, true_answer):
    predicted_answer = return_answer(context, question)
    f1 = calculate_f1(predicted_answer, true_answer)
    print("Question:", question)
    print("Predicted Answer:", predicted_answer)
    print("True Answer:", true_answer)
    print("F1:", f1)

In [31]:
context = """ Mount Olympus is the highest mountain in Greece. It is part of the Olympus massif near 
              the Gulf of Thérmai of the Aegean Sea, located in the Olympus Range on the border between 
              Thessaly and Macedonia, between the regional units of Pieria and Larissa, about 80 km (50 mi) 
              southwest from Thessaloniki. Mount Olympus has 52 peaks and deep gorges. The highest peak, 
              Mytikas, meaning "nose", rises to 2917 metres (9,570 ft). It is one of the 
              highest peaks in Europe in terms of topographic prominence. """

queries = [
           "Where Olympus is near?",
           "How far away is Olympus from Thessaloniki?"
          ]
answers = [
           "Gulf of Thérmai of the Aegean Sea",
           "80 km (50 mi)"
          ]

for question, answer in zip(queries,answers):
    print("\n")
    predict_answer(context, question, answer)



Question: Where Olympus is near?
Predicted Answer: gulf of therma
True Answer: Gulf of Thérmai of the Aegean Sea
F1: 0.4444444444444444


Question: How far away is Olympus from Thessaloniki?
Predicted Answer: 80 km ( 50 mi )
True Answer: 80 km (50 mi)
F1: 1.0


In [34]:
context = """ Harry Potter is a series of seven fantasy novels written by British author, J. K. Rowling. The novels chronicle the lives of a young wizard, 
              Harry Potter, and his friends Hermione Granger and Ron Weasley, all of whom are students at Hogwarts School of Witchcraft and Wizardry. 
              The main story arc concerns Harry's struggle against Lord Voldemort, a dark wizard who intends to become immortal, overthrow the wizard 
              governing body known as the Ministry of Magic and subjugate all wizards and Muggles (non-magical people). Since the release of the first novel, 
              Harry Potter and the Philosopher's Stone, on 26 June 1997, the books have found immense popularity, positive reviews, and commercial success worldwide. 
              They have attracted a wide adult audience as well as younger readers and are often considered cornerstones of modern young adult literature.[2] 
              As of February 2018, the books have sold more than 500 million copies worldwide, making them the best-selling book series in history, and have been translated 
              into eighty languages.[3] The last four books consecutively set records as the fastest-selling books in history, with the final installment selling roughly 
              eleven million copies in the United States within twenty-four hours of its release.  """

queries = [
           "Who wrote Harry Potter's novels?",
           "Who are Harry Potter's friends?",
           "Which is the name of Harry Poter's first novel?",
           "When did the first novel release?"
          ]
answers = [
           "J. K. Rowling",
           "Hermione Granger and Ron Weasley",
           "Harry Potter and the Philosopher's Stone",
           "26 June 1997"
          ]

for question, answer in zip(queries,answers):
    print("\n")
    predict_answer(context, question, answer)



Question: Who wrote Harry Potter's novels?
Predicted Answer: j. k. rowling
True Answer: J. K. Rowling
F1: 1.0


Question: Who are Harry Potter's friends?
Predicted Answer: hermione granger and ron weasley
True Answer: Hermione Granger and Ron Weasley
F1: 1.0


Question: Which is the name of Harry Poter's first novel?
Predicted Answer: harry potter and the philosopher ' s stone
True Answer: Harry Potter and the Philosopher's Stone
F1: 0.7272727272727272


Question: When did the first novel release?
Predicted Answer: 26 june 1997
True Answer: 26 June 1997
F1: 1.0
