In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm.notebook import tqdm

import torch
from transformers import AutoTokenizer, AutoModelForQuestionAnswering
from transformers import QuestionAnsweringPipeline

sns.set_palette(palette='Paired')
sns.set_style('whitegrid')

In [17]:
# Albert
albert_tokenizer = AutoTokenizer.from_pretrained("Firat/albert-base-v2-finetuned-squad")
albert = AutoModelForQuestionAnswering.from_pretrained("Firat/albert-base-v2-finetuned-squad")
albert_pipeline = QuestionAnsweringPipeline(model=albert, tokenizer=albert_tokenizer)

In [19]:
# Distilbert
distilbert_tokenizer = AutoTokenizer.from_pretrained("Firat/distilbert-base-uncased-finetuned-squad")
distilbert = AutoModelForQuestionAnswering.from_pretrained("Firat/distilbert-base-uncased-finetuned-squad")
distilbert_pipeline = QuestionAnsweringPipeline(model=distilbert, tokenizer=distilbert_tokenizer)

In [18]:
# Roberta
roberta_tokenizer = AutoTokenizer.from_pretrained("Firat/roberta-base-finetuned-squad")
roberta = AutoModelForQuestionAnswering.from_pretrained("Firat/roberta-base-finetuned-squad")
roberta_pipeline = QuestionAnsweringPipeline(model=roberta, tokenizer=roberta_tokenizer)

In [5]:
# Load Questions
data = pd.read_excel('./questions_collection.xlsx')
data.columns = [col.lower() for col in data.columns]
data.head(2)

Unnamed: 0,id,topic,difficulty (1-3),text,question 1,answer 1,question 2,answer 2,question 3,answer 3
0,1.0,history,1.0,"World War II or the Second World War, often ab...",When took the second World War place?,1939 to 1945,How many personnel was involved?,more than 100 million,Who were the majority of fatalities?,civilians
1,2.0,health,3.0,Occupational therapy (OT) is a profession with...,Occupational therapy is profession within whic...,healthcare,By who is it performed?,occupational therapists and occupational thera...,What are common occupational therapy inerventi...,"helping children with disabilities, injury reh..."


In [7]:
answer_tokens = roberta_pipeline(question=data.loc[3, 'question 1'], context=data.loc[3, 'text'])
print("Real Answer:" , data.loc[3, 'answer 1'])
print("Predicted Answer:" , answer_tokens["answer"])

Real Answer: Monkey D. Ruffy
Predicted Answer: Monkey D. Luffy


In [30]:
pipelines = [albert_pipeline,distilbert_pipeline,roberta_pipeline]
models = ["albert","distilbert","roberta"]

In [90]:
pred_data = data.iloc[:,1:4]

In [91]:
for k in range(3):
    question = "question " + str(k+1)
    answer = "answer " + str(k+1)
    for j in range(len(pipelines)):
        pred_answer = []
        score_answer = []
        for i in range(len(data)):
            answer_tokens = pipelines[j](question=data.loc[i, question], context=data.loc[i, 'text'])
            pred_answer.append(answer_tokens["answer"])
            score_answer.append(answer_tokens["score"])
        pred_data[question] = data[question]
        pred_data[answer] = data[answer]
        pred_data[models[j]+" pred "+str(k+1)] = pred_answer
        #pred_data[models[j]+" score "+str(k+1)] = score_answer

In [93]:
pd.options.display.width = 100
pd.options.display.max_columns = 100
pred_data

Unnamed: 0,topic,difficulty (1-3),text,question 1,answer 1,albert pred 1,distilbert pred 1,roberta pred 1,question 2,answer 2,albert pred 2,distilbert pred 2,roberta pred 2,question 3,answer 3,albert pred 3,distilbert pred 3,roberta pred 3
0,history,1.0,"World War II or the Second World War, often abbreviated as WWII or WW2, was a global war that la...",When took the second World War place?,1939 to 1945,1939 to 1945.,1939 to 1945,1945,How many personnel was involved?,more than 100 million,more than 100 million,more than 100 million,more than 100 million,Who were the majority of fatalities?,civilians,civilians.,civilians,civilians
1,health,3.0,Occupational therapy (OT) is a profession within healthcare. It is the use of assessment and int...,Occupational therapy is profession within which field?,healthcare,healthcare.,healthcare,healthcare,By who is it performed?,occupational therapists and occupational therapy assistants,occupational therapists and occupational therapy assistants (OTA).,occupational therapists and occupational therapy assistants,occupational therapists and occupational therapy assistants (OTA).,What are common occupational therapy inerventions?,"helping children with disabilities, injury rehabilitation and providing supports for older adults","helping children with disabilities to participate fully in school and social situations,","helping children with disabilities to participate fully in school and social situations, injury ...",helping children with disabilities to participate fully in school and social situations
2,tech,2.0,"In deep learning, each level learns to transform its input data into a slightly more abstract an...",What can a raw input may be?,a matrix of pixels,a matrix of pixels;,a matrix of pixels,a matrix of pixels,What does the second layer?,compose and encode arrangements,may compose and encode arrangements of edges;,compose and encode arrangements of edges,compose and encode arrangements of edges,What network is it about?,,"deep learning,",image recognition application,deep learning
3,offtopic,1.0,"The series focuses on Monkey D. Luffy, a young man made of rubber, whom, inspired by his childho...",On who focuses the series?,Monkey D. Ruffy,"Monkey D. Luffy,",Monkey D. Luffy,Monkey D. Luffy,What is the name of the pirate crew?,Straw Hat Pirates,the Straw Hat Pirates.,Straw Hat Pirates,Straw Hat Pirates,How is the mythical treasure called?,One Piece,"the One Piece,",the One Piece,One Piece
4,news,2.0,"He told a news conference Mr Putin would pay a ""serious and dear price"" for invading, but indica...",What would Putin pay for invading?,serious and dear price,"""serious and dear price""",serious and dear price,"a ""serious and dear price",Who warned that the comments could further destabilise the situation?,The Kremlin,The Kremlin,The Kremlin,The Kremlin,How many troops are near of the border?,100.0,100000,100000,100000
5,tech,2.0,"Bitcoin is a decentralized digital currency that you can buy, sell and exchange directly, withou...",What is Bitcoin?,decentralized digital currency,a decentralized digital currency,a decentralized digital currency,"a decentralized digital currency that you can buy, sell and exchange directly",Who is the creator of bitcoin?,Satoshi Nakamoto,"Satoshi Nakamoto,",Satoshi Nakamoto,Satoshi Nakamoto,Why Bitcoins aren't backed by the government?,Because there’s nothing to guarantee their value.,"their decentralized nature,",nothing to guarantee their value besides the proof baked in the heart of the system,there’s nothing to guarantee their value
6,history,1.0,"Inhabited since at least 1800 BC, Varanasi is well known for being among the oldest living citie...",What is Varanasi well known as?,for being among the oldest living cities on Earth,"among the oldest living cities on Earth,",oldest living cities on Earth,among the oldest living cities on Earth,How many hindus live in Varanasi?,1.2 billion,1.2 billion,1.2 billion,1.2 billion,How many stone ghat steps does the city have?,88.0,88,88,88
7,bibliography,1.0,"After Will Smith met Jeff Townes at age 16, the duo launched a highly successful rap career as D...",How long starred Will Smith on the sitcom The Fresh Prince of Bel-Air ?,six seasons,"six seasons,",six seasons,six seasons,Where was Will Smith born?,in Philadelphia,"Philadelphia, Pennsylvania,","Philadelphia, Pennsylvania","Philadelphia, Pennsylvania",For what Will Smith earned acclaim?,Concussion,Concussion,Concussion,Concussion
8,product description,1.0,"The Apple MacBook Air is a laptop computer with a sleek design and a thin, light aluminum body. ...",How much does the computer weigh?,3 pounds,3 pounds,3 pounds,3 pounds,Why does the computer have no moving parts?,It has a fanless design,"fanless design,",fanless design,fanless design,How much percent can i charge in 30 minutes?,0.5,50%,50%,50%
9,tech,3.0,"With worldwide unique measuring systems the Swiss Institute for Forest, Snow and Landscapes (WSL...",In what is WSL pioneer?,research of bedload transport in flowing waters,bedload transport in flowing waters.,"Swiss Institute for Forest, Snow and Landscapes",research of bedload transport in flowing waters,Since when are measurement signals recorded?,2018.0,2018.,2018,2018,What are the names of the three measuring systems?,"Miniplate Accelorometer (MPA), Square Pipe System (SPG) and Swiss Plate Geophones (SPS)","Miniplate Accelorometer (MPA), Square Pipe System","Miniplate Accelorometer (MPA), Square Pipe System","Miniplate Accelorometer (MPA),"


In [95]:
df.to_csv('df_pred.csv',index=False)

NameError: name 'df' is not defined