<a href="https://colab.research.google.com/github/Rehman047/Question-Answering-System--Pytorch-/blob/main/QA_system.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [16]:
import pandas as pd
import torch

In [17]:
df=pd.read_csv('100_Unique_QA_Dataset.csv')
df.head()

Unnamed: 0,question,answer
0,What is the capital of France?,Paris
1,What is the capital of Germany?,Berlin
2,Who wrote 'To Kill a Mockingbird'?,Harper-Lee
3,What is the largest planet in our solar system?,Jupiter
4,What is the boiling point of water in Celsius?,100


In [18]:
def tokenize(text):
  text=text.lower()
  text=text.replace('?','')
  text=text.replace("'","")
  return text.split()

In [19]:
vocab={'UNV':0}
def build_vocab(row):
  text=tokenize(row['question'])+tokenize(row['answer'])
  for token in text:
    if token in vocab:
      continue
    vocab[token]=len(vocab)

In [20]:
df.apply(build_vocab,axis=1)


Unnamed: 0,0
0,
1,
2,
3,
4,
...,...
85,
86,
87,
88,


In [21]:
len(vocab)

324

In [22]:
def text_to_indices(text,vocab):
  indices=[]
  for word in tokenize(text):
    if word not in vocab:
      word='UNV'
    indices.append(vocab[word])
  return indices

In [23]:
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

In [30]:
class Data(Dataset):
  def __init__(self,df,vocab):
    self.df=df
    self.vocab=vocab
  def __len__(self):
    return self.df.shape[0]
  def __getitem__(self,index):
    return torch.tensor(text_to_indices(self.df.iloc[index]['question'],self.vocab)), torch.tensor(text_to_indices(self.df.iloc[index]['answer'],self.vocab))

In [31]:
data=Data(df,vocab)

In [32]:
data[0]

(tensor([1, 2, 3, 4, 5, 6]), tensor([7]))

In [119]:
loader=DataLoader(data,batch_size=1,shuffle=True)

In [120]:
import torch.nn as nn

In [137]:
class Model(nn.Module):
  def __init__(self,vocab_size):
    super().__init__()
    self.embedding=nn.Embedding(vocab_size,embedding_dim=50)
    self.rnn=nn.RNN(50,64,batch_first=True)
    self.output=nn.Linear(64,324)
  def forward(self,x):
    x=self.embedding(x)
    _, x = self.rnn(x)
    return self.output(x.squeeze(0))

In [138]:
model=Model(len(vocab))
criterion=nn.CrossEntropyLoss()
optimizer=torch.optim.RMSprop(model.parameters(),0.001)

In [141]:
epochs=50

In [143]:
for epoch in range(epochs):
  epoch_loss=0
  for que,ans in loader:
    y_pred=model(que)
    #print(que.shape)
    #print(y_pred.shape)
    loss=criterion(y_pred,ans[0])
    epoch_loss+=loss.item()*que.size(0)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
  epoch_loss=epoch_loss/100
  print(f'Epoch {epoch},loss:{epoch_loss}')




Epoch 0,loss:2.2649759834791893e-07
Epoch 1,loss:1.8954273414806266e-07
Epoch 2,loss:1.3828275577054683e-07
Epoch 3,loss:1.1086462670562014e-07
Epoch 4,loss:9.536742084037541e-08
Epoch 5,loss:7.629393863339829e-08
Epoch 6,loss:4.649161915892819e-08
Epoch 7,loss:4.529952661869174e-08
Epoch 8,loss:3.457069176704408e-08
Epoch 9,loss:2.980232032712138e-08
Epoch 10,loss:3.933906278064114e-08
Epoch 11,loss:2.5033948887198675e-08
Epoch 12,loss:4.1723247932168307e-08
Epoch 13,loss:1.7881392224694537e-08
Epoch 14,loss:2.7418134962431397e-08
Epoch 15,loss:1.549720657578746e-08
Epoch 16,loss:2.1457670698055155e-08
Epoch 17,loss:2.861022714739647e-08
Epoch 18,loss:1.788139208258599e-08
Epoch 19,loss:2.3841856204853684e-08
Epoch 20,loss:2.622604185376076e-08
Epoch 21,loss:1.9073485191256622e-08
Epoch 22,loss:2.264976330934587e-08
Epoch 23,loss:1.549720657578746e-08
Epoch 24,loss:2.6226041995869308e-08
Epoch 25,loss:2.1457670555946607e-08
Epoch 26,loss:2.6226041995869308e-08
Epoch 27,loss:1.78813923

In [148]:
question=input('Question: ')
question=torch.tensor(text_to_indices(question,vocab))
question=question.reshape(1,len(question))
with torch.no_grad():
  y_pred=model(question)
probs=torch.softmax(y_pred,dim=-1)
if torch.max(probs)<0.5:
  print("Model: I don't know")
else:
  pred_class=torch.argmax(probs,dim=-1).item()
  for k,v in vocab.items():
    if v==pred_class:
      ans=k
      break
  print('Model:',ans)

Question: Where is paris
Model: I don't know
