In [1]:
# test reading data

import json

with open("/content/train/questions.json", "r") as json_data:
    questions_json = json.loads(json_data.read())
    json_data.close()

questions_json["questions"][0]

{'id': 'Q0001',
 'query_type': 'SINGLE_FACT',
 'question': {'string': 'What are the papers written by the person Wazir Muhammad?'},
 'paraphrased_question': {'string': 'Which papers did the author Wazir Muhammad write?'},
 'query': {'sparql': 'SELECT DISTINCT ?answer WHERE { ?answer <https://dblp.org/rdf/schema#authoredBy> <https://dblp.org/pid/211/3355> }'},
 'template_id': 'TC01',
 'entities': ['<https://dblp.org/pid/211/3355>'],
 'relations': ['<https://dblp.org/rdf/schema#authoredBy>'],
 'temporal': False,
 'held_out': False}

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install sparqlwrapper

Collecting sparqlwrapper
  Downloading SPARQLWrapper-2.0.0-py3-none-any.whl (28 kB)
Collecting rdflib>=6.1.1 (from sparqlwrapper)
  Downloading rdflib-7.0.0-py3-none-any.whl (531 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m531.9/531.9 kB[0m [31m11.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting isodate<0.7.0,>=0.6.0 (from rdflib>=6.1.1->sparqlwrapper)
  Downloading isodate-0.6.1-py2.py3-none-any.whl (41 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.7/41.7 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: isodate, rdflib, sparqlwrapper
Successfully installed isodate-0.6.1 rdflib-7.0.0 sparqlwrapper-2.0.0


In [None]:
# test getting answer for each question query

from SPARQLWrapper import SPARQLWrapper, JSON

def get_sparql_query_answer(q):

  sparql = SPARQLWrapper("https://dblp-kg.ltdemos.informatik.uni-hamburg.de/sparql")

  sparql.setReturnFormat(JSON)

  question_query = q["query"]["sparql"]

  sparql.setQuery(question_query)

  try:
      ret = sparql.queryAndConvert()

      for r in ret["results"]["bindings"]:
          print(r)
  except Exception as e:
      print(e)

  return ret

In [4]:
# start preparing for QA pipeline
! pip install -U accelerate
! pip install -U transformers

Collecting accelerate
  Downloading accelerate-0.22.0-py3-none-any.whl (251 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m251.2/251.2 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: accelerate
Successfully installed accelerate-0.22.0
Collecting transformers
  Downloading transformers-4.33.1-py3-none-any.whl (7.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.6/7.6 MB[0m [31m43.9 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.15.1 (from transformers)
  Downloading huggingface_hub-0.17.1-py3-none-any.whl (294 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m294.8/294.8 kB[0m [31m38.5 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers)
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m105.7 MB/s[0m eta 

In [2]:
# load answers dataset
with open("/content/train/answers.json", "r") as json_data:
    answers_json = json.loads(json_data.read())
    json_data.close()

answers_json["answers"][0]

{'id': 'Q0001',
 'answer': {'head': {'link': [], 'vars': ['answer']},
  'results': {'distinct': False,
   'ordered': True,
   'bindings': [{'answer': {'type': 'uri',
      'value': 'https://dblp.org/rec/journals/frai/HartYHLNMD20'}},
    {'answer': {'type': 'uri',
      'value': 'https://dblp.org/rec/journals/frai/MuhammadHNFJLD19'}},
    {'answer': {'type': 'uri',
      'value': 'https://dblp.org/rec/journals/fdata/HartNMLHD19'}},
    {'answer': {'type': 'uri',
      'value': 'https://dblp.org/rec/journals/fdata/NartowtHMLSD20'}},
    {'answer': {'type': 'uri',
      'value': 'https://dblp.org/rec/journals/cma/SabirSMAB17'}}]}}}

In [None]:
str(get_sparql_query_answer(questions_json["questions"][0]))

In [3]:
def prepare_dataset(input_questions_json, input_answers_json):
  qa_dataset = []
  for q in input_questions_json["questions"]:
    q_answer = str(q["query"]) + " [SEP] " + str(q["entities"])
    #q_answer = [a["answer"] for a in input_answers_json["answers"] if a["id"] == q["id"]][0]
    #q_context = str(get_sparql_query_answer(q))
    q_context = "[CLS] " + q["query_type"] + " [SEP] " + q["template_id"] + " [SEP] " + str(q["query"]) + " [SEP] " + str(q["entities"])
    qa_dataset_elem = {} # define a dictionary for the standard question
    qa_dataset_elem["id"] = q["id"]
    qa_dataset_elem["question"] = q["question"]["string"]
    qa_dataset_elem["context"] =  q_context
    qa_dataset_elem["answer"] = q_answer
    qa_dataset_elem["answer_start"] = len("[CLS] " + q["query_type"] + " [SEP] " + q["template_id"] + " [SEP] ")
    qa_dataset.append(qa_dataset_elem)
    qa_dataset_elem_p = {} # define a dictionary for the paraphrased question
    qa_dataset_elem_p["id"] = q["id"]
    qa_dataset_elem_p["question"] = q["paraphrased_question"]["string"]
    qa_dataset_elem_p["context"] =  q_context
    qa_dataset_elem_p["answer"] = q_answer
    qa_dataset_elem_p["answer_start"] = len("[CLS] " + q["query_type"] + " [SEP] " + q["template_id"] + " [SEP] ")
    qa_dataset.append(qa_dataset_elem_p)
    print("**** Processed "+ str(q["id"]) + " ****")

  print("Finished loading QA Dataset")
  return qa_dataset

In [4]:
# prepare train dataset
qa_train_data = prepare_dataset(questions_json, answers_json)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
**** Processed Q2002 ****
**** Processed Q2003 ****
**** Processed Q2004 ****
**** Processed Q2005 ****
**** Processed Q2006 ****
**** Processed Q2007 ****
**** Processed Q2008 ****
**** Processed Q2009 ****
**** Processed Q2010 ****
**** Processed Q2011 ****
**** Processed Q2012 ****
**** Processed Q2013 ****
**** Processed Q2014 ****
**** Processed Q2015 ****
**** Processed Q2016 ****
**** Processed Q2017 ****
**** Processed Q2018 ****
**** Processed Q2019 ****
**** Processed Q2020 ****
**** Processed Q2021 ****
**** Processed Q2022 ****
**** Processed Q2023 ****
**** Processed Q2024 ****
**** Processed Q2025 ****
**** Processed Q2026 ****
**** Processed Q2027 ****
**** Processed Q2028 ****
**** Processed Q2029 ****
**** Processed Q2030 ****
**** Processed Q2031 ****
**** Processed Q2032 ****
**** Processed Q2033 ****
**** Processed Q2034 ****
**** Processed Q2035 ****
**** Processed Q2036 ****
**** Processed Q2037 ****

In [5]:
# prepare validation dataset
with open("/content/valid/questions.json", "r") as json_data:
    valid_questions_json = json.loads(json_data.read())
    json_data.close()

with open("/content/valid/answers.json", "r") as json_data:
    valid_answers_json = json.loads(json_data.read())
    json_data.close()

qa_valid_data = prepare_dataset(valid_questions_json, valid_answers_json)


**** Processed Q0001 ****
**** Processed Q0002 ****
**** Processed Q0003 ****
**** Processed Q0004 ****
**** Processed Q0005 ****
**** Processed Q0006 ****
**** Processed Q0007 ****
**** Processed Q0008 ****
**** Processed Q0009 ****
**** Processed Q0010 ****
**** Processed Q0011 ****
**** Processed Q0012 ****
**** Processed Q0013 ****
**** Processed Q0014 ****
**** Processed Q0015 ****
**** Processed Q0016 ****
**** Processed Q0017 ****
**** Processed Q0018 ****
**** Processed Q0019 ****
**** Processed Q0020 ****
**** Processed Q0021 ****
**** Processed Q0022 ****
**** Processed Q0023 ****
**** Processed Q0024 ****
**** Processed Q0025 ****
**** Processed Q0026 ****
**** Processed Q0027 ****
**** Processed Q0028 ****
**** Processed Q0029 ****
**** Processed Q0030 ****
**** Processed Q0031 ****
**** Processed Q0032 ****
**** Processed Q0033 ****
**** Processed Q0034 ****
**** Processed Q0035 ****
**** Processed Q0036 ****
**** Processed Q0037 ****
**** Processed Q0038 ****
**** Process

In [6]:
qa_final_train_data = qa_train_data + qa_valid_data

In [7]:
# prepare test dataset
with open("/content/test/questions.json", "r") as json_data:
    test_questions_json = json.loads(json_data.read())
    json_data.close()

with open("/content/test/answers.json", "r") as json_data:
    test_answers_json = json.loads(json_data.read())
    json_data.close()

qa_test_data = prepare_dataset(test_questions_json, test_answers_json)


**** Processed Q0001 ****
**** Processed Q0002 ****
**** Processed Q0003 ****
**** Processed Q0004 ****
**** Processed Q0005 ****
**** Processed Q0006 ****
**** Processed Q0007 ****
**** Processed Q0008 ****
**** Processed Q0009 ****
**** Processed Q0010 ****
**** Processed Q0011 ****
**** Processed Q0012 ****
**** Processed Q0013 ****
**** Processed Q0014 ****
**** Processed Q0015 ****
**** Processed Q0016 ****
**** Processed Q0017 ****
**** Processed Q0018 ****
**** Processed Q0019 ****
**** Processed Q0020 ****
**** Processed Q0021 ****
**** Processed Q0022 ****
**** Processed Q0023 ****
**** Processed Q0024 ****
**** Processed Q0025 ****
**** Processed Q0026 ****
**** Processed Q0027 ****
**** Processed Q0028 ****
**** Processed Q0029 ****
**** Processed Q0030 ****
**** Processed Q0031 ****
**** Processed Q0032 ****
**** Processed Q0033 ****
**** Processed Q0034 ****
**** Processed Q0035 ****
**** Processed Q0036 ****
**** Processed Q0037 ****
**** Processed Q0038 ****
**** Process

In [10]:
!pip install transformers
!pip install evaluate

Collecting evaluate
  Downloading evaluate-0.4.0-py3-none-any.whl (81 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.4/81.4 kB[0m [31m845.9 kB/s[0m eta [36m0:00:00[0m
[?25hCollecting datasets>=2.0.0 (from evaluate)
  Downloading datasets-2.14.5-py3-none-any.whl (519 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m519.6/519.6 kB[0m [31m16.0 MB/s[0m eta [36m0:00:00[0m
Collecting dill (from evaluate)
  Downloading dill-0.3.7-py3-none-any.whl (115 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.3/115.3 kB[0m [31m17.2 MB/s[0m eta [36m0:00:00[0m
Collecting xxhash (from evaluate)
  Downloading xxhash-3.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.1/194.1 kB[0m [31m24.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting multiprocess (from evaluate)
  Downloading multiprocess-0.70.15-py310-none-any.whl (134 kB)
[2K     [90m━━

In [8]:
import torch
import json
from tqdm import tqdm
import torch.nn as nn
from torch.optim import Adam
import nltk
import spacy
import string
import evaluate  # Bleu
from torch.utils.data import Dataset, DataLoader, RandomSampler
import pandas as pd
import numpy as np
import transformers
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from transformers import T5Tokenizer, T5Model, T5ForConditionalGeneration, T5TokenizerFast

import warnings
warnings.filterwarnings("ignore")

In [9]:
TOKENIZER = T5TokenizerFast.from_pretrained("t5-base")
MODEL = T5ForConditionalGeneration.from_pretrained("t5-base", return_dict=True)
OPTIMIZER = Adam(MODEL.parameters(), lr=0.00001)
Q_LEN = 512   # Question Length
T_LEN = 512    # Target Length
BATCH_SIZE = 4
DEVICE = "cuda:0"

In [10]:
# Create a Dataframe from train data
qa_train_df = pd.DataFrame(qa_final_train_data)
# convert df values to string
qa_train_df = qa_train_df.applymap(str)
# Create a Dataframe from test data
qa_test_df = pd.DataFrame(qa_test_data)
qa_test_df = qa_test_df.applymap(str)

In [11]:
class QA_Dataset(Dataset):
    def __init__(self, tokenizer, dataframe, q_len, t_len):
        self.tokenizer = tokenizer
        self.q_len = q_len
        self.t_len = t_len
        self.data = dataframe
        self.questions = self.data["question"]
        self.context = self.data["context"]
        self.answer = self.data['answer']

    def __len__(self):
        return len(self.questions)

    def __getitem__(self, idx):
        question = self.questions[idx]
        context = self.context[idx]
        answer = self.answer[idx]

        question_tokenized = self.tokenizer(question, context, max_length=self.q_len, padding="max_length",
                                                    truncation=True, pad_to_max_length=True, add_special_tokens=True)
        answer_tokenized = self.tokenizer(answer, max_length=self.t_len, padding="max_length",
                                          truncation=True, pad_to_max_length=True, add_special_tokens=True)

        labels = torch.tensor(answer_tokenized["input_ids"], dtype=torch.long)
        labels[labels == 0] = -100

        return {
            "input_ids": torch.tensor(question_tokenized["input_ids"], dtype=torch.long),
            "attention_mask": torch.tensor(question_tokenized["attention_mask"], dtype=torch.long),
            "labels": labels,
            "decoder_attention_mask": torch.tensor(answer_tokenized["attention_mask"], dtype=torch.long)
        }

In [12]:
qa_train_dataset = QA_Dataset(TOKENIZER, qa_train_df, Q_LEN, T_LEN)
qa_test_dataset = QA_Dataset(TOKENIZER, qa_test_df, Q_LEN, T_LEN)

train_loader = DataLoader(qa_train_dataset, batch_size=BATCH_SIZE)
val_loader = DataLoader(qa_test_dataset, batch_size=BATCH_SIZE)

In [14]:
#torch.cuda.empty_cache()
MODEL.to('cuda')

T5ForConditionalGeneration(
  (shared): Embedding(32128, 768)
  (encoder): T5Stack(
    (embed_tokens): Embedding(32128, 768)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=768, out_features=768, bias=False)
              (k): Linear(in_features=768, out_features=768, bias=False)
              (v): Linear(in_features=768, out_features=768, bias=False)
              (o): Linear(in_features=768, out_features=768, bias=False)
              (relative_attention_bias): Embedding(32, 12)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseActDense(
              (wi): Linear(in_features=768, out_features=3072, bias=False)
              (wo): Linear(in_features=3072, out_features=768, bias=False)
              (dropout): Dro

In [15]:
train_loss = 0
val_loss = 0
train_batch_count = 0
val_batch_count = 0

for epoch in range(2):
    MODEL.train()
    for batch in tqdm(train_loader, desc="Training batches"):
        input_ids = batch["input_ids"].to(DEVICE)
        attention_mask = batch["attention_mask"].to(DEVICE)
        labels = batch["labels"].to(DEVICE)
        decoder_attention_mask = batch["decoder_attention_mask"].to(DEVICE)

        outputs = MODEL(
                          input_ids=input_ids,
                          attention_mask=attention_mask,
                          labels=labels,
                          decoder_attention_mask=decoder_attention_mask
                        )

        OPTIMIZER.zero_grad()
        outputs.loss.backward()
        OPTIMIZER.step()
        train_loss += outputs.loss.item()
        train_batch_count += 1

    #Evaluation
    MODEL.eval()
    for batch in tqdm(val_loader, desc="Validation batches"):
        input_ids = batch["input_ids"].to(DEVICE)
        attention_mask = batch["attention_mask"].to(DEVICE)
        labels = batch["labels"].to(DEVICE)
        decoder_attention_mask = batch["decoder_attention_mask"].to(DEVICE)

        outputs = MODEL(
                          input_ids=input_ids,
                          attention_mask=attention_mask,
                          labels=labels,
                          decoder_attention_mask=decoder_attention_mask
                        )

        OPTIMIZER.zero_grad()
        outputs.loss.backward()
        OPTIMIZER.step()
        val_loss += outputs.loss.item()
        val_batch_count += 1

    print(f"{epoch+1}/{2} -> Train loss: {train_loss / train_batch_count}\tValidation loss: {val_loss/val_batch_count}")

Training batches: 100%|██████████| 4000/4000 [1:13:29<00:00,  1.10s/it]
Validation batches: 100%|██████████| 1000/1000 [17:27<00:00,  1.05s/it]


1/2 -> Train loss: 0.010722651088426573	Validation loss: 0.00016018379920251392


Training batches: 100%|██████████| 4000/4000 [1:13:31<00:00,  1.10s/it]
Validation batches: 100%|██████████| 1000/1000 [17:27<00:00,  1.05s/it]

2/2 -> Train loss: 0.005694573415488549	Validation loss: 9.907609644093896e-05





In [16]:
MODEL.save_pretrained("t5_qa_model")
TOKENIZER.save_pretrained("t5_qa_tokenizer")

('t5_qa_tokenizer/tokenizer_config.json',
 't5_qa_tokenizer/special_tokens_map.json',
 't5_qa_tokenizer/tokenizer.json')

In [17]:
# Saved files
"""('qa_tokenizer/tokenizer_config.json',
 'qa_tokenizer/special_tokens_map.json',
 'qa_tokenizer/spiece.model',
'qa_tokenizer/added_tokens.json',
'qa_tokenizer/tokenizer.json')"""

"('qa_tokenizer/tokenizer_config.json',\n 'qa_tokenizer/special_tokens_map.json',\n 'qa_tokenizer/spiece.model',\n'qa_tokenizer/added_tokens.json',\n'qa_tokenizer/tokenizer.json')"

In [18]:
def predict_answer(context, question, ref_answer=None):
    inputs = TOKENIZER(question, context, max_length=Q_LEN, padding="max_length", truncation=True, add_special_tokens=True)

    input_ids = torch.tensor(inputs["input_ids"], dtype=torch.long).to(DEVICE).unsqueeze(0)
    attention_mask = torch.tensor(inputs["attention_mask"], dtype=torch.long).to(DEVICE).unsqueeze(0)

    outputs = MODEL.generate(input_ids=input_ids, attention_mask=attention_mask)

    predicted_answer = TOKENIZER.decode(outputs.flatten(), skip_special_tokens=True)

    if ref_answer:
        # Load the Bleu metric
        bleu = evaluate.load("google_bleu")
        score = bleu.compute(predictions=[predicted_answer],
                            references=[ref_answer])

        print("Context: \n", context)
        print("\n")
        print("Question: \n", question)
        return {
            "Reference Answer: ": ref_answer,
            "Predicted Answer: ": predicted_answer,
            "BLEU Score: ": score
        }
    else:
        return predicted_answer

In [19]:
# test predictions
context = qa_train_df.iloc[0]["context"]
question = qa_train_df.iloc[0]["question"]
answer = qa_train_df.iloc[0]["answer"]

predict_answer(context, question, answer)

Downloading builder script:   0%|          | 0.00/8.64k [00:00<?, ?B/s]

Downloading extra modules:   0%|          | 0.00/3.34k [00:00<?, ?B/s]

Context: 
 [CLS] SINGLE_FACT [SEP] TC01 [SEP] {'sparql': 'SELECT DISTINCT ?answer WHERE { ?answer <https://dblp.org/rdf/schema#authoredBy> <https://dblp.org/pid/211/3355> }'} [SEP] ['<https://dblp.org/pid/211/3355>']


Question: 
 What are the papers written by the person Wazir Muhammad?


{'Reference Answer: ': "{'sparql': 'SELECT DISTINCT ?answer WHERE { ?answer <https://dblp.org/rdf/schema#authoredBy> <https://dblp.org/pid/211/3355> }'} [SEP] ['<https://dblp.org/pid/211/3355>']",
 'Predicted Answer: ': "'sparql': 'SELECT DISTINCT?",
 'BLEU Score: ': {'google_bleu': 0.05426356589147287}}

In [None]:
from transformers import T5Tokenizer, T5Model, T5ForConditionalGeneration, T5TokenizerFast

# test loading model
MODEL = T5ForConditionalGeneration.from_pretrained("/content/t5_qa_model")
TOKENIZER = T5TokenizerFast.from_pretrained("/content/t5_qa_tokenizer")

In [None]:
MODEL.to('cuda')

T5ForConditionalGeneration(
  (shared): Embedding(32128, 768)
  (encoder): T5Stack(
    (embed_tokens): Embedding(32128, 768)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=768, out_features=768, bias=False)
              (k): Linear(in_features=768, out_features=768, bias=False)
              (v): Linear(in_features=768, out_features=768, bias=False)
              (o): Linear(in_features=768, out_features=768, bias=False)
              (relative_attention_bias): Embedding(32, 12)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseActDense(
              (wi): Linear(in_features=768, out_features=3072, bias=False)
              (wo): Linear(in_features=3072, out_features=768, bias=False)
              (dropout): Dro

In [20]:
# test predictions
context = qa_train_df.iloc[0]["context"]
question = qa_train_df.iloc[0]["question"]
answer = qa_train_df.iloc[0]["answer"]

predict_answer(context, question, answer)

Context: 
 [CLS] SINGLE_FACT [SEP] TC01 [SEP] {'sparql': 'SELECT DISTINCT ?answer WHERE { ?answer <https://dblp.org/rdf/schema#authoredBy> <https://dblp.org/pid/211/3355> }'} [SEP] ['<https://dblp.org/pid/211/3355>']


Question: 
 What are the papers written by the person Wazir Muhammad?


{'Reference Answer: ': "{'sparql': 'SELECT DISTINCT ?answer WHERE { ?answer <https://dblp.org/rdf/schema#authoredBy> <https://dblp.org/pid/211/3355> }'} [SEP] ['<https://dblp.org/pid/211/3355>']",
 'Predicted Answer: ': "'sparql': 'SELECT DISTINCT?",
 'BLEU Score: ': {'google_bleu': 0.05426356589147287}}