In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
def template_to_sparql(template, subjectentityid):
  if template == "1":
    return f"SELECT ?obj WHERE {{ wd:{subjectentityid} wdt:P47 ?obj}}"
  elif template == "2":
    return f"SELECT ?obj WHERE {{ wd:{subjectentityid} wdt:P20 ?obj}}"
  elif template == "3":
    return f"SELECT ?obj WHERE {{ wd:{subjectentityid} wdt:P414 ?obj}}"
  elif template == "4":
    return f"SELECT ?obj WHERE {{ wd:{subjectentityid} wdt:P1113 ?obj}}"
  elif template == "5":
    return f"SELECT ?sub WHERE {{ ?sub wdt:P166 wd:{subjectentityid}}}"
  else:
    return []

In [3]:
!pip install sparqlwrapper



In [64]:
# get object entities
from SPARQLWrapper import SPARQLWrapper, JSON
import pandas as pd

def sparql_id_to_label(qid):
  sparql = SPARQLWrapper("https://query.wikidata.org/sparql")
  sparql.setQuery(f"SELECT * WHERE {{ SERVICE wikibase:label {{ bd:serviceParam wikibase:language 'en'. wd:{qid} rdfs:label ?label}}}}")
  sparql.setReturnFormat(JSON)
  results = sparql.query().convert()
  results_df = pd.json_normalize(results['results']['bindings'])
  if not results_df.empty:
    final_res = results_df["label.value"].values[0]
  else:
    final_res = ""
  return final_res

def get_object_entities(query):
  sparql = SPARQLWrapper("https://query.wikidata.org/sparql")
  sparql.setQuery(query)
  sparql.setReturnFormat(JSON)
  results = sparql.query().convert()
  results_df = pd.json_normalize(results['results']['bindings'])
  if not results_df.empty:
    if 'obj.value' in results_df.columns:
      final_res = [val[val.find('Q'):] for val in results_df["obj.value"].values]
    else:
      final_res = [val[val.find('Q'):] for val in results_df["sub.value"].values]
  else:
    final_res = []
  return final_res

In [5]:
# test reading data

import json
from sklearn.model_selection import train_test_split

with open("/content/drive/MyDrive/Colab Notebooks/lm-kbc/data/train.jsonl", "r") as json_data:
    train_json = json_data.readlines()
    #train_json = json.loads(json_data.read())
    json_data.close()

print(train_json[0])

# transform json data to pandas dataframe
#wordnet_df = pd.DataFrame.from_dict(wordnet_json)
#print(wordnet_df.head())

# train-test split data
train_split, test_split = train_test_split(train_json, test_size=0.2)

{"SubjectEntityID": "Q80061", "SubjectEntity": "Nobel Prize in Physiology or Medicine", "ObjectEntitiesID": ["Q46928", "Q44597", "Q42985", "Q37064", "Q37193", "Q35703", "Q35698", "Q60060", "Q60108", "Q60031", "Q57568", "Q58054", "Q57132", "Q57128", "Q57089", "Q57192", "Q57191", "Q80323", "Q80917", "Q81218", "Q78501", "Q78496", "Q78479", "Q78478", "Q75847", "Q76464", "Q76425", "Q76807", "Q77104", "Q77081", "Q77173", "Q77174", "Q77160", "Q77152", "Q77126", "Q72194", "Q68118", "Q97270", "Q84405", "Q83333", "Q83598", "Q110974", "Q108602", "Q108576", "Q108896", "Q108891", "Q108880", "Q109553", "Q109731", "Q109726", "Q110060", "Q110106", "Q110101", "Q106547", "Q107402", "Q107462", "Q107609", "Q104607", "Q104600", "Q104592", "Q104913", "Q105605", "Q105494", "Q105830", "Q103598", "Q103844", "Q102034", "Q123280", "Q115448", "Q115495", "Q115490", "Q115478", "Q115475", "Q115470", "Q116064", "Q211940", "Q211787", "Q207719", "Q208670", "Q205667", "Q206455", "Q204733", "Q202152", "Q202136", "Q199654

In [6]:
# start preparing for QA pipeline
! pip install -U accelerate
! pip install -U transformers



In [7]:
# prepare train dataset
lm_train_data = []
for i in train_split:
  lm_train_data.append(i)


# prepare test dataset
lm_test_data = []
for i in test_split:
  lm_test_data.append(i)

In [8]:
!pip install transformers
!pip install evaluate



In [9]:
import torch
import json
from tqdm import tqdm
import torch.nn as nn
from torch.optim import Adam
import nltk
import spacy
import string
import evaluate  # Bleu
from torch.utils.data import Dataset, DataLoader, RandomSampler
import numpy as np
import transformers
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from transformers import T5Tokenizer, T5Model, T5ForConditionalGeneration, T5TokenizerFast

import warnings
warnings.filterwarnings("ignore")

In [None]:
MODEL_NAME = "google/flan-t5-small"
TOKENIZER = T5TokenizerFast.from_pretrained(MODEL_NAME)
MODEL = T5ForConditionalGeneration.from_pretrained(MODEL_NAME, return_dict=True)
OPTIMIZER = Adam(MODEL.parameters(), lr=0.0001)
Q_LEN = 512   # Question Length
T_LEN = 512    # Target Length
BATCH_SIZE = 4
DEVICE = "cuda:0"

In [10]:
# Create a Dataframe from train data
train_df = pd.DataFrame(columns=['SubjectEntity', 'SubjectEntityID', 'ObjectEntities', 'ObjectEntitiesID', 'Relation'])
for item in lm_train_data:
  train_df = pd.concat([train_df, pd.DataFrame([json.loads(item)])], ignore_index=True)
# convert df values to string
train_df = train_df.applymap(str)

# Create a Dataframe from test data
test_df = pd.DataFrame(columns=['SubjectEntity', 'SubjectEntityID', 'ObjectEntities', 'ObjectEntitiesID', 'Relation'])
for item in lm_test_data:
  test_df = pd.concat([test_df, pd.DataFrame([json.loads(item)])], ignore_index=True)
# convert df values to string
test_df = test_df.applymap(str)

In [11]:
class QA_Dataset(Dataset):
    def __init__(self, tokenizer, dataframe, q_len, t_len):
        self.tokenizer = tokenizer
        self.q_len = q_len
        self.t_len = t_len
        self.data = dataframe
        self.subjectentity = self.data["SubjectEntity"]
        self.subjectentityid = self.data["SubjectEntityID"]
        self.objectentity = self.data["ObjectEntities"]
        self.objectentityid = self.data["ObjectEntitiesID"]
        self.relation = self.data["Relation"]

    def __len__(self):
        return len(self.subjectentity)

    def __getitem__(self, idx):
        subjectentity = self.subjectentity[idx]
        subjectentityid = self.subjectentityid[idx]
        objectentity = self.objectentity[idx]
        objectentityid = self.objectentityid[idx]
        relation = self.relation[idx]
        if relation == "countryLandBordersCountry":
          gold_answer = "1"
        elif relation == "personHasCityOfDeath":
          gold_answer = "2"
        elif relation == "companyTradesAtStockExchange":
          gold_answer = "3"
        elif relation == "numberofseriesepisodes":
          gold_answer = "4"
        else: # relation == "awardWonBy"
          gold_answer = "5"

        term_tokenized = self.tokenizer(subjectentityid, relation, max_length=self.q_len, padding="max_length",
                                                    truncation=True, pad_to_max_length=True, add_special_tokens=True)
        type_tokenized = self.tokenizer(gold_answer, max_length=self.t_len, padding="max_length",
                                          truncation=True, pad_to_max_length=True, add_special_tokens=True)

        labels = torch.tensor(type_tokenized["input_ids"], dtype=torch.long)
        labels[labels == 0] = -100

        return {
            "input_ids": torch.tensor(term_tokenized["input_ids"], dtype=torch.long),
            "attention_mask": torch.tensor(term_tokenized["attention_mask"], dtype=torch.long),
            "labels": labels,
            "decoder_attention_mask": torch.tensor(type_tokenized["attention_mask"], dtype=torch.long)
        }

In [12]:
train_dataset = QA_Dataset(TOKENIZER, train_df, Q_LEN, T_LEN)
test_dataset = QA_Dataset(TOKENIZER, test_df, Q_LEN, T_LEN)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE)
val_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE)

NameError: name 'TOKENIZER' is not defined

In [None]:
#torch.cuda.empty_cache()
MODEL.to('cuda')

T5ForConditionalGeneration(
  (shared): Embedding(32128, 512)
  (encoder): T5Stack(
    (embed_tokens): Embedding(32128, 512)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=512, out_features=384, bias=False)
              (k): Linear(in_features=512, out_features=384, bias=False)
              (v): Linear(in_features=512, out_features=384, bias=False)
              (o): Linear(in_features=384, out_features=512, bias=False)
              (relative_attention_bias): Embedding(32, 6)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseGatedActDense(
              (wi_0): Linear(in_features=512, out_features=1024, bias=False)
              (wi_1): Linear(in_features=512, out_features=1024, bias=False)
              (wo): 

In [None]:
train_loss = 0
val_loss = 0
train_batch_count = 0
val_batch_count = 0

for epoch in range(20):
    MODEL.train()
    for batch in tqdm(train_loader, desc="Training batches"):
        input_ids = batch["input_ids"].to(DEVICE)
        attention_mask = batch["attention_mask"].to(DEVICE)
        labels = batch["labels"].to(DEVICE)
        decoder_attention_mask = batch["decoder_attention_mask"].to(DEVICE)

        outputs = MODEL(
                          input_ids=input_ids,
                          attention_mask=attention_mask,
                          labels=labels,
                          decoder_attention_mask=decoder_attention_mask
                        )

        OPTIMIZER.zero_grad()
        outputs.loss.backward()
        OPTIMIZER.step()
        train_loss += outputs.loss.item()
        train_batch_count += 1

    #Evaluation
    MODEL.eval()
    for batch in tqdm(val_loader, desc="Validation batches"):
        input_ids = batch["input_ids"].to(DEVICE)
        attention_mask = batch["attention_mask"].to(DEVICE)
        labels = batch["labels"].to(DEVICE)
        decoder_attention_mask = batch["decoder_attention_mask"].to(DEVICE)

        outputs = MODEL(
                          input_ids=input_ids,
                          attention_mask=attention_mask,
                          labels=labels,
                          decoder_attention_mask=decoder_attention_mask
                        )

        OPTIMIZER.zero_grad()
        outputs.loss.backward()
        OPTIMIZER.step()
        val_loss += outputs.loss.item()
        val_batch_count += 1

    print(f"{epoch+1}/{20} -> Train loss: {train_loss / train_batch_count}\tValidation loss: {val_loss/val_batch_count}")

Training batches: 100%|██████████| 76/76 [00:14<00:00,  5.18it/s]
Validation batches: 100%|██████████| 19/19 [00:03<00:00,  5.52it/s]


1/20 -> Train loss: 0.6679885457269847	Validation loss: 0.0005363052340199877


Training batches: 100%|██████████| 76/76 [00:14<00:00,  5.08it/s]
Validation batches: 100%|██████████| 19/19 [00:03<00:00,  5.44it/s]


2/20 -> Train loss: 0.3425135072818528	Validation loss: 0.0002821936231109583


Training batches: 100%|██████████| 76/76 [00:15<00:00,  5.03it/s]
Validation batches: 100%|██████████| 19/19 [00:03<00:00,  5.38it/s]


3/20 -> Train loss: 0.23034388574211462	Validation loss: 0.00019171468478050788


Training batches: 100%|██████████| 76/76 [00:15<00:00,  5.02it/s]
Validation batches: 100%|██████████| 19/19 [00:03<00:00,  5.28it/s]


4/20 -> Train loss: 0.17356574065855201	Validation loss: 0.00014516710261528045


Training batches: 100%|██████████| 76/76 [00:15<00:00,  4.91it/s]
Validation batches: 100%|██████████| 19/19 [00:03<00:00,  5.20it/s]


5/20 -> Train loss: 0.13943231577619658	Validation loss: 0.00011663686863668869


Training batches: 100%|██████████| 76/76 [00:15<00:00,  4.94it/s]
Validation batches: 100%|██████████| 19/19 [00:03<00:00,  5.31it/s]


6/20 -> Train loss: 0.11671525739515537	Validation loss: 9.801184896567767e-05


Training batches: 100%|██████████| 76/76 [00:15<00:00,  5.03it/s]
Validation batches: 100%|██████████| 19/19 [00:03<00:00,  5.35it/s]


7/20 -> Train loss: 0.10063809451471604	Validation loss: 8.557921977253704e-05


Training batches: 100%|██████████| 76/76 [00:15<00:00,  5.02it/s]
Validation batches: 100%|██████████| 19/19 [00:03<00:00,  5.30it/s]


8/20 -> Train loss: 0.08827658479699015	Validation loss: 7.513886171301764e-05


Training batches: 100%|██████████| 76/76 [00:15<00:00,  4.95it/s]
Validation batches: 100%|██████████| 19/19 [00:03<00:00,  5.26it/s]


9/20 -> Train loss: 0.07856012291193759	Validation loss: 6.693248791447204e-05


Training batches: 100%|██████████| 76/76 [00:15<00:00,  4.97it/s]
Validation batches: 100%|██████████| 19/19 [00:03<00:00,  5.31it/s]


10/20 -> Train loss: 0.0707977347900085	Validation loss: 6.033154776751441e-05


Training batches: 100%|██████████| 76/76 [00:15<00:00,  5.03it/s]
Validation batches: 100%|██████████| 19/19 [00:03<00:00,  5.30it/s]


11/20 -> Train loss: 0.06439393561082728	Validation loss: 5.491423759145688e-05


Training batches: 100%|██████████| 76/76 [00:15<00:00,  5.03it/s]
Validation batches: 100%|██████████| 19/19 [00:03<00:00,  5.32it/s]


12/20 -> Train loss: 0.05906382895856248	Validation loss: 5.0390597269281366e-05


Training batches: 100%|██████████| 76/76 [00:15<00:00,  5.02it/s]
Validation batches: 100%|██████████| 19/19 [00:03<00:00,  5.30it/s]


13/20 -> Train loss: 0.054550458886565636	Validation loss: 4.6552223472722306e-05


Training batches: 100%|██████████| 76/76 [00:15<00:00,  5.01it/s]
Validation batches: 100%|██████████| 19/19 [00:03<00:00,  5.29it/s]


14/20 -> Train loss: 0.050678635788270365	Validation loss: 4.326017209230475e-05


Training batches: 100%|██████████| 76/76 [00:15<00:00,  5.02it/s]
Validation batches: 100%|██████████| 19/19 [00:03<00:00,  5.29it/s]


15/20 -> Train loss: 0.04732071078918428	Validation loss: 4.0403191826773344e-05


Training batches: 100%|██████████| 76/76 [00:15<00:00,  5.03it/s]
Validation batches: 100%|██████████| 19/19 [00:03<00:00,  5.30it/s]


16/20 -> Train loss: 0.04438027600466542	Validation loss: 3.790294195830861e-05


Training batches: 100%|██████████| 76/76 [00:15<00:00,  5.03it/s]
Validation batches: 100%|██████████| 19/19 [00:03<00:00,  5.28it/s]


17/20 -> Train loss: 0.04178569942175467	Validation loss: 3.569001136758969e-05


Training batches: 100%|██████████| 76/76 [00:15<00:00,  4.98it/s]
Validation batches: 100%|██████████| 19/19 [00:03<00:00,  5.27it/s]


18/20 -> Train loss: 0.039479372461060286	Validation loss: 3.372348480079677e-05


Training batches: 100%|██████████| 76/76 [00:15<00:00,  4.97it/s]
Validation batches: 100%|██████████| 19/19 [00:03<00:00,  5.26it/s]


19/20 -> Train loss: 0.03740911102217783	Validation loss: 3.196169077295301e-05


Training batches: 100%|██████████| 76/76 [00:15<00:00,  4.98it/s]
Validation batches: 100%|██████████| 19/19 [00:03<00:00,  5.26it/s]

20/20 -> Train loss: 0.035547389018001085	Validation loss: 3.0375174236122634e-05





In [None]:
MODEL.save_pretrained("flan-t5-small_lm-kbc_model")
TOKENIZER.save_pretrained("flan-t5-small_lm-kbc_tokenizer")

('flan-t5-small_lm-kbc_tokenizer/tokenizer_config.json',
 'flan-t5-small_lm-kbc_tokenizer/special_tokens_map.json',
 'flan-t5-small_lm-kbc_tokenizer/spiece.model',
 'flan-t5-small_lm-kbc_tokenizer/added_tokens.json',
 'flan-t5-small_lm-kbc_tokenizer/tokenizer.json')

In [60]:
import argparse
import json
from pathlib import Path
from typing import List, Dict, Union


def read_jsonl_file(file_path: Union[str, Path]) -> List[Dict]:
    with open(file_path, "r") as f:
        rows = [json.loads(line) for line in f]
    return rows


def true_positives(preds: List, gts: List) -> int:
    tp = 0
    for pred in preds:
        if pred in gts:
            tp += 1

    return tp


def precision(preds: List[str], gts: List[str]) -> float:
    try:
        # When nothing is predicted, precision = 1
        # irrespective of the ground truth value
        if len(preds) == 0:
            return 1
        # When the predictions are not empty
        return min(true_positives(preds, gts) / len(preds), 1.0)
    except TypeError:
        return 0.0


def recall(preds: List[str], gts: List[str]) -> float:
    try:
        # When ground truth is empty return 1
        # even if there are predictions (edge case)
        if len(gts) == 0:
            return 1.0
        # When the ground truth is not empty
        return min(true_positives(preds, gts) / len(gts), 1.0)
    except TypeError:
        return 0.0


def f1_score(p: float, r: float) -> float:
    try:
        return (2 * p * r) / (p + r)
    except ZeroDivisionError:
        return 0.0


def rows_to_dict(rows: List[Dict]) -> Dict:
    """Index the ground truth/prediction rows by subject entity and relation."""
    return {
        (r["SubjectEntity"], r["Relation"]): list(set(r["ObjectEntitiesID"]))
        for r in rows
    }

def predict_answer(relation, subjectentityid, subjectentity):
    inputs = TOKENIZER(subjectentityid, relation, max_length=Q_LEN, padding="max_length", truncation=True, add_special_tokens=True)

    input_ids = torch.tensor(inputs["input_ids"], dtype=torch.long).to(DEVICE).unsqueeze(0)
    attention_mask = torch.tensor(inputs["attention_mask"], dtype=torch.long).to(DEVICE).unsqueeze(0)

    outputs = MODEL.generate(input_ids=input_ids, attention_mask=attention_mask)

    predicted_type = TOKENIZER.decode(outputs.flatten(), skip_special_tokens=True)

    print("TEMPLATE:",predicted_type)
    sparql_query = template_to_sparql(predicted_type, subjectentityid)
    objectentitiesid = get_object_entities(sparql_query)
    objectentities = [sparql_id_to_label(objectentityid) for objectentityid in objectentitiesid]
    return {"SubjectEntityID": subjectentityid, "SubjectEntity": subjectentity, "Relation": relation, "ObjectEntitiesID": objectentitiesid, "ObjectEntities": objectentities}


def evaluate_per_sr_pair(pred_rows, gt_rows) -> List[Dict[str, float]]:
    """Evaluate the predictions per Subject-Relation pair"""
    pred_dict = rows_to_dict(pred_rows)
    gt_dict = rows_to_dict(gt_rows)

    results = []

    for subj, rel in gt_dict:
        # get the ground truth objects
        gts = gt_dict[(subj, rel)]

        # get the predictions
        preds = pred_dict[(subj, rel)]

        # calculate the scores
        p = precision(preds, gts)
        r = recall(preds, gts)
        f1 = f1_score(p, r)

        results.append({
            "SubjectEntity": subj,
            "Relation": rel,
            "p": p,
            "r": r,
            "f1": f1,
            "tp": true_positives(preds, gts),
            "total_pred": len(preds),
            "total_gt": len(gts),
        })

    return sorted(results, key=lambda x: (x["Relation"], x["SubjectEntity"]))


def macro_average_per_relation(scores_per_sr: List[Dict[str, float]]) -> dict:
    """Compute the macro average scores per relation"""
    scores = {}
    for r in scores_per_sr:
        if r["Relation"] not in scores:
            scores[r["Relation"]] = []
        scores[r["Relation"]].append({
            "p": r["p"],
            "r": r["r"],
            "f1": r["f1"],
        })

    macro_averages = {}
    for rel in scores:
        macro_averages[rel] = {
            "macro-p": sum([x["p"] for x in scores[rel]]) / len(scores[rel]),
            "macro-r": sum([x["r"] for x in scores[rel]]) / len(scores[rel]),
            "macro-f1": sum([x["f1"] for x in scores[rel]]) / len(scores[rel]),
        }

    # Macro average for all relations
    all_rel_macro_p = sum([x["p"] for x in scores_per_sr]) / len(scores_per_sr)
    all_rel_macro_r = sum([x["r"] for x in scores_per_sr]) / len(scores_per_sr)
    all_rel_macro_f1 = sum([x["f1"] for x in scores_per_sr]) / len(
        scores_per_sr)

    macro_averages["*** All Relations ***"] = {
        "macro-p": all_rel_macro_p,
        "macro-r": all_rel_macro_r,
        "macro-f1": all_rel_macro_f1,
    }

    return macro_averages


def micro_average_per_relation(scores_per_sr: List[Dict[str, float]]) -> dict:
    """Compute the micro average scores per relation"""
    scores = {}
    for r in scores_per_sr:
        if r["Relation"] not in scores:
            scores[r["Relation"]] = {
                "tp": 0,
                "total_pred": 0,
                "total_gt": 0,
            }
        scores[r["Relation"]]["tp"] += r["tp"]
        scores[r["Relation"]]["total_pred"] += r["total_pred"]
        scores[r["Relation"]]["total_gt"] += r["total_gt"]

    micro_averages = {}
    for rel in scores:
        micro_p = scores[rel]["tp"] / scores[rel]["total_pred"] if scores[rel][
                                                                       "total_pred"] > 0 else 1.0
        micro_r = scores[rel]["tp"] / scores[rel]["total_gt"] if scores[rel][
                                                                     "total_gt"] > 0 else 1.0

        micro_averages[rel] = {
            "micro-p": micro_p,
            "micro-r": micro_r,
            "micro-f1": f1_score(micro_p, micro_r),
        }

    # Micro average for all relations
    total_tp = sum([x["tp"] for x in scores.values()])
    total_pred = sum([x["total_pred"] for x in scores.values()])
    total_gt = sum([x["total_gt"] for x in scores.values()])

    all_rel_micro_p = total_tp / total_pred if total_pred > 0 else 1.0
    all_rel_micro_r = total_tp / total_gt if total_gt > 0 else 1.0

    micro_averages["*** All Relations ***"] = {
        "micro-p": all_rel_micro_p,
        "micro-r": all_rel_micro_r,
        "micro-f1": f1_score(all_rel_micro_p, all_rel_micro_r),
    }

    return micro_averages


def prediction_statistics(scores_per_sr: List[Dict[str, float]]) -> dict:
    """Get the average numbers of predictions and the numbers of empty predictions per relation."""
    stats = {}
    for r in scores_per_sr:
        if r["Relation"] not in stats:
            stats[r["Relation"]] = {
                "num_sr_pairs": 0,
                "total_pred": 0,
                "empty_pred": 0,
            }
        stats[r["Relation"]]["num_sr_pairs"] += 1
        stats[r["Relation"]]["total_pred"] += r["total_pred"]
        if r["total_pred"] == 0:
            stats[r["Relation"]]["empty_pred"] += 1

    final_stats = {}
    for rel in stats:
        final_stats[rel] = {
            "avg. #preds": stats[rel]["total_pred"] / stats[rel][
                "num_sr_pairs"],
            "#empty preds": stats[rel]["empty_pred"],
        }

    # Average numbers of predictions and the numbers of empty predictions for all relations
    total_sr_pairs = len(scores_per_sr)
    total_preds = sum([x["total_pred"] for x in stats.values()])
    total_empty_preds = sum([x["empty_pred"] for x in stats.values()])

    final_stats["*** All Relations ***"] = {
        "avg. #preds": total_preds / total_sr_pairs,
        "#empty preds": total_empty_preds,
    }

    return final_stats


def generate_evaluation(predictions, ground_truth):
    # Read the predictions and ground truth
    ## pred_rows = [predictions]
    ## gt_rows = [ground_truth]
    #pred_rows = read_jsonl_file(predictions)
    #gt_rows = read_jsonl_file(ground_truth)

    # Evaluate the predictions
    scores_per_sr_pair = evaluate_per_sr_pair(predictions, ground_truth)

    # Macro average
    macro_per_relation = macro_average_per_relation(scores_per_sr_pair)
    macro_df = pd.DataFrame(macro_per_relation).transpose().round(3)

    # Micro average
    micro_per_relation = micro_average_per_relation(scores_per_sr_pair)
    micro_df = pd.DataFrame(micro_per_relation).transpose().round(3)

    # Statistics
    stats = prediction_statistics(scores_per_sr_pair)
    stats_df = pd.DataFrame(stats).transpose().round(3)
    stats_df["#empty preds"] = stats_df["#empty preds"].astype(int)

    # Combine the results
    results = pd.concat([macro_df, micro_df, stats_df], axis=1)
    print(results)

In [15]:
from transformers import T5Tokenizer, T5Model, T5ForConditionalGeneration, T5TokenizerFast

# test loading model
MODEL = T5ForConditionalGeneration.from_pretrained("/content/drive/MyDrive/Colab Notebooks/lm-kbc/model/flan-t5-small_lm-kbc_model")
TOKENIZER = T5TokenizerFast.from_pretrained("/content/drive/MyDrive/Colab Notebooks/lm-kbc/model/flan-t5-small_lm-kbc_tokenizer")

In [16]:
MODEL.to('cuda')

T5ForConditionalGeneration(
  (shared): Embedding(32128, 512)
  (encoder): T5Stack(
    (embed_tokens): Embedding(32128, 512)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=512, out_features=384, bias=False)
              (k): Linear(in_features=512, out_features=384, bias=False)
              (v): Linear(in_features=512, out_features=384, bias=False)
              (o): Linear(in_features=384, out_features=512, bias=False)
              (relative_attention_bias): Embedding(32, 6)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseGatedActDense(
              (wi_0): Linear(in_features=512, out_features=1024, bias=False)
              (wi_1): Linear(in_features=512, out_features=1024, bias=False)
              (wo): 

In [62]:
Q_LEN = 512   # Question Length
T_LEN = 512    # Target Length
DEVICE = "cuda:0"
# test predictions
relation = train_df.iloc[0]["Relation"]
subjectentity = train_df.iloc[0]["SubjectEntity"]
subjectentityid = train_df.iloc[0]["SubjectEntityID"]
objectentity = train_df.iloc[0]["ObjectEntities"]
objectentityid = train_df.iloc[0]["ObjectEntitiesID"]
predicted_result = predict_answer(relation, subjectentityid, subjectentity)
print("SUBJECT ENTITY:", subjectentity)
print("SUBJECT ENTITY ID:", subjectentityid)
print("RELATION:", relation)
print("OBJECT ENTITY:", objectentity)
print("OBJECT ENTITY ID:", objectentityid)
print("PREDICTED OBJECT:", predicted_result["ObjectEntitiesID"])

source = {"SubjectEntity": subjectentity,
                    "SubjectEntityID": subjectentityid,
            "Relation": relation,
            "ObjectEntitiesID": train_df.iloc[0]["ObjectEntitiesID"],
          "ObjectEntities": train_df.iloc[0]["ObjectEntities"]}

print("*** EVALUATION:", generate_evaluation([source], [predicted_result]))

TEMPLATE: 2
  label.xml:lang label.type  label.value
0             en    literal  Baden-Baden
SUBJECT ENTITY: Herbert Joos
SUBJECT ENTITY ID: Q1498204
RELATION: personHasCityOfDeath
OBJECT ENTITY: ['Baden-Baden']
OBJECT ENTITY ID: ['Q4100']
PREDICTED OBJECT: ['Q4100']
                       macro-p  macro-r  macro-f1  micro-p  micro-r  micro-f1  \
personHasCityOfDeath       0.0      0.0       0.0      0.0      0.0       0.0   
*** All Relations ***      0.0      0.0       0.0      0.0      0.0       0.0   

                       avg. #preds  #empty preds  
personHasCityOfDeath           7.0             0  
*** All Relations ***          7.0             0  
*** EVALUATION: None


In [45]:
# test predictions
reference_types = []
predicted_types = []
for index, row in test_df.iterrows():
  relation = row["Relation"]
  subjectentity = row["SubjectEntity"]
  subjectentityid = row["SubjectEntityID"]
  objectentity = row["ObjectEntities"]
  objectentityid = row["ObjectEntitiesID"]
  print("SubjectEntity:", subjectentity)
  print("SubjectEntityID:", subjectentityid)
  print("Relation:", relation)

  predicted_type = predict_answer(relation, subjectentityid, subjectentity)
  reference_types.append({"SubjectEntity": subjectentity, "SubjectEntityID": subjectentityid, "Relation": relation, "ObjectEntitiesID": objectentityid})
  predicted_types.append(predicted_type)

generate_evaluation(reference_types, predicted_types)

SubjectEntity: Dave Keon
SubjectEntityID: Q326190
Relation: personHasCityOfDeath
TEMPLATE: 2
SubjectEntity: The Royal Bodyguard
SubjectEntityID: Q7761669
Relation: seriesHasNumberOfEpisodes
TEMPLATE: 5
SubjectEntity: Remedy
SubjectEntityID: Q15582364
Relation: seriesHasNumberOfEpisodes
TEMPLATE: 5
SubjectEntity: Moravské kovárny
SubjectEntityID: Q18006568
Relation: companyTradesAtStockExchange
TEMPLATE: 3
SubjectEntity: Estonia
SubjectEntityID: Q191
Relation: countryLandBordersCountry
TEMPLATE: 1
SubjectEntity: Uganda
SubjectEntityID: Q1036
Relation: countryLandBordersCountry
TEMPLATE: 1
SubjectEntity: D-Wave Systems
SubjectEntityID: Q5203294
Relation: companyTradesAtStockExchange
TEMPLATE: 3
SubjectEntity: Zimbabwe
SubjectEntityID: Q954
Relation: countryLandBordersCountry
TEMPLATE: 1
SubjectEntity: Akkad Bakkad Bambey Bo
SubjectEntityID: Q4701303
Relation: seriesHasNumberOfEpisodes
TEMPLATE: 5
SubjectEntity: Natalia del Mar
SubjectEntityID: Q370493
Relation: seriesHasNumberOfEpisodes


In [51]:
with open("/content/drive/MyDrive/Colab Notebooks/lm-kbc/data/val.jsonl", "r") as json_data:
    val_json = json_data.readlines()
    json_data.close()

print(val_json[0])

# prepare val dataset
lm_val_data = []
for i in val_json:
  lm_val_data.append(i)

# Create a Dataframe from val data
val_df = pd.DataFrame(columns=['SubjectEntity', 'SubjectEntityID', 'ObjectEntities', 'ObjectEntitiesID', 'Relation'])
for item in lm_val_data:
  val_df = pd.concat([val_df, pd.DataFrame([json.loads(item)])], ignore_index=True)
# convert df values to string
val_df = val_df.applymap(str)

{"SubjectEntityID": "Q38215093", "SubjectEntity": "honorary doctor of Stockholm University", "ObjectEntitiesID": ["Q34253", "Q49103066", "Q58023631", "Q63827235", "Q4981282", "Q4991105", "Q4948961", "Q5704340", "Q6291052", "Q6179597", "Q6208845", "Q6129350", "Q6029586", "Q5812644", "Q7666528", "Q8008226", "Q7926219", "Q471777", "Q214582", "Q214607", "Q164170", "Q179735", "Q76613", "Q102395", "Q123666", "Q910163", "Q823151", "Q701550", "Q1358507", "Q1643909", "Q3085655", "Q10625133", "Q21264283", "Q21165958", "Q18159169", "Q30539721", "Q114228447", "Q112554949", "Q81521986", "Q5284"], "ObjectEntities": ["Linus Torvalds", "Eva Hedlund", "Achim Trebst", "Ewa \u015awie\u017cewska", "Lena Sundstr\u00f6m", "Maj \u00d6dman", "H\u00e9di Fried", "Helga Hernes", "Harry Guldberg", "Peter Skautrup", "Arthur Thomson", "James Barber", "Sten Orrenius", "James R. Holton", "S\u00f8ren Brunak", "William E. Doolittle", "Victor P. Whittaker", "Harald Cram\u00e9r", "Benny Andersson", "Walter Arthur Berends

In [65]:
# test predictions on val set
reference_types = []
predicted_types = []
for index, row in val_df.iterrows():
  relation = row["Relation"]
  subjectentity = row["SubjectEntity"]
  subjectentityid = row["SubjectEntityID"]
  objectentity = row["ObjectEntities"]
  objectentityid = row["ObjectEntitiesID"]
  print("SubjectEntity:", subjectentity)
  print("SubjectEntityID:", subjectentityid)
  print("Relation:", relation)

  predicted_type = predict_answer(relation, subjectentityid, subjectentity)
  reference_types.append({"SubjectEntity": subjectentity, "SubjectEntityID": subjectentityid, "Relation": relation, "ObjectEntitiesID": objectentityid})
  predicted_types.append(predicted_type)

generate_evaluation(reference_types, predicted_types)

SubjectEntity: honorary doctor of Stockholm University
SubjectEntityID: Q38215093
Relation: awardWonBy
TEMPLATE: 5
SubjectEntity: Grammy Award for Best Rock Album
SubjectEntityID: Q691892
Relation: awardWonBy
TEMPLATE: 5
SubjectEntity: honorary doctor of the Yale University
SubjectEntityID: Q63072513
Relation: awardWonBy
TEMPLATE: 5
SubjectEntity: Order of the Aztec Eagle
SubjectEntityID: Q93956
Relation: awardWonBy
TEMPLATE: 5
SubjectEntity: AAAI Fellow
SubjectEntityID: Q18748042
Relation: awardWonBy
TEMPLATE: 5
SubjectEntity: Sakharov Prize
SubjectEntityID: Q48786
Relation: awardWonBy
TEMPLATE: 5
SubjectEntity: Presidential Medal of Freedom
SubjectEntityID: Q17144
Relation: awardWonBy
TEMPLATE: 5
SubjectEntity: Turing Award
SubjectEntityID: Q185667
Relation: awardWonBy
TEMPLATE: 5
SubjectEntity: Félix Houphouët-Boigny Peace Prize
SubjectEntityID: Q1479435
Relation: awardWonBy
TEMPLATE: 5
SubjectEntity: American Library Association Honorary Membership
SubjectEntityID: Q107183707
Relat

In [66]:
with open("/content/drive/MyDrive/Colab Notebooks/lm-kbc/data/test.jsonl", "r") as json_data:
    testset_json = json_data.readlines()
    json_data.close()

print(testset_json[0])

# prepare val dataset
lm_testset_data = []
for i in testset_json:
  lm_testset_data.append(i)

# Create a Dataframe from val data
testset_df = pd.DataFrame(columns=['SubjectEntity', 'SubjectEntityID', 'ObjectEntities', 'ObjectEntitiesID', 'Relation'])
for item in lm_testset_data:
  testset_df = pd.concat([testset_df, pd.DataFrame([json.loads(item)])], ignore_index=True)
# convert df values to string
testset_df = testset_df.applymap(str)

# test predictions on val set
reference_types = []
predicted_types = []
for index, row in testset_df.iterrows():
  relation = row["Relation"]
  subjectentity = row["SubjectEntity"]
  subjectentityid = row["SubjectEntityID"]
  objectentity = row["ObjectEntities"]
  objectentityid = row["ObjectEntitiesID"]
  print("SubjectEntity:", subjectentity)
  print("SubjectEntityID:", subjectentityid)
  print("Relation:", relation)

  predicted_type = predict_answer(relation, subjectentityid, subjectentity)
  reference_types.append({"SubjectEntity": subjectentity, "SubjectEntityID": subjectentityid, "Relation": relation, "ObjectEntitiesID": objectentityid})
  predicted_types.append(predicted_type)

generate_evaluation(reference_types, predicted_types)

{"SubjectEntityID": "Q317038", "SubjectEntity": "Max Planck Medal", "ObjectEntitiesID": [], "ObjectEntities": [], "Relation": "awardWonBy"}

SubjectEntity: Max Planck Medal
SubjectEntityID: Q317038
Relation: awardWonBy
TEMPLATE: 5
SubjectEntity: Best Female Tennis Player ESPY Award
SubjectEntityID: Q16275104
Relation: awardWonBy
TEMPLATE: 5
SubjectEntity: Franklin Medal
SubjectEntityID: Q3141777
Relation: awardWonBy
TEMPLATE: 5
SubjectEntity: BBC World Sport Star of the Year
SubjectEntityID: Q3494048
Relation: awardWonBy
TEMPLATE: 5
SubjectEntity: Pulitzer Prize for Biography
SubjectEntityID: Q285117
Relation: awardWonBy
TEMPLATE: 5
SubjectEntity: Nobel Prize in Literature
SubjectEntityID: Q37922
Relation: awardWonBy
TEMPLATE: 5
SubjectEntity: Sydney Peace Prize
SubjectEntityID: Q1975046
Relation: awardWonBy
TEMPLATE: 5
SubjectEntity: Aga Khan Award for Architecture
SubjectEntityID: Q389808
Relation: awardWonBy
TEMPLATE: 5
SubjectEntity: Mark Twain Prize for American Humor
SubjectEntit

In [67]:
# save official predictions to jsonl file
with open("predictions.jsonl", 'w') as f:
    f.write('\n'.join(map(json.dumps, predicted_types)))