In [None]:
pip install transformers

In [None]:
import sys
sys.path.append('..')

In [None]:
from Aspect_Dense_Baselines import NeuralEmbedder, NeuralSearchEngine
from helper import *
import numpy as np
import json
import pandas as pd

In [None]:
config = load_config()
train_data, val_data, test_data = load_data(config)

results_data = {}

In [None]:
def custom_gmean(lst):
  gmean = np.prod(lst)**(1/len(lst))
  return gmean

In [None]:
def model_score(data, model_name, agg_fcn):
  type_correct = {
    "Specific": 0,
    "Subjective": 0,
    "Commonsense": 0,
    "Compound": 0,
    "Negated": 0,
    "Analogical": 0,
    "Temporal": 0}
  type_count = {
    "Specific": 0,
    "Subjective": 0,
    "Commonsense": 0,
    "Compound": 0,
    "Negated": 0,
    "Analogical": 0,
    "Temporal": 0}

  # number of correct predictions
  correct = 0

  # create an embedder object the tokenizer and model 
  embedder = NeuralEmbedder(model_name, model_name)
  
  # loop through each query
  for sample in data:
    for key in sample['query_type']:
        if sample['query_type'][key] == 1:
          type_count[key] += 1

    docs = []
    for description in sample["options"].values():
      docs.append(description)

    # create a search engine object for this query 
    engine = NeuralSearchEngine(embedder)
    # index the options into the search engine
    engine.index(docs)

    # check if model predicted the correct answer
    ## get the predicted description
    aspects = [str(a) for a in sample["correctness_explanation"].keys()]
    predicted_description = engine.search(aspects, agg_fcn)
    ## loop through all correct options to find the predicted id
    for option in sample["options"]:
      if sample["options"][option] == predicted_description:
        predicted_id = option

    ## check if predicted id is the same as correct id
    if predicted_id == sample["answer"]:
      correct += 1
      for key in sample['query_type']:
        if sample['query_type'][key] == 1:
          type_correct[key] += 1

  return correct, len(data), type_correct, type_count

In [None]:
aggc_fcn = min 

# BERT model
correct, total, type_correct, type_count = model_score(test_data, "bert-base-uncased", agg_fcn)
print("Total correct answers: {} out of {}".format(correct, total))
bert_results = [x*100/y for x,y in zip(type_correct.values(), type_count.values())]
bert_results.append(correct*100/total)
results_data.update({'BERT':bert_results})

# TASB model
correct, total, type_correct, type_count = model_score(test_data, "sebastian-hofstaetter/distilbert-dot-tas_b-b256-msmarco", agg_fcn)
print("Total correct answers: {} out of {}".format(correct, total))
tasb_results = [x*100/y for x,y in zip(type_correct.values(), type_count.values())]
tasb_results.append(correct*100/total)
results_data.update({'TAS-B':tasb_results})

In [None]:
row_names = ['Specific', 'Subjective', 'Commonsense', 'Compound', 'Negated', 'Analogical', 'Temporal', 'All']
results_df = pd.DataFrame(results_data, index=row_names)
print(results_df)