In [1]:
from transformers import pipeline
from sentence_transformers import SentenceTransformer
from sentence_transformers import util
import plotly.express as px
import numpy as np
import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model_names = {
    "deberta": "deepset/deberta-v3-large-squad2",
    "bert_large_deepset": "deepset/bert-large-uncased-whole-word-masking-squad2",
    "bert_base": "deepset/bert-base-cased-squad2",
}

models = {}
for model_name in model_names:
    models[model_name] = pipeline("question-answering", model=model_names[model_name], tokenizer=model_names[model_name])

# Importing the data

In [3]:
df = pd.read_excel('./data/Example forms.xlsx', sheet_name='Lunch_Extractive', dtype=str)

df_melted = df.melt(id_vars='Full story', var_name='question', value_name='answer')
df_melted.columns = ['context', 'question', 'answers']

In [4]:
# Run questions through models
def generate_predictions(models, dataset):
    results = pd.DataFrame()
    index = 0
    for model in models:
        for row in dataset.iterrows():
            prediction = models[model](context=row[1]['context'], question=row[1]['question'])
            new_row = pd.DataFrame({
                'context': row[1]['context'], 
                'question': row[1]['question'], 
                'answer': row[1]['answers'], 
                'model': model, 
                'prediction': prediction['answer'], 
                'model_confidence': prediction['score']
            }, index=[index])
            results = pd.concat([results, new_row])
            index += 1
    return results

In [46]:
similarity_model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')
def average_answer(dataset): # only answers for a single question
    # get all embeddings from similarity model
    vector_embeddings = {}
    for index, row in dataset.iterrows():
        model_name = row['model']
        vector_embeddings[model_name] = similarity_model.encode(row["prediction"])
    # calculate average vector
    average_vector = np.average(list(vector_embeddings.values()), axis=0)

    # Return answer closest to average
    closest_to_average = max(vector_embeddings, key=lambda key: util.pytorch_cos_sim(average_vector, vector_embeddings[key]))
    row_closest_to_average = dataset[dataset['model'] == closest_to_average]
    return row_closest_to_average

questions = df_melted['question'].unique()
contexts = df_melted['context'].unique()

In [8]:
answers = generate_predictions(models, df_melted)

In [48]:
# for each question, get the average answer
average_answers = pd.DataFrame()
for question in questions:
    average_answers = pd.concat([average_answers, average_answer(answers[answers['question'] == question])])

# count per model
count_per_model = average_answers.groupby('model').count()
count_per_model

Unnamed: 0_level_0,context,question,answer,prediction,model_confidence
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
bert_large_deepset,18,18,18,18,18
deberta,30,30,30,30,30
