In [None]:
import os
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from utils.evaluate_v2 import compute_f1

# Read output file

In [None]:
def read_output_json(filepath: str):
    """Needed because these jsons are not really valid json files, but whatever."""
    output = []
    with open(filepath, "r") as f:
        line = f.readline()
        while line != '':
            output.append(json.loads(line))
            line = f.readline()
    return output

In [None]:
folder = 'outputs/'
dfs = {}
for filename in os.listdir(folder):
    # transform the filename from e.g. "BT_output_42.json" to the key "bert_42"
    # transform the filename from e.g. "BT_output_history_42.json" to the key "bert_history_42"
    key = filename.split('.')[0]
    key = key.split('_')
    name = 'bert' if key[0] == 'BT' else 'roberta'
    key = '_'.join([name] + key[2:])

    dfs[key] = pd.DataFrame(read_output_json(os.path.join(folder, filename)))


In [None]:
for df in dfs.values():
    df['f1'] = [compute_f1(pred, lbl) for pred, lbl in zip(df['pred'], df['answer'])]
    df['story_id'] = df.groupby('story').ngroup()

# Analyze output

In [None]:
def print_answers(*df, idx=0):
    """print the answers of the model.

    Parameters
    ----------
    df : pd.DataFrame
        dataframe with the output results. Pass multiple dataframes to have more predicted answers.
    idx : int, array or array-like
        index of the stories to print. Pass None to print all the stories. Default is 0.
    """
    stories = df[0]['story'].unique()
    if idx is not None:
        stories = stories[idx]
        if type(idx) is int:
            stories = [stories]

    for story in stories:
        df2 = [d.loc[d['story']==story].reset_index() for d in df]

        print(55*'=', 'Story', 60*'=')
        print(df2[0]['story'][0])

        print()
        print(50*'-', 'Question answering', 50*'-')
        for i in df2[0].index:
            print("\nQuestion:", ' '*3, df2[0]['question'][i])
            print("True answer:", ' '*4, df2[0]['answer'][i])
            for j in range(len(df2)):
                print("\nPred answer:", ' '*4, df2[j]['pred'][i])
                print("F1 score:", ' '*7, f"{df2[j]['f1'][i]:.2g}")
            print()

In [None]:
print_answers(dfs['roberta_42'], dfs['roberta_2022'], dfs['roberta_1337'], idx=0)

In [None]:
print_answers(dfs['bert_42'], dfs['bert_history_42'], dfs['roberta_42'], dfs['roberta_history_42'], idx=100)