In [None]:
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from utils.evaluate_v2 import compute_f1

# Read output file

In [None]:
def read_output_json(filepath: str):
    """Needed because these jsons are not really valid json files, but whatever."""
    output = []
    with open(filepath, "r") as f:
        line = f.readline()
        while line != '':
            output.append(json.loads(line))
            line = f.readline()
    return output

In [None]:
df_bert = pd.DataFrame(read_output_json('outputs/BT_output_2022.json'))
df_roberta = pd.DataFrame(read_output_json('outputs/RB_output_2022.json'))
df_bert_history = pd.DataFrame(read_output_json('outputs/BT_output_history_2022.json'))
df_roberta_history = pd.DataFrame(read_output_json('outputs/RB_output_history_2022.json'))

for df in [df_bert, df_roberta, df_bert_history, df_roberta_history]:
    df['f1'] = [compute_f1(pred, lbl) for pred, lbl in zip(df['pred'], df['answer'])]
    df['story_id'] = df.groupby('story').ngroup()

# Analyze output

In [None]:
def print_answers(df, idx=0):
    """print the answers of the model

    Parameters
    ----------
    df : pd.DataFrame
        dataframe with the output results. Pass multiple dataframes to have more predicted answers.
    idx : int, array or array-like
        index of the stories to print. Pass None to print all the stories. Default is 0.
    """
    stories = df['story'].unique()
    if idx is not None:
        stories = stories[idx]
        if type(idx) is int:
            stories = [stories]

    for story in stories:
        df2 = df.loc[df['story']==story].reset_index()

        print(55*'=', 'Story', 60*'=')
        print(df2['story'][0])

        print()
        print(50*'-', 'Question answering', 50*'-')
        for i in df2.index:
            print("Question:\t", df2['question'][i])
            print("True answer:\t", df2['answer'][i])
            print("Pred answer:\t", df2['pred'][i])
            print(f"F1 score: \t {df2['f1'][i]:.2g}")
            print()

In [None]:
print_answers(df_bert, 2)

In [None]:
print_answers(df_roberta, 2)

In [None]:
print_answers(df_bert_history, 2)

In [None]:
print_answers(df_roberta_history, 2)