In [6]:
import re
import pandas as pd
from prettytable import PrettyTable

In [7]:
ANNOTATION_BASE_PATH = f'../../../../03_datasets/sentiment-analysis_stories/annotations/final'

MODEL = {
    'Llama-3.3-70B': f'{ANNOTATION_BASE_PATH}/25-05-09_Annotations_Llama3-3.json',
    'Llama-2-7B': f'{ANNOTATION_BASE_PATH}/25-05-09_Annotations_Llama2.json',
    'Mistral-7B': f'{ANNOTATION_BASE_PATH}/25-05-09_Annotations_Mistral.json',
}


In [8]:
def extract_sentiment(input):
    m = re.search('The story should have a (.+?) sentiment', input)
    if m:
        found = m.group(1)
        return found

    return input

def get_annotations(path):
    df = pd.read_json(path)
    df = df.loc[~df['oracle'].isna()]
    df['condition'] = df['input'].apply(extract_sentiment)
    df['match'] = df['condition'] == df['oracle']
    
    return df

def pretty_print_latex(latex_str):
    lines = latex_str.replace(r" \\ ", r" \\" + "\n").splitlines()
    formatted_lines = []
    indent_level = 0
    for line in lines:
        if r"\begin" in line:
            formatted_lines.append(line)
            indent_level += 1
        elif r"\end" in line:
            indent_level -= 1
            formatted_lines.append(line)
        else:
            formatted_lines.append("    " * indent_level + line)
    return "\n".join(formatted_lines)

In [9]:
t = PrettyTable(['Generator', 'Title match (%)', 'Condition Match (%)'])


for id, path in MODEL.items():
    df = get_annotations(path)
    df['match'].value_counts()

    match = df['match'].value_counts()[True]

    t.add_row([id, len(df), match])

t

Generator,Title match (%),Condition Match (%)
Llama-3.3-70B,100,87
Llama-2-7B,98,80
Mistral-7B,96,76


In [10]:
l = pretty_print_latex(t.get_latex_string())

print(l)

\begin{tabular}{ccc}
    Generator & Title match (%) & Condition Match (%) \\
    Llama-3.3-70B & 100 & 87 \\
    Llama-2-7B & 98 & 80 \\
    Mistral-7B & 96 & 76 \\
\end{tabular}
