# Notebook: Convert Model Results to Latex

This notebook is used to load the .json files with the model performance in order to convert them into a latex table for the paper

In [2]:
# Todo: Schauen, ob es die Metriken auch bei anderen Modellen gibt
# Todo: 1.000 <- Punkte einfügen
# Todo: Soll bei nur Real gehen
# Todo: Soll bei allen Tasks gehen
# Todo: bei f1 micro etc 3 nachkommastellen
# Schauen, dass es bei jedem Task "eval_f1_micro", "eval_f1_macro", "accuracy"

## Packages

In [3]:
import json

## Settings / Constants

In [4]:
RESULTS_PATH_BASE = "../07 train models/results_json/results_"
LLMS = ["Llama70B"]#, "GPT-3"]
LLM_PAPER_TITLE = {"Llama70B": "Llama-2-70B", "GPT-3": "GPT-3.5-turbo"}
# , "aspect_category_sentiment", "end_2_end_absa" ,"target_aspect_sentiment_detection"]
ABSA_TASKS = ["aspect_category", "aspect_category_sentiment", "end_2_end_absa", "target_aspect_sentiment_detection"]
SYNTH_COMBINATIONS = {
    "random": [
        {"real": 500, "synth": 500},
        {"real": 500, "synth": 1000},
        {"real": 500, "synth": 1500}
    ], "fixed": [
        {"real": 25, "synth": 475},
        {"real": 25, "synth": 975},
        {"real": 25, "synth": 1975}
    ]}

In [5]:
FEW_SHOT_STRATEGY = {25: "25 fixed examples", 500: "25 random examples"}

## Code

In [6]:
def format_thousands(number):
    formatted_number = "{:,}".format(number)
    return formatted_number

def round_to_three_decimals(number):
    rounded_number = round(number, 3)
    return rounded_number

### Report Main Metrics

In [7]:
for absa_task in ABSA_TASKS:
    print(absa_task, ":\n")
    for n_real in [500, 1000, 2000]:
        json_path = RESULTS_PATH_BASE + \
            f"only_real_real{n_real}_synth0_{absa_task}_random.json"
        with open(json_path, 'r') as json_file:
            results = json.load(json_file)
        if absa_task != "target_aspect_sentiment_detection":
            print(
                f"- & - & {format_thousands(n_real)} & 0 & {round_to_three_decimals(results['eval_f1_micro'])} & {round_to_three_decimals(results['eval_f1_macro'])} & {round_to_three_decimals(results['eval_accuracy'])} \\\\")
        else:
            print(f"- & - & {format_thousands(n_real)} & 0 & {round_to_three_decimals(results['eval_f1'])} & {round_to_three_decimals(results['eval_accuracy'])} \\\\")
    print("\\hline")
    for few_shot_condition in SYNTH_COMBINATIONS.keys():
        for llm in LLMS:
            for freq in SYNTH_COMBINATIONS[few_shot_condition]:
                n_real = freq["real"]
                n_synth = freq["synth"]
                json_path = RESULTS_PATH_BASE + llm + \
                    f"_real{n_real}_synth{n_synth}_{absa_task}_{few_shot_condition}.json"
                with open(json_path, 'r') as json_file:
                    results = json.load(json_file)
                # print(f"results: {absa_task}, {llm}, {few_shot_condition}, n_real: {n_real}, n_synth: {n_synth}", results)
                if absa_task != "target_aspect_sentiment_detection":
                    f1_metrics = f"{round_to_three_decimals(results['eval_f1_micro'])} & {round_to_three_decimals(results['eval_f1_macro'])}"
                else:
                    f1_metrics = f"{round_to_three_decimals(results['eval_f1'])}"
                print(
                    f"{LLM_PAPER_TITLE[llm]} & {FEW_SHOT_STRATEGY[n_real]} & {format_thousands(n_real)} & {format_thousands(n_synth)} & {f1_metrics} & {round_to_three_decimals(results['eval_accuracy'])} \\\\")
        print("\hline")

aspect_category :

- & - & 500 & 0 & 0.867 & 0.866 & 0.778 \\
- & - & 1,000 & 0 & 0.899 & 0.889 & 0.824 \\
- & - & 2,000 & 0 & 0.903 & 0.893 & 0.826 \\
\hline
Llama-2-70B & 25 random examples & 500 & 500 & 0.839 & 0.833 & 0.726 \\
Llama-2-70B & 25 random examples & 500 & 1,000 & 0.854 & 0.834 & 0.742 \\
Llama-2-70B & 25 random examples & 500 & 1,500 & 0.832 & 0.823 & 0.734 \\
\hline
Llama-2-70B & 25 fixed examples & 25 & 475 & 0.693 & 0.701 & 0.476 \\
Llama-2-70B & 25 fixed examples & 25 & 975 & 0.74 & 0.747 & 0.596 \\
Llama-2-70B & 25 fixed examples & 25 & 1,975 & 0.79 & 0.779 & 0.676 \\
\hline
aspect_category_sentiment :

- & - & 500 & 0 & 0.789 & 0.552 & 0.68 \\
- & - & 1,000 & 0 & 0.819 & 0.592 & 0.732 \\
- & - & 2,000 & 0 & 0.833 & 0.671 & 0.748 \\
\hline
Llama-2-70B & 25 random examples & 500 & 500 & 0.736 & 0.534 & 0.602 \\
Llama-2-70B & 25 random examples & 500 & 1,000 & 0.739 & 0.568 & 0.604 \\
Llama-2-70B & 25 random examples & 500 & 1,500 & 0.742 & 0.546 & 0.58 \\
\hline
Lla

### Report Metrics Fine-Grained Metrics

#### ACD

In [8]:
absa_task = "aspect_category"
for n_real in [500, 1000, 2000]:
    json_path = RESULTS_PATH_BASE + \
        f"only_real_real{n_real}_synth0_{absa_task}_random.json"
    with open(json_path, 'r') as json_file:
        results = json.load(json_file)
    class_wise_metrics = ""
    for ac in ["GENERAL-IMPRESSION", "FOOD", "SERVICE", "AMBIENCE", "PRICE"]:
       for metric in ["f1", "accuracy"]:
           class_wise_metrics += f"{round_to_three_decimals(results[f'eval_{metric}_{ac}'])} &"
    print(f"- & - & {format_thousands(n_real)} & 0 & {class_wise_metrics[:-1]} \\\\")
print("\\hline")
for few_shot_condition in SYNTH_COMBINATIONS.keys():
    for llm in LLMS:
        for freq in SYNTH_COMBINATIONS[few_shot_condition]:
            n_real = freq["real"]
            n_synth = freq["synth"]
            json_path = RESULTS_PATH_BASE + llm + \
                f"_real{n_real}_synth{n_synth}_{absa_task}_{few_shot_condition}.json"
            with open(json_path, 'r') as json_file:
                results = json.load(json_file)
            # print(f"results: {absa_task}, {llm}, {few_shot_condition}, n_real: {n_real}, n_synth: {n_synth}", results)
            if absa_task == "TASD":
                f1_metrics = f"{round_to_three_decimals(results['eval_f1_micro'])} & {round_to_three_decimals(results['eval_f1_macro'])}"
            else:
                f1_metrics = f"{round_to_three_decimals(results['eval_f1_micro'])}"

            class_wise_metrics = ""
            for ac in ["GENERAL-IMPRESSION", "FOOD", "SERVICE", "AMBIENCE", "PRICE"]:
                for metric in ["f1", "accuracy"]:
                   class_wise_metrics += f"{round_to_three_decimals(results[f'eval_{metric}_{ac}'])} &"

            print(
                    f"{LLM_PAPER_TITLE[llm]} & {FEW_SHOT_STRATEGY[n_real]} & {format_thousands(n_real)} & {format_thousands(n_synth)} & {class_wise_metrics[:-1]} \\\\")
    print("\hline")

- & - & 500 & 0 & 0.783 &0.896 &0.894 &0.91 &0.896 &0.926 &0.812 &0.952 &0.944 &0.992  \\
- & - & 1,000 & 0 & 0.821 &0.91 &0.92 &0.932 &0.955 &0.97 &0.844 &0.954 &0.904 &0.986  \\
- & - & 2,000 & 0 & 0.836 &0.912 &0.933 &0.944 &0.949 &0.966 &0.827 &0.948 &0.921 &0.988  \\
\hline
Llama-2-70B & 25 random examples & 500 & 500 & 0.776 &0.894 &0.866 &0.896 &0.865 &0.906 &0.788 &0.944 &0.868 &0.98  \\
Llama-2-70B & 25 random examples & 500 & 1,000 & 0.797 &0.902 &0.886 &0.908 &0.911 &0.942 &0.753 &0.928 &0.825 &0.972  \\
Llama-2-70B & 25 random examples & 500 & 1,500 & 0.792 &0.892 &0.824 &0.866 &0.897 &0.936 &0.797 &0.948 &0.805 &0.966  \\
\hline
Llama-2-70B & 25 fixed examples & 25 & 475 & 0.555 &0.634 &0.729 &0.774 &0.836 &0.902 &0.677 &0.92 &0.706 &0.95  \\
Llama-2-70B & 25 fixed examples & 25 & 975 & 0.654 &0.816 &0.735 &0.808 &0.823 &0.896 &0.71 &0.91 &0.811 &0.972  \\
Llama-2-70B & 25 fixed examples & 25 & 1,975 & 0.74 &0.844 &0.811 &0.856 &0.839 &0.902 &0.733 &0.936 &0.771 &0.968  \\

#### ACSA

In [23]:
absa_task = "aspect_category_sentiment"

idx = 0
for aspect_categories in [["GENERAL-IMPRESSION", "FOOD"], ["SERVICE", "AMBIENCE", "PRICE"]]:
    print("Table for:", aspect_categories, "\n\n")
    for n_real in [500, 1000, 2000]:
        json_path = RESULTS_PATH_BASE + \
            f"only_real_real{n_real}_synth0_{absa_task}_random.json"
        with open(json_path, 'r') as json_file:
            results = json.load(json_file)

        condition_string = f"- & - & {format_thousands(n_real)} & 0 &"
        metrics_class_wise = ""
        for ac in aspect_categories:
            for polarity in ["POSITIVE", "NEUTRAL", "NEGATIVE"]:
                for metric in ["eval_f1", "eval_precision", "eval_recall"]:
                    metrics_class_wise += f" {round_to_three_decimals(results[f'{metric}_{ac}-{polarity}'])} &"

        print(condition_string + metrics_class_wise[:-1] + "\\\\") if idx == 0 else print(metrics_class_wise[:-1] + "\\\\")

    print("\\hline")
    for few_shot_condition in SYNTH_COMBINATIONS.keys():
        for llm in LLMS:
            for freq in SYNTH_COMBINATIONS[few_shot_condition]:
                n_real = freq["real"]
                n_synth = freq["synth"]
                json_path = RESULTS_PATH_BASE + llm + \
                    f"_real{n_real}_synth{n_synth}_{absa_task}_{few_shot_condition}.json"
                with open(json_path, 'r') as json_file:
                    results = json.load(json_file)

                condition_string = f"{LLM_PAPER_TITLE[llm]} & {FEW_SHOT_STRATEGY[n_real]} & {format_thousands(n_real)} & {format_thousands(n_synth)} &"
                metrics_class_wise = ""
                for ac in aspect_categories:
                    for polarity in ["POSITIVE", "NEUTRAL", "NEGATIVE"]:
                        for metric in ["eval_f1", "eval_precision", "eval_recall"]:
                            metrics_class_wise += f" {round_to_three_decimals(results[f'{metric}_{ac}-{polarity}'])} &"

                print(condition_string + metrics_class_wise[:-1] + "\\\\") if idx == 0 else print(metrics_class_wise[:-1] + "\\\\")
        print("\hline")
    idx += 1

Table for: ['GENERAL-IMPRESSION', 'FOOD'] 


- & - & 500 & 0 & 0.797 & 0.833 & 0.764 & 0.0 & 0.0 & 0.0 & 0.654 & 0.723 & 0.596 & 0.869 & 0.88 & 0.858 & 0.727 & 0.75 & 0.706 & 0.68 & 0.718 & 0.646 \\
- & - & 1,000 & 0 & 0.832 & 0.877 & 0.792 & 0.0 & 0.0 & 0.0 & 0.673 & 0.72 & 0.632 & 0.893 & 0.92 & 0.867 & 0.833 & 0.789 & 0.882 & 0.732 & 0.757 & 0.709 \\
- & - & 2,000 & 0 & 0.824 & 0.803 & 0.847 & 0.571 & 1.0 & 0.4 & 0.694 & 0.829 & 0.596 & 0.911 & 0.93 & 0.892 & 0.756 & 0.607 & 1.0 & 0.765 & 0.747 & 0.785 \\
\hline
Llama-2-70B & 25 random examples & 500 & 500 & 0.718 & 0.933 & 0.583 & 0.0 & 0.0 & 0.0 & 0.66 & 0.767 & 0.579 & 0.812 & 0.875 & 0.758 & 0.3 & 1.0 & 0.176 & 0.691 & 0.675 & 0.709 \\
Llama-2-70B & 25 random examples & 500 & 1,000 & 0.776 & 0.839 & 0.722 & 0.0 & 0.0 & 0.0 & 0.646 & 0.762 & 0.561 & 0.8 & 0.88 & 0.733 & 0.286 & 0.364 & 0.235 & 0.618 & 0.864 & 0.481 \\
Llama-2-70B & 25 random examples & 500 & 1,500 & 0.818 & 0.862 & 0.778 & 0.0 & 0.0 & 0.0 & 0.729 & 0.78 & 0.684 &