In [18]:
import os
from typing import List
import json
from typing import Callable
from collections import Counter
from collections import defaultdict
from typing import Dict

from src.utils import task_2_categories

store_dir = os.path.join("src", "data", "{task_header}")
file_path = os.path.join(
    store_dir,
    "{task_header}_train_preds_{model_name}_{keyword_flag}_{file_ending}.json",
)

In [19]:
keyword_flag: Callable[[bool], str] = (
    lambda x: "with_keyword" if x else "without_keyword"
)

In [20]:
def show_labels(model_names: List[str]) -> Dict:
    result_dict = dict()
    for model_name in model_names:
        model_result_dict = defaultdict(dict)
        for task_name, task_entry_per_category, task_params in [
            ("LaMP_1", 120, [(2, False), (4, False), (5, 5, True), (10, 10, True)]),
            ("LaMP_2", 16, [(2, False), (4, False), (5, 15, True), (10, 30, True)]),
        ]:
            for task_param in task_params:
                with_keyword_flag = task_param[-1]
                curr_param = [task_entry_per_category]
                curr_param.extend(task_param[:-1])
                file_ending = "_".join([str(elem) for elem in curr_param])
                model_path = file_path.format(
                    task_header=task_name,
                    model_name=model_name,
                    keyword_flag=keyword_flag(with_keyword_flag),
                    file_ending=file_ending,
                )
                curr_counter = Counter()
                with open(model_path, "r", encoding="utf-8") as f:
                    for label in json.load(f)["golds"]:
                        curr_counter.update([label["output"]])
                curr_dict_name = " ".join([str(elem) for elem in curr_param])
                model_result_dict[task_name][curr_dict_name] = curr_counter

        result_dict[model_name] = model_result_dict
    return result_dict

In [21]:
r"""
\begin{tabular}{ l l c c c c}
    \hline
    &   & \multicolumn{4}{c}{Conversational: ChatGPT-3.5-Turbo} \\
    \cline{3-6}
    &   & \multicolumn{2}{c}{Without Keyphrase Extraction} & \multicolumn{2}{c}{With Keyphrase Extraction} \\ 
    \cline{3-6}
    Dataset & Metric & $k_1=2$ & $k_1=4$ & $k_1=5, k_2=5, k_3=13$ & $k_1=10, k_2=10, k_3=50$  \\  
    \hline
    \vspace{-1.5mm}
    LaMP-1: Personalized & Accuracy & {} & {} & {} & {} \\
    Citation Identification & F1 & {} & {} & {} & {} \\
    \hline
    &  & $k_1=2$ & $k_1=4$ &  $k_2=5, k_4=15$ &  $k_2=10, k_3=30$  \\  
    \hline
    \vspace{-1.5mm}
    LaMP-2: Personalized & Accuracy & {} & {} & {} & {}   \\
    News Categorization & F1 & {} & {} & {} & {}   \\
    \hline
\end{tabular}
"""

'\n\\begin{tabular}{ l l c c c c}\n    \\hline\n    &   & \\multicolumn{4}{c}{Conversational: ChatGPT-3.5-Turbo} \\\\\n    \\cline{3-6}\n    &   & \\multicolumn{2}{c}{Without Keyphrase Extraction} & \\multicolumn{2}{c}{With Keyphrase Extraction} \\\\ \n    \\cline{3-6}\n    Dataset & Metric & $k_1=2$ & $k_1=4$ & $k_1=5, k_2=5, k_3=13$ & $k_1=10, k_2=10, k_3=50$  \\\\  \n    \\hline\n    \\vspace{-1.5mm}\n    LaMP-1: Personalized & Accuracy & {} & {} & {} & {} \\\\\n    Citation Identification & F1 & {} & {} & {} & {} \\\\\n    \\hline\n    &  & $k_1=2$ & $k_1=4$ &  $k_2=5, k_4=15$ &  $k_2=10, k_3=30$  \\\\  \n    \\hline\n    \\vspace{-1.5mm}\n    LaMP-2: Personalized & Accuracy & {} & {} & {} & {}   \\\\\n    News Categorization & F1 & {} & {} & {} & {}   \\\\\n    \\hline\n\\end{tabular}\n'

In [51]:
def convert_labels_to_table(model_names: List[str], labels_dict: Dict) -> str:
    task_1_labels = ["[1]", "[2]"]
    task_2_labels = task_2_categories

    result_dict = defaultdict(dict)
    for model_name in model_names:
        model_result_dict = defaultdict(dict)

        task_name, task_valid_labels, task_entry_per_category, task_params = (
            "LaMP_1",
            task_1_labels,
            120,
            [(2,), (4,), (5, 5), (10, 10)],
        )
        template_1 = """LaMP-1: Personalized & [0] & {} & {} & {} & {} \\\\ \n Citation Identification & [3] & {} & {} & {} & {} \\\\"""
        task_1_fillup = []
        for task_param in task_params:
            curr_param = [task_entry_per_category]
            curr_param.extend(task_param)
            curr_dict_name = " ".join([str(elem) for elem in curr_param])
            curr_counter: Counter = labels_dict[model_name][task_name][curr_dict_name]
            task_1_fillup.append(
                (curr_counter.get("[0]", 0), curr_counter.get("[3]", 0))
            )

        result_dict[model_name][task_name] = template_1.format(
            task_1_fillup[0][0],
            task_1_fillup[1][0],
            task_1_fillup[2][0],
            task_1_fillup[3][0],
            task_1_fillup[0][1],
            task_1_fillup[1][1],
            task_1_fillup[2][1],
            task_1_fillup[3][1],
        )

        task_name, task_valid_labels, task_entry_per_category, task_params = (
            "LaMP_2",
            task_2_labels,
            16,
            [(2,), (4,), (5, 15), (10, 30)],
        )
        template_2 = """LaMP-2: Personalized & \multicolumn{{2}}{{c}}{{""}} & {} & {} & {} & {} \\\\ News Categorization &  &  &  &  &  \\\\"""
        task_2_fillup = []
        for task_param in task_params:
            curr_param = [task_entry_per_category]
            curr_param.extend(task_param)
            curr_dict_name = " ".join([str(elem) for elem in curr_param])
            curr_counter: Counter = labels_dict[model_name][task_name][curr_dict_name]
            curr_content = "\multicolumn{2}{c}{" + str(curr_counter.get("", 0)) + "}"
            task_2_fillup.append(curr_content)

        result_dict[model_name][task_name] = template_2.format(
            task_2_fillup[0], task_2_fillup[1], task_2_fillup[2], task_2_fillup[3]
        )
    return result_dict

In [23]:
model_names = [
    # "BERTSERINI",
    "DistilBERT",
    "MiniLM",
    "OpenAI",
]

In [52]:
result = show_labels(model_names)
# print(json.dumps(result, indent=4,))
result = convert_labels_to_table(model_names, result)
print(
    json.dumps(
        result,
        indent=4,
    )
)

{
    "DistilBERT": {
        "LaMP_1": "LaMP-1: Personalized & [0] & 33 & 52 & 2 & 14 \\\\ \n Citation Identification & [3] & 4 & 8 & 2 & 5 \\\\",
        "LaMP_2": "LaMP-2: Personalized & \\multicolumn{2}{c}{\"\"} & \\multicolumn{2}{c}{1} & \\multicolumn{2}{c}{1} & \\multicolumn{2}{c}{9} & \\multicolumn{2}{c}{7} \\\\ News Categorization &  &  &  &  &  \\\\"
    },
    "MiniLM": {
        "LaMP_1": "LaMP-1: Personalized & [0] & 0 & 3 & 0 & 0 \\\\ \n Citation Identification & [3] & 1 & 0 & 1 & 0 \\\\",
        "LaMP_2": "LaMP-2: Personalized & \\multicolumn{2}{c}{\"\"} & \\multicolumn{2}{c}{0} & \\multicolumn{2}{c}{0} & \\multicolumn{2}{c}{0} & \\multicolumn{2}{c}{0} \\\\ News Categorization &  &  &  &  &  \\\\"
    },
    "OpenAI": {
        "LaMP_1": "LaMP-1: Personalized & [0] & 23 & 103 & 1 & 6 \\\\ \n Citation Identification & [3] & 98 & 65 & 5 & 10 \\\\",
        "LaMP_2": "LaMP-2: Personalized & \\multicolumn{2}{c}{\"\"} & \\multicolumn{2}{c}{16} & \\multicolumn{2}{c}{14} & \\mu