In [1]:
import pandas as pd
import pickle
import os

In [2]:
model_map_name = {
    "qwen": "Qwen2",
    "Mistral-7B-Instruct-v0.3": "Mistral v0.3",
    "gemma-7b-it": "Gemma 1.1",
    "Meta-Llama-3-8B-Instruct": "Llama 3",
    "llama3.1": "Llama 3.1",
    "led_tiny": "LED-Tiny",
    "primera_tiny": "Primera-Tiny",
    "led_short": "LED-Short",
    "primera_short": "Primera-Short",
    "led_long": "LED-Long",
    "primera_long": "Primera-Long"
}

pipeline_map_name = {
    "first5last5_bert": "F5L5 + BES" ,
    "first5last5_textrank": "F5L5 + TR", 
    "first5last5": "F5L5",
    "random_selection_bert": "RS + BES",
    "random_selection_textrank": "RS + TR",
    "random_selection": "RS",
    "None": "None"
}

pipeline_map_name_eurlexsum = {
    "random_selection_bert": "BES",
    "random_selection_textrank": "TR",
    "None": "None"
}

prompt_type_map = {
    "basic": "Basic",
    "detailed": "Policy-Informed",
    "cod": "CoD",
    "None": "None"
}


In [19]:
dfs = []
summ_type = "long"
for result_file in os.listdir():
    if summ_type not in result_file:
        continue
    df = pd.read_pickle(result_file)
    if "qwen" in df["model"] or "llama3.1" in df["model"]:
        df["selection_type"] = ["None"] * len(df["model"])
    if "primera" in df["model"][0] or "led" in df["model"][0]:
        if len(df["score_value"]) == 5:
            df["prompt_type"] = ["None"] * 5
            continue
        df["model"] = [df["model"][0]] * 5 + [df["model"][1]] * 5
        df["prompt_type"] = ["None"] * len(df["score_value"])
        df["selection_type"] = ["random_selection"] * 5 + ["first5last5"] * 5
    df = pd.DataFrame(df)
    dfs.append(df)

df = pd.concat(dfs)
df = df[df["score_type"] != "mover_score"]
df = df[~df["model"].str.contains("mixtral")]
df = df.reset_index(drop=True)
df["prompt_type"] = df["prompt_type"].apply(lambda x: x.split("_")[-1])
df = df.copy(True).reset_index(drop=True)
df = [df.iloc[idx:idx+4].to_numpy() for idx in range(0,len(df),4)]
rows = []
for row in df:
    rows += [[row[0][0], row[0][1], row[0][2], row[0][3], row[1][3], row[2][3], row[3][3]]]

df = pd.DataFrame(rows, columns=["model", "selection_type", "prompt_type", "R1", "R2", "RL", "BS"])
df["model"] = [model_map_name[name] for name in df["model"]]
df["selection_type"] = [pipeline_map_name[name] for name in df["selection_type"]]
df["prompt_type"] = [prompt_type_map[name] for name in df["prompt_type"]]

from collections import defaultdict
latex_dict = defaultdict(dict)
for idx, row in df.iterrows():
    key = (row["selection_type"] + " + " if row["selection_type"] != "None" else "") + row["model"]

    max_value_r1 = str(max(df[df["prompt_type"].str.contains(row["prompt_type"])]["R1"]))
    max_value_r2 = str(max(df[df["prompt_type"].str.contains(row["prompt_type"])]["R2"]))
    max_value_rl = str(max(df[df["prompt_type"].str.contains(row["prompt_type"])]["RL"]))
    max_value_bs = str(round(max(df[df["prompt_type"].str.contains(row["prompt_type"])]["BS"]),3))
    r1_val = f'\\textbf{{{str(row["R1"])}}}' if str(row["R1"]) == max_value_r1 else str(row["R1"])
    r2_val = f'\\textbf{{{str(row["R2"])}}}' if str(row["R2"]) == max_value_r2 else str(row["R2"])
    rl_val = f'\\textbf{{{str(row["RL"])}}}' if str(row["RL"]) == max_value_rl else str(row["RL"])
    bs_val = f'\\textbf{{{str(round(row["BS"],3))}}}' if str(round(row["BS"],3)) == max_value_bs else str(round(row["BS"],3))

    latex_dict[key][row["prompt_type"]] = [r1_val, r2_val, rl_val, bs_val]

prompt_types = ["Basic", "Policy-Informed", "CoD"]
latex_rows_elas = []
latex_rows_longcontext = []
latex_rows_ablation = []

for model in latex_dict.keys():
    if "primera" in model.lower() or "led" in model.lower():
        row = model + " & " + " & ".join(latex_dict[model]["None"]) + "\\\\"
        latex_rows_ablation += [row]
    else: 
        row = model + " & "
        for prompt in prompt_types:
            row += " & ".join(latex_dict[model][prompt]) + " & "
        row = row[:-2]
        row += "\\\\" 
        if "qwen" in model.lower() or "llama 3.1" in model.lower():
            latex_rows_longcontext += [row]
        else:
            latex_rows_elas += [row]
    

In [20]:
print("Pipeline\n----------------------")
print("\n".join(latex_rows_elas) + "\n")
print("LongContext\n----------------------")
print("\n".join(latex_rows_longcontext) + "\n")
print("Ablation\n----------------------")
print("\n".join(latex_rows_ablation))

Pipeline
----------------------
F5L5 + BES + Mistral v0.3 & 0.398 & 0.153 & 0.205 & 0.597 & 0.425 & 0.166 & 0.209 & \textbf{0.602} & 0.381 & 0.133 & 0.197 & 0.542 \\
RS + BES + Mistral v0.3 & 0.394 & 0.151 & 0.203 & 0.596 & 0.421 & 0.164 & 0.207 & 0.601 & 0.379 & 0.131 & 0.196 & 0.541 \\
RS + TR + Mistral v0.3 & 0.383 & 0.153 & 0.203 & 0.593 & 0.41 & 0.165 & 0.209 & 0.598 & 0.377 & 0.134 & 0.195 & 0.541 \\
F5L5 + TR + Mistral v0.3 & 0.383 & 0.154 & 0.203 & 0.594 & 0.411 & 0.167 & 0.209 & 0.599 & 0.38 & 0.137 & 0.197 & 0.542 \\
F5L5 + BES + Gemma 1.1 & 0.387 & 0.129 & 0.193 & 0.567 & 0.394 & 0.135 & 0.193 & 0.568 & 0.348 & 0.12 & 0.183 & 0.56 \\
RS + BES + Gemma 1.1 & 0.385 & 0.129 & 0.193 & 0.567 & 0.394 & 0.134 & 0.193 & 0.567 & 0.346 & 0.118 & 0.182 & 0.559 \\
RS + TR + Gemma 1.1 & 0.383 & 0.131 & 0.196 & 0.566 & 0.392 & 0.134 & 0.197 & 0.566 & 0.357 & 0.128 & 0.189 & 0.564 \\
F5L5 + TR + Gemma 1.1 & 0.386 & 0.132 & 0.197 & 0.567 & 0.395 & 0.136 & 0.198 & 0.567 & 0.361 & 0.13 & 0.192

In [27]:
def get_prompt_type(x):
    x = x.split("_")
    if len(x) > 2:
        x = x[1]
    else:
        x = x[-1]
    return x

dfs = []
summ_type = "test"
for result_file in os.listdir():
    if summ_type not in result_file or "eurlexsum" not in result_file:
        continue
    df = pd.read_pickle(result_file)
    if "qwen" in df["model"] or "llama3.1" in df["model"]:
        df["selection_type"] = ["None"] * len(df["model"])
    df = pd.DataFrame(df)
    dfs.append(df)

df = pd.concat(dfs)
df = df[df["score_type"] != "mover_score"]
df = df[~df["model"].str.contains("mixtral")]
df = df.reset_index(drop=True)
df["prompt_type"] = df["prompt_type"].apply(lambda x: get_prompt_type(x))
df = df.copy(True).reset_index(drop=True)
df = [df.iloc[idx:idx+4].to_numpy() for idx in range(0,len(df),4)]
rows = []
for row in df:
    rows += [[row[0][0], row[0][1], row[0][2], row[0][3], row[1][3], row[2][3], row[3][3]]]

df = pd.DataFrame(rows, columns=["model", "selection_type", "prompt_type", "R1", "R2", "RL", "BS"])
df["model"] = [model_map_name[name] for name in df["model"]]
df["selection_type"] = [pipeline_map_name_eurlexsum[name] for name in df["selection_type"]]
df["prompt_type"] = [prompt_type_map[name] for name in df["prompt_type"]]

from collections import defaultdict
latex_dict = defaultdict(dict)
for idx, row in df.iterrows():
    key = (row["selection_type"] + " + " if row["selection_type"] != "None" else "") + row["model"]

    max_value_r1 = str(max(df[df["prompt_type"].str.contains(row["prompt_type"])]["R1"]))
    max_value_r2 = str(max(df[df["prompt_type"].str.contains(row["prompt_type"])]["R2"]))
    max_value_rl = str(max(df[df["prompt_type"].str.contains(row["prompt_type"])]["RL"]))
    max_value_bs = str(round(max(df[df["prompt_type"].str.contains(row["prompt_type"])]["BS"]),3))
    r1_val = f'\\textbf{{{str(row["R1"])}}}' if str(row["R1"]) == max_value_r1 else str(row["R1"])
    r2_val = f'\\textbf{{{str(row["R2"])}}}' if str(row["R2"]) == max_value_r2 else str(row["R2"])
    rl_val = f'\\textbf{{{str(row["RL"])}}}' if str(row["RL"]) == max_value_rl else str(row["RL"])
    bs_val = f'\\textbf{{{str(round(row["BS"],3))}}}' if str(round(row["BS"],3)) == max_value_bs else str(round(row["BS"],3))

    latex_dict[key][row["prompt_type"]] = [r1_val, r2_val, rl_val, bs_val]

prompt_types = ["Basic", "Policy-Informed", "CoD"]
latex_rows_elas = []
latex_rows_longcontext = []
latex_rows_ablation = []

for model in latex_dict.keys():
    row = model + " & "
    for prompt in prompt_types:
        row += " & ".join(latex_dict[model][prompt]) + " & "
    row = row[:-2]
    row += "\\\\" 
    if "qwen" in model.lower() or "llama 3.1" in model.lower():
        latex_rows_longcontext += [row]
    else:
        latex_rows_elas += [row]

In [28]:
print("Pipeline\n----------------------")
print("\n".join(latex_rows_elas) + "\n")
print("LongContext\n----------------------")
print("\n".join(latex_rows_longcontext))

Pipeline
----------------------
BES + Mistral v0.3 & 0.322 & 0.107 & 0.155 & 0.506 & 0.386 & 0.138 & 0.182 & 0.545 & 0.333 & 0.09 & 0.159 & 0.545 \\
TR + Mistral v0.3 & 0.371 & 0.129 & 0.171 & 0.536 & 0.425 & 0.16 & 0.196 & 0.576 & 0.343 & 0.102 & 0.164 & 0.554 \\
BES + Gemma 1.1 & 0.265 & 0.087 & 0.138 & 0.505 & 0.307 & 0.105 & 0.155 & 0.523 & 0.216 & 0.064 & 0.117 & 0.498 \\
TR + Gemma 1.1 & 0.32 & 0.112 & 0.162 & 0.518 & 0.329 & 0.114 & 0.164 & 0.522 & 0.23 & 0.077 & 0.125 & 0.502 \\
BES + Llama 3 & 0.354 & 0.119 & 0.17 & 0.518 & 0.42 & 0.153 & 0.198 & 0.564 & 0.427 & 0.121 & 0.19 & 0.611 \\
TR + Llama 3 & 0.384 & 0.141 & 0.184 & 0.535 & 0.442 & 0.169 & 0.206 & 0.578 & 0.425 & 0.131 & 0.201 & 0.601 \\

LongContext
----------------------
Llama 3.1 & \textbf{0.505} & \textbf{0.211} & \textbf{0.236} & \textbf{0.625} & \textbf{0.51} & \textbf{0.213} & \textbf{0.236} & \textbf{0.633} & \textbf{0.48} & \textbf{0.189} & \textbf{0.228} & \textbf{0.627} \\
Qwen2 & 0.478 & 0.187 & 0.212 & 0.5