In [47]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import matplotlib.lines as mlines


poe_path = f"../process_of_elimination.csv"
iter_poe_path = f"../iterative_process_of_elimination.csv"

poe_raw_df = pd.read_csv(poe_path)
iter_poe_raw_df = pd.read_csv(iter_poe_path)

poe_raw_df = poe_raw_df[poe_raw_df["mask_strategy"] == "lowest"] # "below_average" "lowest"
# iter_poe_df = iter_poe_df[iter_poe_df["mask_strategy"] == "lowest"] # "below_average" "lowest"


In [48]:
def process(df, drop_columns=None): 
    # get rid of identical rows 
    df = df.drop_duplicates()
    if isinstance(drop_columns, list):
        drop_columns += ["model_family", "seed", "batch_size", "loading_precision", "sample"]
    else:
        drop_columns = ["model_family", "seed", "batch_size", "loading_precision", "sample"]
    df = df.drop(columns=drop_columns)
    # shorten checkpoint names
    df['checkpoint'] = df["checkpoint"].apply(lambda x: x.split("/")[-1])
    return df

In [85]:
poe_df = process(poe_raw_df, drop_columns=["n_shot", "prompting_method", "scoring_method", "mask_accuracy", "mask_strategy"])
iter_poe_df = process(iter_poe_raw_df, drop_columns=["n_shot"])

datasets="anli cqa siqa logical_deduction_five_objects disambiguation_qa conceptual_combinations strange_stories symbol_interpretation".split()
poe_df = poe_df[poe_df["dataset"].isin(datasets)]

# group accuracy by dataset, checkpoint
poe_df = poe_df.groupby(["dataset", "checkpoint", "method"]).mean().reset_index()
iter_poe_df = iter_poe_df.groupby(["dataset", "checkpoint", "method"]).mean().reset_index()

# drop colum: checkpoint and method
poe_df = poe_df.drop(columns=["checkpoint", "method"])
# rename column: accuracy -> poe_accuracy
poe_df = poe_df.rename(columns={"accuracy": "poe_accuracy"})
# poe_accuracy: 3 decimal places
poe_df["poe_accuracy"] = poe_df["poe_accuracy"].apply(lambda x: round(x, 3))

# drop colum: checkpoint and method
iter_poe_df = iter_poe_df.drop(columns=["checkpoint", "method"])
# rename column: accuracy -> iter_poe_accuracy
iter_poe_df = iter_poe_df.rename(columns={"accuracy": "iter_poe_accuracy"})
# iter_poe_accuracy: 3 decimal places
iter_poe_df["iter_poe_accuracy"] = iter_poe_df["iter_poe_accuracy"].apply(lambda x: round(x, 3))

# merge poe_df and iter_poe_df
df = pd.merge(poe_df, iter_poe_df, on=["dataset"])

# compute the difference between poe_accuracy and iter_poe_accuracy, 3 decimal places
df["diff"] = df["poe_accuracy"] - df["iter_poe_accuracy"]
df["diff"] = df["diff"].apply(lambda x: round(x, 3))

# save to csv
df.to_csv("poe_iter_poe.csv", index=False)

In [87]:
df

Unnamed: 0,dataset,poe_accuracy,iter_poe_accuracy,diff
0,anli,0.556,0.556,0.0
1,conceptual_combinations,0.76,0.742,0.018
2,cqa,0.895,0.884,0.011
3,disambiguation_qa,0.678,0.678,0.0
4,logical_deduction_five_objects,0.56,0.572,-0.012
5,siqa,0.817,0.82,-0.003
6,strange_stories,0.756,0.766,-0.01
7,symbol_interpretation,0.236,0.238,-0.002
