In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import matplotlib.lines as mlines


poe_path = f"./process_of_elimination.csv"
mcp_path = f"./multiple_choice_prompt.csv"

poe_raw_df = pd.read_csv(poe_path)
mcp_raw_df = pd.read_csv(mcp_path)

In [2]:
def process(df, drop_columns=None): 
    # get rid of identical rows 
    df = df.drop_duplicates()
    if isinstance(drop_columns, list):
        drop_columns += ["model_family", "seed", "batch_size", "loading_precision", "sample"]
    else:
        drop_columns = ["model_family", "seed", "batch_size", "loading_precision", "sample"]
    df = df.drop(columns=drop_columns)
    # shorten checkpoint names
    df['checkpoint'] = df["checkpoint"].apply(lambda x: x.split("/")[-1])
    return df

def process_v2(df):
    datasets="logical_deduction_five_objects conceptual_combinations".split()
    df = df[df["dataset"].isin(datasets)]
    df = df.groupby(["dataset", "checkpoint", "method", "n_shot"]).mean().reset_index()
    df = df.drop(columns=["checkpoint"])
    # accuracy: 3 decimal places
    df["accuracy"] = df["accuracy"].apply(lambda x: round(x, 3))
    return df

In [63]:
poe_df = process(poe_raw_df, drop_columns=["prompting_method", "scoring_method", "mask_strategy", "mask_accuracy", "mask_token"])
mcp_df = process(mcp_raw_df, drop_columns=[])

poe_df = process_v2(poe_df)
mcp_df = process_v2(mcp_df)

# rename: accuracy -> PoE
poe_df = poe_df.rename(columns={"accuracy": "PoE"})
poe_df = poe_df.drop(columns=["method"])

# PoE: convert to percentage and 3 decimal places
poe_df["PoE"] = poe_df["PoE"].apply(lambda x: round(x * 100, 3))

# rename: accuracy -> MCP
mcp_df = mcp_df.rename(columns={"accuracy": "MCP"})
mcp_df = mcp_df.drop(columns=["method"])

# MCP: convert to percentage and 3 decimal places
mcp_df["MCP"] = mcp_df["MCP"].apply(lambda x: round(x * 100, 3))

# merge
df = pd.merge(mcp_df, poe_df, on=["dataset", "n_shot"])

# compute difference, 3 decimal places
df["Improvement"] = df["PoE"] - df["MCP"]
df["Improvement"] = df["Improvement"].apply(lambda x: round(x, 3))

# rename: Improvement -> PoE - MCP
df = df.rename(columns={"Improvement": "PoE - MCP"})

# rename dataset -> Task
df = df.rename(columns={"dataset": "Task",
                        "n_shot": "N Shot",})

temp_df = df.set_index("Task")
temp_df = temp_df.rename(index={
                                "logical_deduction_five_objects": "LD",
                                "conceptual_combinations": "CC",
                                })


temp_df = temp_df.reset_index()
# save to csv
temp_df.to_csv("few_shot_v3.csv", index=False)
# save to markdown
temp_df.to_markdown("few_shot_v3.md", index=False)

In [64]:
temp_df

Unnamed: 0,Task,N Shot,MCP,PoE,Improvement
0,CC,0,60.4,76.0,15.6
1,CC,3,76.8,77.0,0.2
2,LD,0,39.8,56.0,16.2
3,LD,3,17.6,19.2,1.6


In [4]:
# Find the index of the row with the highest accuracy for each dataset
max_accuracy_indices = mask_token_df.groupby('dataset')['accuracy'].idxmax()
# Get the rows with the highest accuracy for each dataset
rows_with_highest_accuracy = mask_token_df.loc[max_accuracy_indices]
rows_with_highest_accuracy

Unnamed: 0,dataset,mask_token,accuracy
4,anli,empty,0.572
6,conceptual_combinations,[N/A],0.792
13,cqa,dasjhasjkdhjskdhds,0.896
16,disambiguation_qa,[N/A],0.682
21,logical_deduction_five_objects,[N/A],0.568
27,siqa,[mask],0.826
34,strange_stories,empty,0.786
35,symbol_interpretation,[MASK],0.234


In [10]:
temp_df = rows_with_highest_accuracy.copy()

temp_df = temp_df.set_index("dataset")
temp_df = temp_df.rename(columns={"accuracy": "Accuracy",
                                  "mask_token": "Best Mask Token",
                                  },
                          index={"anli":"ANLI",
                                "cqa": "CQA",
                                "siqa": "SIQA",
                                "logical_deduction_five_objects": "LD",
                                "disambiguation_qa": "DQA",
                                "conceptual_combinations": "CC",
                                "strange_stories": "SS",
                                "symbol_interpretation": "SIT",
                                })


temp_df = temp_df.reset_index()
temp_df = temp_df.rename(columns={"dataset": "Task"})

# convert accuracy to percentage
temp_df["Accuracy"] = temp_df["Accuracy"].apply(lambda x: f"{x*100:.1f}")

# change some names
temp_df['Best Mask Token'] = temp_df['Best Mask Token'].apply(lambda x: f'"{x}"')

# save to csv
temp_df.to_csv("mask_token_best.csv", index=False)
# save to markdown
temp_df.to_markdown("mask_token_best.md", index=False)


Unnamed: 0,Task,Best Mask Token,Accuracy
0,ANLI,"""empty""",57.2
1,CC,"""[N/A]""",79.2
2,CQA,"""dasjhasjkdhjskdhds""",89.6
3,DQA,"""[N/A]""",68.2
4,LD,"""[N/A]""",56.8
5,SIQA,"""[mask]""",82.6
6,SS,"""empty""",78.6
7,SIT,"""[MASK]""",23.4


In [5]:
sns.barplot(
    x='dataset', 
    y='accuracy', 
    hue='mask_token', 
    data=mask_token_df, 
    palette='Set1',
    )

plt.show()

NameError: name 'sns' is not defined