# Analyze probing results
- Preprocess a bit. Select the best results from each configuration. Export to the same directory.

In [1]:
from pathlib import Path
import pandas as pd
import numpy as np

In [29]:
def analyze_results(report_dir):
    dfs = []
    for p in Path(report_dir).iterdir():
        if not p.name.startswith("report"): 
            continue
        dfs.append(pd.read_csv(p))
    df = pd.concat(dfs, axis=0)
    tasks = []
    layers = []
    for i, row in df.iterrows():
        tasks.append("_".join(row["task"].split("_")[:-2]))
        layers.append(int(row["task"].split("_")[-1]))
    df.drop(columns=["task"], inplace=True)
    df["task"] = tasks
    df["layer"] = layers
    return df 


all_model_best_results = []
for p in Path("../reports").iterdir():
    if not p.is_dir():
        continue
    df_best_results = analyze_results(p)
    df_best_results["LM"] = [p.name] * len(df_best_results)
    all_model_best_results.append(df_best_results) 
df_all = pd.concat(all_model_best_results, axis=0)
# There are 6 LM x 13 layers x 7 (probing) tasks x 3 configs x 7 models x 5 rs
# For each {LM, layer, task, config}, average by rs, and take the best model.
df_avg = df_all.groupby(["LM", "layer", "task", "config", "model"], as_index=False).mean()  # 11466 rows
df_best = df_avg.sort_values("val_acc", ascending=False).groupby(["LM", "layer", "task", "config"], as_index=False).max(1)
df_best.head()

Unnamed: 0,LM,layer,task,config,train_acc,train_loss,val_acc,val_loss,test_acc,test_loss,rs,train_size_per_class,nclasses
0,embeddings_roberta_base,0,bigram_shift,Full,0.5,0.693867,0.5,0.693867,0.5,0.693867,6560.2,1200.0,2.0
1,embeddings_roberta_base,0,bigram_shift,Nonzero,0.5,0.693378,0.5,0.693378,0.5,0.693378,6560.2,1200.0,2.0
2,embeddings_roberta_base,0,bigram_shift,ZeroMI,0.5,0.693281,0.5,0.693281,0.5,0.693281,6560.2,1200.0,2.0
3,embeddings_roberta_base,0,coordination_inversion,Full,0.5,0.693549,0.5,0.693549,0.5,0.693549,6560.2,1200.0,2.0
4,embeddings_roberta_base,0,coordination_inversion,Nonzero,0.5,0.693411,0.5,0.693411,0.5,0.693411,6560.2,1200.0,2.0


In [31]:
df_best.to_csv("../reports/probing_roberta_base.csv", index=False)