In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os

pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', -1)
meanprops = dict(linestyle='-', linewidth=1, color='k', alpha=0.5)
whiskerprops=dict(linestyle='-', linewidth=0)


In [None]:
exp_names = ["svhn",
            "cifar10",
            "cifar100",
            "super_cifar100",
            "camelyon",
            # "animals",
            ]

df_list = []
for exp in exp_names:
    in_path = os.path.join("/Users/Paul/research/files/analysis/{}_paper_sweep.csv".format(exp))
    df =  pd.read_csv(in_path)
    df = df.dropna(subset=["name", "model"])
    df = df.drop_duplicates(subset=["name", "study", "model", "network", "confid"])
    df["study"] = df.apply(lambda row: exp + "_" + row["study"], axis=1)
    df_list.append(df)

df = pd.concat(df_list)

In [None]:
df.groupby("study").count()

In [None]:
df["backbone"] = df.apply(lambda row: row["name"].split("bb")[1].split("_")[0], axis=1)
df["dropout"] = df.apply(lambda row: row["name"].split("do")[1].split("_")[0], axis=1)
df["model"] = df.apply(lambda row: row["name"].split("_")[0], axis=1)
df["run"] = df.apply(lambda row: row["name"].split("run")[1].split("_")[0], axis=1)
df["rew"] = df.apply(lambda row: row["name"].split("_rew")[1].split("_")[0], axis=1)
df["confid"] = df.apply(lambda row: row["model"] + "_" + row["confid"], axis=1)
df = df.drop("model", axis=1)

In [None]:

metric = "aurc"
df = df[["study", "confid", "run", "backbone", "dropout", "rew", metric]]

In [None]:
non_agg_columns = ["study", "confid", "backbone", "dropout", "rew"]

df = df.groupby(by=non_agg_columns).mean().reset_index()
df_std = df.groupby(by=non_agg_columns).std().reset_index()
df[metric + "_std"] = df_std[metric]


In [None]:
studies = df.studies.unique().tolist()
dff = df[df.study == studies[-1]]
dff = dff.rename(columns={metric: studies[-1], metric + "_std": studies[-1] + "_std"})
dff.drop("study")
for s in studies[1:]:
    sdf = df[df.study == s]
    dff[s] = sdf[metric]
    dff[s + "_std"] = sdf[metric + "_std"]

In [None]:
dff

In [None]:
# todo: filter rows! use val tuning etc.

In [None]:
plot_data = data # & (data["ne"].str.contains("250")) & (data["ap"]==False)]
scale = 5
sns.set_style("whitegrid")
sns.set_context("paper", font_scale=scale * 0.3)
dims = ["confid", "model", "backbone", "dropout", "rew"]
metrics = ["accuracy", "failauc", "failap_err", "aurc"]

f, axs = plt.subplots(nrows=len(dims), ncols=len(metrics), figsize=(len(metrics)*scale, len(dims)*scale))
for xix, dim in enumerate(dims):
    for yix, metric in enumerate(metrics):
        y = metric
        sns.stripplot(ax=axs[xix, yix], x=dim, y=metric, data=plot_data, s = scale*0.8)
        sns.boxplot(ax=axs[xix, yix], x=dim, y=metric, data=plot_data, saturation=0, showbox=False, showcaps=False, showfliers=False, whiskerprops=whiskerprops, showmeans=True, meanprops=meanprops, meanline=True)
        axs[xix, yix].set_xticklabels(axs[xix, yix].get_xticklabels(),rotation=90)
        # if "iid" in study and metric == "aurc":
        #     axs[xix, yix].set_ylim(4, 8)
        # if "iid" in study and metric == "failauc":
        #     axs[xix, yix].set_ylim(0.90, 0.96)
plt.tight_layout()
# plt.savefig("/Users/Paul/research/files/analysis/{}_cifar10.png".format(study))

In [None]:
std_columns = [c for c in data_std.columns if "std"in c]
for s in std_columns:
    data[s] = data_std[s]

print(data.columns)
print(data_std.columns)
print(len(data))
data[["model", "backbone",  "dropout",  "confid",  "rew", "accuracy", "accuracy_std", "failauc", "failauc_std", "aurc", "aurc_std", "failap_suc", "risk@100cov", "risk@95cov", "risk@90cov", "risk@85cov", "risk@80cov"]].sort_values(by="aurc", ascending=True)

In [None]:
df[(df.study == "iid_study") & (df.dropout == "1")]

In [None]:
import numpy as np
np.std([0.95000, 0.94633, 1])
np.std([95.000, 94.633], ddof=1)