In [93]:
# %matplotlib notebook
%matplotlib inline
from ipynb_setup import *
import matplotlib.pyplot as plt 

from pathlib import Path
from IPython.display import Markdown, display

from dataframe import get_experiments_matching, create_experiment_df, preprocess_df
from utils.pandas import filter_dataframe
from plot import DATASETS, MATCH_PATTERNS, VALUE_DICT, QUERYMETHODS, get_label_regime

import utils.eval as evaluate
from utils.eval import compute_pairwise_matrix
from plotlib.performance_plots import plot_pairwise_matrix
from scipy.stats import ttest_ind

# CIFAR-100

In [2]:
base_path = Path("/mnt/drive_nvme2/rsync_cluster/active_learning")
save_path = Path("./plots")
df = create_experiment_df(base_path, DATASETS, rewrite=True)


filter_ = get_experiments_matching(df, key="Rel. Path", patterns=[".*basic-pretrained.*"])
df = df[filter_]

df = preprocess_df(df, MATCH_PATTERNS, VALUE_DICT)
inv_dataset = {v: k for k, v in DATASETS.items()}

df["Dataset"] = df["Dataset"].map(inv_dataset)
df["Label Regime"] = df["Label Regime"].map(get_label_regime)
df["Query Method"] = df["Query Method"].map(QUERYMETHODS)

df = df[df["Label Regime"].isin(["low_qs-50","low", "low_qs-2000"])]
df.columns

Index(['index', 'val_acc', 'test_acc', 'num_samples', 'Acquisition Entropy',
       'Dataset Entropy', 'version', 'Name', 'test/loss', 'test/acc', 'Path',
       'Rel. Path', 'Dataset', 'Label Regime', 'Experiment Name',
       'Query Method', 'Self-SL', 'Semi-SL', 'Training'],
      dtype='object')

In [3]:
df["Experiment Name"].unique()

array(['basic-pretrained_model-resnet_drop-0_aug-cifar_randaugment_acq-random_ep-80_freeze-False_smallhead-False',
       'basic-pretrained_model-resnet_drop-0_aug-cifar_randaugment_acq-kcentergreedy_ep-80_freeze-False_smallhead-False',
       'basic-pretrained_model-resnet_drop-0.5_aug-cifar_randaugment_acq-bald_ep-80_freeze-False_smallhead-False',
       'basic-pretrained_model-resnet_drop-0_aug-cifar_randaugment_acq-entropy_ep-80_freeze-False_smallhead-False',
       'basic-pretrained_model-resnet_drop-0_aug-cifar_randaugment_acq-badge_ep-80_freeze-False_smallhead-False'],
      dtype=object)

In [69]:
settings = []
val = "test_acc"

samp_df = df[df["num_samples"].isin([1000, 1500])]

out_df = samp_df.groupby(["Dataset", "Label Regime", "Query Method", "num_samples"]).agg({val : ["mean", "std"]})

out_df["test_acc"]
out_df = out_df["test_acc"].round(4)*100




out_dict = {}
for qm in samp_df["Query Method"].unique():
    print(qm)
    for n_samp in samp_df["num_samples"].unique():
        print(n_samp)
        part_dict = {}
        for exp in samp_df["Label Regime"].unique():
            test_df = samp_df[(samp_df["Label Regime"] ==exp) & (samp_df["Query Method"] ==qm) & (samp_df["num_samples"] ==n_samp)][val]
            part_dict[exp] = test_df.to_numpy()
        tval = ttest_ind(*part_dict.values())
        print(tval)
        

Random
1000
Ttest_indResult(statistic=-2.8512645533602328, pvalue=0.04633653067808281)
1500
Ttest_indResult(statistic=-0.44962591288590875, pvalue=0.6762683748529865)
Core-Set
1000
Ttest_indResult(statistic=-0.8448905423330896, pvalue=0.4457455639404634)
1500
Ttest_indResult(statistic=0.6747577441163723, pvalue=0.5368208136796477)
BALD
1000
Ttest_indResult(statistic=-3.2486197636010568, pvalue=0.03141669888270467)
1500
Ttest_indResult(statistic=-10.084815245817571, pvalue=0.0005439193862747724)
Entropy
1000
Ttest_indResult(statistic=-1.0911527617084127, pvalue=0.3365267635027454)
1500
Ttest_indResult(statistic=-0.4479763976006725, pvalue=0.6773625243392601)
BADGE
1000
Ttest_indResult(statistic=1.259476712149793, pvalue=0.2763395041365433)
1500
Ttest_indResult(statistic=0.34563975782869705, pvalue=0.7470260739973795)


In [71]:
out_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,mean,std
Dataset,Label Regime,Query Method,num_samples,Unnamed: 4_level_1,Unnamed: 5_level_1
CIFAR-100,low,BADGE,1000,46.32,0.2
CIFAR-100,low,BADGE,1500,50.24,0.61
CIFAR-100,low,BALD,1000,42.31,1.71
CIFAR-100,low,BALD,1500,46.0,0.44
CIFAR-100,low,Core-Set,1000,46.02,0.74
CIFAR-100,low,Core-Set,1500,49.7,0.6
CIFAR-100,low,Entropy,1000,42.95,1.32
CIFAR-100,low,Entropy,1500,46.75,0.76
CIFAR-100,low,Random,1000,45.22,0.34
CIFAR-100,low,Random,1500,49.79,0.27


In [65]:
settings = []
val = "test_acc"

samp_df = df[df["num_samples"].isin([2500, 4500])]

out_df = samp_df.groupby(["Dataset", "Label Regime", "Query Method", "num_samples"]).agg({val : ["mean", "std"]})


out_df["test_acc"]
out_df = out_df["test_acc"].round(4)*100

out_dict = {}
for qm in samp_df["Query Method"].unique():
    print(qm)
    for n_samp in samp_df["num_samples"].unique():
        print(n_samp)
        part_dict = {}
        for exp in samp_df["Label Regime"].unique():
            test_df = samp_df[(samp_df["Label Regime"] ==exp) & (samp_df["Query Method"] ==qm) & (samp_df["num_samples"] ==n_samp)][val]
            part_dict[exp] = test_df.to_numpy()
        tval = ttest_ind(*part_dict.values())
        print(tval)

Random
2500
Ttest_indResult(statistic=1.352207041253772, pvalue=0.2477042974761229)
4500
Ttest_indResult(statistic=-0.2020717522525327, pvalue=0.8497217654324899)
Core-Set
2500
Ttest_indResult(statistic=5.674160988446457, pvalue=0.004759623703752529)
4500
Ttest_indResult(statistic=0.9923971623878244, pvalue=0.37717747559620557)
BALD
2500
Ttest_indResult(statistic=4.675729366626695, pvalue=0.009477787481559133)
4500
Ttest_indResult(statistic=4.699304512108845, pvalue=0.00931320727806673)
Entropy
2500
Ttest_indResult(statistic=-0.61033083821728, pvalue=0.5746170004026746)
4500
Ttest_indResult(statistic=-1.0316846190717477, pvalue=0.36051252843408843)
BADGE
2500
Ttest_indResult(statistic=-0.9969902617087215, pvalue=0.37519506765113664)
4500
Ttest_indResult(statistic=-0.1690755858547518, pvalue=0.873942887964515)


In [68]:
print(out_df.to_latex())

\begin{tabular}{llllrr}
\toprule
          &             &        &      &   mean &   std \\
Dataset & Label Regime & Query Method & num\_samples &        &       \\
\midrule
CIFAR-100 & low & BADGE & 2500 &  54.62 &  0.60 \\
          &             &        & 4500 &  59.92 &  0.12 \\
          &             & BALD & 2500 &  50.92 &  0.30 \\
          &             &        & 4500 &  55.86 &  0.21 \\
          &             & Core-Set & 2500 &  54.72 &  0.16 \\
          &             &        & 4500 &  58.71 &  0.61 \\
          &             & Entropy & 2500 &  51.41 &  0.78 \\
          &             &        & 4500 &  57.12 &  0.53 \\
          &             & Random & 2500 &  54.71 &  0.38 \\
          &             &        & 4500 &  59.41 &  0.31 \\
          & low\_qs-2000 & BADGE & 2500 &  55.03 &  0.38 \\
          &             &        & 4500 &  59.98 &  0.60 \\
          &             & BALD & 2500 &  46.96 &  1.43 \\
          &             &        & 4500 &  52.14 &  1.3

In [63]:
out_df["mean"].values

array([54.62, 59.92, 50.92, 55.86, 54.72, 58.71, 51.41, 57.12, 54.71,
       59.41, 55.03, 59.98, 46.96, 52.14, 53.52, 58.28, 51.79, 57.53,
       54.38, 59.48])

# ISIC-2019

In [94]:
base_path = Path("/home/c817h/network/Cluster-Experiments/activelearning")
save_path = Path("./plots")
df = create_experiment_df(base_path, DATASETS, rewrite=True)


filter_ = get_experiments_matching(df, key="Rel. Path", patterns=[".*basic-pretrained.*"])
df = df[filter_]

df = preprocess_df(df, MATCH_PATTERNS, VALUE_DICT)
inv_dataset = {v: k for k, v in DATASETS.items()}

df["Dataset"] = df["Dataset"].map(inv_dataset)
df["Label Regime"] = df["Label Regime"].map(get_label_regime)
df["Query Method"] = df["Query Method"].map(QUERYMETHODS)

df = df[df["Label Regime"].isin(["low_qs-10","low_qs-40", "low_qs-160"])]
df.columns

Index(['index', 'val_acc', 'test_acc', 'num_samples', 'Acquisition Entropy',
       'Dataset Entropy', 'version', 'Name', 'test/loss', 'test/acc', 'Path',
       'test/auroc', 'test/av_prec', 'test/w_acc', 'test/av_f1', 'Rel. Path',
       'Dataset', 'Label Regime', 'Experiment Name', 'Query Method', 'Self-SL',
       'Semi-SL', 'Training'],
      dtype='object')

In [95]:
df

Unnamed: 0,index,val_acc,test_acc,num_samples,Acquisition Entropy,Dataset Entropy,version,Name,test/loss,test/acc,...,test/w_acc,test/av_f1,Rel. Path,Dataset,Label Regime,Experiment Name,Query Method,Self-SL,Semi-SL,Training
1450,0,0.310,0.203095,40,1.313834,,20,basic-pretrained_model-resnet_drop-0_aug-isic_...,2.351670,0.203095,...,0.337474,0.186589,isic2019/active-isic19_low_qs-10/basic-pretrai...,ISIC-2019,low_qs-10,basic-pretrained_model-resnet_drop-0_aug-isic_...,Entropy,True,False,Self-SL Pre-Trained
1451,1,0.260,0.309855,50,1.504788,1.313834,20,basic-pretrained_model-resnet_drop-0_aug-isic_...,1.805039,0.309855,...,0.326310,0.226014,isic2019/active-isic19_low_qs-10/basic-pretrai...,ISIC-2019,low_qs-10,basic-pretrained_model-resnet_drop-0_aug-isic_...,Entropy,True,False,Self-SL Pre-Trained
1452,2,0.365,0.347915,60,1.279854,1.591093,20,basic-pretrained_model-resnet_drop-0_aug-isic_...,2.466533,0.347915,...,0.352185,0.246424,isic2019/active-isic19_low_qs-10/basic-pretrai...,ISIC-2019,low_qs-10,basic-pretrained_model-resnet_drop-0_aug-isic_...,Entropy,True,False,Self-SL Pre-Trained
1453,3,0.340,0.445989,70,0.801819,1.553032,20,basic-pretrained_model-resnet_drop-0_aug-isic_...,1.631087,0.445989,...,0.375267,0.294096,isic2019/active-isic19_low_qs-10/basic-pretrai...,ISIC-2019,low_qs-10,basic-pretrained_model-resnet_drop-0_aug-isic_...,Entropy,True,False,Self-SL Pre-Trained
1454,4,0.360,0.422773,80,1.279854,1.452860,20,basic-pretrained_model-resnet_drop-0_aug-isic_...,1.794828,0.422773,...,0.353580,0.277377,isic2019/active-isic19_low_qs-10/basic-pretrai...,ISIC-2019,low_qs-10,basic-pretrained_model-resnet_drop-0_aug-isic_...,Entropy,True,False,Self-SL Pre-Trained
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1955,1,0.390,0.481049,200,1.631457,1.469106,2,basic-pretrained_model-resnet_drop-0.5_aug-isi...,1.434865,0.481049,...,0.424527,0.306969,isic2019/active-isic19_low_qs-160/basic-pretra...,ISIC-2019,low_qs-160,basic-pretrained_model-resnet_drop-0.5_aug-isi...,BALD,True,False,Self-SL Pre-Trained
1956,2,0.380,0.533639,360,1.786142,1.585468,2,basic-pretrained_model-resnet_drop-0.5_aug-isi...,1.263438,0.533639,...,0.439675,0.346555,isic2019/active-isic19_low_qs-160/basic-pretra...,ISIC-2019,low_qs-160,basic-pretrained_model-resnet_drop-0.5_aug-isi...,BALD,True,False,Self-SL Pre-Trained
1957,0,0.370,0.335913,40,1.457784,,2,basic-pretrained_model-resnet_drop-0.5_aug-isi...,2.070035,0.335913,...,0.320922,0.222475,isic2019/active-isic19_low_qs-160/basic-pretra...,ISIC-2019,low_qs-160,basic-pretrained_model-resnet_drop-0.5_aug-isi...,BALD,True,False,Self-SL Pre-Trained
1958,1,0.420,0.457044,200,1.650112,1.457784,2,basic-pretrained_model-resnet_drop-0.5_aug-isi...,1.442475,0.457044,...,0.390889,0.284335,isic2019/active-isic19_low_qs-160/basic-pretra...,ISIC-2019,low_qs-160,basic-pretrained_model-resnet_drop-0.5_aug-isi...,BALD,True,False,Self-SL Pre-Trained


In [96]:
settings = []
val = "test/w_acc"

samp_df = df[df["num_samples"].isin([80, 120 ,160, 200, 240]) & df["Label Regime"].isin(["low_qs-10", "low_qs-40"])]

out_df = samp_df.groupby(["Dataset", "Label Regime", "Query Method", "num_samples"]).agg({val : ["mean", "std"]})

out_df = out_df[val].round(4)*100




out_dict = {}
for qm in samp_df["Query Method"].unique():
    print(qm)
    for n_samp in samp_df["num_samples"].unique():
        print(n_samp)
        part_dict = {}
        for exp in samp_df["Label Regime"].unique():
            test_df = samp_df[(samp_df["Label Regime"] ==exp) & (samp_df["Query Method"] ==qm) & (samp_df["num_samples"] ==n_samp)][val]
            part_dict[exp] = test_df.to_numpy()
        tval = ttest_ind(*part_dict.values())
        print(tval)
        

Entropy
80
Ttest_indResult(statistic=0.4995716748351085, pvalue=0.6436060636902046)
120
Ttest_indResult(statistic=-1.0082702860225325, pvalue=0.37036499140402396)
160
Ttest_indResult(statistic=-3.7436598845491424, pvalue=0.020057180904763935)
200
Ttest_indResult(statistic=-0.7415300978123583, pvalue=0.4995470877909325)
240
Ttest_indResult(statistic=-0.4232106384907112, pvalue=0.693904436607434)
Random
80
Ttest_indResult(statistic=0.7272765144088507, pvalue=0.5073391224211323)
120
Ttest_indResult(statistic=-0.04732005250906029, pvalue=0.9645265069026203)
160
Ttest_indResult(statistic=-2.9498397161861156, pvalue=0.04197605064695599)
200
Ttest_indResult(statistic=-1.4151123608342118, pvalue=0.22995514864427613)
240
Ttest_indResult(statistic=-1.9685100029150555, pvalue=0.1203748034590236)
Core-Set
80
Ttest_indResult(statistic=0.20973844734056565, pvalue=0.8441213424285187)
120
Ttest_indResult(statistic=0.21339152531410346, pvalue=0.8414567052455849)
160
Ttest_indResult(statistic=0.21433396

In [97]:
out_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,mean,std
Dataset,Label Regime,Query Method,num_samples,Unnamed: 4_level_1,Unnamed: 5_level_1
ISIC-2019,low_qs-10,BADGE,80,34.39,0.86
ISIC-2019,low_qs-10,BADGE,120,37.58,1.93
ISIC-2019,low_qs-10,BADGE,160,37.85,2.51
ISIC-2019,low_qs-10,BADGE,200,38.32,1.59
ISIC-2019,low_qs-10,BADGE,240,38.78,1.64
ISIC-2019,low_qs-10,BALD,80,38.11,1.42
ISIC-2019,low_qs-10,BALD,120,38.8,1.19
ISIC-2019,low_qs-10,BALD,160,40.4,1.76
ISIC-2019,low_qs-10,BALD,200,42.15,1.49
ISIC-2019,low_qs-10,BALD,240,42.26,0.97


In [98]:
settings = []
val = "test/w_acc"

samp_df = df[df["num_samples"].isin([200, 360]) & df["Label Regime"].isin(["low_qs-40", "low_qs-160"])]

out_df = samp_df.groupby(["Dataset", "Label Regime", "Query Method", "num_samples"]).agg({val : ["mean", "std"]})

out_df = out_df[val].round(4)*100




out_dict = {}
for qm in samp_df["Query Method"].unique():
    print(qm)
    for n_samp in samp_df["num_samples"].unique():
        print(n_samp)
        part_dict = {}
        for exp in samp_df["Label Regime"].unique():
            test_df = samp_df[(samp_df["Label Regime"] ==exp) & (samp_df["Query Method"] ==qm) & (samp_df["num_samples"] ==n_samp)][val]
            part_dict[exp] = test_df.to_numpy()
        tval = ttest_ind(*part_dict.values())
        print(tval)
        

Random
200
Ttest_indResult(statistic=-0.3041153970288876, pvalue=0.7762040981455891)
360
Ttest_indResult(statistic=-0.22961606405106338, pvalue=0.8296537263006051)
Entropy
200
Ttest_indResult(statistic=1.3208126590785714, pvalue=0.2570602170308522)
360
Ttest_indResult(statistic=1.6097144888174841, pvalue=0.1827463331264351)
Core-Set
200
Ttest_indResult(statistic=0.03401247857186024, pvalue=0.9744967872190067)
360
Ttest_indResult(statistic=0.08471004369688048, pvalue=0.9365622670418251)
BADGE
200
Ttest_indResult(statistic=-0.647906275448505, pvalue=0.5523576668309075)
360
Ttest_indResult(statistic=0.01583290280264634, pvalue=0.9881259430142277)
BALD
200
Ttest_indResult(statistic=0.19755498139601513, pvalue=0.85302626360399)
360
Ttest_indResult(statistic=1.392371856847076, pvalue=0.23622123404885975)


In [99]:
out_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,mean,std
Dataset,Label Regime,Query Method,num_samples,Unnamed: 4_level_1,Unnamed: 5_level_1
ISIC-2019,low_qs-160,BADGE,200,38.11,1.07
ISIC-2019,low_qs-160,BADGE,360,42.86,1.44
ISIC-2019,low_qs-160,BALD,200,39.67,2.54
ISIC-2019,low_qs-160,BALD,360,42.83,1.05
ISIC-2019,low_qs-160,Core-Set,200,37.71,3.18
ISIC-2019,low_qs-160,Core-Set,360,39.47,1.04
ISIC-2019,low_qs-160,Entropy,200,37.8,1.46
ISIC-2019,low_qs-160,Entropy,360,40.82,0.6
ISIC-2019,low_qs-160,Random,200,40.8,2.34
ISIC-2019,low_qs-160,Random,360,42.48,1.65
