# Review Results
General notebook for reviewing results files

In [1]:
import pandas as pd

In [2]:
experiment_name = "gpu-expt"

# get the list of datasets and algs that we expect for this batch
datasets_list_file = "../scripts/DATASETS_A.sh"
algs_list_file = "../scripts/ALGS_GPU_1.sh"

In [3]:
# parse the expected datasets and algs from the scripts

alg_list = []
dataset_list = []

with open(algs_list_file, 'r') as f:
    lines = f.readlines()
    start = False
    for i, l in enumerate(lines):
        if "MODELS_ENVS" in l:
            start = True
            continue
        if start and (")" in l):
            break
        if start:
            alg_list.append(l.split(":")[0].strip())


with open(datasets_list_file, 'r') as f:
    lines = f.readlines()
    start = False
    for i, l in enumerate(lines):
        if "DATASETS" in l:
            start = True
            continue
        if start and (")" in l):
            break
        if start:
            dataset_list.append(l.split("\n")[0].strip())

print(f"found {len(alg_list)} algs in {algs_list_file}:")
for i, a in enumerate(alg_list):
    print(i+1, a)
print()
print(f"found {len(dataset_list)} datasets in {datasets_list_file}: ")
for i, d in enumerate(dataset_list):
    print(i+1, d)



found 5 algs in ../scripts/ALGS_GPU_1.sh:
1 XGBoost
2 CatBoost
3 MLP
4 TabNet
5 VIME

found 114 datasets in ../scripts/DATASETS_A.sh: 
1 openml__sick__3021
2 openml__kr-vs-kp__3
3 openml__letter__6
4 openml__balance-scale__11
5 openml__mfeat-factors__12
6 openml__mfeat-fourier__14
7 openml__breast-w__15
8 openml__mfeat-karhunen__16
9 openml__mfeat-morphological__18
10 openml__mfeat-zernike__22
11 openml__cmc__23
12 openml__optdigits__28
13 openml__credit-approval__29
14 openml__credit-g__31
15 openml__pendigits__32
16 openml__diabetes__37
17 openml__spambase__43
18 openml__splice__45
19 openml__tic-tac-toe__49
20 openml__vehicle__53
21 openml__electricity__219
22 openml__satimage__2074
23 openml__eucalyptus__2079
24 openml__vowel__3022
25 openml__isolet__3481
26 openml__analcatdata_authorship__3549
27 openml__analcatdata_dmft__3560
28 openml__mnist_784__3573
29 openml__pc4__3902
30 openml__pc3__3903
31 openml__jm1__3904
32 openml__kc2__3913
33 openml__kc1__3917
34 openml__pc1__3918
35 

In [4]:
df = pd.read_csv("../TabSurvey/metadataset.csv")

# filter by experiment name
df = df.loc[df["exp_name"].str.contains(experiment_name)]

In [6]:
print(len(df))

144680


In [5]:
# for each alg-dataset combination, check for results in the metadataset
import itertools

alg_dataset_pairs = list(itertools.product(alg_list, dataset_list))

alg_list_tmp = []
dataset_list_tmp = []
counts_list_tmp = []
# keep track of the number of results for each pair
for alg, dataset in alg_dataset_pairs:
    alg_list_tmp.append(alg)
    dataset_list_tmp.append(dataset)
    counts_list_tmp.append(sum((df["dataset_name"] == dataset) & (df["alg_name"] == alg)))

df_counts = pd.DataFrame(
    {
        "alg": alg_list_tmp,
        "dataset": dataset_list_tmp,
        "count": counts_list_tmp
    }
)


In [7]:
# pairs with fewer than 300 results (300 = complete.)

print("pairs with fewer than 300 results")
df_counts[df_counts["count"] < 300]

pairs with fewer than 300 results


Unnamed: 0,alg,dataset,count
67,XGBoost,openml__Devnagari-Script__167121,100
68,XGBoost,openml__CIFAR_10__167124,110
73,XGBoost,openml__covertype__7593,0
79,XGBoost,openml__helena__168329,0
82,XGBoost,openml__robert__168332,130
...,...,...,...
549,VIME,openml__dionis__189355,0
550,VIME,openml__albert__189356,0
553,VIME,openml__skin-segmentation__9965,0
555,VIME,openml__philippine__190410,260


In [9]:
# pairs with fewer than 0 results (complete fail)

print("pairs with 0 results (complete fail)")
df_counts[df_counts["count"] == 0]

pairs with 0 results (complete fail)


Unnamed: 0,alg,dataset,count
73,XGBoost,openml__covertype__7593,0
79,XGBoost,openml__helena__168329,0
93,XGBoost,openml__dionis__189355,0
128,CatBoost,openml__pendigits__32,0
138,CatBoost,openml__isolet__3481,0
181,CatBoost,openml__Devnagari-Script__167121,0
182,CatBoost,openml__CIFAR_10__167124,0
187,CatBoost,openml__covertype__7593,0
190,CatBoost,openml__shuttle__146212,0
193,CatBoost,openml__helena__168329,0


In [10]:
# for each dataset, what % of algorithms are there at least 200 results?
print(df_counts.groupby("dataset")["count"].apply(lambda x: sum(x > 200) / float(len(alg_list))).sort_values())

dataset
openml__dionis__189355                    0.0
openml__covertype__7593                   0.0
openml__CIFAR_10__167124                  0.0
openml__Devnagari-Script__167121          0.0
openml__helena__168329                    0.0
                                         ... 
openml__wdbc__9946                        1.0
openml__haberman__42                      1.0
openml__banknote-authentication__10093    1.0
openml__jasmine__168911                   1.0
openml__wilt__146820                      1.0
Name: count, Length: 114, dtype: float64


Again some problematic datasets, not a big issue though.

In [11]:
# for each alg, what % of datasets are there at least 200 results?
print(df_counts.groupby("alg")["count"].apply(lambda x: sum(x > 200) / float(len(dataset_list))).sort_values())

alg
TabNet      0.622807
VIME        0.701754
MLP         0.894737
CatBoost    0.903509
XGBoost     0.947368
Name: count, dtype: float64


Nothing terrible. Great.