In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import wilcoxon

from mlscorecheck import auc

In [2]:
data = pd.read_csv('raw-aggregated.csv')

In [3]:
data.head()

Unnamed: 0.1,Unnamed: 0,dataset,k,acc,sens,spec,auc,best_acc,best_sens,best_spec,threshold,best_threshold,best_acc_orig,p,n
0,0,bupa,8,0.562368,0.855994,0.35,0.751001,0.733417,0.545687,0.87,0.26556,0.556736,0.733417,145,200
1,1,new_thyroid1,4,0.925577,1.0,0.911111,0.994483,0.962788,0.888889,0.977778,0.166667,0.333333,0.962788,35,180
2,2,haberman,3,0.637255,0.703704,0.613333,0.699095,0.738562,0.012346,1.0,0.22617,0.885074,0.738562,81,225
3,3,dermatology-6,2,0.055866,1.0,0.0,0.97071,0.980447,0.85,0.988166,0.0,0.75,0.980447,20,338
4,4,monk-2,3,0.909722,1.0,0.828947,1.0,0.979167,1.0,0.960526,0.1964,0.595344,0.979167,204,228


In [4]:
lower_bounds = ['min', 'rmin', 'amin', 'armin']
upper_bounds = ['max', 'amax', 'maxa']

In [5]:
def wrapper(func, **kwargs):
    #try:
        return func(**kwargs)
    #except:
    #    return None

In [6]:
def wrapper_debug(func, **kwargs):
    try:
        #print(kwargs, flush=True)
        return func(**kwargs)
    except Exception as exc:
        return str(exc)

In [7]:
for lb in lower_bounds:
    data[f'auc_{lb}'] = data.apply(
        lambda row:
        wrapper_debug(auc.auc_lower_from_aggregated,
        scores={
            'acc': row['acc'],
            'sens': row['sens'],
            'spec': row['spec']
        },
        folding={
            'p': row['p'],
            'n': row['n'],
            'n_repeats': 1,
            'n_folds': row['k'],
            'folding': 'stratified_sklearn'
        },
        eps=1e-4,
        lower=lb),
        axis=1
    )

    data[f'auc_{lb}_best'] = data.apply(
        lambda row:
        wrapper_debug(auc.auc_lower_from_aggregated,
        scores={
            'acc': row['best_acc'],
            'sens': row['best_sens'],
            'spec': row['best_spec']
        },
        folding={
            'p': row['p'],
            'n': row['n'],
            'n_repeats': 1,
            'n_folds': row['k'],
            'folding': 'stratified_sklearn'
        },
        eps=1e-4,
        lower=lb),
        axis=1
    )

In [8]:
for ub in upper_bounds:
    data[f'auc_{ub}'] = data.apply(
        lambda row:
        wrapper_debug(
        auc.auc_upper_from_aggregated,
        scores={
            'acc': row['acc'] if ub != 'maxa' else row['best_acc'],
            'sens': row['sens'] if ub != 'maxa' else row['best_sens'],
            'spec': row['spec'] if ub != 'maxa' else row['best_spec']
        },
        folding={
            'p': row['p'],
            'n': row['n'],
            'n_repeats': 1,
            'n_folds': row['k'],
            'folding': 'stratified_sklearn'
        },
        eps=1e-4,
        upper=ub),
        axis=1
    )
    data[f'auc_{ub}_best'] = data.apply(
        lambda row:
        wrapper_debug(
        auc.auc_upper_from_aggregated,
        scores={
            'acc': row['best_acc'],
            'sens': row['best_sens'],
            'spec': row['best_spec']
        },
        folding={
            'p': row['p'],
            'n': row['n'],
            'n_repeats': 1,
            'n_folds': row['k'],
            'folding': 'stratified_sklearn'
        },
        eps=1e-4,
        upper=ub),
        axis=1
    )

In [9]:
lower_bounds = ['min', 'rmin']
upper_bounds = ['max', 'rmax']

In [10]:
for lb in lower_bounds:
    data[f'acc_{lb}'] = data.apply(
        lambda row:
        wrapper_debug(auc.acc_lower_from_aggregated,
        scores={
            'acc': row['acc'],
            'sens': row['sens'],
            'spec': row['spec'],
            'auc': row['auc']
        },
        folding={
            'p': row['p'],
            'n': row['n'],
            'n_repeats': 1,
            'n_folds': row['k'],
            'folding': 'stratified_sklearn'
        },
        eps=1e-4,
        lower=lb),
        axis=1
    )

In [11]:
for ub in upper_bounds:
    data[f'acc_{ub}'] = data.apply(
        lambda row:
        wrapper_debug(auc.acc_upper_from_aggregated,
        scores={
            'acc': row['acc'],
            'sens': row['sens'],
            'spec': row['spec'],
            'auc': row['auc']
        },
        folding={
            'p': row['p'],
            'n': row['n'],
            'n_repeats': 1,
            'n_folds': row['k'],
            'folding': 'stratified_sklearn'
        },
        eps=1e-4,
        upper=ub),
        axis=1
    )

In [12]:
lower_bounds = ['min']
upper_bounds = ['max', 'rmax']

In [13]:
for lb in lower_bounds:
    data[f'max_acc_{lb}'] = data.apply(
        lambda row:
        wrapper_debug(auc.max_acc_lower_from_aggregated,
        scores={
            'acc': row['acc'],
            'sens': row['sens'],
            'spec': row['spec'],
            'auc': row['auc']
        },
        folding={
            'p': row['p'],
            'n': row['n'],
            'n_repeats': 1,
            'n_folds': row['k'],
            'folding': 'stratified_sklearn'
        },
        eps=1e-4,
        lower=lb),
        axis=1
    )

In [14]:
for ub in upper_bounds:
    data[f'max_acc_{ub}'] = data.apply(
        lambda row:
        wrapper_debug(auc.max_acc_upper_from_aggregated,
        scores={
            'acc': row['acc'],
            'sens': row['sens'],
            'spec': row['spec'],
            'auc': row['auc']
        },
        folding={
            'p': row['p'],
            'n': row['n'],
            'n_repeats': 1,
            'n_folds': row['k'],
            'folding': 'stratified_sklearn'
        },
        eps=1e-4,
        upper=ub),
        axis=1
    )

In [15]:
data[data['auc'] > 0.999]

Unnamed: 0.1,Unnamed: 0,dataset,k,acc,sens,spec,auc,best_acc,best_sens,best_spec,...,auc_amax_best,auc_maxa,auc_maxa_best,acc_min,acc_rmin,acc_max,acc_rmax,max_acc_min,max_acc_max,max_acc_rmax
4,4,monk-2,3,0.909722,1.000000,0.828947,1.000000,0.979167,1.000000,0.960526,...,1.0,0.999138,0.999138,0.472175,0.472222,1.000000,1.000000,1.0,1.000000,1.000000
5,5,page-blocks-1-3_vs_4,9,0.059305,1.000000,0.000000,1.000000,0.997863,1.000000,0.997732,...,1.0,0.999963,0.999963,0.059297,0.059305,1.000000,1.000000,1.0,1.000000,1.000000
26,26,new_thyroid1,6,0.361772,1.000000,0.238889,0.999074,0.995370,0.972222,1.000000,...,1.0,0.999925,0.999925,0.162527,0.162698,0.999882,0.999882,1.0,0.999882,0.999882
30,30,iris0,3,0.333333,1.000000,0.000000,1.000000,1.000000,1.000000,1.000000,...,1.0,1.0,1.0,0.333299,0.333333,1.000000,1.000000,1.0,1.000000,1.000000
31,31,shuttle-c0-vs-c4,8,0.067247,1.000000,0.000000,1.000000,1.000000,1.000000,1.000000,...,1.0,1.0,1.0,0.067240,0.067247,1.000000,1.000000,0.998763,1.000000,1.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9918,9918,new_thyroid1,10,0.162338,1.000000,0.000000,0.999074,0.972294,0.866667,0.994444,...,1.0,0.99719,0.99719,0.162151,0.162338,0.999882,0.999882,1.0,0.999882,0.999882
9943,9943,new_thyroid1,7,0.897542,1.000000,0.877582,1.000000,0.995238,0.971429,1.000000,...,1.0,0.99992,0.99992,0.162810,0.162826,1.000000,1.000000,1.0,1.000000,1.000000
9944,9944,vowel0,6,0.091094,1.000000,0.000000,0.999109,0.995941,0.966667,0.998881,...,1.0,0.999905,0.999905,0.091003,0.091094,0.999928,0.999928,0.987187,0.999928,0.999928
9972,9972,shuttle-c0-vs-c4,9,0.067248,1.000000,0.000000,1.000000,1.000000,1.000000,1.000000,...,1.0,1.0,1.0,0.067241,0.067248,1.000000,1.000000,0.998808,1.000000,1.000000


In [16]:
data.to_csv('processed-aggregated.csv', index=False)