In [17]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import wilcoxon

from mlscorecheck import auc

In [18]:
data = pd.read_csv('raw-aggregated3.csv')

In [19]:
data.head()

Unnamed: 0.1,Unnamed: 0,dataset,k,acc,sens,spec,auc,best_acc,best_sens,best_spec,threshold,best_threshold,best_acc_orig,p,n
0,0,bupa,8,0.69556,0.663377,0.72,0.751001,0.73051,0.580409,0.84,0.42053,0.508864,0.73051,145,200
1,1,new_thyroid1,4,0.925577,1.0,0.911111,0.994483,0.962788,0.888889,0.977778,0.166667,0.333333,0.962788,35,180
2,2,haberman,3,0.653595,0.617284,0.666667,0.699095,0.738562,0.012346,1.0,0.264706,0.885074,0.738562,81,225
3,3,dermatology-6,2,0.96648,0.95,0.967456,0.97071,0.980447,0.85,0.988166,0.055866,0.75,0.980447,20,338
4,4,monk-2,3,0.974537,1.0,0.951754,1.0,0.979167,1.0,0.960526,0.472222,0.595344,0.979167,204,228


In [20]:
lower_bounds = ['min', 'rmin', 'amin', 'armin']
upper_bounds = ['max', 'amax', 'maxa']

In [21]:
def wrapper(func, **kwargs):
    #try:
        return func(**kwargs)[0]
    #except:
    #    return None

In [22]:
def wrapper_debug(func, **kwargs):
    try:
        #print(kwargs, flush=True)
        return func(**kwargs)[0]
    except Exception as exc:
        return str(exc)

In [23]:
for lb in lower_bounds:
    data[f'auc_{lb}'] = data.apply(
        lambda row:
        wrapper_debug(auc.auc_lower_from_aggregated,
        scores={
            'acc': row['acc'],
            'sens': row['sens'],
            'spec': row['spec']
        },
        folding={
            'p': row['p'],
            'n': row['n'],
            'n_repeats': 1,
            'n_folds': row['k'],
            'folding': 'stratified_sklearn'
        },
        eps=1e-4,
        lower=lb),
        axis=1
    )

    data[f'auc_{lb}_best'] = data.apply(
        lambda row:
        wrapper_debug(auc.auc_lower_from_aggregated,
        scores={
            'acc': row['best_acc'],
            'sens': row['best_sens'],
            'spec': row['best_spec']
        },
        folding={
            'p': row['p'],
            'n': row['n'],
            'n_repeats': 1,
            'n_folds': row['k'],
            'folding': 'stratified_sklearn'
        },
        eps=1e-4,
        lower=lb),
        axis=1
    )

In [24]:
for ub in upper_bounds:
    data[f'auc_{ub}'] = data.apply(
        lambda row:
        wrapper_debug(
        auc.auc_upper_from_aggregated,
        scores={
            'acc': row['acc'] if ub != 'maxa' else row['best_acc'],
            'sens': row['sens'] if ub != 'maxa' else row['best_sens'],
            'spec': row['spec'] if ub != 'maxa' else row['best_spec']
        },
        folding={
            'p': row['p'],
            'n': row['n'],
            'n_repeats': 1,
            'n_folds': row['k'],
            'folding': 'stratified_sklearn'
        },
        eps=1e-4,
        upper=ub),
        axis=1
    )
    data[f'auc_{ub}_best'] = data.apply(
        lambda row:
        wrapper_debug(
        auc.auc_upper_from_aggregated,
        scores={
            'acc': row['best_acc'],
            'sens': row['best_sens'],
            'spec': row['best_spec']
        },
        folding={
            'p': row['p'],
            'n': row['n'],
            'n_repeats': 1,
            'n_folds': row['k'],
            'folding': 'stratified_sklearn'
        },
        eps=1e-4,
        upper=ub),
        axis=1
    )

In [25]:
lower_bounds = ['min', 'rmin']
upper_bounds = ['max', 'rmax']

In [26]:
for lb in lower_bounds:
    data[f'acc_{lb}'] = data.apply(
        lambda row:
        wrapper_debug(auc.acc_lower_from_aggregated,
        scores={
            'acc': row['acc'],
            'sens': row['sens'],
            'spec': row['spec'],
            'auc': row['auc']
        },
        folding={
            'p': row['p'],
            'n': row['n'],
            'n_repeats': 1,
            'n_folds': row['k'],
            'folding': 'stratified_sklearn'
        },
        eps=1e-4,
        lower=lb),
        axis=1
    )

In [27]:
for ub in upper_bounds:
    data[f'acc_{ub}'] = data.apply(
        lambda row:
        wrapper_debug(auc.acc_upper_from_aggregated,
        scores={
            'acc': row['acc'],
            'sens': row['sens'],
            'spec': row['spec'],
            'auc': row['auc']
        },
        folding={
            'p': row['p'],
            'n': row['n'],
            'n_repeats': 1,
            'n_folds': row['k'],
            'folding': 'stratified_sklearn'
        },
        eps=1e-4,
        upper=ub),
        axis=1
    )

In [28]:
lower_bounds = ['min']
upper_bounds = ['max', 'rmax']

In [29]:
for lb in lower_bounds:
    data[f'max_acc_{lb}'] = data.apply(
        lambda row:
        wrapper_debug(auc.max_acc_lower_from_aggregated,
        scores={
            'acc': row['acc'],
            'sens': row['sens'],
            'spec': row['spec'],
            'auc': row['auc']
        },
        folding={
            'p': row['p'],
            'n': row['n'],
            'n_repeats': 1,
            'n_folds': row['k'],
            'folding': 'stratified_sklearn'
        },
        eps=1e-4,
        lower=lb),
        axis=1
    )

In [30]:
for ub in upper_bounds:
    data[f'max_acc_{ub}'] = data.apply(
        lambda row:
        wrapper_debug(auc.max_acc_upper_from_aggregated,
        scores={
            'acc': row['acc'],
            'sens': row['sens'],
            'spec': row['spec'],
            'auc': row['auc']
        },
        folding={
            'p': row['p'],
            'n': row['n'],
            'n_repeats': 1,
            'n_folds': row['k'],
            'folding': 'stratified_sklearn'
        },
        eps=1e-4,
        upper=ub),
        axis=1
    )

In [31]:
data[data['auc'] > 0.999]

Unnamed: 0.1,Unnamed: 0,dataset,k,acc,sens,spec,auc,best_acc,best_sens,best_spec,...,auc_amax_best,auc_maxa,auc_maxa_best,acc_min,acc_rmin,acc_max,acc_rmax,max_acc_min,max_acc_max,max_acc_rmax
4,4,monk-2,3,0.974537,1.000000,0.951754,1.000000,0.979167,1.000000,0.960526,...,1.0,0.999138,0.999138,'float' object is not subscriptable,invalid index to scalar variable.,'float' object is not subscriptable,invalid index to scalar variable.,'float' object is not subscriptable,'float' object is not subscriptable,invalid index to scalar variable.
5,5,page-blocks-1-3_vs_4,9,0.940695,1.000000,0.937007,1.000000,0.997863,1.000000,0.997732,...,1.0,0.999963,0.999963,'float' object is not subscriptable,invalid index to scalar variable.,'float' object is not subscriptable,invalid index to scalar variable.,'float' object is not subscriptable,'float' object is not subscriptable,invalid index to scalar variable.
26,26,new_thyroid1,6,0.953571,1.000000,0.944444,0.999074,0.995370,0.972222,1.000000,...,1.0,0.999925,0.999925,'float' object is not subscriptable,invalid index to scalar variable.,'float' object is not subscriptable,invalid index to scalar variable.,'float' object is not subscriptable,'float' object is not subscriptable,invalid index to scalar variable.
30,30,iris0,3,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,...,1.0,1.0,1.0,'float' object is not subscriptable,invalid index to scalar variable.,'float' object is not subscriptable,invalid index to scalar variable.,'float' object is not subscriptable,'float' object is not subscriptable,invalid index to scalar variable.
31,31,shuttle-c0-vs-c4,8,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,...,1.0,1.0,1.0,'float' object is not subscriptable,invalid index to scalar variable.,'float' object is not subscriptable,invalid index to scalar variable.,invalid index to scalar variable.,'float' object is not subscriptable,invalid index to scalar variable.
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9919,9919,new_thyroid1,10,0.957576,1.000000,0.950000,0.999074,0.972294,0.866667,0.994444,...,1.0,0.99719,0.99719,'float' object is not subscriptable,invalid index to scalar variable.,'float' object is not subscriptable,invalid index to scalar variable.,'float' object is not subscriptable,'float' object is not subscriptable,invalid index to scalar variable.
9944,9944,new_thyroid1,7,0.948848,1.000000,0.938901,1.000000,0.995238,0.971429,1.000000,...,1.0,0.99992,0.99992,'float' object is not subscriptable,invalid index to scalar variable.,'float' object is not subscriptable,invalid index to scalar variable.,'float' object is not subscriptable,'float' object is not subscriptable,invalid index to scalar variable.
9945,9945,vowel0,6,0.959522,0.988889,0.956577,0.999109,0.995941,0.966667,0.998881,...,1.0,0.999905,0.999905,'float' object is not subscriptable,invalid index to scalar variable.,'float' object is not subscriptable,invalid index to scalar variable.,invalid index to scalar variable.,'float' object is not subscriptable,invalid index to scalar variable.
9973,9973,shuttle-c0-vs-c4,9,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,...,1.0,1.0,1.0,'float' object is not subscriptable,invalid index to scalar variable.,'float' object is not subscriptable,invalid index to scalar variable.,invalid index to scalar variable.,'float' object is not subscriptable,invalid index to scalar variable.


In [32]:
data.to_csv('processed-aggregated3.csv', index=False)