In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import wilcoxon

from mlscorecheck import auc

In [2]:
data = pd.read_csv('raw-aggregated-ns.csv')

In [3]:
data.head()

Unnamed: 0.1,Unnamed: 0,dataset,k,acc,sens,spec,auc,best_acc,best_sens,best_spec,threshold,best_threshold,best_acc_orig,p,n
0,0,bupa,8,0.463993,0.992188,0.081193,0.739367,0.721921,0.569068,0.833953,0.127085,0.515278,0.721921,145,200
1,1,new_thyroid1,4,0.162736,1.0,0.0,0.99422,0.962788,0.858036,0.983081,0.0,0.333333,0.962788,35,180
2,2,monk-2,3,0.613426,0.183985,1.0,1.0,1.0,1.0,1.0,0.98,0.725,1.0,204,228
3,3,led7digit-0-2-4-6-7-8-9_vs_1,3,0.083533,1.0,0.0,0.858256,0.954848,0.704159,0.977992,0.0,0.772727,0.954848,37,406
4,4,saheart,4,0.480603,0.967434,0.220345,0.720729,0.686169,0.296875,0.894715,0.246489,0.43109,0.686169,160,302


In [4]:
lower_bounds = ['min', 'rmin', 'amin', 'armin']
upper_bounds = ['max', 'amax', 'maxa']

In [5]:
def wrapper(func, **kwargs):
    #try:
        return func(**kwargs)
    #except:
    #    return None

In [6]:
def wrapper_debug(func, **kwargs):
    try:
        #print(kwargs, flush=True)
        return func(**kwargs)
    except Exception as exc:
        return str(exc)

In [7]:
for lb in lower_bounds:
    data[f'auc_{lb}'] = data.apply(
        lambda row:
        wrapper_debug(auc.auc_lower_from_aggregated,
        scores={
            'acc': row['acc'],
            'sens': row['sens'],
            'spec': row['spec']
        },
        folding={
            'p': row['p'],
            'n': row['n'],
            'n_repeats': 1,
            'n_folds': row['k'],
            'folding': 'stratified_sklearn'
        },
        eps=1e-4,
        lower=lb),
        axis=1
    )

    data[f'auc_{lb}_best'] = data.apply(
        lambda row:
        wrapper_debug(auc.auc_lower_from_aggregated,
        scores={
            'acc': row['best_acc'],
            'sens': row['best_sens'],
            'spec': row['best_spec']
        },
        folding={
            'p': row['p'],
            'n': row['n'],
            'n_repeats': 1,
            'n_folds': row['k'],
            'folding': 'stratified_sklearn'
        },
        eps=1e-4,
        lower=lb),
        axis=1
    )

In [8]:
for ub in upper_bounds:
    data[f'auc_{ub}'] = data.apply(
        lambda row:
        wrapper_debug(
        auc.auc_upper_from_aggregated,
        scores={
            'acc': row['acc'] if ub != 'maxa' else row['best_acc'],
            'sens': row['sens'] if ub != 'maxa' else row['best_sens'],
            'spec': row['spec'] if ub != 'maxa' else row['best_spec']
        },
        folding={
            'p': row['p'],
            'n': row['n'],
            'n_repeats': 1,
            'n_folds': row['k'],
            'folding': 'stratified_sklearn'
        },
        eps=1e-4,
        upper=ub),
        axis=1
    )
    data[f'auc_{ub}_best'] = data.apply(
        lambda row:
        wrapper_debug(
        auc.auc_upper_from_aggregated,
        scores={
            'acc': row['best_acc'],
            'sens': row['best_sens'],
            'spec': row['best_spec']
        },
        folding={
            'p': row['p'],
            'n': row['n'],
            'n_repeats': 1,
            'n_folds': row['k'],
            'folding': 'stratified_sklearn'
        },
        eps=1e-4,
        upper=ub),
        axis=1
    )

In [9]:
lower_bounds = ['min', 'rmin']
upper_bounds = ['max', 'rmax']

In [10]:
for lb in lower_bounds:
    data[f'acc_{lb}'] = data.apply(
        lambda row:
        wrapper_debug(auc.acc_lower_from_aggregated,
        scores={
            'acc': row['acc'],
            'sens': row['sens'],
            'spec': row['spec'],
            'auc': row['auc']
        },
        folding={
            'p': row['p'],
            'n': row['n'],
            'n_repeats': 1,
            'n_folds': row['k'],
            'folding': 'stratified_sklearn'
        },
        eps=1e-4,
        lower=lb),
        axis=1
    )

In [11]:
for ub in upper_bounds:
    data[f'acc_{ub}'] = data.apply(
        lambda row:
        wrapper_debug(auc.acc_upper_from_aggregated,
        scores={
            'acc': row['acc'],
            'sens': row['sens'],
            'spec': row['spec'],
            'auc': row['auc']
        },
        folding={
            'p': row['p'],
            'n': row['n'],
            'n_repeats': 1,
            'n_folds': row['k'],
            'folding': 'stratified_sklearn'
        },
        eps=1e-4,
        upper=ub),
        axis=1
    )

In [12]:
lower_bounds = ['min']
upper_bounds = ['max', 'rmax']

In [13]:
for lb in lower_bounds:
    data[f'max_acc_{lb}'] = data.apply(
        lambda row:
        wrapper_debug(auc.max_acc_lower_from_aggregated,
        scores={
            'acc': row['acc'],
            'sens': row['sens'],
            'spec': row['spec'],
            'auc': row['auc']
        },
        folding={
            'p': row['p'],
            'n': row['n'],
            'n_repeats': 1,
            'n_folds': row['k'],
            'folding': 'stratified_sklearn'
        },
        eps=1e-4,
        lower=lb),
        axis=1
    )

In [14]:
for ub in upper_bounds:
    data[f'max_acc_{ub}'] = data.apply(
        lambda row:
        wrapper_debug(auc.max_acc_upper_from_aggregated,
        scores={
            'acc': row['acc'],
            'sens': row['sens'],
            'spec': row['spec'],
            'auc': row['auc']
        },
        folding={
            'p': row['p'],
            'n': row['n'],
            'n_repeats': 1,
            'n_folds': row['k'],
            'folding': 'stratified_sklearn'
        },
        eps=1e-4,
        upper=ub),
        axis=1
    )

In [15]:
data[data['auc'] > 0.999]

Unnamed: 0.1,Unnamed: 0,dataset,k,acc,sens,spec,auc,best_acc,best_sens,best_spec,...,auc_amax_best,auc_maxa,auc_maxa_best,acc_min,acc_rmin,acc_max,acc_rmax,max_acc_min,max_acc_max,max_acc_rmax
2,2,monk-2,3,0.613426,0.183985,1.000000,1.000000,1.000000,1.000000,1.000000,...,1.0,1.0,1.0,0.472175,0.472222,1.00000,1.00000,1.0,1.00000,1.00000
7,7,new_thyroid1,9,0.162842,1.000000,0.000000,1.000000,1.000000,1.000000,1.000000,...,1.0,1.0,1.0,0.162624,0.162641,1.00000,1.00000,1.0,1.00000,1.00000
40,40,monk-2,3,0.766204,1.000000,0.569882,1.000000,1.000000,1.000000,1.000000,...,1.0,1.0,1.0,0.472175,0.472222,1.00000,1.00000,1.0,1.00000,1.00000
42,42,shuttle-c0-vs-c4,4,0.067244,1.000000,0.000000,1.000000,1.000000,1.000000,1.000000,...,1.0,1.0,1.0,0.067243,0.067250,1.00000,1.00000,0.997506,1.00000,1.00000
44,44,dermatology-6,7,0.832310,1.000000,0.822443,1.000000,1.000000,1.000000,1.000000,...,1.0,1.0,1.0,0.055855,0.055861,1.00000,1.00000,1.0,1.00000,1.00000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9953,9953,shuttle-c0-vs-c4,10,0.067246,1.000000,0.000000,1.000000,1.000000,1.000000,1.000000,...,1.0,1.0,1.0,0.067242,0.067249,1.00000,1.00000,0.998893,1.00000,1.00000
9957,9957,new_thyroid1,8,0.162571,1.000000,0.000000,1.000000,0.990741,0.986111,0.994565,...,1.0,0.999691,0.999691,0.162731,0.162749,1.00000,1.00000,1.0,1.00000,1.00000
9980,9980,iris0,3,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,...,1.0,1.0,1.0,0.333299,0.333333,1.00000,1.00000,1.0,1.00000,1.00000
9988,9988,monk-2,7,0.858654,1.000000,0.732492,1.000000,1.000000,1.000000,1.000000,...,1.0,1.0,1.0,0.472189,0.472237,1.00000,1.00000,1.0,1.00000,1.00000


In [16]:
data.to_csv('processed-aggregated-ns.csv', index=False)