In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import wilcoxon

from mlscorecheck import auc

In [2]:
data = pd.read_csv('raw-aggregated3.csv')

In [3]:
data.head()

Unnamed: 0.1,Unnamed: 0,dataset,k,acc,sens,spec,auc,best_acc,best_sens,best_spec,threshold,best_threshold,best_acc_orig,p,n,n_nodes,classifier,classifier_params
0,0,bupa,8,0.678383,0.705,0.642909,0.730965,0.704413,0.835,0.525219,0.579711,0.489203,0.704413,200,145,20.75,SVC,"{'probability': True, 'C': 0.4603054539689608}"
1,1,ecoli1,7,0.857143,0.857143,0.857143,0.955423,0.910714,0.792208,0.945946,0.229167,0.560225,0.910714,77,259,9.571429,SVC,"{'probability': True, 'C': 0.04459933938631211}"
2,2,glass0,9,0.82649,0.840278,0.799603,0.828311,0.836151,0.861111,0.785714,0.6729,0.5,0.836151,144,70,3.444444,DecisionTreeClassifier,"{'max_depth': 8, 'random_state': 5}"
3,3,vehicle0,2,0.903073,0.893375,0.934848,0.976526,0.931442,0.967559,0.814495,0.764775,0.224935,0.931442,647,199,50.0,SVC,"{'probability': True, 'C': 0.0496136065615112}"
4,4,crx,4,0.843839,0.81236,0.881757,0.914755,0.856118,0.857241,0.85473,0.546707,0.446358,0.856118,357,296,35.5,SVC,"{'probability': True, 'C': 0.2982487258875685}"


In [4]:
lower_bounds = ['min', 'rmin', 'amin', 'armin']
upper_bounds = ['max', 'amax', 'maxa']

In [5]:
def wrapper(func, **kwargs):
    #try:
        return func(**kwargs)[0]
    #except:
    #    return None

In [6]:
def wrapper_debug(func, **kwargs):
    try:
        #print(kwargs, flush=True)
        return func(**kwargs)[0]
    except Exception as exc:
        return str(exc)

In [7]:
for lb in lower_bounds:
    data[f'auc_{lb}'] = data.apply(
        lambda row:
        wrapper_debug(auc.auc_lower_from_aggregated,
        scores={
            'acc': row['acc'],
            'sens': row['sens'],
            'spec': row['spec']
        },
        folding={
            'p': row['p'],
            'n': row['n'],
            'n_repeats': 1,
            'n_folds': row['k'],
            'folding': 'stratified_sklearn'
        },
        eps=1e-4,
        lower=lb),
        axis=1
    )

    data[f'auc_{lb}_best'] = data.apply(
        lambda row:
        wrapper_debug(auc.auc_lower_from_aggregated,
        scores={
            'acc': row['best_acc'],
            'sens': row['best_sens'],
            'spec': row['best_spec']
        },
        folding={
            'p': row['p'],
            'n': row['n'],
            'n_repeats': 1,
            'n_folds': row['k'],
            'folding': 'stratified_sklearn'
        },
        eps=1e-4,
        lower=lb),
        axis=1
    )

In [8]:
for ub in upper_bounds:
    data[f'auc_{ub}'] = data.apply(
        lambda row:
        wrapper_debug(
        auc.auc_upper_from_aggregated,
        scores={
            'acc': row['acc'] if ub != 'maxa' else row['best_acc'],
            'sens': row['sens'] if ub != 'maxa' else row['best_sens'],
            'spec': row['spec'] if ub != 'maxa' else row['best_spec']
        },
        folding={
            'p': row['p'],
            'n': row['n'],
            'n_repeats': 1,
            'n_folds': row['k'],
            'folding': 'stratified_sklearn'
        },
        eps=1e-4,
        upper=ub),
        axis=1
    )
    data[f'auc_{ub}_best'] = data.apply(
        lambda row:
        wrapper_debug(
        auc.auc_upper_from_aggregated,
        scores={
            'acc': row['best_acc'],
            'sens': row['best_sens'],
            'spec': row['best_spec']
        },
        folding={
            'p': row['p'],
            'n': row['n'],
            'n_repeats': 1,
            'n_folds': row['k'],
            'folding': 'stratified_sklearn'
        },
        eps=1e-4,
        upper=ub),
        axis=1
    )

In [9]:
lower_bounds = ['min', 'rmin']
upper_bounds = ['max', 'rmax']

In [10]:
for lb in lower_bounds:
    data[f'acc_{lb}'] = data.apply(
        lambda row:
        wrapper_debug(auc.acc_lower_from_aggregated,
        scores={
            'acc': row['acc'],
            'sens': row['sens'],
            'spec': row['spec'],
            'auc': row['auc']
        },
        folding={
            'p': row['p'],
            'n': row['n'],
            'n_repeats': 1,
            'n_folds': row['k'],
            'folding': 'stratified_sklearn'
        },
        eps=1e-4,
        lower=lb),
        axis=1
    )

In [11]:
for ub in upper_bounds:
    data[f'acc_{ub}'] = data.apply(
        lambda row:
        wrapper_debug(auc.acc_upper_from_aggregated,
        scores={
            'acc': row['acc'],
            'sens': row['sens'],
            'spec': row['spec'],
            'auc': row['auc']
        },
        folding={
            'p': row['p'],
            'n': row['n'],
            'n_repeats': 1,
            'n_folds': row['k'],
            'folding': 'stratified_sklearn'
        },
        eps=1e-4,
        upper=ub),
        axis=1
    )

In [12]:
lower_bounds = ['min']
upper_bounds = ['max', 'rmax']

In [13]:
for lb in lower_bounds:
    data[f'max_acc_{lb}'] = data.apply(
        lambda row:
        wrapper_debug(auc.max_acc_lower_from_aggregated,
        scores={
            'acc': row['acc'],
            'sens': row['sens'],
            'spec': row['spec'],
            'auc': row['auc']
        },
        folding={
            'p': row['p'],
            'n': row['n'],
            'n_repeats': 1,
            'n_folds': row['k'],
            'folding': 'stratified_sklearn'
        },
        eps=1e-4,
        lower=lb),
        axis=1
    )

In [14]:
for ub in upper_bounds:
    data[f'max_acc_{ub}'] = data.apply(
        lambda row:
        wrapper_debug(auc.max_acc_upper_from_aggregated,
        scores={
            'acc': row['acc'],
            'sens': row['sens'],
            'spec': row['spec'],
            'auc': row['auc']
        },
        folding={
            'p': row['p'],
            'n': row['n'],
            'n_repeats': 1,
            'n_folds': row['k'],
            'folding': 'stratified_sklearn'
        },
        eps=1e-4,
        upper=ub),
        axis=1
    )

In [15]:
data[data['auc'] > 0.999]

Unnamed: 0.1,Unnamed: 0,dataset,k,acc,sens,spec,auc,best_acc,best_sens,best_spec,...,auc_amax_best,auc_maxa,auc_maxa_best,acc_min,acc_rmin,acc_max,acc_rmax,max_acc_min,max_acc_max,max_acc_rmax
9,9,iris0,3,1.000000,1.000000,1.0,1.000000,1.000000,1.00000,1.0,...,1.0,1.0,1.0,'float' object is not subscriptable,invalid index to scalar variable.,'float' object is not subscriptable,invalid index to scalar variable.,'float' object is not subscriptable,'float' object is not subscriptable,invalid index to scalar variable.
11,11,shuttle-c0-vs-c4,5,0.998359,0.998244,1.0,0.999976,0.998907,0.99883,1.0,...,1.0,0.999992,0.999992,'float' object is not subscriptable,invalid index to scalar variable.,'float' object is not subscriptable,invalid index to scalar variable.,invalid index to scalar variable.,'float' object is not subscriptable,invalid index to scalar variable.
16,16,iris0,4,1.000000,1.000000,1.0,1.000000,1.000000,1.00000,1.0,...,1.0,1.0,1.0,'float' object is not subscriptable,invalid index to scalar variable.,'float' object is not subscriptable,invalid index to scalar variable.,'float' object is not subscriptable,'float' object is not subscriptable,invalid index to scalar variable.
25,25,shuttle-c0-vs-c4,3,1.000000,1.000000,1.0,1.000000,1.000000,1.00000,1.0,...,1.0,1.0,1.0,'float' object is not subscriptable,invalid index to scalar variable.,'float' object is not subscriptable,invalid index to scalar variable.,invalid index to scalar variable.,'float' object is not subscriptable,invalid index to scalar variable.
27,27,shuttle-c0-vs-c4,3,0.998359,0.998240,1.0,0.999986,0.998359,0.99824,1.0,...,1.0,0.999981,0.999981,'float' object is not subscriptable,invalid index to scalar variable.,'float' object is not subscriptable,invalid index to scalar variable.,invalid index to scalar variable.,'float' object is not subscriptable,invalid index to scalar variable.
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9977,9977,dermatology-6,3,1.000000,1.000000,1.0,1.000000,1.000000,1.00000,1.0,...,1.0,1.0,1.0,'float' object is not subscriptable,invalid index to scalar variable.,'float' object is not subscriptable,invalid index to scalar variable.,'float' object is not subscriptable,'float' object is not subscriptable,invalid index to scalar variable.
9981,9981,monk-2,9,0.972222,0.947692,1.0,1.000000,1.000000,1.00000,1.0,...,1.0,1.0,1.0,'float' object is not subscriptable,invalid index to scalar variable.,'float' object is not subscriptable,invalid index to scalar variable.,'float' object is not subscriptable,'float' object is not subscriptable,invalid index to scalar variable.
9983,9983,dermatology-6,10,1.000000,1.000000,1.0,1.000000,1.000000,1.00000,1.0,...,1.0,1.0,1.0,'float' object is not subscriptable,invalid index to scalar variable.,'float' object is not subscriptable,invalid index to scalar variable.,'float' object is not subscriptable,'float' object is not subscriptable,invalid index to scalar variable.
9988,9988,iris0,6,1.000000,1.000000,1.0,1.000000,1.000000,1.00000,1.0,...,1.0,1.0,1.0,'float' object is not subscriptable,invalid index to scalar variable.,'float' object is not subscriptable,invalid index to scalar variable.,'float' object is not subscriptable,'float' object is not subscriptable,invalid index to scalar variable.


In [16]:
data.to_csv('processed-aggregated3.csv', index=False)