In [18]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import wilcoxon

from mlscorecheck import auc

In [19]:
data = pd.read_csv('raw-single.csv')

In [20]:
data[:10]

Unnamed: 0,dataset,classifier,acc,sens,spec,auc,best_acc,best_sens,best_spec,threshold,best_threshold,p,n
0,bupa,"{'max_depth': 9, 'random_state': 5}",0.57971,0.0,1.0,0.605603,0.608696,0.586207,0.625,inf,1.0,29,40
1,vehicle0,"{'probability': True, 'C': 1.4971919355651315}",0.847059,0.868421,0.840909,0.934809,0.882353,0.842105,0.893939,0.271316,0.339899,38,132
2,yeast1,"{'probability': True, 'C': 0.5420412117184014}",0.646465,0.802326,0.582938,0.788659,0.79798,0.488372,0.924171,0.208912,0.373224,86,211
3,yeast1,"{'max_depth': 2, 'random_state': 5}",0.723906,0.685393,0.740385,0.81096,0.791246,0.426966,0.947115,0.301002,0.371073,89,208
4,page-blocks-1-3_vs_4,"{'max_depth': 7, 'random_state': 5}",0.947368,1.0,0.945652,1.0,1.0,1.0,1.0,0.08,0.85,3,92
5,CM1,"{'max_depth': 7, 'random_state': 5}",0.64,0.666667,0.635294,0.774902,0.87,0.133333,1.0,0.10377,0.403494,15,85
6,monk-2,"{'probability': True, 'C': 0.6975343728942637}",0.471264,0.0,1.0,1.0,1.0,1.0,1.0,inf,0.800915,46,41
7,page-blocks-1-3_vs_4,{'n_neighbors': 4},0.968421,0.0,1.0,0.978261,0.978947,0.333333,1.0,inf,1.0,3,92
8,wdbc,"{'probability': True, 'C': 0.5570006378295725}",0.929825,0.934783,0.926471,0.977302,0.929825,0.826087,1.0,0.277218,0.5,46,68
9,ecoli1,"{'max_depth': 8, 'random_state': 5}",0.823529,0.076923,1.0,0.981818,0.955882,0.923077,0.963636,0.913128,0.494691,13,55


In [21]:
data.columns

Index(['dataset', 'classifier', 'acc', 'sens', 'spec', 'auc', 'best_acc',
       'best_sens', 'best_spec', 'threshold', 'best_threshold', 'p', 'n'],
      dtype='object')

In [22]:
lower_bounds = ['min', 'rmin', 'grmin', 'amin', 'armin', 'onmin']
upper_bounds = ['max', 'amax', 'maxa']

In [23]:
def wrapper(func, **kwargs):
    try:
        return func(**kwargs)
    except Exception as exc:
        return str(exc)

In [24]:
for lb in lower_bounds:
    data[f'auc_{lb}'] = data.apply(
        lambda row:
        wrapper(auc.auc_lower_from,
        scores={
            'acc': row['acc'],
            'sens': row['sens'],
            'spec': row['spec']
        },
        p=row['p'],
        n=row['n'],
        eps=1e-4,
        lower=lb),
        axis=1
    )

    data[f'auc_{lb}_best'] = data.apply(
        lambda row:
        wrapper(auc.auc_lower_from,
        scores={
            'acc': row['best_acc'],
            'sens': row['best_sens'],
            'spec': row['best_spec']
        },
        p=row['p'],
        n=row['n'],
        eps=1e-4,
        lower=lb),
        axis=1
    )

In [25]:
for ub in upper_bounds:
    data[f'auc_{ub}'] = data.apply(
        lambda row:
        wrapper(
        auc.auc_upper_from,
        scores={
            'acc': row['acc'] if ub != 'maxa' else row['best_acc'],
            'sens': row['sens'] if ub != 'maxa' else row['best_sens'],
            'spec': row['spec'] if ub != 'maxa' else row['best_spec']
        },
        p=row['p'],
        n=row['n'],
        eps=1e-4,
        upper=ub),
        axis=1
    )

    data[f'auc_{ub}_best'] = data.apply(
        lambda row:
        wrapper(
        auc.auc_upper_from,
        scores={
            'acc': row['best_acc'],
            'sens': row['best_sens'],
            'spec': row['best_spec']
        },
        p=row['p'],
        n=row['n'],
        eps=1e-4,
        upper=ub),
        axis=1
    )

In [26]:
lower_bounds = ['min', 'rmin']
upper_bounds = ['max', 'rmax', 'onmax']

In [27]:
for lb in lower_bounds:
    data[f'acc_{lb}'] = data.apply(
        lambda row:
        wrapper(auc.acc_lower_from,
        scores={
            'acc': row['acc'],
            'sens': row['sens'],
            'spec': row['spec'],
            'auc': row['auc']
        },
        p=row['p'],
        n=row['n'],
        eps=1e-4,
        lower=lb),
        axis=1
    )

In [28]:
for ub in upper_bounds:
    data[f'acc_{ub}'] = data.apply(
        lambda row:
        wrapper(auc.acc_upper_from,
        scores={
            'acc': row['acc'],
            'sens': row['sens'],
            'spec': row['spec'],
            'auc': row['auc']
        },
        p=row['p'],
        n=row['n'],
        eps=1e-4,
        upper=ub),
        axis=1
    )

In [29]:
lower_bounds = ['min']
upper_bounds = ['max', 'rmax', 'onmax']

In [30]:
for lb in lower_bounds:
    data[f'max_acc_{lb}'] = data.apply(
        lambda row:
        wrapper(auc.max_acc_lower_from,
        scores={
            'acc': row['acc'],
            'sens': row['sens'],
            'spec': row['spec'],
            'auc': row['auc']
        },
        p=row['p'],
        n=row['n'],
        eps=1e-4,
        lower=lb),
        axis=1
    )

In [31]:
for ub in upper_bounds:
    data[f'max_acc_{ub}'] = data.apply(
        lambda row:
        wrapper(auc.max_acc_upper_from,
        scores={
            'acc': row['acc'],
            'sens': row['sens'],
            'spec': row['spec'],
            'auc': row['auc']
        },
        p=row['p'],
        n=row['n'],
        eps=1e-4,
        upper=ub),
        axis=1
    )

In [32]:
data.columns

Index(['dataset', 'classifier', 'acc', 'sens', 'spec', 'auc', 'best_acc',
       'best_sens', 'best_spec', 'threshold', 'best_threshold', 'p', 'n',
       'auc_min', 'auc_min_best', 'auc_rmin', 'auc_rmin_best', 'auc_grmin',
       'auc_grmin_best', 'auc_amin', 'auc_amin_best', 'auc_armin',
       'auc_armin_best', 'auc_onmin', 'auc_onmin_best', 'auc_max',
       'auc_max_best', 'auc_amax', 'auc_amax_best', 'auc_maxa',
       'auc_maxa_best', 'acc_min', 'acc_rmin', 'acc_max', 'acc_rmax',
       'max_acc_min', 'max_acc_max', 'max_acc_rmax', 'max_acc_onmax'],
      dtype='object')

In [33]:
data.head()

Unnamed: 0,dataset,classifier,acc,sens,spec,auc,best_acc,best_sens,best_spec,threshold,...,auc_maxa,auc_maxa_best,acc_min,acc_rmin,acc_max,acc_rmax,max_acc_min,max_acc_max,max_acc_rmax,max_acc_onmax
0,bupa,"{'max_depth': 9, 'random_state': 5}",0.57971,0.0,1.0,0.605603,0.608696,0.586207,0.625,inf,...,0.685936,0.685936,0.254487,0.42029,0.834281,0.772955,0.57971,0.834281,0.772955,0.668562
1,vehicle0,"{'probability': True, 'C': 1.4971919355651315}",0.847059,0.868421,0.840909,0.934809,0.882353,0.842105,0.893939,0.271316,...,0.960195,0.960195,0.208935,0.223529,0.98545,0.984943,0.849453,0.98545,0.984943,0.9709
2,yeast1,"{'probability': True, 'C': 0.5420412117184014}",0.646465,0.802326,0.582938,0.788659,0.79798,0.488372,0.924171,0.208912,...,0.900903,0.900903,0.228337,0.289562,0.938832,0.930489,0.710438,0.938832,0.930489,0.877665
3,yeast1,"{'max_depth': 2, 'random_state': 5}",0.723906,0.685393,0.740385,0.81096,0.791246,0.426966,0.947115,0.301002,...,0.896275,0.896275,0.242985,0.299663,0.943382,0.936695,0.718242,0.943382,0.936695,0.886764
4,page-blocks-1-3_vs_4,"{'max_depth': 7, 'random_state': 5}",0.947368,1.0,0.945652,1.0,1.0,1.0,1.0,0.08,...,1.0,1.0,0.031576,0.031579,1.0,1.0,0.997527,1.0,1.0,1.0


In [34]:
data.to_csv('processed-single.csv', index=False)