In [18]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import wilcoxon

from mlscorecheck import auc

In [19]:
data = pd.read_csv('raw-single3.csv')

In [20]:
data[:10]

Unnamed: 0,dataset,acc,sens,spec,auc,best_acc,best_sens,best_spec,threshold,best_threshold,p,n,n_nodes
0,bupa,0.637681,0.6,0.689655,0.715086,0.724638,0.875,0.517241,0.57971,0.571429,40,29,8
1,dermatology-6,0.944444,0.941176,1.0,1.0,1.0,1.0,1.0,0.944056,0.90644,68,4,12
2,glass0,0.674419,0.655172,0.714286,0.80665,0.767442,0.896552,0.5,0.672515,0.375,29,14,9
3,yeast1,0.750842,0.805687,0.616279,0.808332,0.784512,0.886256,0.534884,0.711036,0.635724,211,86,92
4,iris0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.333333,1.0,10,20,3
5,ecoli1,0.911765,0.903846,0.9375,0.971755,0.911765,0.903846,0.9375,0.772388,0.833333,52,16,8
6,ionosphere,0.84507,0.6,0.978261,0.937391,0.929577,0.92,0.934783,0.360714,0.125,25,46,9
7,saheart,0.666667,0.672131,0.65625,0.727459,0.752688,0.852459,0.5625,0.653117,0.55,61,32,8
8,wdbc,0.877193,0.880952,0.875,0.956349,0.903509,0.809524,0.958333,0.373626,0.588016,42,72,22
9,appendicitis,0.727273,0.722222,0.75,0.819444,0.909091,0.944444,0.75,0.797619,0.5,18,4,4


In [21]:
data.columns

Index(['dataset', 'acc', 'sens', 'spec', 'auc', 'best_acc', 'best_sens',
       'best_spec', 'threshold', 'best_threshold', 'p', 'n', 'n_nodes'],
      dtype='object')

In [22]:
lower_bounds = ['min', 'rmin', 'grmin', 'amin', 'armin', 'onmin']
#lower_bounds = ['min']
upper_bounds = ['max', 'amax', 'maxa']
#upper_bounds = ['max']

In [23]:
def wrapper(func, **kwargs):
    try:
        return func(**kwargs)[0]
    except Exception as exc:
        return str(exc)

In [24]:
for lb in lower_bounds:
    data[f'auc_{lb}'] = data.apply(
        lambda row:
        wrapper(auc.auc_lower_from,
        scores={
            'acc': row['acc'],
            'sens': row['sens'],
            'spec': row['spec']
        },
        p=row['p'],
        n=row['n'],
        eps=1e-4,
        lower=lb),
        axis=1
    )

    data[f'auc_{lb}_best'] = data.apply(
        lambda row:
        wrapper(auc.auc_lower_from,
        scores={
            'acc': row['best_acc'],
            'sens': row['best_sens'],
            'spec': row['best_spec']
        },
        p=row['p'],
        n=row['n'],
        eps=1e-4,
        lower=lb),
        axis=1
    )

In [25]:
for ub in upper_bounds:
    data[f'auc_{ub}'] = data.apply(
        lambda row:
        wrapper(
        auc.auc_upper_from,
        scores={
            'acc': row['acc'] if ub != 'maxa' else row['best_acc'],
            'sens': row['sens'] if ub != 'maxa' else row['best_sens'],
            'spec': row['spec'] if ub != 'maxa' else row['best_spec']
        },
        p=row['p'],
        n=row['n'],
        eps=1e-4,
        upper=ub),
        axis=1
    )

    data[f'auc_{ub}_best'] = data.apply(
        lambda row:
        wrapper(
        auc.auc_upper_from,
        scores={
            'acc': row['best_acc'],
            'sens': row['best_sens'],
            'spec': row['best_spec']
        },
        p=row['p'],
        n=row['n'],
        eps=1e-4,
        upper=ub),
        axis=1
    )

In [26]:
lower_bounds = ['min', 'rmin']
upper_bounds = ['max', 'rmax', 'onmax']

In [27]:
for lb in lower_bounds:
    data[f'acc_{lb}'] = data.apply(
        lambda row:
        wrapper(auc.acc_lower_from,
        scores={
            'acc': row['acc'],
            'sens': row['sens'],
            'spec': row['spec'],
            'auc': row['auc']
        },
        p=row['p'],
        n=row['n'],
        eps=1e-4,
        lower=lb),
        axis=1
    )

In [28]:
for ub in upper_bounds:
    data[f'acc_{ub}'] = data.apply(
        lambda row:
        wrapper(auc.acc_upper_from,
        scores={
            'acc': row['acc'],
            'sens': row['sens'],
            'spec': row['spec'],
            'auc': row['auc']
        },
        p=row['p'],
        n=row['n'],
        eps=1e-4,
        upper=ub),
        axis=1
    )

In [29]:
lower_bounds = ['min']
upper_bounds = ['max', 'rmax', 'onmax']

In [30]:
for lb in lower_bounds:
    data[f'max_acc_{lb}'] = data.apply(
        lambda row:
        wrapper(auc.max_acc_lower_from,
        scores={
            'acc': row['acc'],
            'sens': row['sens'],
            'spec': row['spec'],
            'auc': row['auc']
        },
        p=row['p'],
        n=row['n'],
        eps=1e-4,
        lower=lb),
        axis=1
    )

In [31]:
for ub in upper_bounds:
    data[f'max_acc_{ub}'] = data.apply(
        lambda row:
        wrapper(auc.max_acc_upper_from,
        scores={
            'acc': row['acc'],
            'sens': row['sens'],
            'spec': row['spec'],
            'auc': row['auc']
        },
        p=row['p'],
        n=row['n'],
        eps=1e-4,
        upper=ub),
        axis=1
    )

In [32]:
data.columns

Index(['dataset', 'acc', 'sens', 'spec', 'auc', 'best_acc', 'best_sens',
       'best_spec', 'threshold', 'best_threshold', 'p', 'n', 'n_nodes',
       'auc_min', 'auc_min_best', 'auc_rmin', 'auc_rmin_best', 'auc_grmin',
       'auc_grmin_best', 'auc_amin', 'auc_amin_best', 'auc_armin',
       'auc_armin_best', 'auc_onmin', 'auc_onmin_best', 'auc_max',
       'auc_max_best', 'auc_amax', 'auc_amax_best', 'auc_maxa',
       'auc_maxa_best', 'acc_min', 'acc_rmin', 'acc_max', 'acc_rmax',
       'acc_onmax', 'max_acc_min', 'max_acc_max', 'max_acc_rmax',
       'max_acc_onmax'],
      dtype='object')

In [33]:
data.head()

Unnamed: 0,dataset,acc,sens,spec,auc,best_acc,best_sens,best_spec,threshold,best_threshold,...,auc_maxa_best,acc_min,acc_rmin,acc_max,acc_rmax,acc_onmax,max_acc_min,max_acc_max,max_acc_rmax,max_acc_onmax
0,bupa,0.637681,0.6,0.689655,0.715086,0.724638,0.875,0.517241,0.57971,0.571429,...,0.84451,'float' object is not subscriptable,'float' object is not subscriptable,'float' object is not subscriptable,invalid index to scalar variable.,'float' object is not subscriptable,0.627327,0.880296,0.855432,0.760591
1,dermatology-6,0.944444,0.941176,1.0,1.0,1.0,1.0,1.0,0.944056,0.90644,...,1.0,'float' object is not subscriptable,'float' object is not subscriptable,'float' object is not subscriptable,invalid index to scalar variable.,'float' object is not subscriptable,0.996761,1.0,1.0,1.0
2,glass0,0.674419,0.655172,0.714286,0.80665,0.767442,0.896552,0.5,0.672515,0.375,...,0.876953,'float' object is not subscriptable,'float' object is not subscriptable,'float' object is not subscriptable,invalid index to scalar variable.,'float' object is not subscriptable,0.70853,0.937081,0.929434,0.874163
3,yeast1,0.750842,0.805687,0.616279,0.808332,0.784512,0.886256,0.534884,0.711036,0.635724,...,0.887242,'float' object is not subscriptable,'float' object is not subscriptable,'float' object is not subscriptable,invalid index to scalar variable.,'float' object is not subscriptable,0.71911,0.944529,0.937862,0.889059
4,iris0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.333333,1.0,...,1.0,'float' object is not subscriptable,'float' object is not subscriptable,'float' object is not subscriptable,invalid index to scalar variable.,'float' object is not subscriptable,0.993333,1.0,1.0,1.0


In [34]:
data.to_csv('processed-single3.csv', index=False)