In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import wilcoxon

from mlscorecheck import auc

In [2]:
data = pd.read_csv('raw-single.csv')

In [3]:
data.head()

Unnamed: 0,dataset,acc,sens,spec,auc,best_acc,best_sens,best_spec,threshold,best_threshold,p,n
0,bupa,0.5942,0.0345,1.0,0.610345,0.6812,0.4483,0.85,0.206719,0.520119,29,40
1,new_thyroid1,0.9535,0.9091,0.9688,0.93892,0.9535,0.9091,0.9688,0.089821,1.0,11,32
2,yeast1,0.7037,0.0,0.9952,0.783525,0.7912,0.4943,0.9143,0.486638,0.465209,87,210
3,iris0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.080741,1.0,8,22
4,new_thyroid1,0.907,0.6364,1.0,1.0,1.0,1.0,1.0,0.879937,0.5,11,32


In [4]:
data.columns

Index(['dataset', 'acc', 'sens', 'spec', 'auc', 'best_acc', 'best_sens',
       'best_spec', 'threshold', 'best_threshold', 'p', 'n'],
      dtype='object')

In [5]:
lower_bounds = ['min', 'rmin', 'grmin', 'amin', 'armin']
upper_bounds = ['max', 'amax', 'maxa']

In [6]:
def wrapper(func, **kwargs):
    try:
        return func(**kwargs)
    except Exception as exc:
        return str(exc)

In [7]:
for lb in lower_bounds:
    data[f'auc_{lb}'] = data.apply(
        lambda row:
        wrapper(auc.auc_lower_from,
        scores={
            'acc': row['acc'],
            'sens': row['sens'],
            'spec': row['spec']
        },
        p=row['p'],
        n=row['n'],
        eps=1e-4,
        lower=lb),
        axis=1
    )

    data[f'auc_{lb}_best'] = data.apply(
        lambda row:
        wrapper(auc.auc_lower_from,
        scores={
            'acc': row['best_acc'],
            'sens': row['best_sens'],
            'spec': row['best_spec']
        },
        p=row['p'],
        n=row['n'],
        eps=1e-4,
        lower=lb),
        axis=1
    )

In [8]:
for ub in upper_bounds:
    data[f'auc_{ub}'] = data.apply(
        lambda row:
        wrapper(
        auc.auc_upper_from,
        scores={
            'acc': row['acc'] if ub != 'maxa' else row['best_acc'],
            'sens': row['sens'] if ub != 'maxa' else row['best_sens'],
            'spec': row['spec'] if ub != 'maxa' else row['best_spec']
        },
        p=row['p'],
        n=row['n'],
        eps=1e-4,
        upper=ub),
        axis=1
    )

    data[f'auc_{ub}_best'] = data.apply(
        lambda row:
        wrapper(
        auc.auc_upper_from,
        scores={
            'acc': row['best_acc'],
            'sens': row['best_sens'],
            'spec': row['best_spec']
        },
        p=row['p'],
        n=row['n'],
        eps=1e-4,
        upper=ub),
        axis=1
    )

In [9]:
lower_bounds = ['min', 'rmin']
upper_bounds = ['max', 'rmax']

In [10]:
for lb in lower_bounds:
    data[f'acc_{lb}'] = data.apply(
        lambda row:
        wrapper(auc.acc_lower_from,
        scores={
            'acc': row['acc'],
            'sens': row['sens'],
            'spec': row['spec'],
            'auc': row['auc']
        },
        p=row['p'],
        n=row['n'],
        eps=1e-4,
        lower=lb),
        axis=1
    )

In [11]:
for ub in upper_bounds:
    data[f'acc_{ub}'] = data.apply(
        lambda row:
        wrapper(auc.acc_upper_from,
        scores={
            'acc': row['acc'],
            'sens': row['sens'],
            'spec': row['spec'],
            'auc': row['auc']
        },
        p=row['p'],
        n=row['n'],
        eps=1e-4,
        upper=ub),
        axis=1
    )

In [12]:
lower_bounds = ['min']
upper_bounds = ['max', 'rmax']

In [13]:
for lb in lower_bounds:
    data[f'max_acc_{lb}'] = data.apply(
        lambda row:
        wrapper(auc.max_acc_lower_from,
        scores={
            'acc': row['acc'],
            'sens': row['sens'],
            'spec': row['spec'],
            'auc': row['auc']
        },
        p=row['p'],
        n=row['n'],
        eps=1e-4,
        lower=lb),
        axis=1
    )

In [14]:
for ub in upper_bounds:
    data[f'max_acc_{ub}'] = data.apply(
        lambda row:
        wrapper(auc.max_acc_upper_from,
        scores={
            'acc': row['acc'],
            'sens': row['sens'],
            'spec': row['spec'],
            'auc': row['auc']
        },
        p=row['p'],
        n=row['n'],
        eps=1e-4,
        upper=ub),
        axis=1
    )

In [15]:
data.head()

Unnamed: 0,dataset,acc,sens,spec,auc,best_acc,best_sens,best_spec,threshold,best_threshold,...,auc_amax_best,auc_maxa,auc_maxa_best,acc_min,acc_rmin,acc_max,acc_rmax,max_acc_min,max_acc_max,max_acc_rmax
0,bupa,0.5942,0.0345,1.0,0.610345,0.6812,0.4483,0.85,0.206719,0.520119,...,1.0,0.791563,0.791563,0.25648,0.42029,0.836274,0.777242,0.57971,0.836274,0.777242
1,new_thyroid1,0.9535,0.9091,0.9688,0.93892,0.9535,0.9091,0.9688,0.089821,1.0,...,1.0,0.994345,0.994345,0.240163,0.255814,0.984401,0.983894,0.847377,0.984401,0.983894
2,yeast1,0.7037,0.0,0.9952,0.783525,0.7912,0.4943,0.9143,0.486638,0.465209,...,1.0,0.894855,0.894855,0.229488,0.292929,0.936617,0.927693,0.707071,0.936617,0.927693
3,iris0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.080741,1.0,...,1.0,1.0,1.0,0.26664,0.266667,1.0,1.0,0.993746,1.0,1.0
4,new_thyroid1,0.907,0.6364,1.0,1.0,1.0,1.0,1.0,0.879937,0.5,...,1.0,1.0,1.0,0.255788,0.255814,1.0,1.0,0.99383,1.0,1.0


In [81]:
data.to_csv('processed-single.csv', index=False)