In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import wilcoxon
from sklearn.metrics import r2_score, mean_absolute_error, mean_absolute_percentage_error

In [3]:
data = pd.read_csv('single-processed.csv')

In [4]:
data.columns

Index(['dataset', 'acc', 'sens', 'spec', 'auc', 'best_acc', 'best_sens',
       'best_spec', 'threshold', 'best_threshold', 'p', 'n', 'auc_min',
       'auc_min_best', 'auc_rmin', 'auc_rmin_best', 'auc_grmin',
       'auc_grmin_best', 'auc_amin', 'auc_amin_best', 'auc_armin',
       'auc_armin_best', 'auc_max', 'auc_max_best', 'auc_amax',
       'auc_amax_best', 'auc_maxa', 'auc_maxa_best', 'acc_min', 'acc_rmin',
       'acc_max', 'acc_rmax', 'max_acc_min', 'max_acc_max', 'max_acc_rmax'],
      dtype='object')

In [5]:
def convert(x):
    try:
        return float(x)
    except:
        return None

In [6]:
data['auc_min_max'] = (data['auc_min'].apply(convert) + data['auc_max'].apply(convert)) / 2.0
data['auc_rmin_max'] = (data['auc_rmin'].apply(convert) + data['auc_max'].apply(convert)) / 2.0

data['auc_min_max_best'] = (data['auc_min_best'].apply(convert) + data['auc_max_best'].apply(convert)) / 2.0
data['auc_rmin_max_best'] = (data['auc_rmin_best'].apply(convert) + data['auc_max_best'].apply(convert)) / 2.0

data['auc_min_maxa_best'] = (data['auc_min_best'].apply(convert) + data['auc_maxa_best'].apply(convert)) / 2.0
data['auc_rmin_maxa_best'] = (data['auc_rmin_best'].apply(convert) + data['auc_maxa_best'].apply(convert)) / 2.0

data['max_acc_min_max'] = (data['max_acc_min'].apply(convert) + data['max_acc_max'].apply(convert)) / 2.0
data['max_acc_min_rmax'] = (data['max_acc_min'].apply(convert) + data['max_acc_rmax'].apply(convert)) / 2.0

In [7]:
tmp = data[['auc', 'auc_min_max', 'auc_rmin_max']].dropna()

In [8]:
r2_score(tmp['auc'], tmp['auc_min_max']), r2_score(tmp['auc'], tmp['auc_rmin_max'])

(-2.7113151861036804, -0.05887983370408478)

In [9]:
mean_absolute_percentage_error(tmp['auc'], tmp['auc_min_max']), mean_absolute_percentage_error(tmp['auc'], tmp['auc_rmin_max'])

(np.float64(0.24128938580760967), np.float64(0.13752523993517546))

In [10]:
tmp = data[['auc', 'auc_min_max_best', 'auc_rmin_max_best']].dropna()

In [11]:
r2_score(tmp['auc'], tmp['auc_min_max_best']), r2_score(tmp['auc'], tmp['auc_rmin_max_best'])

(0.8128207007728161, 0.7860444673986366)

In [12]:
mean_absolute_percentage_error(tmp['auc'], tmp['auc_min_max_best']), mean_absolute_percentage_error(tmp['auc'], tmp['auc_rmin_max_best'])

(np.float64(0.04980966004096219), np.float64(0.061537418141244644))

In [13]:
tmp = data[['auc', 'auc_min_maxa_best', 'auc_rmin_maxa_best']].dropna()

In [14]:
r2_score(tmp['auc'], tmp['auc_min_maxa_best']), r2_score(tmp['auc'], tmp['auc_rmin_maxa_best'])

(0.6217198875097397, 0.8559622658707935)

In [15]:
mean_absolute_percentage_error(tmp['auc'], tmp['auc_min_maxa_best']), mean_absolute_percentage_error(tmp['auc'], tmp['auc_rmin_maxa_best'])

(np.float64(0.0685958445382244), np.float64(0.040742849269262534))

In [16]:
tmp = data[['best_acc', 'max_acc_min_max', 'max_acc_min_rmax']].dropna()

In [17]:
r2_score(tmp['best_acc'], tmp['max_acc_min_max']), r2_score(tmp['best_acc'], tmp['max_acc_min_rmax'])

(0.8560097676679841, 0.8947964043208955)

In [19]:
mean_absolute_percentage_error(tmp['best_acc'], tmp['max_acc_min_max']), mean_absolute_percentage_error(tmp['best_acc'], tmp['max_acc_min_rmax'])

(np.float64(0.03860364510822232), np.float64(0.033065305922178496))