## Dependencies

In [2]:
import os
import numpy as np
import pandas as pd
from datetime import datetime
from dateutil.relativedelta import relativedelta as rd
from scipy.stats import ranksums

In [38]:
df = pd.read_excel('resultados.xlsx')

df['label'] = df['label'].map(lambda x:x.replace('give_me_some_credit','give-me-some-credit'))
df['label'] = df['label'].map(lambda x:x.replace('lendinc_club','lending-club'))

df[['method','dataset','sample']] = df['label'].map(lambda x:x.split('_')).apply(pd.Series).dropna()

## Overall Performance

### AUC

In [39]:
auc = df.loc[df['sample']=='validate'].pivot_table(index='method',values='roc',aggfunc=['min','mean','max','std'])
auc.reset_index(inplace=True)
auc.columns = ['Method','Minimum','Mean','Máximum','Std. Dev.']
print(auc.to_latex(index=False))

\begin{tabular}{lrrrr}
\toprule
Method &  Minimum &     Mean &  Máximum &  Std. Dev. \\
\midrule
   dcc & 0.587827 & 0.769409 & 0.917901 &   0.082920 \\
   dec & 0.607170 & 0.768938 & 0.923817 &   0.084425 \\
   gau & 0.500000 & 0.634785 & 0.763336 &   0.065052 \\
   kme & 0.448506 & 0.575113 & 0.753337 &   0.074439 \\
   qua & 0.588716 & 0.754714 & 0.919462 &   0.086572 \\
   uni & 0.493665 & 0.529869 & 0.694079 &   0.054815 \\
\bottomrule
\end{tabular}



  print(auc.to_latex(index=False))


### KS

In [40]:
ks = df.loc[df['sample']=='validate'].pivot_table(index='method',values='ks',aggfunc=['min','mean','max','std'])
ks.reset_index(inplace=True)
ks.columns = ['Method','Minimum','Mean','Máximum','Std. Dev.']
print(ks.to_latex(index=False))

\begin{tabular}{lrrrr}
\toprule
Method &  Minimum &     Mean &  Máximum &  Std. Dev. \\
\midrule
   dcc & 0.008236 & 0.389404 & 0.706700 &   0.174004 \\
   dec & 0.003397 & 0.387594 & 0.705212 &   0.171343 \\
   gau & 0.000000 & 0.182928 & 0.381886 &   0.109808 \\
   kme & 0.000000 & 0.118286 & 0.462393 &   0.123398 \\
   qua & 0.002725 & 0.364638 & 0.679718 &   0.169512 \\
   uni & 0.000000 & 0.038043 & 0.295606 &   0.078135 \\
\bottomrule
\end{tabular}



  print(ks.to_latex(index=False))


### CPU Time

In [58]:
cpu = df.loc[df['sample']=='validate'].pivot_table(index='method',
                                                   values='completion_time',
                                                   aggfunc=['min','mean','max','std']).apply(lambda x:x/1000).round(4)
cpu.reset_index(inplace=True)
cpu.columns = ['Method','Minimum','Mean','Máximum','Std. Dev.']
print(cpu.to_latex(index=False))

\begin{tabular}{lrrrr}
\toprule
Method &  Minimum &   Mean &  Máximum &  Std. Dev. \\
\midrule
   dcc &   0.0669 & 1.1309 &   6.3363 &     1.9117 \\
   dec &   0.1519 & 4.1158 &  23.6292 &     7.4300 \\
   gau &   0.0703 & 1.1783 &   6.3997 &     1.9848 \\
   kme &   0.0695 & 1.1852 &   6.4033 &     1.9869 \\
   qua &   0.0675 & 1.1519 &   6.2736 &     1.9435 \\
   uni &   0.0660 & 1.1114 &   6.3558 &     1.9364 \\
\bottomrule
\end{tabular}



  print(cpu.to_latex(index=False))


## Wilcoxon Test

### AUC

In [48]:
aux = df.loc[df['sample']=='validate'].reset_index(drop=True)

In [50]:
l = []
for m1,d1 in [(m,d['roc']) for m,d in aux[['roc','method']].groupby('method')][:2]:
    for m2,d2 in [(m,d['roc']) for m,d in aux[['roc','method']].groupby('method')][2:]:
        res = ranksums(d1,d2)
        l.append([f'{m1} vs {m2}',res.statistic,res.pvalue])
        

print(pd.DataFrame(l,columns=['Contrast','Wilcoxon','p-value']).round(6).to_latex(index=False))

\begin{tabular}{lrr}
\toprule
  Contrast &  Wilcoxon &  p-value \\
\midrule
dcc vs gau & 14.512486 & 0.000000 \\
dcc vs kme & 16.554207 & 0.000000 \\
dcc vs qua &  2.046219 & 0.040735 \\
dcc vs uni & 17.636926 & 0.000000 \\
dec vs gau & 14.352028 & 0.000000 \\
dec vs kme & 16.472478 & 0.000000 \\
dec vs qua &  1.927000 & 0.053980 \\
dec vs uni & 17.593437 & 0.000000 \\
\bottomrule
\end{tabular}



  print(pd.DataFrame(l,columns=['Contrast','Wilcoxon','p-value']).round(6).to_latex(index=False))


### KS

In [51]:
l = []
for m1,d1 in [(m,d['ks']) for m,d in aux[['ks','method']].groupby('method')][:2]:
    for m2,d2 in [(m,d['ks']) for m,d in aux[['ks','method']].groupby('method')][2:]:
        res = ranksums(d1,d2)
        l.append([f'{m1} vs {m2}',res.statistic,res.pvalue])
        

print(pd.DataFrame(l,columns=['Contrast','Wilcoxon','p-value']).round(6).to_latex(index=False))

\begin{tabular}{lrr}
\toprule
  Contrast &  Wilcoxon &  p-value \\
\midrule
dcc vs gau & 12.912401 & 0.000000 \\
dcc vs kme & 14.747175 & 0.000000 \\
dcc vs qua &  1.729052 & 0.083800 \\
dcc vs uni & 17.184043 & 0.000000 \\
dec vs gau & 12.691958 & 0.000000 \\
dec vs kme & 14.601338 & 0.000000 \\
dec vs qua &  1.471493 & 0.141158 \\
dec vs uni & 17.157050 & 0.000000 \\
\bottomrule
\end{tabular}



  print(pd.DataFrame(l,columns=['Contrast','Wilcoxon','p-value']).round(6).to_latex(index=False))


## Winning Method on each dataset

### AUC

In [52]:
ind = aux.pivot_table(columns=['method'],index='dataset',aggfunc='mean',values='roc')

ind['winner'] = ind.idxmax(axis=1)

print(ind.round(4).to_latex())

\begin{tabular}{lrrrrrrl}
\toprule
method &     dcc &     dec &     gau &     kme &     qua &     uni & winner \\
dataset             &         &         &         &         &         &         &        \\
\midrule
australian          &  0.8484 &  0.8442 &  0.6145 &  0.5478 &  0.8365 &  0.5326 &    dcc \\
farmers             &  0.6600 &  0.6686 &  0.6436 &  0.5220 &  0.6601 &  0.5000 &    dec \\
german              &  0.6732 &  0.6776 &  0.6389 &  0.6103 &  0.6634 &  0.5074 &    dec \\
give-me-some-credit &  0.8351 &  0.8395 &  0.6633 &  0.6549 &  0.8058 &  0.5000 &    dec \\
hmeq                &  0.9059 &  0.9103 &  0.6389 &  0.5080 &  0.9024 &  0.5073 &    dec \\
japan               &  0.8441 &  0.8423 &  0.5867 &  0.5000 &  0.8369 &  0.5000 &    dcc \\
lending-club        &  0.6857 &  0.6681 &  0.6719 &  0.6738 &  0.6600 &  0.6725 &    dcc \\
mexico              &  0.7629 &  0.7616 &  0.7313 &  0.6681 &  0.7112 &  0.5000 &    dcc \\
mortgage            &  0.7070 &  0.6966 &  0.6460

  print(ind.round(4).to_latex())


### KS

In [53]:
ind = aux.pivot_table(columns=['method'],index='dataset',aggfunc='mean',values='ks')

ind['winner'] = ind.idxmax(axis=1)

print(ind.round(4).to_latex())

\begin{tabular}{lrrrrrrl}
\toprule
method &     dcc &     dec &     gau &     kme &     qua &     uni & winner \\
dataset             &         &         &         &         &         &         &        \\
\midrule
australian          &  0.5682 &  0.5598 &  0.1804 &  0.0908 &  0.5471 &  0.0632 &    dcc \\
farmers             &  0.2672 &  0.2636 &  0.2538 &  0.0415 &  0.2669 &  0.0000 &    dcc \\
german              &  0.2638 &  0.2650 &  0.2244 &  0.1743 &  0.2454 &  0.0116 &    dec \\
give-me-some-credit &  0.4874 &  0.4950 &  0.1834 &  0.2962 &  0.4494 &  0.0000 &    dec \\
hmeq                &  0.6730 &  0.6710 &  0.1980 &  0.0108 &  0.6425 &  0.0100 &    dcc \\
japan               &  0.5670 &  0.5580 &  0.1514 &  0.0000 &  0.5420 &  0.0000 &    dcc \\
lending-club        &  0.2638 &  0.2443 &  0.2428 &  0.2429 &  0.2313 &  0.2391 &    dcc \\
mexico              &  0.3972 &  0.3915 &  0.3450 &  0.2495 &  0.3019 &  0.0000 &    dcc \\
mortgage            &  0.0533 &  0.0663 &  0.0131

  print(ind.round(4).to_latex())
