In [1]:
import pandas as pd
import os
import numpy as np
import scipy.stats as stats

pd.options.display.max_columns = 500
pd.options.display.max_rows = 500

In [2]:
PATH = 'ml/results_rf_290322_rich_fix_imp.csv'

In [3]:
results = pd.read_csv(PATH, index_col=0)

chunk_sizes = results.chunk_size.unique()

complete = results[results.restriction == 'complete']
complete = complete.dropna(how='all', axis=1)

In [4]:
PATH_ORG = 'ml/results_rf_290322_fix_imp.csv'

In [5]:
results_org = pd.read_csv(PATH_ORG, index_col=0)

complete_org = results_org[results_org.restriction == 'complete']
complete_org = complete_org.dropna(how='all', axis=1)

## WF

In [6]:
wf_res_org = complete_org[complete_org.modality == 'morphological']
wf_res = complete[complete.modality == 'morphological']

In [7]:
wf_aucs_org = [wf_res_org.auc[wf_res_org.chunk_size == cs] for cs in chunk_sizes]
wf_aucs = [wf_res.auc[wf_res.chunk_size == cs] for cs in chunk_sizes]

for cs_aucs_org, cs_aucs, cs in zip(wf_aucs_org[1:], wf_aucs[1:], chunk_sizes[1:]):
    print(f"comparing CS={cs} for WF models:", stats.wilcoxon(cs_aucs, cs_aucs_org))

comparing CS=25.0 for WF models: WilcoxonResult(statistic=268.5, pvalue=0.8300003060118832)
comparing CS=50.0 for WF models: WilcoxonResult(statistic=179.0, pvalue=0.4048283906073369)
comparing CS=100.0 for WF models: WilcoxonResult(statistic=193.5, pvalue=0.8286770064402925)
comparing CS=200.0 for WF models: WilcoxonResult(statistic=176.5, pvalue=0.37450971027243074)
comparing CS=400.0 for WF models: WilcoxonResult(statistic=125.0, pvalue=0.31253191263840885)
comparing CS=800.0 for WF models: WilcoxonResult(statistic=176.5, pvalue=0.7638478637050132)
comparing CS=1600.0 for WF models: WilcoxonResult(statistic=126.0, pvalue=0.20855280071591642)


## spatial

In [8]:
spat_res_org = complete_org[complete_org.modality == 'spatial']
spat_res = complete[complete.modality == 'spatial']

In [9]:
spat_aucs_org = [spat_res_org.auc[spat_res_org.chunk_size == cs] for cs in chunk_sizes]
spat_aucs = [spat_res.auc[spat_res.chunk_size == cs] for cs in chunk_sizes]

for cs_aucs_org, cs_aucs, cs in zip(spat_aucs_org[1:], spat_aucs[1:], chunk_sizes[1:]):
    print(f"comparing CS={cs} for Spatial models:", stats.wilcoxon(cs_aucs, cs_aucs_org))

comparing CS=25.0 for Spatial models: WilcoxonResult(statistic=16.0, pvalue=1.976977338482904e-09)
comparing CS=50.0 for Spatial models: WilcoxonResult(statistic=19.0, pvalue=2.363501764395746e-09)
comparing CS=100.0 for Spatial models: WilcoxonResult(statistic=7.0, pvalue=1.1543224529009006e-09)
comparing CS=200.0 for Spatial models: WilcoxonResult(statistic=8.0, pvalue=1.226241494446336e-09)
comparing CS=400.0 for Spatial models: WilcoxonResult(statistic=32.0, pvalue=5.0628144200557475e-09)
comparing CS=800.0 for Spatial models: WilcoxonResult(statistic=144.0, pvalue=1.8989320193738752e-06)
comparing CS=1600.0 for Spatial models: WilcoxonResult(statistic=47.0, pvalue=1.1957750927807627e-08)


## Spike-timing

In [10]:
temp_res_org = complete_org[complete_org.modality == 'temporal']
temp_res = complete[complete.modality == 'temporal']

In [11]:
temp_aucs_org = [temp_res_org.auc[temp_res_org.chunk_size == cs] for cs in chunk_sizes]
temp_aucs = [temp_res.auc[temp_res.chunk_size == cs] for cs in chunk_sizes]

for cs_aucs_org, cs_aucs, cs in zip(temp_aucs_org[1:], temp_aucs[1:], chunk_sizes[1:]):
    print(f"comparing CS={cs} for Temporal models:", stats.wilcoxon(cs_aucs, cs_aucs_org))

comparing CS=25.0 for Temporal models: WilcoxonResult(statistic=547.0, pvalue=0.5146799316825909)
comparing CS=50.0 for Temporal models: WilcoxonResult(statistic=367.5, pvalue=0.0091454817621786)
comparing CS=100.0 for Temporal models: WilcoxonResult(statistic=196.5, pvalue=2.0684711186592426e-05)
comparing CS=200.0 for Temporal models: WilcoxonResult(statistic=261.5, pvalue=0.0002836388075439085)
comparing CS=400.0 for Temporal models: WilcoxonResult(statistic=374.0, pvalue=0.01766731164478056)
comparing CS=800.0 for Temporal models: WilcoxonResult(statistic=229.0, pvalue=0.00013620905285348335)
comparing CS=1600.0 for Temporal models: WilcoxonResult(statistic=324.5, pvalue=0.004170584989423545)


## Cross-comparisons

In [12]:
temp_org_800 = complete_org[complete_org.chunk_size == 800]
temp_org_800 = temp_org_800[temp_org_800.modality == 'temporal'].auc

temp_1600 = complete[complete.chunk_size == 1600]
temp_1600 = temp_1600[temp_1600.modality == 'temporal'].auc

print(f"comparing best with different methods for Temporal models:", stats.wilcoxon(temp_org_800, temp_1600))

comparing best with different methods for Temporal models: WilcoxonResult(statistic=315.0, pvalue=0.008409296147927934)


In [13]:
wf_org_800 = complete_org[complete_org.chunk_size == 800]
wf_org_800 = wf_org_800[wf_org_800.modality == 'morphological'].auc

wf_50 = complete[complete.chunk_size == 50]
wf_50 = wf_50[wf_50.modality == 'morphological'].auc

print(f"comparing best with different methods for WF models:", stats.wilcoxon(wf_org_800, wf_50))

comparing best with different methods for WF models: WilcoxonResult(statistic=97.0, pvalue=0.027051180513157976)


In [14]:
spat_org_25 = complete_org[complete_org.chunk_size == 25]
spat_org_25 = spat_org_25[spat_org_25.modality == 'spatial'].auc

spat_25 = complete[complete.chunk_size == 25]
spat_25 = spat_25[spat_25.modality == 'spatial'].auc

print(f"comparing best with different methods for Spatial models:", stats.wilcoxon(spat_org_25, spat_25))

comparing best with different methods for Spatial models: WilcoxonResult(statistic=16.0, pvalue=1.976977338482904e-09)


In [15]:
PATH_SPAT = 'ml/results_rf_spatial_combined.csv'
results_spat = pd.read_csv(PATH_SPAT, index_col=0)
complete_spat = results_spat[results_spat.restriction == 'complete']
spat_org_5 = complete_spat[complete_spat.chunk_size == 5]
spat_org_5 = spat_org_5[spat_org_5.modality == 'spatial'].auc

print(f"comparing best with different methods for Spatial models:", stats.wilcoxon(spat_org_5, spat_25))

comparing best with different methods for Spatial models: WilcoxonResult(statistic=309.0, pvalue=0.0015182316754984479)
