In [1]:
import sys
import pandas as pd
import numpy as np
import scipy.stats as stats

from constants import MORPHOLOGICAL, TEMPORAL, SPATIAL, feature_names_org

In [2]:
NUM_MOMENTS = 5

In [3]:
pd.options.display.max_columns = 500
pd.options.display.max_rows = 500

In [4]:
imps = np.load('ml/raw_imps_rf_290322_fix_imp.npy')

PATH = 'ml/results_rf_combined.csv'
BASE = 'ml/results_rf_combined_chance_balanced.csv'

df = pd.read_csv(PATH, index_col=0)
df = df[df.restriction == 'complete']
 
df_base = pd.read_csv(BASE, index_col=0)
df_base = df_base[df_base.restriction == 'complete']

In [5]:
def get_family_imp(inds, arr):
    arr_m = abs(arr[:, :, inds].sum(axis=2))
    fam_imps = arr_m[~np.isnan(arr_m)].reshape((arr.shape[0], -1)).mean(axis=1)
    return fam_imps

def names2inds(d_names, n2i_map):
    d_inds = dict()
    for key in d_names:
        temp_inds = [n2i_map.index(name) for name in d_names[key]]
        d_inds[key] = temp_inds
    return d_inds

## Spatial

In [6]:
spatial_df = df[df.chunk_size == 25]
spatial_df = spatial_df[spatial_df.modality == 'spatial']
spatial_imps = imps[1::3 * 8,:, :]

spatial_df = spatial_df.dropna(how='all', axis=1)
keep = [f'test feature {i+1}' for i in SPATIAL[:-1]]
drop = [c for c in spatial_df.columns if c not in keep]
spatial_df = spatial_df.drop(columns=drop)

mapper = {f'test feature {i+1}': feature_names_org[i] for i in SPATIAL[:-1]}
spatial_df = spatial_df.rename(columns=mapper)

In [7]:
spatial_df.describe()

Unnamed: 0,spatial_dispersion_count,spatial_dispersion_sd,spatial_dispersion_area,dep_red,dep_sd,fzc_red,fzc_sd,szc_red,szc_sd,dep_graph_avg_speed,dep_graph_slowest_path,dep_graph_fastest_path,fzc_graph_avg_speed,fzc_graph_slowest_path,fzc_graph_fastest_path,szc_graph_avg_speed,szc_graph_slowest_path,szc_graph_fastest_path
count,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0
mean,0.010597,0.008769,0.015903,0.008623,0.010461,0.093386,0.093971,0.046867,0.054813,0.009334,0.006745,0.006446,0.043129,0.070034,0.016277,0.008944,0.009149,0.012821
std,0.007165,0.003439,0.007768,0.003255,0.005452,0.014655,0.011776,0.012663,0.014172,0.006525,0.00343,0.003376,0.015471,0.015385,0.008726,0.00448,0.003992,0.004259
min,0.003306,0.003943,0.005543,0.003672,0.004822,0.050078,0.072533,0.021463,0.030867,0.003212,0.002799,0.00185,0.025149,0.038316,0.003593,0.00415,0.004045,0.004048
25%,0.00583,0.00612,0.010754,0.00649,0.007331,0.087186,0.085608,0.03821,0.045757,0.005041,0.00434,0.004305,0.032354,0.061739,0.010828,0.006671,0.006221,0.010089
50%,0.007385,0.007939,0.014594,0.007598,0.008601,0.094302,0.092813,0.046257,0.05179,0.007054,0.00604,0.005818,0.038067,0.067532,0.013763,0.008142,0.007994,0.012858
75%,0.013248,0.010526,0.019377,0.010353,0.013045,0.103443,0.099495,0.051385,0.062802,0.010943,0.008179,0.007621,0.050572,0.077274,0.019078,0.009912,0.011371,0.015563
max,0.035924,0.017058,0.038834,0.01797,0.03841,0.124568,0.131919,0.081064,0.094841,0.032707,0.021218,0.017895,0.095951,0.132784,0.05022,0.028586,0.020914,0.022248


In [8]:
spatial_df_base = df_base[df_base.chunk_size == 25]
spatial_df_base = spatial_df_base[spatial_df_base.modality == 'spatial']

keep = [f'test feature {i+1}' for i in SPATIAL[:-1]]
drop = [c for c in spatial_df_base.columns if c not in keep]

spatial_df_base = spatial_df_base.drop(columns=drop)
spatial_df_base = spatial_df_base.dropna(how='all', axis=1)
spatial_df_base = spatial_df_base.rename(columns=mapper)

for col in spatial_df.columns:
    col_test = spatial_df[col].to_numpy()
    col_base = spatial_df_base[col].to_numpy()
    
    test_median, test_prec25, test_prec75 = np.percentile(col_test, [50, 25, 75])
    base_median, base_prec25, base_prec75 = np.percentile(col_base, [50, 25, 75])
    
    print(f"Median of test {col} column is {test_median} [{test_prec25}, {test_prec75}]")
    print(f"Median of base {col} column is {base_median} [{base_prec25}, {base_prec75}]")
    
    statistic, p_val = stats.mannwhitneyu(col_test, col_base, alternative='greater')
    print(f"Mann-Whitney statistical test results for feature {col} are p-value={p_val} (statistic={statistic})")
    print()

Median of test spatial_dispersion_count column is 0.007385278881142177 [0.005830300306185667, 0.013247545002678135]
Median of base spatial_dispersion_count column is 0.0005085125731559398 [0.00025476331162468756, 0.002184745969530443]
Mann-Whitney statistical test results for feature spatial_dispersion_count are p-value=8.577698544001558e-13 (statistic=2274.0)

Median of test spatial_dispersion_sd column is 0.00793888756873824 [0.006120196826501106, 0.010526203513153587]
Median of base spatial_dispersion_sd column is 0.0022057422917316927 [0.0011497706952628218, 0.006013396484274025]
Mann-Whitney statistical test results for feature spatial_dispersion_sd are p-value=3.166345535198973e-07 (statistic=1973.0)

Median of test spatial_dispersion_area column is 0.014593587699502092 [0.010753841789731763, 0.019377106645616246]
Median of base spatial_dispersion_area column is 0.0022386692066633336 [0.0012226790997811208, 0.008110199321025875]
Mann-Whitney statistical test results for feature s

In [9]:
spatial_families = {'value-based': ['spatial_dispersion_count', 'spatial_dispersion_sd', 'spatial_dispersion_area'],
                  'time-based': ['dep_red', 'dep_sd','fzc_red', 'fzc_sd', 'szc_red', 'szc_sd'],
                  'graph-based': ['dep_graph_avg_speed', 'dep_graph_slowest_path', 'dep_graph_fastest_path',
                           'fzc_graph_avg_speed', 'fzc_graph_slowest_path', 'fzc_graph_fastest_path',
                           'szc_graph_avg_speed', 'szc_graph_slowest_path', 'szc_graph_fastest_path']}

for fam in spatial_families:
    spatial_df[fam] = spatial_df[spatial_families[fam]].sum(axis=1)

In [10]:
spatial_fams_df = spatial_df[[fam for fam in spatial_families]]
spatial_fams_df.describe()

Unnamed: 0,value-based,time-based,graph-based
count,50.0,50.0,50.0
mean,0.035269,0.308121,0.18288
std,0.01306,0.023019,0.023998
min,0.016339,0.258881,0.151834
25%,0.026359,0.291898,0.167348
50%,0.033955,0.308193,0.181321
75%,0.040824,0.322498,0.19184
max,0.076998,0.354294,0.289127


In [11]:
spatial_families_inds = names2inds(spatial_families, feature_names_org)

for fam in spatial_families_inds:
    spatial_df[f'{fam}_up'] = get_family_imp(spatial_families_inds[fam], spatial_imps)

In [12]:
spatial_fams_df_up = spatial_df[[f'{fam}_up' for fam in spatial_families]]
spatial_fams_df_up.describe()

Unnamed: 0,value-based_up,time-based_up,graph-based_up
count,50.0,50.0,50.0
mean,0.075151,0.164002,0.096058
std,0.021829,0.009599,0.006775
min,0.044938,0.143776,0.076157
25%,0.054165,0.157057,0.091588
50%,0.07938,0.164823,0.096806
75%,0.092889,0.171067,0.101301
max,0.1191,0.194071,0.106372


In [13]:
value_based = spatial_fams_df['value-based'].to_numpy()
time_based =  spatial_fams_df['time-based'].to_numpy()
graph_based = spatial_fams_df['graph-based'].to_numpy()
statistic, p_val = stats.wilcoxon(value_based, time_based)
print(f"Wilcoxon statistical test results for spd vs time lag are p-value={p_val} (statistic={statistic})")
statistic, p_val = stats.wilcoxon(value_based, graph_based)
print(f"Wilcoxon statistical test results for spd vs graph are p-value={p_val} (statistic={statistic})")
statistic, p_val = stats.wilcoxon(time_based, graph_based)
print(f"Wilcoxon statistical test results for time lag vs graph are p-value={p_val} (statistic={statistic})")

Wilcoxon statistical test results for spd vs time lag are p-value=7.556929455863566e-10 (statistic=0.0)
Wilcoxon statistical test results for spd vs graph are p-value=7.556929455863566e-10 (statistic=0.0)
Wilcoxon statistical test results for time lag vs graph are p-value=8.031090907046913e-10 (statistic=1.0)


In [14]:
value_based = spatial_fams_df_up['value-based_up'].to_numpy()
time_based =  spatial_fams_df_up['time-based_up'].to_numpy()
graph_based = spatial_fams_df_up['graph-based_up'].to_numpy()
statistic, p_val = stats.wilcoxon(value_based, time_based)
print(f"Wilcoxon statistical test results for spd vs time lag are p-value={p_val} (statistic={statistic})")
statistic, p_val = stats.wilcoxon(value_based, graph_based)
print(f"Wilcoxon statistical test results for spd vs graph are p-value={p_val} (statistic={statistic})")
statistic, p_val = stats.wilcoxon(time_based, graph_based)
print(f"Wilcoxon statistical test results for time lag vs graph are p-value={p_val} (statistic={statistic})")

Wilcoxon statistical test results for spd vs time lag are p-value=7.556929455863566e-10 (statistic=0.0)
Wilcoxon statistical test results for spd vs graph are p-value=4.798864940250759e-07 (statistic=116.0)
Wilcoxon statistical test results for time lag vs graph are p-value=7.556929455863566e-10 (statistic=0.0)


In [15]:
event_families = {'dep': ['dep_red', 'dep_sd', 'dep_graph_avg_speed', 'dep_graph_slowest_path', 'dep_graph_fastest_path'],
                  'fzc': ['fzc_red', 'fzc_sd', 'fzc_graph_avg_speed', 'fzc_graph_slowest_path', 'fzc_graph_fastest_path'],
                  'szc': ['szc_red', 'szc_sd', 'szc_graph_avg_speed', 'szc_graph_slowest_path', 'szc_graph_fastest_path']}

for fam in event_families:
    spatial_df[fam] = spatial_df[event_families[fam]].sum(axis=1)

In [16]:
event_fams_df = spatial_df[[fam for fam in event_families]]
event_fams_df.describe()

Unnamed: 0,dep,fzc,szc
count,50.0,50.0,50.0
mean,0.041609,0.316798,0.132595
std,0.015197,0.020687,0.025716
min,0.022546,0.273651,0.084552
25%,0.032991,0.305296,0.117167
50%,0.037608,0.319206,0.124575
75%,0.047179,0.328623,0.146419
max,0.117066,0.378619,0.204646


In [17]:
event_families_inds = names2inds(event_families, feature_names_org)

for fam in event_families_inds:
    spatial_df[f'{fam}_up'] = get_family_imp(event_families_inds[fam], spatial_imps)

In [18]:
event_fams_df_up = spatial_df[[f'{fam}_up' for fam in event_families]]
event_fams_df_up.describe()

Unnamed: 0,dep_up,fzc_up,szc_up
count,50.0,50.0,50.0
mean,0.07489,0.182132,0.086041
std,0.012627,0.013347,0.009059
min,0.052014,0.147968,0.065222
25%,0.066025,0.173161,0.080218
50%,0.070922,0.184014,0.086162
75%,0.085853,0.190945,0.092067
max,0.106351,0.212879,0.10696


In [19]:
dep_event = event_fams_df['dep'].to_numpy()
fzc_event =  event_fams_df['fzc'].to_numpy()
szc_event = event_fams_df['szc'].to_numpy()
statistic, p_val = stats.wilcoxon(dep_event, fzc_event)
print(f"Wilcoxon statistical test results for dep vs fzc are p-value={p_val} (statistic={statistic})")
statistic, p_val = stats.wilcoxon(dep_event, szc_event)
print(f"Wilcoxon statistical test results for dep vs szc are p-value={p_val} (statistic={statistic})")
statistic, p_val = stats.wilcoxon(fzc_event, szc_event)
print(f"Wilcoxon statistical test results for fzc vs szc are p-value={p_val} (statistic={statistic})")

Wilcoxon statistical test results for dep vs fzc are p-value=7.556929455863566e-10 (statistic=0.0)
Wilcoxon statistical test results for dep vs szc are p-value=8.031090907046913e-10 (statistic=1.0)
Wilcoxon statistical test results for fzc vs szc are p-value=7.556929455863566e-10 (statistic=0.0)


In [20]:
dep_event = event_fams_df_up['dep_up'].to_numpy()
fzc_event = event_fams_df_up['fzc_up'].to_numpy()
szc_event = event_fams_df_up['szc_up'].to_numpy()
statistic, p_val = stats.wilcoxon(dep_event, fzc_event)
print(f"Wilcoxon statistical test results for dep vs fzc are p-value={p_val} (statistic={statistic})")
statistic, p_val = stats.wilcoxon(dep_event, szc_event)
print(f"Wilcoxon statistical test results for dep vs szc are p-value={p_val} (statistic={statistic})")
statistic, p_val = stats.wilcoxon(fzc_event, szc_event)
print(f"Wilcoxon statistical test results for fzc vs szc are p-value={p_val} (statistic={statistic})")

Wilcoxon statistical test results for dep vs fzc are p-value=7.556929455863566e-10 (statistic=0.0)
Wilcoxon statistical test results for dep vs szc are p-value=0.00012943824374041834 (statistic=241.0)
Wilcoxon statistical test results for fzc vs szc are p-value=7.556929455863566e-10 (statistic=0.0)


## Temporal

In [21]:
temporal_df = df[df.chunk_size == 1600]
temporal_df = temporal_df[temporal_df.modality == 'temporal']
temporal_imps = imps[14::3 * 8,:, :]

temporal_df = temporal_df.dropna(how='all', axis=1)
keep = [f'test feature {i+1}' for i in TEMPORAL[:-1]]
drop = [c for c in temporal_df.columns if c not in keep]
temporal_df = temporal_df.drop(columns=drop)
mapper = {f'test feature {i+1}': feature_names_org[i] for i in TEMPORAL[:-1]}
temporal_df = temporal_df.rename(columns=mapper)

In [22]:
temporal_df.describe()

Unnamed: 0,firing_rate,d_kl_start,d_kl_mid,jump,psd_center,der_psd_center,rise_time,unif_dist
count,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0
mean,0.079174,0.028272,0.186695,0.030599,0.017198,0.008981,0.030526,0.138036
std,0.020456,0.012518,0.028007,0.008131,0.006765,0.002544,0.008078,0.024749
min,0.050198,0.003675,0.121779,0.01144,0.005801,0.004189,0.009781,0.095522
25%,0.062962,0.020611,0.171837,0.026109,0.012938,0.006913,0.026266,0.118112
50%,0.076606,0.025474,0.186821,0.029114,0.016402,0.009021,0.030584,0.135142
75%,0.089417,0.035227,0.199467,0.032415,0.019494,0.010943,0.035349,0.157266
max,0.139811,0.059195,0.255799,0.056082,0.039176,0.014735,0.054503,0.196062


In [23]:
temporal_df_base = df_base[df_base.chunk_size == 1600]
temporal_df_base = temporal_df_base[temporal_df_base.modality == 'temporal']

temporal_df_base = temporal_df_base.dropna(how='all', axis=1)
temporal_df_base = temporal_df_base.drop(columns=drop)
temporal_df_base = temporal_df_base.rename(columns=mapper)

for col in temporal_df.columns:
    col_test = temporal_df[col].to_numpy()
    col_base = temporal_df_base[col].to_numpy()
    
    test_median, test_prec25, test_prec75 = np.percentile(col_test, [50, 25, 75])
    base_median, base_prec25, base_prec75 = np.percentile(col_base, [50, 25, 75])
    
    print(f"Median of test {col} column is {test_median} [{test_prec25}, {test_prec75}]")
    print(f"Median of base {col} column is {base_median} [{base_prec25}, {base_prec75}]")
    
    statistic, p_val = stats.mannwhitneyu(col_test, col_base, alternative='greater')
    print(f"Mann-Whitney statistical test results for feature {col} are p-value={p_val} (statistic={statistic})")
    print()

Median of test firing_rate column is 0.07660628883976234 [0.06296248006769566, 0.0894171318889741]
Median of base firing_rate column is 0.017801644329628392 [0.012707424934842998, 0.0337395552599446]
Mann-Whitney statistical test results for feature firing_rate are p-value=7.762066810476817e-16 (statistic=2407.0)

Median of test d_kl_start column is 0.025473617569475256 [0.020611167566853734, 0.03522693911372335]
Median of base d_kl_start column is 0.02189852082348713 [0.011827566550182541, 0.03290514648208806]
Mann-Whitney statistical test results for feature d_kl_start are p-value=0.03464520362062903 (statistic=1514.0)

Median of test d_kl_mid column is 0.1868213948624144 [0.1718374246539367, 0.19946749408257894]
Median of base d_kl_mid column is 0.020968910350341666 [0.013342541138720354, 0.03355964368240139]
Mann-Whitney statistical test results for feature d_kl_mid are p-value=3.533035965194466e-18 (statistic=2500.0)

Median of test jump column is 0.029113604473170608 [0.026108648

In [24]:
temporal_families = {'short': ['d_kl_start', 'unif_dist', 'rise_time'],
                     'long': ['d_kl_mid', 'jump'],
                     'wb': ['firing_rate', 'psd_center', 'der_psd_center']}

for fam in temporal_families:
    temporal_df[fam] = temporal_df[temporal_families[fam]].sum(axis=1)

In [25]:
temporal_fams_df = temporal_df[[fam for fam in temporal_families]]
temporal_fams_df.describe()

Unnamed: 0,short,long,wb
count,50.0,50.0,50.0
mean,0.196834,0.217294,0.105354
std,0.028725,0.025193,0.021199
min,0.142361,0.154215,0.072959
25%,0.173669,0.204327,0.090433
50%,0.192911,0.219731,0.105259
75%,0.21952,0.227453,0.113957
max,0.25514,0.27765,0.158148


In [26]:
temporal_families_inds = names2inds(temporal_families, feature_names_org)

for fam in temporal_families_inds:
    temporal_df[f'{fam}_up'] = get_family_imp(temporal_families_inds[fam], temporal_imps)

In [27]:
temporal_fams_df_up = temporal_df[[f'{fam}_up' for fam in temporal_families]]
temporal_fams_df_up.describe()

Unnamed: 0,short_up,long_up,wb_up
count,50.0,50.0,50.0
mean,0.202765,0.195539,0.093321
std,0.017966,0.014671,0.016183
min,0.165181,0.162849,0.044558
25%,0.192906,0.185664,0.084381
50%,0.202095,0.1957,0.092916
75%,0.211044,0.205718,0.107837
max,0.245569,0.229826,0.124536


In [28]:
short = temporal_fams_df['short'].to_numpy()
long =  temporal_fams_df['long'].to_numpy()
wb = temporal_fams_df['wb'].to_numpy()
statistic, p_val = stats.wilcoxon(short, long)
print(f"Wilcoxon statistical test results for short vs long are p-value={p_val} (statistic={statistic})")
statistic, p_val = stats.wilcoxon(short, wb)
print(f"Wilcoxon statistical test results for short vs wb are p-value={p_val} (statistic={statistic})")
statistic, p_val = stats.wilcoxon(long, wb)
print(f"Wilcoxon statistical test results for short vs wb are p-value={p_val} (statistic={statistic})")

Wilcoxon statistical test results for short vs long are p-value=0.006205800908562291 (statistic=354.0)
Wilcoxon statistical test results for short vs wb are p-value=8.53422673646545e-10 (statistic=2.0)
Wilcoxon statistical test results for short vs wb are p-value=7.556929455863566e-10 (statistic=0.0)


In [29]:
short = temporal_fams_df_up['short_up'].to_numpy()
long =  temporal_fams_df_up['long_up'].to_numpy()
wb = temporal_fams_df_up['wb_up'].to_numpy()
statistic, p_val = stats.wilcoxon(short, long)
print(f"Wilcoxon statistical test results for short vs long are p-value={p_val} (statistic={statistic})")
statistic, p_val = stats.wilcoxon(short, wb)
print(f"Wilcoxon statistical test results for short vs wb are p-value={p_val} (statistic={statistic})")
statistic, p_val = stats.wilcoxon(long, wb)
print(f"Wilcoxon statistical test results for short vs wb are p-value={p_val} (statistic={statistic})")

Wilcoxon statistical test results for short vs long are p-value=0.12841263682125154 (statistic=480.0)
Wilcoxon statistical test results for short vs wb are p-value=7.556929455863566e-10 (statistic=0.0)
Wilcoxon statistical test results for short vs wb are p-value=7.556929455863566e-10 (statistic=0.0)


## WF (Morphological)

In [30]:
morph_df = df[df.chunk_size == 50]
morph_df = morph_df[morph_df.modality == 'morphological']
morph_imps = imps[22::3 * 8,:, :]

morph_df = morph_df.dropna(how='all', axis=1)
keep = [f'test feature {i+1}' for i in MORPHOLOGICAL[:-1]]
drop = [c for c in morph_df.columns if c not in keep]
morph_df = morph_df.drop(columns=drop)
mapper = {f'test feature {i+1}': feature_names_org[i] for i in MORPHOLOGICAL[:-1]}
morph_df = morph_df.rename(columns=mapper)

In [31]:
morph_df.describe()

Unnamed: 0,break_measure,fwhm,get_acc,max_speed,peak2peak,trough2peak,rise_coef,smile_cry
count,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0
mean,0.003893,0.006283,0.128688,0.010599,0.103076,0.243773,0.006613,0.013425
std,0.004841,0.004801,0.049221,0.004453,0.028846,0.032315,0.006649,0.004921
min,0.000746,0.001492,0.041221,0.002396,0.048737,0.166116,0.000638,0.004688
25%,0.002014,0.003567,0.099331,0.006613,0.08277,0.231012,0.003589,0.010698
50%,0.002844,0.004832,0.117248,0.010904,0.108555,0.247573,0.004814,0.013057
75%,0.00425,0.00748,0.154144,0.013966,0.120835,0.26051,0.008101,0.015307
max,0.035268,0.025898,0.244667,0.019687,0.171023,0.306495,0.042035,0.027637


In [32]:
morph_df_base = df_base[df_base.chunk_size == 50]
morph_df_base = morph_df_base[morph_df_base.modality == 'morphological']

morph_df_base = morph_df_base.dropna(how='all', axis=1)
morph_df_base = morph_df_base.drop(columns=drop)
morph_df_base = morph_df_base.rename(columns=mapper)

for col in morph_df.columns:
    col_test = morph_df[col].to_numpy()
    col_base = morph_df_base[col].to_numpy()
    
    test_median, test_prec25, test_prec75 = np.percentile(col_test, [50, 25, 75])
    base_median, base_prec25, base_prec75 = np.percentile(col_base, [50, 25, 75])
    
    print(f"Median of test {col} column is {test_median} [{test_prec25}, {test_prec75}]")
    print(f"Median of base {col} column is {base_median} [{base_prec25}, {base_prec75}]")
    
    statistic, p_val = stats.mannwhitneyu(col_test, col_base, alternative='greater')
    print(f"Mann-Whitney statistical test results for feature {col} are p-value={p_val} (statistic={statistic})")
    print()

Median of test break_measure column is 0.0028440496726888243 [0.0020136104851318395, 0.004249580224414094]
Median of base break_measure column is 0.005326804235759501 [0.002846373332937295, 0.022079929645976465]
Mann-Whitney statistical test results for feature break_measure are p-value=0.9999261925054945 (statistic=700.0)

Median of test fwhm column is 0.004831626777964015 [0.0035670169185989154, 0.007479833582739025]
Median of base fwhm column is 0.004369140344333035 [0.0024219236419720717, 0.016319556779816183]
Mann-Whitney statistical test results for feature fwhm are p-value=0.5261085790178511 (statistic=1241.0)

Median of test get_acc column is 0.11724804673665778 [0.09933110131114307, 0.15414351551508793]
Median of base get_acc column is 0.008542269568742019 [0.0027837015662190937, 0.020303042172106926]
Mann-Whitney statistical test results for feature get_acc are p-value=7.248001579130404e-18 (statistic=2488.0)

Median of test max_speed column is 0.010904462131404412 [0.0066125

In [33]:
morph_families = {'WF': ['trough2peak', 'peak2peak', 'fwhm', 'rise_coef'],
                  'first': ['max_speed'],
                  'second': ['break_measure', 'smile_cry', 'get_acc']}

for fam in morph_families:
    morph_df[fam] = morph_df[morph_families[fam]].sum(axis=1)

In [34]:
morph_fams_df = morph_df[[fam for fam in morph_families]]
morph_fams_df.describe()

Unnamed: 0,WF,first,second
count,50.0,50.0,50.0
mean,0.359745,0.010599,0.146005
std,0.045255,0.004453,0.050794
min,0.25447,0.002396,0.053587
25%,0.342647,0.006613,0.117115
50%,0.367611,0.010904,0.133842
75%,0.381678,0.013966,0.169328
max,0.45129,0.019687,0.26767


In [35]:
morph_families_inds = names2inds(morph_families, feature_names_org)

for fam in morph_families:
    morph_df[f'{fam}_up'] = get_family_imp(morph_families_inds[fam], morph_imps)

In [36]:
morph_fams_df_up = morph_df[[f'{fam}_up' for fam in morph_families]]
morph_fams_df_up.describe()

Unnamed: 0,WF_up,first_up,second_up
count,50.0,50.0,50.0
mean,0.353532,0.009399,0.133076
std,0.026929,0.004087,0.026671
min,0.268245,0.002093,0.059716
25%,0.340342,0.006481,0.120665
50%,0.356177,0.00833,0.13097
75%,0.367601,0.011358,0.145856
max,0.408368,0.020086,0.206803


In [37]:
org = morph_fams_df['WF'].to_numpy()
first =  morph_fams_df['first'].to_numpy()
second = morph_fams_df['second'].to_numpy()
statistic, p_val = stats.wilcoxon(org, first)
print(f"Wilcoxon statistical test results for WF vs first are p-value={p_val} (statistic={statistic})")
statistic, p_val = stats.wilcoxon(org, second)
print(f"Wilcoxon statistical test results for WF vs second are p-value={p_val} (statistic={statistic})")

Wilcoxon statistical test results for WF vs first are p-value=7.556929455863566e-10 (statistic=0.0)
Wilcoxon statistical test results for WF vs second are p-value=8.031090907046913e-10 (statistic=1.0)


In [38]:
org = morph_fams_df_up['WF_up'].to_numpy()
first =  morph_fams_df_up['first_up'].to_numpy()
second = morph_fams_df_up['second_up'].to_numpy()
statistic, p_val = stats.wilcoxon(org, first)
print(f"Wilcoxon statistical test results for WF vs first are p-value={p_val} (statistic={statistic})")
statistic, p_val = stats.wilcoxon(org, second)
print(f"Wilcoxon statistical test results for WF vs second are p-value={p_val} (statistic={statistic})")

Wilcoxon statistical test results for WF vs first are p-value=7.556929455863566e-10 (statistic=0.0)
Wilcoxon statistical test results for WF vs second are p-value=7.556929455863566e-10 (statistic=0.0)


## Moments

In [39]:
PATH = 'ml/results_rf_moments.csv'
BASE = 'ml/results_rf_moments_chance_balanced.csv'

df = pd.read_csv(PATH, index_col=0)
df = df[df.restriction == 'complete']
 
df_base = pd.read_csv(BASE, index_col=0)
df_base = df_base[df_base.restriction == 'complete']

In [40]:
moments_names = ['Original', 'Mean', 'SD', 'Q25', 'Median', 'Q75']

### Spatial

In [41]:
spatial_df = df[df.chunk_size == 25]
spatial_df = spatial_df[spatial_df.modality == 'spatial']

keep = [f'test feature {i+1}' for i in (np.arange(NUM_MOMENTS + 1))]
drop = [c for c in spatial_df.columns if c not in keep]
spatial_df = spatial_df.drop(columns=drop)

mapper = {f'test feature {i+1}': moments_names[i] for i in np.arange(NUM_MOMENTS + 1)}
spatial_df = spatial_df.rename(columns=mapper)

In [42]:
spatial_df.describe()

Unnamed: 0,Original,Mean,SD,Q25,Median,Q75
count,50.0,50.0,50.0,50.0,50.0,50.0
mean,0.003166,0.05162,0.162757,0.067698,0.038598,0.060491
std,0.002939,0.009687,0.017315,0.014564,0.008582,0.013721
min,0.000779,0.030074,0.130779,0.039486,0.021126,0.032141
25%,0.001624,0.045045,0.147828,0.057077,0.033168,0.051889
50%,0.002438,0.05201,0.162075,0.067352,0.037001,0.059945
75%,0.003106,0.0579,0.173901,0.075903,0.043856,0.06851
max,0.016771,0.073967,0.200541,0.120412,0.066409,0.093201


In [43]:
spatial_df_base = df_base[df_base.chunk_size == 25]
spatial_df_base = spatial_df_base[spatial_df_base.modality == 'spatial']

spatial_df_base = spatial_df_base.drop(columns=drop)
spatial_df_base = spatial_df_base.dropna(how='all', axis=1)
spatial_df_base = spatial_df_base.rename(columns=mapper)

for col in spatial_df.columns:
    col_test = spatial_df[col].to_numpy()
    col_base = spatial_df_base[col].to_numpy()
    
    test_median, test_prec25, test_prec75 = np.percentile(col_test, [50, 25, 75])
    base_median, base_prec25, base_prec75 = np.percentile(col_base, [50, 25, 75])
    
    print(f"Median of test {col} column is {test_median} [{test_prec25}, {test_prec75}]")
    print(f"Median of base {col} column is {base_median} [{base_prec25}, {base_prec75}]")
    
    statistic, p_val = stats.mannwhitneyu(col_test, col_base, alternative='greater')
    print(f"Mann-Whitney statistical test results for feature {col} are p-value={p_val} (statistic={statistic})")
    print()

Median of test Original column is 0.0024379321464613954 [0.0016241253911328513, 0.0031055695932484108]
Median of base Original column is 0.010337783240916646 [0.005428529615957661, 0.023136359055328445]
Mann-Whitney statistical test results for feature Original are p-value=0.9999999999960985 (statistic=258.0)

Median of test Mean column is 0.05201005082433725 [0.045045248894187706, 0.05789950900895412]
Median of base Mean column is 0.0036999208163549504 [0.002084875672954233, 0.00818752087381185]
Mann-Whitney statistical test results for feature Mean are p-value=6.060044905437509e-18 (statistic=2491.0)

Median of test SD column is 0.16207471437127263 [0.14782810729137247, 0.17390097158750917]
Median of base SD column is 0.003659140704237971 [0.0023178400525044286, 0.012008006509250494]
Mann-Whitney statistical test results for feature SD are p-value=3.533035965194466e-18 (statistic=2500.0)

Median of test Q25 column is 0.06735180638014514 [0.05707747809787165, 0.07590309042328994]
Medi

In [44]:
for col in spatial_df.columns:
    if col == 'Original':
        continue
    col_test = spatial_df[col].to_numpy()
    col_test_original = spatial_df['Original'].to_numpy()

    statistic, p_val = stats.mannwhitneyu(col_test, col_test_original)
    print(f"Mann-Whitney statistical test results for feature {col} are p-value={p_val} (statistic={statistic})")
    print()

Mann-Whitney statistical test results for feature Mean are p-value=3.533035965194466e-18 (statistic=0.0)

Mann-Whitney statistical test results for feature SD are p-value=3.533035965194466e-18 (statistic=0.0)

Mann-Whitney statistical test results for feature Q25 are p-value=3.533035965194466e-18 (statistic=0.0)

Mann-Whitney statistical test results for feature Median are p-value=3.533035965194466e-18 (statistic=0.0)

Mann-Whitney statistical test results for feature Q75 are p-value=3.533035965194466e-18 (statistic=0.0)



In [45]:
for col in spatial_df.columns:
    if col == 'SD':
        continue
    col_test = spatial_df[col].to_numpy()
    col_test_sd = spatial_df['SD'].to_numpy()

    statistic, p_val = stats.mannwhitneyu(col_test, col_test_sd)
    print(f"Mann-Whitney statistical test results for feature {col} are p-value={p_val} (statistic={statistic})")
    print()

Mann-Whitney statistical test results for feature Original are p-value=3.533035965194466e-18 (statistic=0.0)

Mann-Whitney statistical test results for feature Mean are p-value=3.533035965194466e-18 (statistic=0.0)

Mann-Whitney statistical test results for feature Q25 are p-value=3.533035965194466e-18 (statistic=0.0)

Mann-Whitney statistical test results for feature Median are p-value=3.533035965194466e-18 (statistic=0.0)

Mann-Whitney statistical test results for feature Q75 are p-value=3.533035965194466e-18 (statistic=0.0)



### Spike-timing

In [46]:
temporal_df = df[df.chunk_size == 1600]
temporal_df = temporal_df[temporal_df.modality == 'temporal']

keep = [f'test feature {i+1}' for i in (np.arange(NUM_MOMENTS + 1))]
drop = [c for c in temporal_df.columns if c not in keep]
temporal_df = temporal_df.drop(columns=drop)

mapper = {f'test feature {i+1}': moments_names[i] for i in np.arange(NUM_MOMENTS + 1)}
temporal_df = temporal_df.rename(columns=mapper)

In [47]:
temporal_df.describe()

Unnamed: 0,Original,Mean,SD,Q25,Median,Q75
count,50.0,50.0,50.0,50.0,50.0,50.0
mean,0.051028,0.088946,0.030351,0.097965,0.093147,0.10498
std,0.013718,0.015345,0.006604,0.018389,0.021444,0.020827
min,0.021799,0.030305,0.012879,0.0187,0.040656,0.072726
25%,0.042237,0.079957,0.026717,0.089166,0.084197,0.091208
50%,0.049169,0.090793,0.031512,0.100306,0.089799,0.098839
75%,0.059225,0.099308,0.033688,0.108278,0.101627,0.117467
max,0.100189,0.126585,0.04655,0.138924,0.190443,0.159987


In [48]:
temporal_df_base = df_base[df_base.chunk_size == 1600]
temporal_df_base = temporal_df_base[temporal_df_base.modality == 'temporal']

temporal_df_base = temporal_df_base.drop(columns=drop)
temporal_df_base = temporal_df_base.dropna(how='all', axis=1)
temporal_df_base = temporal_df_base.rename(columns=mapper)

for col in temporal_df.columns:
    col_test = temporal_df[col].to_numpy()
    col_base = temporal_df_base[col].to_numpy()
    
    test_median, test_prec25, test_prec75 = np.percentile(col_test, [50, 25, 75])
    base_median, base_prec25, base_prec75 = np.percentile(col_base, [50, 25, 75])
    
    print(f"Median of test {col} column is {test_median} [{test_prec25}, {test_prec75}]")
    print(f"Median of base {col} column is {base_median} [{base_prec25}, {base_prec75}]")
    
    statistic, p_val = stats.mannwhitneyu(col_test, col_base, alternative='greater')
    print(f"Mann-Whitney statistical test results for feature {col} are p-value={p_val} (statistic={statistic})")
    print()

Median of test Original column is 0.049168597585393284 [0.04223717851421639, 0.05922549293105055]
Median of base Original column is 0.0702281743168738 [0.032731743358026893, 0.10602249069231215]
Mann-Whitney statistical test results for feature Original are p-value=0.9693740088704476 (statistic=979.0)

Median of test Mean column is 0.09079308870575285 [0.07995700909357352, 0.0993076709215153]
Median of base Mean column is 0.018867166137777187 [0.010543452345292408, 0.026230177147372684]
Mann-Whitney statistical test results for feature Mean are p-value=9.536222832167321e-17 (statistic=2444.0)

Median of test SD column is 0.03151209343431375 [0.02671693814152817, 0.03368750736152391]
Median of base SD column is 0.0237637917505148 [0.012374683373946052, 0.03538959157805905]
Mann-Whitney statistical test results for feature SD are p-value=0.007839915983387659 (statistic=1601.0)

Median of test Q25 column is 0.1003064311280634 [0.08916622808666558, 0.10827794174549525]
Median of base Q25 c

In [49]:
for col in temporal_df.columns:
    if col == 'Original':
        continue
    col_test = temporal_df[col].to_numpy()
    col_test_original = temporal_df['Original'].to_numpy()

    statistic, p_val = stats.mannwhitneyu(col_test, col_test_original)
    print(f"Mann-Whitney statistical test results for feature {col} are p-value={p_val} (statistic={statistic})")
    print()

Mann-Whitney statistical test results for feature Mean are p-value=1.5112130159282838e-15 (statistic=105.0)

Mann-Whitney statistical test results for feature SD are p-value=4.242867088798065e-14 (statistic=167.0)

Mann-Whitney statistical test results for feature Q25 are p-value=4.432167636937588e-16 (statistic=83.0)

Mann-Whitney statistical test results for feature Median are p-value=1.884188348687208e-15 (statistic=109.0)

Mann-Whitney statistical test results for feature Q75 are p-value=2.102916316646988e-17 (statistic=30.0)



In [50]:
for col in temporal_df.columns:
    if col == 'SD':
        continue
    col_test = temporal_df[col].to_numpy()
    col_test_sd = temporal_df['SD'].to_numpy()
        
    statistic, p_val = stats.mannwhitneyu(col_test, col_test_sd)
    print(f"Mann-Whitney statistical test results for feature {col} are p-value={p_val} (statistic={statistic})")
    print()

Mann-Whitney statistical test results for feature Original are p-value=4.242867088798065e-14 (statistic=167.0)

Mann-Whitney statistical test results for feature Mean are p-value=2.102916316646988e-17 (statistic=30.0)

Mann-Whitney statistical test results for feature Q25 are p-value=5.3516117196336476e-17 (statistic=46.0)

Mann-Whitney statistical test results for feature Median are p-value=4.230977759222343e-18 (statistic=3.0)

Mann-Whitney statistical test results for feature Q75 are p-value=3.533035965194466e-18 (statistic=0.0)



### Waveform

In [51]:
wf_df = df[df.chunk_size == 50]
wf_df = wf_df[wf_df.modality == 'morphological']

keep = [f'test feature {i+1}' for i in (np.arange(NUM_MOMENTS + 1))]
drop = [c for c in wf_df.columns if c not in keep]
wf_df = wf_df.drop(columns=drop)

mapper = {f'test feature {i+1}': moments_names[i] for i in np.arange(NUM_MOMENTS + 1)}
wf_df = wf_df.rename(columns=mapper)

In [52]:
wf_df.describe()

Unnamed: 0,Original,Mean,SD,Q25,Median,Q75
count,50.0,50.0,50.0,50.0,50.0,50.0
mean,0.023439,0.117799,0.036055,0.119735,0.101871,0.087564
std,0.008672,0.027136,0.015113,0.033335,0.02656,0.024249
min,0.002561,0.075096,0.016546,0.051519,0.036409,0.019827
25%,0.018188,0.100066,0.027242,0.102397,0.088383,0.071054
50%,0.022379,0.116444,0.033952,0.117159,0.100804,0.088183
75%,0.030186,0.127203,0.04106,0.136353,0.112628,0.100758
max,0.043224,0.205601,0.096509,0.232663,0.2025,0.154632


In [53]:
wf_df_base = df_base[df_base.chunk_size == 50]
wf_df_base = wf_df_base[wf_df_base.modality == 'morphological']

wf_df_base = wf_df_base.drop(columns=drop)
wf_df_base = wf_df_base.dropna(how='all', axis=1)
wf_df_base = wf_df_base.rename(columns=mapper)

for col in wf_df.columns:
    col_test = wf_df[col].to_numpy()
    col_base = wf_df_base[col].to_numpy()
    
    test_median, test_prec25, test_prec75 = np.percentile(col_test, [50, 25, 75])
    base_median, base_prec25, base_prec75 = np.percentile(col_base, [50, 25, 75])
    
    print(f"Median of test {col} column is {test_median} [{test_prec25}, {test_prec75}]")
    print(f"Median of base {col} column is {base_median} [{base_prec25}, {base_prec75}]")
    
    statistic, p_val = stats.mannwhitneyu(col_test, col_base, alternative='greater')
    print(f"Mann-Whitney statistical test results for feature {col} are p-value={p_val} (statistic={statistic})")
    print()

Median of test Original column is 0.02237877001679314 [0.018188308743880364, 0.03018636968651768]
Median of base Original column is 0.015412859323332417 [0.006643367017660434, 0.07153374519172542]
Mann-Whitney statistical test results for feature Original are p-value=0.14341643221315398 (statistic=1405.0)

Median of test Mean column is 0.11644411591219558 [0.10006621703299277, 0.12720349345496607]
Median of base Mean column is 0.006428797657842369 [0.0028005027826177305, 0.0164218672523151]
Mann-Whitney statistical test results for feature Mean are p-value=3.533035965194466e-18 (statistic=2500.0)

Median of test SD column is 0.03395225415721932 [0.0272417678041492, 0.04106005209374072]
Median of base SD column is 0.008452899647953545 [0.003568247727194304, 0.017628916892953532]
Mann-Whitney statistical test results for feature SD are p-value=1.628023156627252e-12 (statistic=2261.0)

Median of test Q25 column is 0.11715856826107693 [0.10239658339503573, 0.1363534815643678]
Median of bas

In [54]:
for col in wf_df.columns:
    if col == 'Original':
        continue
    col_test = wf_df[col].to_numpy()
    col_test_original = wf_df['Original'].to_numpy()
        
    statistic, p_val = stats.mannwhitneyu(col_test, col_test_original)
    print(f"Mann-Whitney statistical test results for feature {col} are p-value={p_val} (statistic={statistic})")
    print()

Mann-Whitney statistical test results for feature Mean are p-value=3.533035965194466e-18 (statistic=0.0)

Mann-Whitney statistical test results for feature SD are p-value=1.2691302843148297e-06 (statistic=567.0)

Mann-Whitney statistical test results for feature Q25 are p-value=3.533035965194466e-18 (statistic=0.0)

Mann-Whitney statistical test results for feature Median are p-value=3.752027398590286e-18 (statistic=1.0)

Mann-Whitney statistical test results for feature Q75 are p-value=2.2301221981921627e-17 (statistic=31.0)



In [55]:
for col in wf_df.columns:
    if col == 'SD':
        continue
    col_test = wf_df[col].to_numpy()
    col_test_sd = wf_df['SD'].to_numpy()
        
    statistic, p_val = stats.mannwhitneyu(col_test, col_test_sd)
    print(f"Mann-Whitney statistical test results for feature {col} are p-value={p_val} (statistic={statistic})")
    print()

Mann-Whitney statistical test results for feature Original are p-value=1.2691302843148297e-06 (statistic=567.0)

Mann-Whitney statistical test results for feature Mean are p-value=5.37706962437211e-18 (statistic=7.0)

Mann-Whitney statistical test results for feature Q25 are p-value=1.1658290633802667e-17 (statistic=20.0)

Mann-Whitney statistical test results for feature Median are p-value=6.746610677097982e-17 (statistic=50.0)

Mann-Whitney statistical test results for feature Q75 are p-value=2.766871909311284e-15 (statistic=116.0)



## Events

In [56]:
PATH = 'ml/results_rf_events.csv'
BASE = 'ml/results_rf_events_chance_balanced.csv'

df = pd.read_csv(PATH, index_col=0)
df = df[df.restriction == 'complete']
 
df_base = pd.read_csv(BASE, index_col=0)
df_base = df_base[df_base.restriction == 'complete']

In [57]:
events_names = ['FMC', 'NEG', 'SZC']

In [58]:
events_df = df[df.chunk_size == 25]

keep = [f'test feature {i+1}' for i in np.arange(len(events_names))]
drop = [c for c in events_df.columns if c not in keep]
events_df = events_df.drop(columns=drop)

mapper = {f'test feature {i+1}': events_names[i] for i in np.arange(len(events_names))}
events_df = events_df.rename(columns=mapper)

In [59]:
events_df.describe()

Unnamed: 0,FMC,NEG,SZC
count,50.0,50.0,50.0
mean,0.266625,0.031404,0.101886
std,0.026182,0.013472,0.02124
min,0.217505,0.016626,0.061487
25%,0.249482,0.023593,0.088098
50%,0.264015,0.028365,0.098318
75%,0.290299,0.036144,0.112656
max,0.314895,0.101495,0.155026


In [60]:
events_df_base = df_base[df_base.chunk_size == 25]

events_df_base = events_df_base.drop(columns=drop)
events_df_base = events_df_base.dropna(how='all', axis=1)
events_df_base = events_df_base.rename(columns=mapper)

for col in events_df.columns:
    col_test = events_df[col].to_numpy()
    col_base = events_df_base[col].to_numpy()
    
    test_median, test_prec25, test_prec75 = np.percentile(col_test, [50, 25, 75])
    base_median, base_prec25, base_prec75 = np.percentile(col_base, [50, 25, 75])
    
    print(f"Median of test {col} column is {test_median} [{test_prec25}, {test_prec75}]")
    print(f"Median of base {col} column is {base_median} [{base_prec25}, {base_prec75}]")
    
    statistic, p_val = stats.mannwhitneyu(col_test, col_base, alternative='greater')
    print(f"Mann-Whitney statistical test results for feature {col} are p-value={p_val} (statistic={statistic})")
    print()

Median of test FMC column is 0.2640145777772147 [0.24948210271719043, 0.29029947622793995]
Median of base FMC column is 0.006939251611798837 [0.0037688925330259795, 0.018811990547760008]
Mann-Whitney statistical test results for feature FMC are p-value=3.533035965194466e-18 (statistic=2500.0)

Median of test NEG column is 0.02836510355834099 [0.02359318104111803, 0.03614357998901153]
Median of base NEG column is 0.006933434570057179 [0.004029121926769381, 0.018035967647840784]
Mann-Whitney statistical test results for feature NEG are p-value=6.082243657963085e-08 (statistic=2018.0)

Median of test SZC column is 0.09831774002650608 [0.08809822171523954, 0.11265576619549275]
Median of base SZC column is 0.007594685801818775 [0.004218328372723387, 0.01570504964726462]
Mann-Whitney statistical test results for feature SZC are p-value=6.009327146507533e-17 (statistic=2452.0)

