In [1]:
import sys
import pandas as pd
import numpy as np
import scipy.stats as stats

from constants import MORPHOLOGICAL, TEMPORAL, SPATIAL, feature_names_org

In [2]:
NUM_MOMENTS = 5
NUM_CHUNKS = 8
NUM_MODALITIES = 3
CHUNKS_MAP = {0: 0, 25: 1, 50: 2, 100: 3, 200: 4, 400: 5, 800: 6, 1600: 7}

In [3]:
pd.options.display.max_columns = 500
pd.options.display.max_rows = 500

In [4]:
imps = np.load('ml/raw_imps_rf_290322_fix_imp.npy')

PATH = 'ml/results_rf_combined.csv'
BASE = 'ml/shuffle_results/060922_shuffles_combined.csv'

df = pd.read_csv(PATH, index_col=0)
df = df[df.restriction == 'complete']
 
df_base = pd.read_csv(BASE, index_col=0)
df_base = df_base[df_base.restriction == 'complete']

In [5]:
def get_family_imp(inds, arr):
    arr_m = abs(arr[:, :, inds].sum(axis=2))
    fam_imps = arr_m[~np.isnan(arr_m)].reshape((arr.shape[0], -1)).mean(axis=1)
    return fam_imps

def names2inds(d_names, n2i_map):
    d_inds = dict()
    for key in d_names:
        temp_inds = [n2i_map.index(name) for name in d_names[key]]
        d_inds[key] = temp_inds
    return d_inds

## Spatial

In [6]:
spatial_df = df[df.chunk_size == 25]
spatial_df = spatial_df[spatial_df.modality == 'spatial']
spatial_imps = imps[CHUNKS_MAP[25]::NUM_CHUNKS * NUM_MODALITIES,:, :]

spatial_df = spatial_df.dropna(how='all', axis=1)
keep = [f'test feature {i+1}' for i in SPATIAL[:-1]]
drop = [c for c in spatial_df.columns if c not in keep]
spatial_df = spatial_df.drop(columns=drop)

mapper = {f'test feature {i+1}': feature_names_org[i] for i in SPATIAL[:-1]}
spatial_df = spatial_df.rename(columns=mapper)

In [7]:
spatial_df.describe()

Unnamed: 0,spatial_dispersion_count,spatial_dispersion_sd,spatial_dispersion_area,dep_red,dep_sd,fzc_red,fzc_sd,szc_red,szc_sd,dep_graph_avg_speed,dep_graph_slowest_path,dep_graph_fastest_path,fzc_graph_avg_speed,fzc_graph_slowest_path,fzc_graph_fastest_path,szc_graph_avg_speed,szc_graph_slowest_path,szc_graph_fastest_path
count,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0
mean,0.010597,0.008769,0.015903,0.008623,0.010461,0.093386,0.093971,0.046867,0.054813,0.009334,0.006745,0.006446,0.043129,0.070034,0.016277,0.008944,0.009149,0.012821
std,0.007165,0.003439,0.007768,0.003255,0.005452,0.014655,0.011776,0.012663,0.014172,0.006525,0.00343,0.003376,0.015471,0.015385,0.008726,0.00448,0.003992,0.004259
min,0.003306,0.003943,0.005543,0.003672,0.004822,0.050078,0.072533,0.021463,0.030867,0.003212,0.002799,0.00185,0.025149,0.038316,0.003593,0.00415,0.004045,0.004048
25%,0.00583,0.00612,0.010754,0.00649,0.007331,0.087186,0.085608,0.03821,0.045757,0.005041,0.00434,0.004305,0.032354,0.061739,0.010828,0.006671,0.006221,0.010089
50%,0.007385,0.007939,0.014594,0.007598,0.008601,0.094302,0.092813,0.046257,0.05179,0.007054,0.00604,0.005818,0.038067,0.067532,0.013763,0.008142,0.007994,0.012858
75%,0.013248,0.010526,0.019377,0.010353,0.013045,0.103443,0.099495,0.051385,0.062802,0.010943,0.008179,0.007621,0.050572,0.077274,0.019078,0.009912,0.011371,0.015563
max,0.035924,0.017058,0.038834,0.01797,0.03841,0.124568,0.131919,0.081064,0.094841,0.032707,0.021218,0.017895,0.095951,0.132784,0.05022,0.028586,0.020914,0.022248


In [8]:
spatial_df_base = df_base[df_base.chunk_size == 25]
spatial_df_base = spatial_df_base[spatial_df_base.modality == 'spatial']

keep = [f'test feature {i+1}' for i in SPATIAL[:-1]]
drop = [c for c in spatial_df_base.columns if c not in keep]

spatial_df_base = spatial_df_base.drop(columns=drop)
spatial_df_base = spatial_df_base.dropna(how='all', axis=1)
spatial_df_base = spatial_df_base.rename(columns=mapper)

for col in spatial_df.columns:
    col_test = spatial_df[col].to_numpy()
    col_base = spatial_df_base[col].to_numpy()
    
    test_median, test_prec25, test_prec75 = np.percentile(col_test, [50, 25, 75])
    base_median, base_prec25, base_prec75 = np.percentile(col_base, [50, 25, 75])
    
    print(f"Median of test {col} column is {test_median} [{test_prec25}, {test_prec75}]")
    print(f"Median of base {col} column is {base_median} [{base_prec25}, {base_prec75}]")
    
    p_val = (1 + (col_base > np.median(col_test)).sum()) / (1 + len(col_base))
    print(f"permutation test result for feature {col} is p-value={p_val}")
    statistic, p_val = stats.mannwhitneyu(col_test, col_base, alternative='greater')
    print(f"Mann-Whitney statistical test results for feature {col} are p-value={p_val} (statistic={statistic})")
    print()

Median of test spatial_dispersion_count column is 0.007385278881142177 [0.005830300306185667, 0.013247545002678135]
Median of base spatial_dispersion_count column is 0.001103438165438971 [0.00047327505233591555, 0.005248950989290943]
permutation test result for feature spatial_dispersion_count is p-value=0.16883116883116883
Mann-Whitney statistical test results for feature spatial_dispersion_count are p-value=1.5320176847937912e-17 (statistic=42671.0)

Median of test spatial_dispersion_sd column is 0.00793888756873824 [0.006120196826501106, 0.010526203513153587]
Median of base spatial_dispersion_sd column is 0.0028872505556767937 [0.0014671421776484932, 0.012076567976981053]
permutation test result for feature spatial_dispersion_sd is p-value=0.3196803196803197
Mann-Whitney statistical test results for feature spatial_dispersion_sd are p-value=2.410373283429868e-06 (statistic=34569.0)

Median of test spatial_dispersion_area column is 0.014593587699502092 [0.010753841789731763, 0.019377

In [9]:
spatial_families = {'value-based': ['spatial_dispersion_count', 'spatial_dispersion_sd', 'spatial_dispersion_area'],
                  'time-based': ['dep_red', 'dep_sd','fzc_red', 'fzc_sd', 'szc_red', 'szc_sd'],
                  'graph-based': ['dep_graph_avg_speed', 'dep_graph_slowest_path', 'dep_graph_fastest_path',
                           'fzc_graph_avg_speed', 'fzc_graph_slowest_path', 'fzc_graph_fastest_path',
                           'szc_graph_avg_speed', 'szc_graph_slowest_path', 'szc_graph_fastest_path']}

In [10]:
spatial_families_inds = names2inds(spatial_families, feature_names_org)

for fam in spatial_families_inds:
    spatial_df[f'{fam}_up'] = get_family_imp(spatial_families_inds[fam], spatial_imps)

In [11]:
spatial_fams_df_up = spatial_df[[f'{fam}_up' for fam in spatial_families]]
spatial_fams_df_up.describe()

Unnamed: 0,value-based_up,time-based_up,graph-based_up
count,50.0,50.0,50.0
mean,0.075151,0.164002,0.096058
std,0.021829,0.009599,0.006775
min,0.044938,0.143776,0.076157
25%,0.054165,0.157057,0.091588
50%,0.07938,0.164823,0.096806
75%,0.092889,0.171067,0.101301
max,0.1191,0.194071,0.106372


In [12]:
value_based = spatial_fams_df_up['value-based_up'].to_numpy()
time_based =  spatial_fams_df_up['time-based_up'].to_numpy()
graph_based = spatial_fams_df_up['graph-based_up'].to_numpy()
statistic, p_val = stats.wilcoxon(value_based, time_based)
print(f"Wilcoxon statistical test results for spd vs time lag are p-value={p_val} (statistic={statistic})")
statistic, p_val = stats.wilcoxon(value_based, graph_based)
print(f"Wilcoxon statistical test results for spd vs graph are p-value={p_val} (statistic={statistic})")
statistic, p_val = stats.wilcoxon(time_based, graph_based)
print(f"Wilcoxon statistical test results for time lag vs graph are p-value={p_val} (statistic={statistic})")

Wilcoxon statistical test results for spd vs time lag are p-value=7.556929455863566e-10 (statistic=0.0)
Wilcoxon statistical test results for spd vs graph are p-value=4.798864940250759e-07 (statistic=116.0)
Wilcoxon statistical test results for time lag vs graph are p-value=7.556929455863566e-10 (statistic=0.0)


In [13]:
event_families = {'dep': ['dep_red', 'dep_sd', 'dep_graph_avg_speed', 'dep_graph_slowest_path', 'dep_graph_fastest_path'],
                  'fzc': ['fzc_red', 'fzc_sd', 'fzc_graph_avg_speed', 'fzc_graph_slowest_path', 'fzc_graph_fastest_path'],
                  'szc': ['szc_red', 'szc_sd', 'szc_graph_avg_speed', 'szc_graph_slowest_path', 'szc_graph_fastest_path']}

In [14]:
event_families_inds = names2inds(event_families, feature_names_org)

for fam in event_families_inds:
    spatial_df[f'{fam}_up'] = get_family_imp(event_families_inds[fam], spatial_imps)

In [15]:
event_fams_df_up = spatial_df[[f'{fam}_up' for fam in event_families]]
event_fams_df_up.describe()

Unnamed: 0,dep_up,fzc_up,szc_up
count,50.0,50.0,50.0
mean,0.07489,0.182132,0.086041
std,0.012627,0.013347,0.009059
min,0.052014,0.147968,0.065222
25%,0.066025,0.173161,0.080218
50%,0.070922,0.184014,0.086162
75%,0.085853,0.190945,0.092067
max,0.106351,0.212879,0.10696


In [16]:
dep_event = event_fams_df_up['dep_up'].to_numpy()
fzc_event = event_fams_df_up['fzc_up'].to_numpy()
szc_event = event_fams_df_up['szc_up'].to_numpy()
statistic, p_val = stats.wilcoxon(dep_event, fzc_event)
print(f"Wilcoxon statistical test results for dep vs fzc are p-value={p_val} (statistic={statistic})")
statistic, p_val = stats.wilcoxon(dep_event, szc_event)
print(f"Wilcoxon statistical test results for dep vs szc are p-value={p_val} (statistic={statistic})")
statistic, p_val = stats.wilcoxon(fzc_event, szc_event)
print(f"Wilcoxon statistical test results for fzc vs szc are p-value={p_val} (statistic={statistic})")

Wilcoxon statistical test results for dep vs fzc are p-value=7.556929455863566e-10 (statistic=0.0)
Wilcoxon statistical test results for dep vs szc are p-value=0.00012943824374041834 (statistic=241.0)
Wilcoxon statistical test results for fzc vs szc are p-value=7.556929455863566e-10 (statistic=0.0)


## Temporal

In [17]:
temporal_df = df[df.chunk_size == 1600]
temporal_df = temporal_df[temporal_df.modality == 'temporal']
temporal_imps = imps[NUM_CHUNKS - 1 + CHUNKS_MAP[1600]::NUM_CHUNKS * NUM_MODALITIES,:, :]

temporal_df = temporal_df.dropna(how='all', axis=1)
keep = [f'test feature {i+1}' for i in TEMPORAL[:-1]]
drop = [c for c in temporal_df.columns if c not in keep]
temporal_df = temporal_df.drop(columns=drop)
mapper = {f'test feature {i+1}': feature_names_org[i] for i in TEMPORAL[:-1]}
temporal_df = temporal_df.rename(columns=mapper)

In [18]:
temporal_df.describe()

Unnamed: 0,firing_rate,d_kl_start,d_kl_mid,jump,psd_center,der_psd_center,rise_time,unif_dist
count,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0
mean,0.079174,0.028272,0.186695,0.030599,0.017198,0.008981,0.030526,0.138036
std,0.020456,0.012518,0.028007,0.008131,0.006765,0.002544,0.008078,0.024749
min,0.050198,0.003675,0.121779,0.01144,0.005801,0.004189,0.009781,0.095522
25%,0.062962,0.020611,0.171837,0.026109,0.012938,0.006913,0.026266,0.118112
50%,0.076606,0.025474,0.186821,0.029114,0.016402,0.009021,0.030584,0.135142
75%,0.089417,0.035227,0.199467,0.032415,0.019494,0.010943,0.035349,0.157266
max,0.139811,0.059195,0.255799,0.056082,0.039176,0.014735,0.054503,0.196062


In [19]:
temporal_df_base = df_base[df_base.chunk_size == 1600]
temporal_df_base = temporal_df_base[temporal_df_base.modality == 'temporal']

temporal_df_base = temporal_df_base.dropna(how='all', axis=1)
temporal_df_base = temporal_df_base.drop(columns=drop)
temporal_df_base = temporal_df_base.rename(columns=mapper)

for col in temporal_df.columns:
    col_test = temporal_df[col].to_numpy()
    col_base = temporal_df_base[col].to_numpy()
    
    test_median, test_prec25, test_prec75 = np.percentile(col_test, [50, 25, 75])
    base_median, base_prec25, base_prec75 = np.percentile(col_base, [50, 25, 75])
    
    print(f"Median of test {col} column is {test_median} [{test_prec25}, {test_prec75}]")
    print(f"Median of base {col} column is {base_median} [{base_prec25}, {base_prec75}]")
    
    p_val = (1 + (col_base > np.median(col_test)).sum()) / (1 + len(col_base))
    print(f"permutation test result for feature {col} is p-value={p_val}")
    statistic, p_val = stats.mannwhitneyu(col_test, col_base, alternative='greater')
    print(f"Mann-Whitney statistical test results for feature {col} are p-value={p_val} (statistic={statistic})")
    print()

Median of test firing_rate column is 0.07660628883976234 [0.06296248006769566, 0.0894171318889741]
Median of base firing_rate column is 0.018227757983275417 [0.010200397202313315, 0.030189876773322445]
permutation test result for feature firing_rate is p-value=0.03796203796203796
Mann-Whitney statistical test results for feature firing_rate are p-value=1.0545246031286143e-27 (statistic=47695.0)

Median of test d_kl_start column is 0.025473617569475256 [0.020611167566853734, 0.03522693911372335]
Median of base d_kl_start column is 0.018271731702568426 [0.010397586000082375, 0.03372508954062936]
permutation test result for feature d_kl_start is p-value=0.35864135864135865
Mann-Whitney statistical test results for feature d_kl_start are p-value=0.0011850384886989292 (statistic=31361.0)

Median of test d_kl_mid column is 0.1868213948624144 [0.1718374246539367, 0.19946749408257894]
Median of base d_kl_mid column is 0.018140445680699967 [0.009724587008314713, 0.031128209446907626]
permutatio

In [20]:
temporal_families = {'short': ['d_kl_start', 'unif_dist', 'rise_time'],
                     'long': ['d_kl_mid', 'jump'],
                     'wb': ['firing_rate', 'psd_center', 'der_psd_center']}

In [21]:
temporal_families_inds = names2inds(temporal_families, feature_names_org)

for fam in temporal_families_inds:
    temporal_df[f'{fam}_up'] = get_family_imp(temporal_families_inds[fam], temporal_imps)

In [22]:
temporal_fams_df_up = temporal_df[[f'{fam}_up' for fam in temporal_families]]
temporal_fams_df_up.describe()

Unnamed: 0,short_up,long_up,wb_up
count,50.0,50.0,50.0
mean,0.202765,0.195539,0.093321
std,0.017966,0.014671,0.016183
min,0.165181,0.162849,0.044558
25%,0.192906,0.185664,0.084381
50%,0.202095,0.1957,0.092916
75%,0.211044,0.205718,0.107837
max,0.245569,0.229826,0.124536


In [23]:
short = temporal_fams_df_up['short_up'].to_numpy()
long =  temporal_fams_df_up['long_up'].to_numpy()
wb = temporal_fams_df_up['wb_up'].to_numpy()
statistic, p_val = stats.wilcoxon(short, long)
print(f"Wilcoxon statistical test results for short vs long are p-value={p_val} (statistic={statistic})")
statistic, p_val = stats.wilcoxon(short, wb)
print(f"Wilcoxon statistical test results for short vs wb are p-value={p_val} (statistic={statistic})")
statistic, p_val = stats.wilcoxon(long, wb)
print(f"Wilcoxon statistical test results for short vs wb are p-value={p_val} (statistic={statistic})")

Wilcoxon statistical test results for short vs long are p-value=0.12841263682125154 (statistic=480.0)
Wilcoxon statistical test results for short vs wb are p-value=7.556929455863566e-10 (statistic=0.0)
Wilcoxon statistical test results for short vs wb are p-value=7.556929455863566e-10 (statistic=0.0)


## WF (Morphological)

In [24]:
morph_df = df[df.chunk_size == 50]
morph_df = morph_df[morph_df.modality == 'morphological']
morph_imps = imps[2 * NUM_CHUNKS - 1 + CHUNKS_MAP[50]::NUM_CHUNKS * NUM_MODALITIES,:, :]

morph_df = morph_df.dropna(how='all', axis=1)
keep = [f'test feature {i+1}' for i in MORPHOLOGICAL[:-1]]
drop = [c for c in morph_df.columns if c not in keep]
morph_df = morph_df.drop(columns=drop)
mapper = {f'test feature {i+1}': feature_names_org[i] for i in MORPHOLOGICAL[:-1]}
morph_df = morph_df.rename(columns=mapper)

In [25]:
morph_df.describe()

Unnamed: 0,break_measure,fwhm,get_acc,max_speed,peak2peak,trough2peak,rise_coef,smile_cry
count,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0
mean,0.003893,0.006283,0.128688,0.010599,0.103076,0.243773,0.006613,0.013425
std,0.004841,0.004801,0.049221,0.004453,0.028846,0.032315,0.006649,0.004921
min,0.000746,0.001492,0.041221,0.002396,0.048737,0.166116,0.000638,0.004688
25%,0.002014,0.003567,0.099331,0.006613,0.08277,0.231012,0.003589,0.010698
50%,0.002844,0.004832,0.117248,0.010904,0.108555,0.247573,0.004814,0.013057
75%,0.00425,0.00748,0.154144,0.013966,0.120835,0.26051,0.008101,0.015307
max,0.035268,0.025898,0.244667,0.019687,0.171023,0.306495,0.042035,0.027637


In [26]:
morph_df_base = df_base[df_base.chunk_size == 50]
morph_df_base = morph_df_base[morph_df_base.modality == 'morphological']

morph_df_base = morph_df_base.dropna(how='all', axis=1)
morph_df_base = morph_df_base.drop(columns=drop)
morph_df_base = morph_df_base.rename(columns=mapper)

for col in morph_df.columns:
    col_test = morph_df[col].to_numpy()
    col_base = morph_df_base[col].to_numpy()
    
    test_median, test_prec25, test_prec75 = np.percentile(col_test, [50, 25, 75])
    base_median, base_prec25, base_prec75 = np.percentile(col_base, [50, 25, 75])
    
    print(f"Median of test {col} column is {test_median} [{test_prec25}, {test_prec75}]")
    print(f"Median of base {col} column is {base_median} [{base_prec25}, {base_prec75}]")
    
    p_val = (1 + (col_base > np.median(col_test)).sum()) / (1 + len(col_base))
    print(f"permutation test result for feature {col} is p-value={p_val}")
    statistic, p_val = stats.mannwhitneyu(col_test, col_base, alternative='greater')
    print(f"Mann-Whitney statistical test results for feature {col} are p-value={p_val} (statistic={statistic})")
    print()

Median of test break_measure column is 0.0028440496726888243 [0.0020136104851318395, 0.004249580224414094]
Median of base break_measure column is 0.009240827385650009 [0.004062466282103498, 0.025323666137223377]
permutation test result for feature break_measure is p-value=0.8381618381618382
Mann-Whitney statistical test results for feature break_measure are p-value=0.9999999999998883 (statistic=9653.0)

Median of test fwhm column is 0.004831626777964015 [0.0035670169185989154, 0.007479833582739025]
Median of base fwhm column is 0.0065137007662879175 [0.0027638878136018313, 0.017682259300179973]
permutation test result for feature fwhm is p-value=0.5874125874125874
Mann-Whitney statistical test results for feature fwhm are p-value=0.9515767897470878 (statistic=21526.0)

Median of test get_acc column is 0.11724804673665778 [0.09933110131114307, 0.15414351551508793]
Median of base get_acc column is 0.009126770281606453 [0.003979373029393301, 0.02428652149704536]
permutation test result fo

In [27]:
morph_families = {'WF': ['trough2peak', 'peak2peak', 'fwhm', 'rise_coef'],
                  'first': ['max_speed'],
                  'second': ['break_measure', 'smile_cry', 'get_acc']}

In [28]:
morph_families_inds = names2inds(morph_families, feature_names_org)

for fam in morph_families:
    morph_df[f'{fam}_up'] = get_family_imp(morph_families_inds[fam], morph_imps)

In [29]:
morph_fams_df_up = morph_df[[f'{fam}_up' for fam in morph_families]]
morph_fams_df_up.describe()

Unnamed: 0,WF_up,first_up,second_up
count,50.0,50.0,50.0
mean,0.344,0.01068,0.146866
std,0.014829,0.004842,0.018319
min,0.309962,0.005298,0.107491
25%,0.335933,0.007825,0.137348
50%,0.344277,0.009138,0.145472
75%,0.3519,0.011226,0.154933
max,0.38157,0.026005,0.195758


In [30]:
org = morph_fams_df_up['WF_up'].to_numpy()
first =  morph_fams_df_up['first_up'].to_numpy()
second = morph_fams_df_up['second_up'].to_numpy()
statistic, p_val = stats.wilcoxon(org, first)
print(f"Wilcoxon statistical test results for WF vs first are p-value={p_val} (statistic={statistic})")
statistic, p_val = stats.wilcoxon(org, second)
print(f"Wilcoxon statistical test results for WF vs second are p-value={p_val} (statistic={statistic})")

Wilcoxon statistical test results for WF vs first are p-value=7.556929455863566e-10 (statistic=0.0)
Wilcoxon statistical test results for WF vs second are p-value=7.556929455863566e-10 (statistic=0.0)


## Moments

In [31]:
PATH = 'ml/results_rf_moments.csv'
BASE = 'ml/shuffle_results/060922_shuffles_moments.csv'

df = pd.read_csv(PATH, index_col=0)
df = df[df.restriction == 'complete']
 
df_base = pd.read_csv(BASE, index_col=0)
df_base = df_base[df_base.restriction == 'complete']

In [32]:
moments_names = ['Original', 'Mean', 'SD', 'Q25', 'Median', 'Q75']

### Spatial

In [33]:
spatial_df = df[df.chunk_size == 25]
spatial_df = spatial_df[spatial_df.modality == 'spatial']

keep = [f'test feature {i+1}' for i in (np.arange(NUM_MOMENTS + 1))]
drop = [c for c in spatial_df.columns if c not in keep]
spatial_df = spatial_df.drop(columns=drop)

mapper = {f'test feature {i+1}': moments_names[i] for i in np.arange(NUM_MOMENTS + 1)}
spatial_df = spatial_df.rename(columns=mapper)

In [34]:
spatial_df.describe()

Unnamed: 0,Original,Mean,SD,Q25,Median,Q75
count,50.0,50.0,50.0,50.0,50.0,50.0
mean,0.003166,0.05162,0.162757,0.067698,0.038598,0.060491
std,0.002939,0.009687,0.017315,0.014564,0.008582,0.013721
min,0.000779,0.030074,0.130779,0.039486,0.021126,0.032141
25%,0.001624,0.045045,0.147828,0.057077,0.033168,0.051889
50%,0.002438,0.05201,0.162075,0.067352,0.037001,0.059945
75%,0.003106,0.0579,0.173901,0.075903,0.043856,0.06851
max,0.016771,0.073967,0.200541,0.120412,0.066409,0.093201


In [35]:
spatial_df_base = df_base[df_base.chunk_size == 25]
spatial_df_base = spatial_df_base[spatial_df_base.modality == 'spatial']

spatial_df_base = spatial_df_base.drop(columns=drop)
spatial_df_base = spatial_df_base.dropna(how='all', axis=1)
spatial_df_base = spatial_df_base.rename(columns=mapper)

for col in spatial_df.columns:
    col_test = spatial_df[col].to_numpy()
    col_base = spatial_df_base[col].to_numpy()
    
    test_median, test_prec25, test_prec75 = np.percentile(col_test, [50, 25, 75])
    base_median, base_prec25, base_prec75 = np.percentile(col_base, [50, 25, 75])
    
    print(f"Median of test {col} column is {test_median} [{test_prec25}, {test_prec75}]")
    print(f"Median of base {col} column is {base_median} [{base_prec25}, {base_prec75}]")
    
    p_val = (1 + (col_base > np.median(col_test)).sum()) / (1 + len(col_base))
    print(f"permutation test result for feature {col} is p-value={p_val}")
    print()

Median of test Original column is 0.0024379321464613954 [0.0016241253911328513, 0.0031055695932484108]
Median of base Original column is 0.013201296567550942 [0.006692550888206405, 0.08651180730011124]
permutation test result for feature Original is p-value=0.977022977022977
Mann-Whitney statistical test results for feature Original are p-value=1.0 (statistic=3639.0)

Median of test Mean column is 0.05201005082433725 [0.045045248894187706, 0.05789950900895412]
Median of base Mean column is 0.004507693973554847 [0.002392087681368283, 0.014452731550075303]
permutation test result for feature Mean is p-value=0.026973026973026972
Mann-Whitney statistical test results for feature Mean are p-value=1.7826384756247957e-29 (statistic=48463.0)

Median of test SD column is 0.16207471437127263 [0.14782810729137247, 0.17390097158750917]
Median of base SD column is 0.004951755443815934 [0.0025024244222704474, 0.015921328569108655]
permutation test result for feature SD is p-value=0.00099900099900099

In [36]:
for col in spatial_df.columns:
    if col == 'Original':
        continue
    col_test = spatial_df[col].to_numpy()
    col_test_original = spatial_df['Original'].to_numpy()

    statistic, p_val = stats.mannwhitneyu(col_test, col_test_original)
    print(f"Mann-Whitney statistical test results for feature {col} are p-value={p_val} (statistic={statistic})")
    print()

Mann-Whitney statistical test results for feature Mean are p-value=3.533035965194466e-18 (statistic=0.0)

Mann-Whitney statistical test results for feature SD are p-value=3.533035965194466e-18 (statistic=0.0)

Mann-Whitney statistical test results for feature Q25 are p-value=3.533035965194466e-18 (statistic=0.0)

Mann-Whitney statistical test results for feature Median are p-value=3.533035965194466e-18 (statistic=0.0)

Mann-Whitney statistical test results for feature Q75 are p-value=3.533035965194466e-18 (statistic=0.0)



In [37]:
for col in spatial_df.columns:
    if col == 'SD':
        continue
    col_test = spatial_df[col].to_numpy()
    col_test_sd = spatial_df['SD'].to_numpy()

    statistic, p_val = stats.mannwhitneyu(col_test, col_test_sd)
    print(f"Mann-Whitney statistical test results for feature {col} are p-value={p_val} (statistic={statistic})")
    print()

Mann-Whitney statistical test results for feature Original are p-value=3.533035965194466e-18 (statistic=0.0)

Mann-Whitney statistical test results for feature Mean are p-value=3.533035965194466e-18 (statistic=0.0)

Mann-Whitney statistical test results for feature Q25 are p-value=3.533035965194466e-18 (statistic=0.0)

Mann-Whitney statistical test results for feature Median are p-value=3.533035965194466e-18 (statistic=0.0)

Mann-Whitney statistical test results for feature Q75 are p-value=3.533035965194466e-18 (statistic=0.0)



### Spike-timing

In [38]:
temporal_df = df[df.chunk_size == 1600]
temporal_df = temporal_df[temporal_df.modality == 'temporal']

keep = [f'test feature {i+1}' for i in (np.arange(NUM_MOMENTS + 1))]
drop = [c for c in temporal_df.columns if c not in keep]
temporal_df = temporal_df.drop(columns=drop)

mapper = {f'test feature {i+1}': moments_names[i] for i in np.arange(NUM_MOMENTS + 1)}
temporal_df = temporal_df.rename(columns=mapper)

In [39]:
temporal_df.describe()

Unnamed: 0,Original,Mean,SD,Q25,Median,Q75
count,50.0,50.0,50.0,50.0,50.0,50.0
mean,0.051028,0.088946,0.030351,0.097965,0.093147,0.10498
std,0.013718,0.015345,0.006604,0.018389,0.021444,0.020827
min,0.021799,0.030305,0.012879,0.0187,0.040656,0.072726
25%,0.042237,0.079957,0.026717,0.089166,0.084197,0.091208
50%,0.049169,0.090793,0.031512,0.100306,0.089799,0.098839
75%,0.059225,0.099308,0.033688,0.108278,0.101627,0.117467
max,0.100189,0.126585,0.04655,0.138924,0.190443,0.159987


In [40]:
temporal_df_base = df_base[df_base.chunk_size == 1600]
temporal_df_base = temporal_df_base[temporal_df_base.modality == 'temporal']

temporal_df_base = temporal_df_base.drop(columns=drop)
temporal_df_base = temporal_df_base.dropna(how='all', axis=1)
temporal_df_base = temporal_df_base.rename(columns=mapper)

for col in temporal_df.columns:
    col_test = temporal_df[col].to_numpy()
    col_base = temporal_df_base[col].to_numpy()
    
    test_median, test_prec25, test_prec75 = np.percentile(col_test, [50, 25, 75])
    base_median, base_prec25, base_prec75 = np.percentile(col_base, [50, 25, 75])
    
    print(f"Median of test {col} column is {test_median} [{test_prec25}, {test_prec75}]")
    print(f"Median of base {col} column is {base_median} [{base_prec25}, {base_prec75}]")
    
    p_val = (1 + (col_base > np.median(col_test)).sum()) / (1 + len(col_base))
    print(f"permutation test result for feature {col} is p-value={p_val}")
    statistic, p_val = stats.mannwhitneyu(col_test, col_base, alternative='greater')
    print(f"Mann-Whitney statistical test results for feature {col} are p-value={p_val} (statistic={statistic})")
    print()

Median of test Original column is 0.049168597585393284 [0.04223717851421639, 0.05922549293105055]
Median of base Original column is 0.053502595011525815 [0.02704454280242085, 0.10156395746475844]
permutation test result for feature Original is p-value=0.5324675324675324
Mann-Whitney statistical test results for feature Original are p-value=0.7222186599695475 (statistic=23767.0)

Median of test Mean column is 0.09079308870575285 [0.07995700909357352, 0.0993076709215153]
Median of base Mean column is 0.015207569231723467 [0.00886960664687627, 0.026736048962166636]
permutation test result for feature Mean is p-value=0.00999000999000999
Mann-Whitney statistical test results for feature Mean are p-value=3.601000410925404e-31 (statistic=49175.0)

Median of test SD column is 0.03151209343431375 [0.02671693814152817, 0.03368750736152391]
Median of base SD column is 0.018598056779681352 [0.010986925447914716, 0.032985411057906006]
permutation test result for feature SD is p-value=0.264735264735

In [41]:
for col in temporal_df.columns:
    if col == 'Original':
        continue
    col_test = temporal_df[col].to_numpy()
    col_test_original = temporal_df['Original'].to_numpy()

    statistic, p_val = stats.mannwhitneyu(col_test, col_test_original)
    print(f"Mann-Whitney statistical test results for feature {col} are p-value={p_val} (statistic={statistic})")
    print()

Mann-Whitney statistical test results for feature Mean are p-value=1.5112130159282838e-15 (statistic=105.0)

Mann-Whitney statistical test results for feature SD are p-value=4.242867088798065e-14 (statistic=167.0)

Mann-Whitney statistical test results for feature Q25 are p-value=4.432167636937588e-16 (statistic=83.0)

Mann-Whitney statistical test results for feature Median are p-value=1.884188348687208e-15 (statistic=109.0)

Mann-Whitney statistical test results for feature Q75 are p-value=2.102916316646988e-17 (statistic=30.0)



In [42]:
for col in temporal_df.columns:
    if col == 'SD':
        continue
    col_test = temporal_df[col].to_numpy()
    col_test_sd = temporal_df['SD'].to_numpy()
        
    statistic, p_val = stats.mannwhitneyu(col_test, col_test_sd)
    print(f"Mann-Whitney statistical test results for feature {col} are p-value={p_val} (statistic={statistic})")
    print()

Mann-Whitney statistical test results for feature Original are p-value=4.242867088798065e-14 (statistic=167.0)

Mann-Whitney statistical test results for feature Mean are p-value=2.102916316646988e-17 (statistic=30.0)

Mann-Whitney statistical test results for feature Q25 are p-value=5.3516117196336476e-17 (statistic=46.0)

Mann-Whitney statistical test results for feature Median are p-value=4.230977759222343e-18 (statistic=3.0)

Mann-Whitney statistical test results for feature Q75 are p-value=3.533035965194466e-18 (statistic=0.0)



### Waveform

In [43]:
wf_df = df[df.chunk_size == 50]
wf_df = wf_df[wf_df.modality == 'morphological']

keep = [f'test feature {i+1}' for i in (np.arange(NUM_MOMENTS + 1))]
drop = [c for c in wf_df.columns if c not in keep]
wf_df = wf_df.drop(columns=drop)

mapper = {f'test feature {i+1}': moments_names[i] for i in np.arange(NUM_MOMENTS + 1)}
wf_df = wf_df.rename(columns=mapper)

In [44]:
wf_df.describe()

Unnamed: 0,Original,Mean,SD,Q25,Median,Q75
count,50.0,50.0,50.0,50.0,50.0,50.0
mean,0.023439,0.117799,0.036055,0.119735,0.101871,0.087564
std,0.008672,0.027136,0.015113,0.033335,0.02656,0.024249
min,0.002561,0.075096,0.016546,0.051519,0.036409,0.019827
25%,0.018188,0.100066,0.027242,0.102397,0.088383,0.071054
50%,0.022379,0.116444,0.033952,0.117159,0.100804,0.088183
75%,0.030186,0.127203,0.04106,0.136353,0.112628,0.100758
max,0.043224,0.205601,0.096509,0.232663,0.2025,0.154632


In [45]:
wf_df_base = df_base[df_base.chunk_size == 50]
wf_df_base = wf_df_base[wf_df_base.modality == 'morphological']

wf_df_base = wf_df_base.drop(columns=drop)
wf_df_base = wf_df_base.dropna(how='all', axis=1)
wf_df_base = wf_df_base.rename(columns=mapper)

for col in wf_df.columns:
    col_test = wf_df[col].to_numpy()
    col_base = wf_df_base[col].to_numpy()
    
    test_median, test_prec25, test_prec75 = np.percentile(col_test, [50, 25, 75])
    base_median, base_prec25, base_prec75 = np.percentile(col_base, [50, 25, 75])
    
    print(f"Median of test {col} column is {test_median} [{test_prec25}, {test_prec75}]")
    print(f"Median of base {col} column is {base_median} [{base_prec25}, {base_prec75}]")
    
    p_val = (1 + (col_base > np.median(col_test)).sum()) / (1 + len(col_base))
    print(f"permutation test result for feature {col} is p-value={p_val}")
    statistic, p_val = stats.mannwhitneyu(col_test, col_base, alternative='greater')
    print(f"Mann-Whitney statistical test results for feature {col} are p-value={p_val} (statistic={statistic})")
    print()

Median of test Original column is 0.02237877001679314 [0.018188308743880364, 0.03018636968651768]
Median of base Original column is 0.022678770922253528 [0.009400770941035674, 0.0932632790429471]
permutation test result for feature Original is p-value=0.5054945054945055
Mann-Whitney statistical test results for feature Original are p-value=0.7214169026685012 (statistic=23772.0)

Median of test Mean column is 0.11644411591219558 [0.10006621703299277, 0.12720349345496607]
Median of base Mean column is 0.007549348264375953 [0.0040876629751419175, 0.01883236928471674]
permutation test result for feature Mean is p-value=0.000999000999000999
Mann-Whitney statistical test results for feature Mean are p-value=3.96069172755048e-33 (statistic=49973.0)

Median of test SD column is 0.03395225415721932 [0.0272417678041492, 0.04106005209374072]
Median of base SD column is 0.008815783052774927 [0.004399584474178203, 0.020964493520139207]
permutation test result for feature SD is p-value=0.13686313686

In [46]:
for col in wf_df.columns:
    if col == 'Original':
        continue
    col_test = wf_df[col].to_numpy()
    col_test_original = wf_df['Original'].to_numpy()
        
    statistic, p_val = stats.mannwhitneyu(col_test, col_test_original)
    print(f"Mann-Whitney statistical test results for feature {col} are p-value={p_val} (statistic={statistic})")
    print()

Mann-Whitney statistical test results for feature Mean are p-value=3.533035965194466e-18 (statistic=0.0)

Mann-Whitney statistical test results for feature SD are p-value=1.2691302843148297e-06 (statistic=567.0)

Mann-Whitney statistical test results for feature Q25 are p-value=3.533035965194466e-18 (statistic=0.0)

Mann-Whitney statistical test results for feature Median are p-value=3.752027398590286e-18 (statistic=1.0)

Mann-Whitney statistical test results for feature Q75 are p-value=2.2301221981921627e-17 (statistic=31.0)



In [47]:
for col in wf_df.columns:
    if col == 'SD':
        continue
    col_test = wf_df[col].to_numpy()
    col_test_sd = wf_df['SD'].to_numpy()
        
    statistic, p_val = stats.mannwhitneyu(col_test, col_test_sd)
    print(f"Mann-Whitney statistical test results for feature {col} are p-value={p_val} (statistic={statistic})")
    print()

Mann-Whitney statistical test results for feature Original are p-value=1.2691302843148297e-06 (statistic=567.0)

Mann-Whitney statistical test results for feature Mean are p-value=5.37706962437211e-18 (statistic=7.0)

Mann-Whitney statistical test results for feature Q25 are p-value=1.1658290633802667e-17 (statistic=20.0)

Mann-Whitney statistical test results for feature Median are p-value=6.746610677097982e-17 (statistic=50.0)

Mann-Whitney statistical test results for feature Q75 are p-value=2.766871909311284e-15 (statistic=116.0)



## Events

In [48]:
PATH = 'ml/results_rf_events.csv'
BASE = 'ml/shuffle_results/060922_shuffles_events.csv'

df = pd.read_csv(PATH, index_col=0)
df = df[df.restriction == 'complete']
 
df_base = pd.read_csv(BASE, index_col=0)
df_base = df_base[df_base.restriction == 'complete']

In [49]:
events_names = ['FMC', 'NEG', 'SMC']

In [50]:
events_df = df[df.chunk_size == 25]

keep = [f'test feature {i+1}' for i in np.arange(len(events_names))]
drop = [c for c in events_df.columns if c not in keep]
events_df = events_df.drop(columns=drop)

mapper = {f'test feature {i+1}': events_names[i] for i in np.arange(len(events_names))}
events_df = events_df.rename(columns=mapper)

In [51]:
events_df.describe()

Unnamed: 0,FMC,NEG,SZC
count,50.0,50.0,50.0
mean,0.266625,0.031404,0.101886
std,0.026182,0.013472,0.02124
min,0.217505,0.016626,0.061487
25%,0.249482,0.023593,0.088098
50%,0.264015,0.028365,0.098318
75%,0.290299,0.036144,0.112656
max,0.314895,0.101495,0.155026


In [52]:
events_df_base = df_base[df_base.chunk_size == 25]

events_df_base = events_df_base.drop(columns=drop)
events_df_base = events_df_base.dropna(how='all', axis=1)
events_df_base = events_df_base.rename(columns=mapper)

for col in events_df.columns:
    col_test = events_df[col].to_numpy()
    col_base = events_df_base[col].to_numpy()
    
    test_median, test_prec25, test_prec75 = np.percentile(col_test, [50, 25, 75])
    base_median, base_prec25, base_prec75 = np.percentile(col_base, [50, 25, 75])
    
    print(f"Median of test {col} column is {test_median} [{test_prec25}, {test_prec75}]")
    print(f"Median of base {col} column is {base_median} [{base_prec25}, {base_prec75}]")
    
    p_val = (1 + (col_base > np.median(col_test)).sum()) / (1 + len(col_base))
    print(f"permutation test result for feature {col} is p-value={p_val}")
    statistic, p_val = stats.mannwhitneyu(col_test, col_base, alternative='greater')
    print(f"Mann-Whitney statistical test results for feature {col} are p-value={p_val} (statistic={statistic})")
    print()

Median of test FMC column is 0.2640145777772147 [0.24948210271719043, 0.29029947622793995]
Median of base FMC column is 0.009126464982540054 [0.004637739780587026, 0.03716680295713735]
permutation test result for feature FMC is p-value=0.000999000999000999
Mann-Whitney statistical test results for feature FMC are p-value=3.391594415816857e-33 (statistic=50000.0)

Median of test NEG column is 0.02836510355834099 [0.02359318104111803, 0.03614357998901153]
Median of base NEG column is 0.00844442097572114 [0.004522662895609392, 0.03598541527226375]
permutation test result for feature NEG is p-value=0.2967032967032967
Mann-Whitney statistical test results for feature NEG are p-value=5.618707881463946e-08 (statistic=36103.0)

Median of test SZC column is 0.09831774002650608 [0.08809822171523954, 0.11265576619549275]
Median of base SZC column is 0.009075366668642653 [0.0047039643926075105, 0.038915203448341165]
permutation test result for feature SZC is p-value=0.11188811188811189
Mann-Whitne

## Spatial families

In [63]:
PATH = 'ml/results_rf_families.csv'
BASE = 'ml/shuffle_results/060922_shuffles_families.csv'

df = pd.read_csv(PATH, index_col=0)
df = df[df.restriction == 'complete']

df = pd.read_csv(PATH, index_col=0)
df = df[df.restriction == 'complete']
 
df_base = pd.read_csv(BASE, index_col=0)
df_base = df_base[df_base.restriction == 'complete']

In [64]:
familiy_names = ['value-based', 'time-based', 'graph-based']

In [65]:
family_df = df[df.chunk_size == 25]

keep = [f'test feature {i+1}' for i in np.arange(len(familiy_names))]
drop = [c for c in family_df.columns if c not in keep]
family_df = family_df.drop(columns=drop)

mapper = {f'test feature {i+1}': familiy_names[i] for i in np.arange(len(familiy_names))}
family_df = family_df.rename(columns=mapper)

In [66]:
family_df.describe()

Unnamed: 0,value-based,time-based,graph-based
count,50.0,50.0,50.0
mean,0.029807,0.246163,0.114888
std,0.01337,0.02655,0.018129
min,0.010543,0.201887,0.082147
25%,0.020788,0.229097,0.101922
50%,0.028101,0.246022,0.110769
75%,0.036145,0.266558,0.129163
max,0.071799,0.295116,0.166109


In [68]:
family_df_base = df_base[df_base.chunk_size == 25]

family_df_base = family_df_base.drop(columns=drop)
family_df_base = family_df_base.dropna(how='all', axis=1)
family_df_base = family_df_base.rename(columns=mapper)

for col in family_df.columns:
    col_test = family_df[col].to_numpy()
    col_base = family_df_base[col].to_numpy()
    
    test_median, test_prec25, test_prec75 = np.percentile(col_test, [50, 25, 75])
    base_median, base_prec25, base_prec75 = np.percentile(col_base, [50, 25, 75])
    
    print(f"Median of test {col} column is {test_median} [{test_prec25}, {test_prec75}]")
    print(f"Median of base {col} column is {base_median} [{base_prec25}, {base_prec75}]")
    
    p_val = (1 + (col_base > np.median(col_test)).sum()) / (1 + len(col_base))
    print(f"permutation test result for feature {col} is p-value={p_val}")
    statistic, p_val = stats.mannwhitneyu(col_test, col_base, alternative='greater')
    print(f"Mann-Whitney statistical test results for feature {col} are p-value={p_val} (statistic={statistic})")
    print()

Median of test value-based column is 0.02810104182781731 [0.02078799775908482, 0.03614521310918066]
Median of base value-based column is 0.005555251597678045 [0.0029301770734055073, 0.02253699536279422]
permutation test result for feature value-based is p-value=0.19080919080919082
Mann-Whitney statistical test results for feature value-based are p-value=6.997842776115285e-13 (statistic=39825.0)

Median of test time-based column is 0.24602169094216159 [0.22909706824150636, 0.26655822293180825]
Median of base time-based column is 0.01042183245340556 [0.005438440337279404, 0.04352825954852603]
permutation test result for feature time-based is p-value=0.000999000999000999
Mann-Whitney statistical test results for feature time-based are p-value=3.391594415816857e-33 (statistic=50000.0)

Median of test graph-based column is 0.11076902332655776 [0.10192201526215235, 0.12916319898746015]
Median of base graph-based column is 0.013385833194657198 [0.007098017791588949, 0.06253447813955756]
permu