In [1]:
import sys
import pandas as pd
import numpy as np
import scipy.stats as stats

from constants import MORPHOLOGICAL, TEMPORAL, SPATIAL, feature_names_org

In [2]:
NUM_MOMENTS = 5

In [3]:
pd.options.display.max_columns = 500
pd.options.display.max_rows = 500

In [None]:
imps = np.load('ml/raw_imps_rf_290322_fix_imp.npy')

PATH = 'ml/results_rf_combined.csv'
BASE = 'ml/results_rf_combined_chance_balanced.csv'

df = pd.read_csv(PATH, index_col=0)
df = df[df.restriction == 'complete']
 
df_base = pd.read_csv(BASE, index_col=0)
df_base = df_base[df_base.restriction == 'complete']

In [None]:
def get_family_imp(inds, arr):
    arr_m = abs(arr[:, :, inds].sum(axis=2))
    fam_imps = arr_m[~np.isnan(arr_m)].reshape((arr.shape[0], -1)).mean(axis=1)
    return fam_imps

def names2inds(d_names, n2i_map):
    d_inds = dict()
    for key in d_names:
        temp_inds = [n2i_map.index(name) for name in d_names[key]]
        d_inds[key] = temp_inds
    return d_inds

## Spatial

In [None]:
spatial_df = df[df.chunk_size == 25]
spatial_df = spatial_df[spatial_df.modality == 'spatial']
spatial_imps = imps[1::3 * 8,:, :]

spatial_df = spatial_df.dropna(how='all', axis=1)
keep = [f'test feature {i+1}' for i in SPATIAL[:-1]]
drop = [c for c in spatial_df.columns if c not in keep]
spatial_df = spatial_df.drop(columns=drop)

mapper = {f'test feature {i+1}': feature_names_org[i] for i in SPATIAL[:-1]}
spatial_df = spatial_df.rename(columns=mapper)

In [None]:
spatial_df.describe()

In [None]:
spatial_df_base = df_base[df_base.chunk_size == 25]
spatial_df_base = spatial_df_base[spatial_df_base.modality == 'spatial']

keep = [f'test feature {i+1}' for i in SPATIAL[:-1]]
drop = [c for c in spatial_df_base.columns if c not in keep]

spatial_df_base = spatial_df_base.drop(columns=drop)
spatial_df_base = spatial_df_base.dropna(how='all', axis=1)
spatial_df_base = spatial_df_base.rename(columns=mapper)

for col in spatial_df.columns:
    col_test = spatial_df[col].to_numpy()
    col_base = spatial_df_base[col].to_numpy()
    
    test_median, test_prec25, test_prec75 = np.percentile(col_test, [50, 25, 75])
    base_median, base_prec25, base_prec75 = np.percentile(col_base, [50, 25, 75])
    
    print(f"Median of test {col} column is {test_median} [{test_prec25}, {test_prec75}]")
    print(f"Median of base {col} column is {base_median} [{base_prec25}, {base_prec75}]")
    
    statistic, p_val = stats.mannwhitneyu(col_test, col_base, alternative='greater')
    print(f"Mann-Whitney statistical test results for feature {col} are p-value={p_val} (statistic={statistic})")
    print()

In [None]:
spatial_families = {'value-based': ['spatial_dispersion_count', 'spatial_dispersion_sd', 'spatial_dispersion_area'],
                  'time-based': ['dep_red', 'dep_sd','fzc_red', 'fzc_sd', 'szc_red', 'szc_sd'],
                  'graph-based': ['dep_graph_avg_speed', 'dep_graph_slowest_path', 'dep_graph_fastest_path',
                           'fzc_graph_avg_speed', 'fzc_graph_slowest_path', 'fzc_graph_fastest_path',
                           'szc_graph_avg_speed', 'szc_graph_slowest_path', 'szc_graph_fastest_path']}

for fam in spatial_families:
    spatial_df[fam] = spatial_df[spatial_families[fam]].sum(axis=1)

In [None]:
spatial_fams_df = spatial_df[[fam for fam in spatial_families]]
spatial_fams_df.describe()

In [None]:
spatial_families_inds = names2inds(spatial_families, feature_names_org)

for fam in spatial_families_inds:
    spatial_df[f'{fam}_up'] = get_family_imp(spatial_families_inds[fam], spatial_imps)

In [None]:
spatial_fams_df_up = spatial_df[[f'{fam}_up' for fam in spatial_families]]
spatial_fams_df_up.describe()

In [None]:
value_based = spatial_fams_df['value-based'].to_numpy()
time_based =  spatial_fams_df['time-based'].to_numpy()
graph_based = spatial_fams_df['graph-based'].to_numpy()
statistic, p_val = stats.wilcoxon(value_based, time_based)
print(f"Wilcoxon statistical test results for spd vs time lag are p-value={p_val} (statistic={statistic})")
statistic, p_val = stats.wilcoxon(value_based, graph_based)
print(f"Wilcoxon statistical test results for spd vs graph are p-value={p_val} (statistic={statistic})")
statistic, p_val = stats.wilcoxon(time_based, graph_based)
print(f"Wilcoxon statistical test results for time lag vs graph are p-value={p_val} (statistic={statistic})")

In [None]:
value_based = spatial_fams_df_up['value-based_up'].to_numpy()
time_based =  spatial_fams_df_up['time-based_up'].to_numpy()
graph_based = spatial_fams_df_up['graph-based_up'].to_numpy()
statistic, p_val = stats.wilcoxon(value_based, time_based)
print(f"Wilcoxon statistical test results for spd vs time lag are p-value={p_val} (statistic={statistic})")
statistic, p_val = stats.wilcoxon(value_based, graph_based)
print(f"Wilcoxon statistical test results for spd vs graph are p-value={p_val} (statistic={statistic})")
statistic, p_val = stats.wilcoxon(time_based, graph_based)
print(f"Wilcoxon statistical test results for time lag vs graph are p-value={p_val} (statistic={statistic})")

In [None]:
event_families = {'dep': ['dep_red', 'dep_sd', 'dep_graph_avg_speed', 'dep_graph_slowest_path', 'dep_graph_fastest_path'],
                  'fzc': ['fzc_red', 'fzc_sd', 'fzc_graph_avg_speed', 'fzc_graph_slowest_path', 'fzc_graph_fastest_path'],
                  'szc': ['szc_red', 'szc_sd', 'szc_graph_avg_speed', 'szc_graph_slowest_path', 'szc_graph_fastest_path']}

for fam in event_families:
    spatial_df[fam] = spatial_df[event_families[fam]].sum(axis=1)

In [None]:
event_fams_df = spatial_df[[fam for fam in event_families]]
event_fams_df.describe()

In [None]:
event_families_inds = names2inds(event_families, feature_names_org)

for fam in event_families_inds:
    spatial_df[f'{fam}_up'] = get_family_imp(event_families_inds[fam], spatial_imps)

In [None]:
event_fams_df_up = spatial_df[[f'{fam}_up' for fam in event_families]]
event_fams_df_up.describe()

In [None]:
dep_event = event_fams_df['dep'].to_numpy()
fzc_event =  event_fams_df['fzc'].to_numpy()
szc_event = event_fams_df['szc'].to_numpy()
statistic, p_val = stats.wilcoxon(dep_event, fzc_event)
print(f"Wilcoxon statistical test results for dep vs fzc are p-value={p_val} (statistic={statistic})")
statistic, p_val = stats.wilcoxon(dep_event, szc_event)
print(f"Wilcoxon statistical test results for dep vs szc are p-value={p_val} (statistic={statistic})")
statistic, p_val = stats.wilcoxon(fzc_event, szc_event)
print(f"Wilcoxon statistical test results for fzc vs szc are p-value={p_val} (statistic={statistic})")

In [None]:
dep_event = event_fams_df_up['dep_up'].to_numpy()
fzc_event = event_fams_df_up['fzc_up'].to_numpy()
szc_event = event_fams_df_up['szc_up'].to_numpy()
statistic, p_val = stats.wilcoxon(dep_event, fzc_event)
print(f"Wilcoxon statistical test results for dep vs fzc are p-value={p_val} (statistic={statistic})")
statistic, p_val = stats.wilcoxon(dep_event, szc_event)
print(f"Wilcoxon statistical test results for dep vs szc are p-value={p_val} (statistic={statistic})")
statistic, p_val = stats.wilcoxon(fzc_event, szc_event)
print(f"Wilcoxon statistical test results for fzc vs szc are p-value={p_val} (statistic={statistic})")

## Temporal

In [None]:
temporal_df = df[df.chunk_size == 1600]
temporal_df = temporal_df[temporal_df.modality == 'temporal']
temporal_imps = imps[14::3 * 8,:, :]

temporal_df = temporal_df.dropna(how='all', axis=1)
keep = [f'test feature {i+1}' for i in TEMPORAL[:-1]]
drop = [c for c in temporal_df.columns if c not in keep]
temporal_df = temporal_df.drop(columns=drop)
mapper = {f'test feature {i+1}': feature_names_org[i] for i in TEMPORAL[:-1]}
temporal_df = temporal_df.rename(columns=mapper)

In [None]:
temporal_df.describe()

In [None]:
temporal_df_base = df_base[df_base.chunk_size == 1600]
temporal_df_base = temporal_df_base[temporal_df_base.modality == 'temporal']

temporal_df_base = temporal_df_base.dropna(how='all', axis=1)
temporal_df_base = temporal_df_base.drop(columns=drop)
temporal_df_base = temporal_df_base.rename(columns=mapper)

for col in temporal_df.columns:
    col_test = temporal_df[col].to_numpy()
    col_base = temporal_df_base[col].to_numpy()
    
    test_median, test_prec25, test_prec75 = np.percentile(col_test, [50, 25, 75])
    base_median, base_prec25, base_prec75 = np.percentile(col_base, [50, 25, 75])
    
    print(f"Median of test {col} column is {test_median} [{test_prec25}, {test_prec75}]")
    print(f"Median of base {col} column is {base_median} [{base_prec25}, {base_prec75}]")
    
    statistic, p_val = stats.mannwhitneyu(col_test, col_base, alternative='greater')
    print(f"Mann-Whitney statistical test results for feature {col} are p-value={p_val} (statistic={statistic})")
    print()

In [None]:
temporal_families = {'short': ['d_kl_start', 'unif_dist', 'rise_time'],
                     'long': ['d_kl_mid', 'jump'],
                     'wb': ['firing_rate', 'psd_center', 'der_psd_center']}

for fam in temporal_families:
    temporal_df[fam] = temporal_df[temporal_families[fam]].sum(axis=1)

In [None]:
temporal_fams_df = temporal_df[[fam for fam in temporal_families]]
temporal_fams_df.describe()

In [None]:
temporal_families_inds = names2inds(temporal_families, feature_names_org)

for fam in temporal_families_inds:
    temporal_df[f'{fam}_up'] = get_family_imp(temporal_families_inds[fam], temporal_imps)

In [None]:
temporal_fams_df_up = temporal_df[[f'{fam}_up' for fam in temporal_families]]
temporal_fams_df_up.describe()

In [None]:
short = temporal_fams_df['short'].to_numpy()
long =  temporal_fams_df['long'].to_numpy()
wb = temporal_fams_df['wb'].to_numpy()
statistic, p_val = stats.wilcoxon(short, long)
print(f"Wilcoxon statistical test results for short vs long are p-value={p_val} (statistic={statistic})")
statistic, p_val = stats.wilcoxon(short, wb)
print(f"Wilcoxon statistical test results for short vs wb are p-value={p_val} (statistic={statistic})")
statistic, p_val = stats.wilcoxon(long, wb)
print(f"Wilcoxon statistical test results for short vs wb are p-value={p_val} (statistic={statistic})")

In [None]:
short = temporal_fams_df_up['short_up'].to_numpy()
long =  temporal_fams_df_up['long_up'].to_numpy()
wb = temporal_fams_df_up['wb_up'].to_numpy()
statistic, p_val = stats.wilcoxon(short, long)
print(f"Wilcoxon statistical test results for short vs long are p-value={p_val} (statistic={statistic})")
statistic, p_val = stats.wilcoxon(short, wb)
print(f"Wilcoxon statistical test results for short vs wb are p-value={p_val} (statistic={statistic})")
statistic, p_val = stats.wilcoxon(long, wb)
print(f"Wilcoxon statistical test results for short vs wb are p-value={p_val} (statistic={statistic})")

## WF (Morphological)

In [None]:
morph_df = df[df.chunk_size == 50]
morph_df = morph_df[morph_df.modality == 'morphological']
morph_imps = imps[22::3 * 8,:, :]

morph_df = morph_df.dropna(how='all', axis=1)
keep = [f'test feature {i+1}' for i in MORPHOLOGICAL[:-1]]
drop = [c for c in morph_df.columns if c not in keep]
morph_df = morph_df.drop(columns=drop)
mapper = {f'test feature {i+1}': feature_names_org[i] for i in MORPHOLOGICAL[:-1]}
morph_df = morph_df.rename(columns=mapper)

In [None]:
morph_df.describe()

In [None]:
morph_df_base = df_base[df_base.chunk_size == 50]
morph_df_base = morph_df_base[morph_df_base.modality == 'morphological']

morph_df_base = morph_df_base.dropna(how='all', axis=1)
morph_df_base = morph_df_base.drop(columns=drop)
morph_df_base = morph_df_base.rename(columns=mapper)

for col in morph_df.columns:
    col_test = morph_df[col].to_numpy()
    col_base = morph_df_base[col].to_numpy()
    
    test_median, test_prec25, test_prec75 = np.percentile(col_test, [50, 25, 75])
    base_median, base_prec25, base_prec75 = np.percentile(col_base, [50, 25, 75])
    
    print(f"Median of test {col} column is {test_median} [{test_prec25}, {test_prec75}]")
    print(f"Median of base {col} column is {base_median} [{base_prec25}, {base_prec75}]")
    
    statistic, p_val = stats.mannwhitneyu(col_test, col_base, alternative='greater')
    print(f"Mann-Whitney statistical test results for feature {col} are p-value={p_val} (statistic={statistic})")
    print()

In [None]:
morph_families = {'WF': ['trough2peak', 'peak2peak', 'fwhm', 'rise_coef'],
                  'first': ['max_speed'],
                  'second': ['break_measure', 'smile_cry', 'get_acc']}

for fam in morph_families:
    morph_df[fam] = morph_df[morph_families[fam]].sum(axis=1)

In [None]:
morph_fams_df = morph_df[[fam for fam in morph_families]]
morph_fams_df.describe()

In [None]:
morph_families_inds = names2inds(morph_families, feature_names_org)

for fam in morph_families:
    morph_df[f'{fam}_up'] = get_family_imp(morph_families_inds[fam], morph_imps)

In [None]:
morph_fams_df_up = morph_df[[f'{fam}_up' for fam in morph_families]]
morph_fams_df_up.describe()

In [None]:
org = morph_fams_df['WF'].to_numpy()
first =  morph_fams_df['first'].to_numpy()
second = morph_fams_df['second'].to_numpy()
statistic, p_val = stats.wilcoxon(org, first)
print(f"Wilcoxon statistical test results for WF vs first are p-value={p_val} (statistic={statistic})")
statistic, p_val = stats.wilcoxon(org, second)
print(f"Wilcoxon statistical test results for WF vs second are p-value={p_val} (statistic={statistic})")

In [None]:
org = morph_fams_df_up['WF_up'].to_numpy()
first =  morph_fams_df_up['first_up'].to_numpy()
second = morph_fams_df_up['second_up'].to_numpy()
statistic, p_val = stats.wilcoxon(org, first)
print(f"Wilcoxon statistical test results for WF vs first are p-value={p_val} (statistic={statistic})")
statistic, p_val = stats.wilcoxon(org, second)
print(f"Wilcoxon statistical test results for WF vs second are p-value={p_val} (statistic={statistic})")

## Moments

In [4]:
PATH = 'ml/results_rf_moments.csv'
BASE = 'ml/results_rf_moments_chance_balanced.csv'

df = pd.read_csv(PATH, index_col=0)
df = df[df.restriction == 'complete']
 
df_base = pd.read_csv(BASE, index_col=0)
df_base = df_base[df_base.restriction == 'complete']

In [5]:
moments_names = ['Original', 'Mean', 'SD', 'Q25', 'Median', 'Q75']

### Spatial

In [17]:
spatial_df = df[df.chunk_size == 25]
spatial_df = spatial_df[spatial_df.modality == 'spatial']

keep = [f'test feature {i+1}' for i in (np.arange(NUM_MOMENTS + 1))]
drop = [c for c in spatial_df.columns if c not in keep]
spatial_df = spatial_df.drop(columns=drop)

mapper = {f'test feature {i+1}': moments_names[i] for i in np.arange(NUM_MOMENTS + 1)}
spatial_df = spatial_df.rename(columns=mapper)

In [18]:
spatial_df.describe()

Unnamed: 0,Original,Mean,SD,Q25,Median,Q75
count,50.0,50.0,50.0,50.0,50.0,50.0
mean,0.003166,0.05162,0.162757,0.067698,0.038598,0.060491
std,0.002939,0.009687,0.017315,0.014564,0.008582,0.013721
min,0.000779,0.030074,0.130779,0.039486,0.021126,0.032141
25%,0.001624,0.045045,0.147828,0.057077,0.033168,0.051889
50%,0.002438,0.05201,0.162075,0.067352,0.037001,0.059945
75%,0.003106,0.0579,0.173901,0.075903,0.043856,0.06851
max,0.016771,0.073967,0.200541,0.120412,0.066409,0.093201


In [19]:
spatial_df_base = df_base[df_base.chunk_size == 25]
spatial_df_base = spatial_df_base[spatial_df_base.modality == 'spatial']

spatial_df_base = spatial_df_base.drop(columns=drop)
spatial_df_base = spatial_df_base.dropna(how='all', axis=1)
spatial_df_base = spatial_df_base.rename(columns=mapper)

for col in spatial_df.columns:
    col_test = spatial_df[col].to_numpy()
    col_base = spatial_df_base[col].to_numpy()
    
    test_median, test_prec25, test_prec75 = np.percentile(col_test, [50, 25, 75])
    base_median, base_prec25, base_prec75 = np.percentile(col_base, [50, 25, 75])
    
    print(f"Median of test {col} column is {test_median} [{test_prec25}, {test_prec75}]")
    print(f"Median of base {col} column is {base_median} [{base_prec25}, {base_prec75}]")
    
    statistic, p_val = stats.mannwhitneyu(col_test, col_base, alternative='greater')
    print(f"Mann-Whitney statistical test results for feature {col} are p-value={p_val} (statistic={statistic})")
    print()

Median of test Original column is 0.0024379321464613954 [0.0016241253911328513, 0.0031055695932484108]
Median of base Original column is 0.010337783240916646 [0.005428529615957661, 0.023136359055328445]
Mann-Whitney statistical test results for feature Original are p-value=0.9999999999960985 (statistic=258.0)

Median of test Mean column is 0.05201005082433725 [0.045045248894187706, 0.05789950900895412]
Median of base Mean column is 0.0036999208163549504 [0.002084875672954233, 0.00818752087381185]
Mann-Whitney statistical test results for feature Mean are p-value=6.060044905437509e-18 (statistic=2491.0)

Median of test SD column is 0.16207471437127263 [0.14782810729137247, 0.17390097158750917]
Median of base SD column is 0.003659140704237971 [0.0023178400525044286, 0.012008006509250494]
Mann-Whitney statistical test results for feature SD are p-value=3.533035965194466e-18 (statistic=2500.0)

Median of test Q25 column is 0.06735180638014514 [0.05707747809787165, 0.07590309042328994]
Medi

In [20]:
for col in spatial_df.columns:
    if col == 'Original':
        continue
    col_test = spatial_df[col].to_numpy()
    col_test_original = spatial_df['Original'].to_numpy()

    statistic, p_val = stats.mannwhitneyu(col_test, col_test_original)
    print(f"Mann-Whitney statistical test results for feature {col} are p-value={p_val} (statistic={statistic})")
    print()

Mann-Whitney statistical test results for feature Mean are p-value=3.533035965194466e-18 (statistic=0.0)

Mann-Whitney statistical test results for feature SD are p-value=3.533035965194466e-18 (statistic=0.0)

Mann-Whitney statistical test results for feature Q25 are p-value=3.533035965194466e-18 (statistic=0.0)

Mann-Whitney statistical test results for feature Median are p-value=3.533035965194466e-18 (statistic=0.0)

Mann-Whitney statistical test results for feature Q75 are p-value=3.533035965194466e-18 (statistic=0.0)



In [21]:
for col in spatial_df.columns:
    if col == 'SD':
        continue
    col_test = spatial_df[col].to_numpy()
    col_test_sd = spatial_df['SD'].to_numpy()

    statistic, p_val = stats.mannwhitneyu(col_test, col_test_sd)
    print(f"Mann-Whitney statistical test results for feature {col} are p-value={p_val} (statistic={statistic})")
    print()

Mann-Whitney statistical test results for feature Original are p-value=3.533035965194466e-18 (statistic=0.0)

Mann-Whitney statistical test results for feature Mean are p-value=3.533035965194466e-18 (statistic=0.0)

Mann-Whitney statistical test results for feature Q25 are p-value=3.533035965194466e-18 (statistic=0.0)

Mann-Whitney statistical test results for feature Median are p-value=3.533035965194466e-18 (statistic=0.0)

Mann-Whitney statistical test results for feature Q75 are p-value=3.533035965194466e-18 (statistic=0.0)



### Spike-timing

In [11]:
temporal_df = df[df.chunk_size == 1600]
temporal_df = temporal_df[temporal_df.modality == 'temporal']

keep = [f'test feature {i+1}' for i in (np.arange(NUM_MOMENTS + 1))]
drop = [c for c in temporal_df.columns if c not in keep]
temporal_df = temporal_df.drop(columns=drop)

mapper = {f'test feature {i+1}': moments_names[i] for i in np.arange(NUM_MOMENTS + 1)}
temporal_df = temporal_df.rename(columns=mapper)

In [12]:
temporal_df.describe()

Unnamed: 0,Original,Mean,SD,Q25,Median,Q75
count,50.0,50.0,50.0,50.0,50.0,50.0
mean,0.051028,0.088946,0.030351,0.097965,0.093147,0.10498
std,0.013718,0.015345,0.006604,0.018389,0.021444,0.020827
min,0.021799,0.030305,0.012879,0.0187,0.040656,0.072726
25%,0.042237,0.079957,0.026717,0.089166,0.084197,0.091208
50%,0.049169,0.090793,0.031512,0.100306,0.089799,0.098839
75%,0.059225,0.099308,0.033688,0.108278,0.101627,0.117467
max,0.100189,0.126585,0.04655,0.138924,0.190443,0.159987


In [13]:
temporal_df_base = df_base[df_base.chunk_size == 1600]
temporal_df_base = temporal_df_base[temporal_df_base.modality == 'temporal']

temporal_df_base = temporal_df_base.drop(columns=drop)
temporal_df_base = temporal_df_base.dropna(how='all', axis=1)
temporal_df_base = temporal_df_base.rename(columns=mapper)

for col in temporal_df.columns:
    col_test = temporal_df[col].to_numpy()
    col_base = temporal_df_base[col].to_numpy()
    
    test_median, test_prec25, test_prec75 = np.percentile(col_test, [50, 25, 75])
    base_median, base_prec25, base_prec75 = np.percentile(col_base, [50, 25, 75])
    
    print(f"Median of test {col} column is {test_median} [{test_prec25}, {test_prec75}]")
    print(f"Median of base {col} column is {base_median} [{base_prec25}, {base_prec75}]")
    
    statistic, p_val = stats.mannwhitneyu(col_test, col_base, alternative='greater')
    print(f"Mann-Whitney statistical test results for feature {col} are p-value={p_val} (statistic={statistic})")
    print()

Median of test Original column is 0.049168597585393284 [0.04223717851421639, 0.05922549293105055]
Median of base Original column is 0.0702281743168738 [0.032731743358026893, 0.10602249069231215]
Mann-Whitney statistical test results for feature Original are p-value=0.9693740088704476 (statistic=979.0)

Median of test Mean column is 0.09079308870575285 [0.07995700909357352, 0.0993076709215153]
Median of base Mean column is 0.018867166137777187 [0.010543452345292408, 0.026230177147372684]
Mann-Whitney statistical test results for feature Mean are p-value=9.536222832167321e-17 (statistic=2444.0)

Median of test SD column is 0.03151209343431375 [0.02671693814152817, 0.03368750736152391]
Median of base SD column is 0.0237637917505148 [0.012374683373946052, 0.03538959157805905]
Mann-Whitney statistical test results for feature SD are p-value=0.007839915983387659 (statistic=1601.0)

Median of test Q25 column is 0.1003064311280634 [0.08916622808666558, 0.10827794174549525]
Median of base Q25 c

In [14]:
for col in temporal_df.columns:
    if col == 'Original':
        continue
    col_test = temporal_df[col].to_numpy()
    col_test_original = temporal_df['Original'].to_numpy()

    statistic, p_val = stats.mannwhitneyu(col_test, col_test_original)
    print(f"Mann-Whitney statistical test results for feature {col} are p-value={p_val} (statistic={statistic})")
    print()

Mann-Whitney statistical test results for feature Mean are p-value=1.5112130159282838e-15 (statistic=105.0)

Mann-Whitney statistical test results for feature SD are p-value=4.242867088798065e-14 (statistic=167.0)

Mann-Whitney statistical test results for feature Q25 are p-value=4.432167636937588e-16 (statistic=83.0)

Mann-Whitney statistical test results for feature Median are p-value=1.884188348687208e-15 (statistic=109.0)

Mann-Whitney statistical test results for feature Q75 are p-value=2.102916316646988e-17 (statistic=30.0)



In [16]:
for col in temporal_df.columns:
    if col == 'SD':
        continue
    col_test = temporal_df[col].to_numpy()
    col_test_sd = temporal_df['SD'].to_numpy()
        
    statistic, p_val = stats.mannwhitneyu(col_test, col_test_sd)
    print(f"Mann-Whitney statistical test results for feature {col} are p-value={p_val} (statistic={statistic})")
    print()

Mann-Whitney statistical test results for feature Original are p-value=4.242867088798065e-14 (statistic=167.0)

Mann-Whitney statistical test results for feature Mean are p-value=2.102916316646988e-17 (statistic=30.0)

Mann-Whitney statistical test results for feature Q25 are p-value=5.3516117196336476e-17 (statistic=46.0)

Mann-Whitney statistical test results for feature Median are p-value=4.230977759222343e-18 (statistic=3.0)

Mann-Whitney statistical test results for feature Q75 are p-value=3.533035965194466e-18 (statistic=0.0)



### Waveform

In [6]:
wf_df = df[df.chunk_size == 50]
wf_df = wf_df[wf_df.modality == 'morphological']

keep = [f'test feature {i+1}' for i in (np.arange(NUM_MOMENTS + 1))]
drop = [c for c in wf_df.columns if c not in keep]
wf_df = wf_df.drop(columns=drop)

mapper = {f'test feature {i+1}': moments_names[i] for i in np.arange(NUM_MOMENTS + 1)}
wf_df = wf_df.rename(columns=mapper)

In [7]:
wf_df.describe()

Unnamed: 0,Original,Mean,SD,Q25,Median,Q75
count,50.0,50.0,50.0,50.0,50.0,50.0
mean,0.023439,0.117799,0.036055,0.119735,0.101871,0.087564
std,0.008672,0.027136,0.015113,0.033335,0.02656,0.024249
min,0.002561,0.075096,0.016546,0.051519,0.036409,0.019827
25%,0.018188,0.100066,0.027242,0.102397,0.088383,0.071054
50%,0.022379,0.116444,0.033952,0.117159,0.100804,0.088183
75%,0.030186,0.127203,0.04106,0.136353,0.112628,0.100758
max,0.043224,0.205601,0.096509,0.232663,0.2025,0.154632


In [8]:
wf_df_base = df_base[df_base.chunk_size == 50]
wf_df_base = wf_df_base[wf_df_base.modality == 'morphological']

wf_df_base = wf_df_base.drop(columns=drop)
wf_df_base = wf_df_base.dropna(how='all', axis=1)
wf_df_base = wf_df_base.rename(columns=mapper)

for col in wf_df.columns:
    col_test = wf_df[col].to_numpy()
    col_base = wf_df_base[col].to_numpy()
    
    test_median, test_prec25, test_prec75 = np.percentile(col_test, [50, 25, 75])
    base_median, base_prec25, base_prec75 = np.percentile(col_base, [50, 25, 75])
    
    print(f"Median of test {col} column is {test_median} [{test_prec25}, {test_prec75}]")
    print(f"Median of base {col} column is {base_median} [{base_prec25}, {base_prec75}]")
    
    statistic, p_val = stats.mannwhitneyu(col_test, col_base, alternative='greater')
    print(f"Mann-Whitney statistical test results for feature {col} are p-value={p_val} (statistic={statistic})")
    print()

Median of test Original column is 0.02237877001679314 [0.018188308743880364, 0.03018636968651768]
Median of base Original column is 0.015412859323332417 [0.006643367017660434, 0.07153374519172542]
Mann-Whitney statistical test results for feature Original are p-value=0.14341643221315398 (statistic=1405.0)

Median of test Mean column is 0.11644411591219558 [0.10006621703299277, 0.12720349345496607]
Median of base Mean column is 0.006428797657842369 [0.0028005027826177305, 0.0164218672523151]
Mann-Whitney statistical test results for feature Mean are p-value=3.533035965194466e-18 (statistic=2500.0)

Median of test SD column is 0.03395225415721932 [0.0272417678041492, 0.04106005209374072]
Median of base SD column is 0.008452899647953545 [0.003568247727194304, 0.017628916892953532]
Mann-Whitney statistical test results for feature SD are p-value=1.628023156627252e-12 (statistic=2261.0)

Median of test Q25 column is 0.11715856826107693 [0.10239658339503573, 0.1363534815643678]
Median of bas

In [9]:
for col in wf_df.columns:
    if col == 'Original':
        continue
    col_test = wf_df[col].to_numpy()
    col_test_original = wf_df['Original'].to_numpy()
        
    statistic, p_val = stats.mannwhitneyu(col_test, col_test_original)
    print(f"Mann-Whitney statistical test results for feature {col} are p-value={p_val} (statistic={statistic})")
    print()

Mann-Whitney statistical test results for feature Mean are p-value=3.533035965194466e-18 (statistic=0.0)

Mann-Whitney statistical test results for feature SD are p-value=1.2691302843148297e-06 (statistic=567.0)

Mann-Whitney statistical test results for feature Q25 are p-value=3.533035965194466e-18 (statistic=0.0)

Mann-Whitney statistical test results for feature Median are p-value=3.752027398590286e-18 (statistic=1.0)

Mann-Whitney statistical test results for feature Q75 are p-value=2.2301221981921627e-17 (statistic=31.0)



In [10]:
for col in wf_df.columns:
    if col == 'SD':
        continue
    col_test = wf_df[col].to_numpy()
    col_test_sd = wf_df['SD'].to_numpy()
        
    statistic, p_val = stats.mannwhitneyu(col_test, col_test_sd)
    print(f"Mann-Whitney statistical test results for feature {col} are p-value={p_val} (statistic={statistic})")
    print()

Mann-Whitney statistical test results for feature Original are p-value=1.2691302843148297e-06 (statistic=567.0)

Mann-Whitney statistical test results for feature Mean are p-value=5.37706962437211e-18 (statistic=7.0)

Mann-Whitney statistical test results for feature Q25 are p-value=1.1658290633802667e-17 (statistic=20.0)

Mann-Whitney statistical test results for feature Median are p-value=6.746610677097982e-17 (statistic=50.0)

Mann-Whitney statistical test results for feature Q75 are p-value=2.766871909311284e-15 (statistic=116.0)



## Events

In [None]:
PATH = 'ml/results_rf_events.csv'
BASE = 'ml/results_rf_events_chance_balanced.csv'

df = pd.read_csv(PATH, index_col=0)
df = df[df.restriction == 'complete']
 
df_base = pd.read_csv(BASE, index_col=0)
df_base = df_base[df_base.restriction == 'complete']

In [None]:
events_names = ['FMC', 'NEG', 'SZC']

In [None]:
events_df = df[df.chunk_size == 25]

keep = [f'test feature {i+1}' for i in np.arange(len(events_names))]
drop = [c for c in events_df.columns if c not in keep]
events_df = events_df.drop(columns=drop)

mapper = {f'test feature {i+1}': events_names[i] for i in np.arange(len(events_names))}
events_df = events_df.rename(columns=mapper)

In [None]:
events_df.describe()

In [None]:
events_df_base = df_base[df_base.chunk_size == 25]

events_df_base = events_df_base.drop(columns=drop)
events_df_base = events_df_base.dropna(how='all', axis=1)
events_df_base = events_df_base.rename(columns=mapper)

for col in events_df.columns:
    col_test = events_df[col].to_numpy()
    col_base = events_df_base[col].to_numpy()
    
    test_median, test_prec25, test_prec75 = np.percentile(col_test, [50, 25, 75])
    base_median, base_prec25, base_prec75 = np.percentile(col_base, [50, 25, 75])
    
    print(f"Median of test {col} column is {test_median} [{test_prec25}, {test_prec75}]")
    print(f"Median of base {col} column is {base_median} [{base_prec25}, {base_prec75}]")
    
    statistic, p_val = stats.mannwhitneyu(col_test, col_base, alternative='greater')
    print(f"Mann-Whitney statistical test results for feature {col} are p-value={p_val} (statistic={statistic})")
    print()