In [1]:
import sys
import pandas as pd
import numpy as np
import scipy.stats as stats

from constants import SPATIAL, SPATIAL_R, feature_names_rich, feature_names_org

In [2]:
pd.options.display.max_columns = 500
pd.options.display.max_rows = 500

In [3]:
imps = np.load('ml/raw_imps_rf_290322_rich_fix_imp.npy')

In [4]:
PATH = 'ml/results_rf_290322_rich_fix_imp.csv'
BASE = 'ml/results_rf_280722_rich_chance_balanced_fix_imp.csv'

df = pd.read_csv(PATH, index_col=0)
df = df[df.restriction == 'complete']
 
df_base = pd.read_csv(BASE, index_col=0)
df_base = df_base[df_base.restriction == 'complete']

In [5]:
def get_family_imp(inds, arr):
    arr_m = abs(arr[:, :, inds].sum(axis=2))
    fam_imps = arr_m[~np.isnan(arr_m)].reshape((arr.shape[0], -1)).mean(axis=1)
    return fam_imps

def names2inds(d_names, n2i_map):
    d_inds = dict()
    for key in d_names:
        temp_inds = [n2i_map.index(name) for name in d_names[key]]
        d_inds[key] = temp_inds
    return d_inds

## Spatial

In [6]:
spatial_df = df[df.chunk_size == 25]
spatial_df = spatial_df[spatial_df.modality == 'spatial']
spatial_imps = imps[1::3 * 8,:, :] # 1 for 25; 3*8 for number of modalities and chunk sizes

spatial_df = spatial_df.dropna(how='all', axis=1)
keep = [f'test feature {i+1}' for i in SPATIAL_R[:-1]]
drop = [c for c in spatial_df.columns if c not in keep]
spatial_df = spatial_df.drop(columns=drop)

mapper = {f'test feature {i+1}': feature_names_rich[i] for i in SPATIAL_R[:-1]}

spatial_df = spatial_df.rename(columns=mapper)

In [7]:
spatial_df.describe()

Unnamed: 0,spatial_dispersion_count,spatial_dispersion_count_avg,spatial_dispersion_count_std,spatial_dispersion_count_q25,spatial_dispersion_count_q50,spatial_dispersion_count_q75,spatial_dispersion_sd,spatial_dispersion_sd_avg,spatial_dispersion_sd_std,spatial_dispersion_sd_q25,spatial_dispersion_sd_q50,spatial_dispersion_sd_q75,spatial_dispersion_area,spatial_dispersion_area_avg,spatial_dispersion_area_std,spatial_dispersion_area_q25,spatial_dispersion_area_q50,spatial_dispersion_area_q75,dep_red,dep_red_avg,dep_red_std,dep_red_q25,dep_red_q50,dep_red_q75,dep_sd,dep_sd_avg,dep_sd_std,dep_sd_q25,dep_sd_q50,dep_sd_q75,fzc_red,fzc_red_avg,fzc_red_std,fzc_red_q25,fzc_red_q50,fzc_red_q75,fzc_sd,fzc_sd_avg,fzc_sd_std,fzc_sd_q25,fzc_sd_q50,fzc_sd_q75,szc_red,szc_red_avg,szc_red_std,szc_red_q25,szc_red_q50,szc_red_q75,szc_sd,szc_sd_avg,szc_sd_std,szc_sd_q25,szc_sd_q50,szc_sd_q75,dep_graph_avg_speed,dep_graph_avg_speed_avg,dep_graph_avg_speed_std,dep_graph_avg_speed_q25,dep_graph_avg_speed_q50,dep_graph_avg_speed_q75,dep_graph_slowest_path,dep_graph_slowest_path_avg,dep_graph_slowest_path_std,dep_graph_slowest_path_q25,dep_graph_slowest_path_q50,dep_graph_slowest_path_q75,dep_graph_fastest_path,dep_graph_fastest_path_avg,dep_graph_fastest_path_std,dep_graph_fastest_path_q25,dep_graph_fastest_path_q50,dep_graph_fastest_path_q75,fzc_graph_avg_speed,fzc_graph_avg_speed_avg,fzc_graph_avg_speed_std,fzc_graph_avg_speed_q25,fzc_graph_avg_speed_q50,fzc_graph_avg_speed_q75,fzc_graph_slowest_path,fzc_graph_slowest_path_avg,fzc_graph_slowest_path_std,fzc_graph_slowest_path_q25,fzc_graph_slowest_path_q50,fzc_graph_slowest_path_q75,fzc_graph_fastest_path,fzc_graph_fastest_path_avg,fzc_graph_fastest_path_std,fzc_graph_fastest_path_q25,fzc_graph_fastest_path_q50,fzc_graph_fastest_path_q75,szc_graph_avg_speed,szc_graph_avg_speed_avg,szc_graph_avg_speed_std,szc_graph_avg_speed_q25,szc_graph_avg_speed_q50,szc_graph_avg_speed_q75,szc_graph_slowest_path,szc_graph_slowest_path_avg,szc_graph_slowest_path_std,szc_graph_slowest_path_q25,szc_graph_slowest_path_q50,szc_graph_slowest_path_q75,szc_graph_fastest_path,szc_graph_fastest_path_avg,szc_graph_fastest_path_std,szc_graph_fastest_path_q25,szc_graph_fastest_path_q50,szc_graph_fastest_path_q75
count,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0
mean,0.00039,0.003728,0.003696,0.001302,0.00234,0.001459,0.000342,0.001832403,0.004222,0.002294,0.001947,0.001817,0.0007598577,0.003835,0.00492,0.004012,0.003369,0.00339,0.000191,0.002339,0.002836,0.001912387,0.002087,0.002509,0.000116,0.003117,0.003078,0.00213,0.002434,0.002919,0.001142,0.02315,0.042092,0.002837,0.007981,0.026951,0.0005563559,0.015174,0.055448,0.002043,0.006443,0.02764,2.352374e-05,0.005412,0.043005,0.001380066,0.001799,0.003378957,2.688042e-05,0.00565,0.046152,0.001689,0.003313,0.006837,2.560955e-05,0.001923626,0.003134,0.00193,0.001989,0.003384,2.466576e-05,0.002341,0.001702,0.00199,0.00134902,0.001459,1.359509e-05,0.001855035,0.002754,0.001586898,0.001509,0.001625,2.079603e-05,0.008513,0.007514,0.016522,0.010703,0.006305,0.001336326,0.004923,0.008723,0.052917,0.012671,0.00415,1.028648e-05,0.003467,0.004618,0.005243,0.004141,0.002675,5.925002e-06,0.002206,0.003417,0.001883,0.001956227,0.002152,1.278284e-05,0.002599,0.002049,0.003318,0.002187,0.002079,1.149386e-05,0.003258,0.005139,0.001818,0.001705,0.002898
std,0.000391,0.002581,0.005272,0.000573,0.001608,0.002241,0.000303,0.00104334,0.00272,0.001918,0.001397,0.001252,0.001134302,0.002526,0.002419,0.002146,0.0017,0.002078,0.000643,0.001486,0.001199,0.001684267,0.001461,0.001604,0.000197,0.001842,0.00207,0.001655,0.001107,0.001991,0.002209,0.00772,0.010135,0.002664,0.004571,0.010584,0.001476418,0.007233,0.011233,0.001266,0.003152,0.008228,7.727973e-05,0.003529,0.011968,0.0006083233,0.001212,0.002611662,9.307292e-05,0.002585,0.013226,0.000849,0.004017,0.005421,5.054911e-05,0.00170312,0.002895,0.002052,0.001865,0.003983,4.506324e-05,0.002132,0.001298,0.001999,0.0009296628,0.00094,4.356462e-05,0.00208003,0.001877,0.002450325,0.000755,0.001854,8.907123e-05,0.005878,0.002806,0.006108,0.010061,0.003852,0.002324264,0.003735,0.0067,0.013492,0.005613,0.002184,2.177416e-05,0.00322,0.003981,0.002832,0.00194,0.001896,2.29572e-05,0.001153,0.001601,0.00175,0.003106832,0.001913,5.560078e-05,0.001637,0.001271,0.002097,0.001044,0.001316,5.19615e-05,0.0026,0.002764,0.00086,0.000941,0.001996
min,0.0,0.000363,7.1e-05,0.000204,0.0,0.0,0.0,9.953189e-17,0.000522,0.000721,0.0,0.000327,2.0819869999999997e-19,0.000659,0.002428,0.000999,9.9e-05,0.000113,0.0,0.000512,0.001133,6.439576e-19,0.000449,0.000602,0.0,0.000265,0.001418,0.000421,0.000533,0.000298,0.0,0.008192,0.022713,0.000667,0.000256,0.010919,0.0,0.001219,0.031275,0.00023,0.001461,0.012501,0.0,0.001424,0.019145,1.433832e-19,0.000251,3.031504e-18,0.0,0.000962,0.025149,8.1e-05,0.000464,0.000139,0.0,5.061644e-18,0.00058,0.000164,0.000384,6.7e-05,0.0,0.000445,0.0,0.0,9.238179000000001e-17,0.000379,0.0,5.809425999999999e-19,0.000245,3.1299040000000005e-17,0.000381,0.000437,1.886438e-20,0.001652,0.001802,0.00479,0.001853,0.002189,5.223098e-20,0.000784,0.001147,0.025764,0.001316,0.000904,0.0,4.7e-05,0.000392,0.000293,0.001273,0.000505,0.0,0.0,0.000287,0.000127,1.0691720000000001e-17,0.0003,0.0,0.000509,0.000836,0.000892,0.000922,0.000336,0.0,0.000149,0.000379,0.0,0.0,0.000757
25%,0.000133,0.002023,0.001114,0.000986,0.001332,0.000579,0.00019,0.001265911,0.002244,0.001418,0.001026,0.001175,0.0003317012,0.002235,0.003206,0.002755,0.002174,0.001951,5e-06,0.001452,0.001927,0.0009339859,0.001158,0.001506,9e-06,0.001925,0.002026,0.001206,0.001709,0.001809,1.6e-05,0.016729,0.036248,0.001384,0.00421,0.017792,7.490526e-18,0.010214,0.048541,0.001212,0.004728,0.022662,9.748970999999999e-19,0.003321,0.034782,0.0009527924,0.000984,0.002005604,7.028437e-18,0.003564,0.036602,0.001109,0.001906,0.004042,1.0819040000000001e-17,0.0009997053,0.001303,0.00092,0.001046,0.00105,8.976305e-18,0.001189,0.001022,0.00117,0.0008647243,0.000822,2.650156e-18,0.001073935,0.001557,0.0008825413,0.000995,0.000965,1.879332e-18,0.004793,0.005575,0.01344,0.005609,0.003421,1.186682e-16,0.002661,0.004662,0.046181,0.009525,0.002576,1.019268e-18,0.001783,0.002007,0.003464,0.002712,0.001502,1.339717e-18,0.001343,0.002368,0.001036,0.0009051969,0.001287,3.4609860000000002e-18,0.001324,0.001172,0.002299,0.001448,0.001099,1.258517e-18,0.001994,0.003065,0.001288,0.0012,0.001878
50%,0.000318,0.002673,0.001644,0.00122,0.00167,0.000745,0.000277,0.001509154,0.003598,0.001715,0.001421,0.001515,0.0004635175,0.003269,0.004232,0.00382,0.003122,0.002937,2.2e-05,0.002112,0.002489,0.001466133,0.001674,0.002174,6.1e-05,0.002567,0.002557,0.001694,0.002225,0.00241,0.00095,0.023164,0.041553,0.001957,0.007434,0.025678,1.34294e-05,0.014732,0.052116,0.001797,0.00599,0.027326,2.466302e-17,0.004617,0.042921,0.001358767,0.001446,0.00281411,1.126323e-06,0.005781,0.043977,0.001451,0.002515,0.006054,3.261422e-07,0.001555087,0.001814,0.001393,0.001498,0.001718,3.45211e-06,0.001785,0.001337,0.001489,0.001115658,0.001211,5.618989e-17,0.001406978,0.002159,0.001055934,0.001263,0.001316,3.145187e-17,0.006637,0.007201,0.015182,0.007956,0.005137,0.0008078884,0.004103,0.006184,0.052613,0.011656,0.003763,5.223247e-07,0.002319,0.003175,0.004804,0.003893,0.002051,6.890974e-18,0.001939,0.003168,0.001582,0.001333654,0.001735,4.6266410000000003e-17,0.002034,0.001773,0.002875,0.001882,0.001911,8.269959e-18,0.002679,0.004576,0.001737,0.001568,0.002566
75%,0.000487,0.004916,0.003034,0.001587,0.003321,0.00133,0.000444,0.002229918,0.00528,0.002212,0.002481,0.002071,0.0007100605,0.004399,0.00542,0.004843,0.004143,0.004336,0.000123,0.002722,0.00381,0.002540012,0.002356,0.002755,0.000124,0.003606,0.003414,0.002514,0.003055,0.003324,0.001273,0.029122,0.045044,0.003018,0.010808,0.033916,0.0008226008,0.019208,0.063261,0.00242,0.007712,0.034228,8.208596e-06,0.006236,0.049217,0.001871031,0.002131,0.00407449,1.475048e-05,0.007392,0.054104,0.002136,0.003232,0.008323,2.885695e-05,0.002354098,0.0038,0.002086,0.002171,0.003234,2.310699e-05,0.002502,0.001954,0.002063,0.001626496,0.001763,3.673489e-06,0.001987107,0.003084,0.001571598,0.00194,0.001661,2.561733e-06,0.009274,0.00888,0.017236,0.012861,0.007054,0.001614562,0.005962,0.011679,0.057993,0.016083,0.005267,1.27448e-05,0.003823,0.005594,0.006355,0.005062,0.002948,1.092123e-16,0.002865,0.004115,0.002047,0.00186892,0.002415,3.978372e-06,0.003301,0.00234,0.003618,0.002801,0.002694,4.430976e-07,0.004271,0.006902,0.002218,0.002129,0.003052
max,0.00216,0.012163,0.027822,0.002828,0.007844,0.011734,0.00203,0.005523371,0.015059,0.009883,0.007304,0.008647,0.00751319,0.012745,0.013369,0.012033,0.007846,0.009665,0.004514,0.008395,0.005761,0.01116442,0.008488,0.007496,0.001053,0.008927,0.014841,0.008783,0.005819,0.010402,0.015474,0.042697,0.066387,0.014575,0.018559,0.056436,0.009949219,0.038556,0.084025,0.006316,0.01517,0.053746,0.0004042245,0.018496,0.071193,0.002564699,0.005957,0.01478225,0.0006176172,0.011607,0.077842,0.004177,0.028923,0.037725,0.0002444214,0.01132837,0.011356,0.013296,0.01153,0.020153,0.0001773155,0.012889,0.00733,0.013358,0.005919712,0.00554,0.0002486362,0.01477562,0.008598,0.01754776,0.003688,0.013836,0.0005685855,0.03133,0.01539,0.039498,0.066871,0.01972,0.01185919,0.022521,0.028246,0.106835,0.030897,0.009267,0.0001220313,0.019959,0.020985,0.017556,0.008775,0.010366,0.0001459333,0.005995,0.007089,0.010989,0.02168711,0.013902,0.000390342,0.006733,0.007406,0.013016,0.005396,0.008343,0.0003517158,0.017911,0.012631,0.003923,0.005174,0.013897


In [8]:
spatial_org_fets = np.asarray(feature_names_org)[SPATIAL[:-1]]
spatial_fet_groups = {f: [f'{f}', f'{f}_avg', f'{f}_std', f'{f}_q25', f'{f}_q50', f'{f}_q75'] for f in spatial_org_fets}
spatial_fet_groups_inds = names2inds(spatial_fet_groups, feature_names_rich)

for g in spatial_fet_groups:
    spatial_df[f'{g}_group'] = get_family_imp(spatial_fet_groups_inds[g], spatial_imps)

In [9]:
spatial_df_base = df_base[df_base.chunk_size == 25]
spatial_df_base = spatial_df_base[spatial_df_base.modality == 'spatial']

keep = [f'test feature {i+1}' for i in SPATIAL[:-1]]
drop = [c for c in spatial_df_base.columns if c not in keep]

spatial_df_base = spatial_df_base.drop(columns=drop)
spatial_df_base = spatial_df_base.dropna(how='all', axis=1)

mapper = {f'test feature {i+1}': feature_names_org[i] for i in SPATIAL[:-1]}
spatial_df_base = spatial_df_base.rename(columns=mapper)

for col in spatial_df_base.columns:
    col_test = spatial_df[f'{col}_group'].to_numpy()
    col_base = spatial_df_base[col].to_numpy()
    
    test_median, test_prec25, test_prec75 = np.percentile(col_test, [50, 25, 75])
    base_median, base_prec25, base_prec75 = np.percentile(col_base, [50, 25, 75])
    
    print(f"Median of test {col} column is {test_median} [{test_prec25}, {test_prec75}]")
    print(f"Median of base {col} column is {base_median} [{base_prec25}, {base_prec75}]")
    
    statistic, p_val = stats.mannwhitneyu(col_test, col_base, alternative='greater')
    print(f"Mann-Whitney statistical test results for feature {col} are p-value={p_val} (statistic={statistic})")
    print()

Median of test spatial_dispersion_count column is 0.007385278881142177 [0.005830300306185667, 0.013247545002678133]
Median of base spatial_dispersion_count column is 0.00013240994771486982 [5.727999896298165e-05, 0.0006574698975188818]
Mann-Whitney statistical test results for feature spatial_dispersion_count are p-value=1.3891867423411367e-17 (statistic=2477.0)

Median of test spatial_dispersion_sd column is 0.00793888756873824 [0.006120196826501106, 0.010526203513153587]
Median of base spatial_dispersion_sd column is 0.0001359900340699868 [3.6115251207962976e-05, 0.0007753269059267046]
Mann-Whitney statistical test results for feature spatial_dispersion_sd are p-value=3.505250453807833e-18 (statistic=2500.0)

Median of test spatial_dispersion_area column is 0.014593587699502092 [0.010753841789731763, 0.019377106645616246]
Median of base spatial_dispersion_area column is 0.0001115908851734788 [7.812230960324466e-06, 0.0009614168650640237]
Mann-Whitney statistical test results for feat

In [13]:
spatial_families_temp = {'value-based': ['spatial_dispersion_count', 'spatial_dispersion_sd', 'spatial_dispersion_area'],
                  'time-based': ['dep_red', 'dep_sd','fzc_red', 'fzc_sd', 'szc_red', 'szc_sd'],
                  'graph-based': ['dep_graph_avg_speed', 'dep_graph_slowest_path', 'dep_graph_fastest_path',
                           'fzc_graph_avg_speed', 'fzc_graph_slowest_path', 'fzc_graph_fastest_path',
                           'szc_graph_avg_speed', 'szc_graph_slowest_path', 'szc_graph_fastest_path']}

spatial_families = dict()
for key in spatial_families_temp:
    temp_list = []
    for f in spatial_families_temp[key]: 
        temp_list += [f'{f}', f'{f}_avg', f'{f}_std', f'{f}_q25', f'{f}_q50', f'{f}_q75']
    spatial_families[key] = temp_list

In [15]:
spatial_families_inds = names2inds(spatial_families, feature_names_rich)

In [16]:
for fam in spatial_families_inds:
    spatial_df[f'{fam}'] = get_family_imp(spatial_families_inds[fam], spatial_imps)

In [17]:
spatial_fams_df = spatial_df[[f'{fam}' for fam in spatial_families]]
spatial_fams_df.describe()

Unnamed: 0,value-based,time-based,graph-based
count,50.0,50.0,50.0
mean,0.029807,0.246163,0.114888
std,0.01337,0.02655,0.018129
min,0.010543,0.201887,0.082147
25%,0.020788,0.229097,0.101922
50%,0.028101,0.246022,0.110769
75%,0.036145,0.266558,0.129163
max,0.071799,0.295116,0.166109


In [18]:
value_based = spatial_fams_df['value-based'].to_numpy()
time_based =  spatial_fams_df['time-based'].to_numpy()
graph_based = spatial_fams_df['graph-based'].to_numpy()
statistic, p_val = stats.wilcoxon(value_based, time_based)
print(f"Wilcoxon statistical test results for spd vs time lag are p-value={p_val} (statistic={statistic})")
statistic, p_val = stats.wilcoxon(value_based, graph_based)
print(f"Wilcoxon statistical test results for spd vs graph are p-value={p_val} (statistic={statistic})")
statistic, p_val = stats.wilcoxon(time_based, graph_based)
print(f"Wilcoxon statistical test results for time lag vs graph are p-value={p_val} (statistic={statistic})")

Wilcoxon statistical test results for spd vs time lag are p-value=7.556929455863566e-10 (statistic=0.0)
Wilcoxon statistical test results for spd vs graph are p-value=7.556929455863566e-10 (statistic=0.0)
Wilcoxon statistical test results for time lag vs graph are p-value=7.556929455863566e-10 (statistic=0.0)


In [19]:
event_families_temp = {'dep': ['dep_red', 'dep_sd', 'dep_graph_avg_speed', 'dep_graph_slowest_path', 'dep_graph_fastest_path'],
                  'fzc': ['fzc_red', 'fzc_sd', 'fzc_graph_avg_speed', 'fzc_graph_slowest_path', 'fzc_graph_fastest_path'],
                  'szc': ['szc_red', 'szc_sd', 'szc_graph_avg_speed', 'szc_graph_slowest_path', 'szc_graph_fastest_path']}

spatial_events = dict()
for key in event_families_temp:
    temp_list = []
    for f in event_families_temp[key]: 
        temp_list += [f'{f}', f'{f}_avg', f'{f}_std', f'{f}_q25', f'{f}_q50', f'{f}_q75']
    spatial_events[key] = temp_list

In [20]:
spatial_events_inds = names2inds(spatial_events, feature_names_rich)

In [21]:
for e in spatial_events_inds:
    spatial_df[f'{e}'] = get_family_imp(spatial_events_inds[e], spatial_imps)

In [23]:
event_fams_df = spatial_df[[e for e in spatial_events_inds]]
event_fams_df.describe()

Unnamed: 0,dep,fzc,szc
count,50.0,50.0,50.0
mean,0.031404,0.266625,0.101886
std,0.013472,0.026182,0.02124
min,0.016626,0.217505,0.061487
25%,0.023593,0.249482,0.088098
50%,0.028365,0.264015,0.098318
75%,0.036144,0.290299,0.112656
max,0.101495,0.314895,0.155026


In [24]:
dep_event = event_fams_df['dep'].to_numpy()
fzc_event =  event_fams_df['fzc'].to_numpy()
szc_event = event_fams_df['szc'].to_numpy()
statistic, p_val = stats.wilcoxon(dep_event, fzc_event)
print(f"Wilcoxon statistical test results for dep vs fzc are p-value={p_val} (statistic={statistic})")
statistic, p_val = stats.wilcoxon(dep_event, szc_event)
print(f"Wilcoxon statistical test results for dep vs szc are p-value={p_val} (statistic={statistic})")
statistic, p_val = stats.wilcoxon(fzc_event, szc_event)
print(f"Wilcoxon statistical test results for fzc vs szc are p-value={p_val} (statistic={statistic})")

Wilcoxon statistical test results for dep vs fzc are p-value=7.556929455863566e-10 (statistic=0.0)
Wilcoxon statistical test results for dep vs szc are p-value=9.06805772149548e-10 (statistic=3.0)
Wilcoxon statistical test results for fzc vs szc are p-value=7.556929455863566e-10 (statistic=0.0)


## Temporal

In [None]:
temporal_df = df[df.chunk_size == 800]
temporal_df = temporal_df[temporal_df.modality == 'temporal']
temporal_imps = imps[14::3 * 8,:, :]

temporal_df = temporal_df.dropna(how='all', axis=1)
keep = [f'test feature {i+1}' for i in TEMPORAL[:-1]]
drop = [c for c in temporal_df.columns if c not in keep]
temporal_df = temporal_df.drop(columns=drop)
mapper = {f'test feature {i+1}': feature_names_org[i] for i in TEMPORAL[:-1]}
temporal_df = temporal_df.rename(columns=mapper)

In [None]:
temporal_df.describe()

In [None]:
temporal_df_base = df_base[df_base.chunk_size == 800]
temporal_df_base = temporal_df_base[temporal_df_base.modality == 'temporal']

temporal_df_base = temporal_df_base.dropna(how='all', axis=1)
temporal_df_base = temporal_df_base.drop(columns=drop)
temporal_df_base = temporal_df_base.rename(columns=mapper)

for col in temporal_df.columns:
    col_test = temporal_df[col].to_numpy()
    col_base = temporal_df_base[col].to_numpy()
    
    test_median, test_prec25, test_prec75 = np.percentile(col_test, [50, 25, 75])
    base_median, base_prec25, base_prec75 = np.percentile(col_base, [50, 25, 75])
    
    print(f"Median of test {col} column is {test_median} [{test_prec25}, {test_prec75}]")
    print(f"Median of base {col} column is {base_median} [{base_prec25}, {base_prec75}]")
    
    statistic, p_val = stats.mannwhitneyu(col_test, col_base, alternative='greater')
    print(f"Mann-Whitney statistical test results for feature {col} are p-value={p_val} (statistic={statistic})")
    print()

In [None]:
temporal_families = {'short': ['d_kl_start', 'unif_dist', 'rise_time'],
                     'long': ['d_kl_mid', 'jump'],
                     'wb': ['firing_rate', 'psd_center', 'der_psd_center']}

for fam in temporal_families:
    temporal_df[fam] = temporal_df[temporal_families[fam]].sum(axis=1)

In [None]:
temporal_fams_df = temporal_df[[fam for fam in temporal_families]]
temporal_fams_df.describe()

In [None]:
temporal_families_inds = {'short': [27, 33, 32],
                     'long': [28, 29],
                     'wb': [26, 30, 31]}

for fam in temporal_families_inds:
    temporal_df[f'{fam}_up'] = get_family_imp(temporal_families_inds[fam], temporal_imps)

In [None]:
temporal_fams_df_up = temporal_df[[f'{fam}_up' for fam in temporal_families]]
temporal_fams_df_up.describe()

In [None]:
short = temporal_fams_df['short'].to_numpy()
long =  temporal_fams_df['long'].to_numpy()
wb = temporal_fams_df['wb'].to_numpy()
statistic, p_val = stats.wilcoxon(short, long)
print(f"Wilcoxon statistical test results for short vs long are p-value={p_val} (statistic={statistic})")
statistic, p_val = stats.wilcoxon(short, wb)
print(f"Wilcoxon statistical test results for short vs wb are p-value={p_val} (statistic={statistic})")
statistic, p_val = stats.wilcoxon(long, wb)
print(f"Wilcoxon statistical test results for short vs wb are p-value={p_val} (statistic={statistic})")

In [None]:
short = temporal_fams_df_up['short_up'].to_numpy()
long =  temporal_fams_df_up['long_up'].to_numpy()
wb = temporal_fams_df_up['wb_up'].to_numpy()
statistic, p_val = stats.wilcoxon(short, long)
print(f"Wilcoxon statistical test results for short vs long are p-value={p_val} (statistic={statistic})")
statistic, p_val = stats.wilcoxon(short, wb)
print(f"Wilcoxon statistical test results for short vs wb are p-value={p_val} (statistic={statistic})")
statistic, p_val = stats.wilcoxon(long, wb)
print(f"Wilcoxon statistical test results for short vs wb are p-value={p_val} (statistic={statistic})")

## WF (Morphological)

In [None]:
morph_df = df[df.chunk_size == 800]
morph_df = morph_df[morph_df.modality == 'morphological']
morph_imps = imps[22::3 * 8,:, :]

morph_df = morph_df.dropna(how='all', axis=1)
keep = [f'test feature {i+1}' for i in MORPHOLOGICAL[:-1]]
drop = [c for c in morph_df.columns if c not in keep]
morph_df = morph_df.drop(columns=drop)
mapper = {f'test feature {i+1}': feature_names_org[i] for i in MORPHOLOGICAL[:-1]}
morph_df = morph_df.rename(columns=mapper)

In [None]:
morph_df.describe()

In [None]:
morph_df_base = df_base[df_base.chunk_size == 800]
morph_df_base = morph_df_base[morph_df_base.modality == 'morphological']

morph_df_base = morph_df_base.dropna(how='all', axis=1)
morph_df_base = morph_df_base.drop(columns=drop)
morph_df_base = morph_df_base.rename(columns=mapper)

for col in morph_df.columns:
    col_test = morph_df[col].to_numpy()
    col_base = morph_df_base[col].to_numpy()
    
    test_median, test_prec25, test_prec75 = np.percentile(col_test, [50, 25, 75])
    base_median, base_prec25, base_prec75 = np.percentile(col_base, [50, 25, 75])
    
    print(f"Median of test {col} column is {test_median} [{test_prec25}, {test_prec75}]")
    print(f"Median of base {col} column is {base_median} [{base_prec25}, {base_prec75}]")
    
    statistic, p_val = stats.mannwhitneyu(col_test, col_base, alternative='greater')
    print(f"Mann-Whitney statistical test results for feature {col} are p-value={p_val} (statistic={statistic})")
    print()

In [None]:
morph_families = {'WF': ['trough2peak', 'peak2peak', 'fwhm', 'rise_coef'],
                  'first': ['max_speed'],
                  'second': ['break_measure', 'smile_cry', 'get_acc']}

for fam in morph_families:
    morph_df[fam] = morph_df[morph_families[fam]].sum(axis=1)

In [None]:
morph_fams_df = morph_df[[fam for fam in morph_families]]
morph_fams_df.describe()

In [None]:
morph_families_inds = {'WF': [23, 22, 19, 24],
                  'first': [21],
                  'second': [18, 25, 20]}

for fam in morph_families:
    morph_df[f'{fam}_up'] = morph_df[morph_families[fam]].sum(axis=1)

In [None]:
morph_fams_df_up = morph_df[[f'{fam}_up' for fam in morph_families]]
morph_fams_df_up.describe()

In [None]:
org = morph_fams_df['WF'].to_numpy()
first =  morph_fams_df['first'].to_numpy()
second = morph_fams_df['second'].to_numpy()
statistic, p_val = stats.wilcoxon(org, first)
print(f"Wilcoxon statistical test results for WF vs first are p-value={p_val} (statistic={statistic})")
statistic, p_val = stats.wilcoxon(org, second)
print(f"Wilcoxon statistical test results for WF vs second are p-value={p_val} (statistic={statistic})")

In [None]:
org = morph_fams_df_up['WF_up'].to_numpy()
first =  morph_fams_df_up['first_up'].to_numpy()
second = morph_fams_df_up['second_up'].to_numpy()
statistic, p_val = stats.wilcoxon(org, first)
print(f"Wilcoxon statistical test results for WF vs first are p-value={p_val} (statistic={statistic})")
statistic, p_val = stats.wilcoxon(org, second)
print(f"Wilcoxon statistical test results for WF vs second are p-value={p_val} (statistic={statistic})")