In [1]:
import sys
import pandas as pd
import numpy as np
import scipy.stats as stats

from constants import MORPHOLOGICAL, TEMPORAL, SPATIAL, feature_names

In [2]:
pd.options.display.max_columns = 500

In [3]:
PATH = 'ml/results_rf_hz.csv'
df = pd.read_csv(PATH, index_col=0)
df = df[df.restriction == 'complete']
df = df[df.chunk_size == 0]

## Spatial

In [4]:
spatial_df = df[df.modality == 'spatial']
spatial_df = spatial_df.dropna(how='all', axis=1)
keep = [f'test feature {i+1}' for i in SPATIAL[:-1]]
drop = [c for c in spatial_df.columns if c not in keep]
spatial_df = spatial_df.drop(columns=drop)
mapper = {f'test feature {i+1}': feature_names[i] for i in SPATIAL[:-1]}
spatial_df = spatial_df.rename(columns=mapper)

In [5]:
spatial_df.describe()

Unnamed: 0,spatial_dispersion_count,spatial_dispersion_sd,spatial_dispersion_area,dep_red,dep_sd,fzc_red,fzc_sd,szc_red,szc_sd,dep_graph_avg_speed,dep_graph_slowest_path,dep_graph_fastest_path,fzc_graph_avg_speed,fzc_graph_slowest_path,fzc_graph_fastest_path,szc_graph_avg_speed,szc_graph_slowest_path,szc_graph_fastest_path
count,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0
mean,0.007460705,0.021502,0.013216,0.036798,0.033934,0.070409,0.049256,0.012235,0.02043,0.019329,0.026951,0.010663,0.020359,0.067222,0.015832,0.010466,0.014564,0.009711
std,0.002874207,0.00557,0.004301,0.008025,0.007949,0.00931,0.008011,0.003959,0.004315,0.00495,0.006498,0.003275,0.005612,0.011071,0.004127,0.003688,0.00391,0.003294
min,3.4673620000000002e-18,0.012711,0.003369,0.018712,0.018339,0.052389,0.032462,0.00697,0.007383,0.0082,0.014807,0.005536,0.0,0.043462,0.010302,0.005217,0.00817,0.003666
25%,0.005556667,0.01741,0.010157,0.032403,0.028837,0.063998,0.045384,0.010214,0.017571,0.015998,0.022776,0.008507,0.017089,0.059083,0.012994,0.007638,0.01171,0.007753
50%,0.00701924,0.020264,0.012656,0.036792,0.032293,0.069485,0.048974,0.011452,0.020824,0.018449,0.025385,0.010111,0.019983,0.067174,0.015523,0.010018,0.01402,0.008995
75%,0.009462748,0.025794,0.016044,0.040609,0.038409,0.075215,0.051585,0.013772,0.02349,0.022323,0.030548,0.011696,0.023023,0.073723,0.017597,0.011982,0.017187,0.010903
max,0.01513678,0.03569,0.023387,0.059422,0.054565,0.094669,0.0795,0.032943,0.031434,0.031176,0.047921,0.023777,0.035535,0.094947,0.034662,0.022758,0.024454,0.020234


In [6]:
spatial_families = {'SPD': ['spatial_dispersion_count', 'spatial_dispersion_sd', 'spatial_dispersion_area'],
                  'time_lag': ['dep_red', 'dep_sd','fzc_red', 'fzc_sd', 'szc_red', 'szc_sd'],
                  'graph': ['dep_graph_avg_speed', 'dep_graph_slowest_path', 'dep_graph_fastest_path',
                           'fzc_graph_avg_speed', 'fzc_graph_slowest_path', 'fzc_graph_fastest_path',
                           'szc_graph_avg_speed', 'szc_graph_slowest_path', 'szc_graph_fastest_path']}

for fam in spatial_families:
    spatial_df[fam] = spatial_df[spatial_families[fam]].sum(axis=1)

In [7]:
spatial_fams_df = spatial_df[[fam for fam in spatial_families]]
spatial_fams_df.describe()

Unnamed: 0,SPD,time_lag,graph
count,50.0,50.0,50.0
mean,0.042178,0.223061,0.195098
std,0.007592,0.013204,0.015551
min,0.02895,0.199556,0.156321
25%,0.035856,0.214047,0.185133
50%,0.043245,0.220397,0.194534
75%,0.046667,0.232097,0.205769
max,0.059295,0.255182,0.246854


## Temporal

In [8]:
temporal_df = df[df.modality == 'temporal']
temporal_df = temporal_df.dropna(how='all', axis=1)
keep = [f'test feature {i+1}' for i in TEMPORAL[:-1]]
drop = [c for c in temporal_df.columns if c not in keep]
temporal_df = temporal_df.drop(columns=drop)
mapper = {f'test feature {i+1}': feature_names[i] for i in TEMPORAL[:-1]}
temporal_df = temporal_df.rename(columns=mapper)

In [9]:
temporal_df.describe()

Unnamed: 0,firing_rate,d_kl_start,d_kl_mid,jump,psd_center,der_psd_center,rise_time,unif_dist
count,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0
mean,0.07841,0.114191,0.054212,0.000861,0.017362,0.013128,0.088735,0.113322
std,0.014432,0.018815,0.014178,0.001106,0.004629,0.004045,0.019497,0.015673
min,0.051019,0.066516,0.018122,0.0,0.00921,0.003429,0.045241,0.077929
25%,0.068785,0.103649,0.047197,0.000264,0.014259,0.01063,0.073781,0.103518
50%,0.078812,0.115335,0.050477,0.000635,0.016785,0.012963,0.088805,0.112535
75%,0.088014,0.122699,0.058284,0.000963,0.019207,0.014861,0.10034,0.128808
max,0.118653,0.195819,0.100935,0.007013,0.02956,0.023491,0.138626,0.141673


In [10]:
temporal_families = {'short': ['d_kl_start', 'unif_dist', 'rise_time'],
                     'long': ['d_kl_mid', 'jump'],
                     'general': ['firing_rate', 'psd_center', 'der_psd_center']}

for fam in temporal_families:
    temporal_df[fam] = temporal_df[temporal_families[fam]].sum(axis=1)

In [11]:
temporal_fams_df = temporal_df[[fam for fam in temporal_families]]
temporal_fams_df.describe()

Unnamed: 0,short,long,general
count,50.0,50.0,50.0
mean,0.316248,0.055074,0.1089
std,0.015872,0.014086,0.017818
min,0.276406,0.018122,0.072554
25%,0.307437,0.047646,0.097849
50%,0.317981,0.051142,0.107544
75%,0.32446,0.059118,0.121442
max,0.356141,0.100935,0.156264


## WF (Morphological)

In [12]:
morph_df = df[df.modality == 'morphological']
morph_df = morph_df.dropna(how='all', axis=1)
keep = [f'test feature {i+1}' for i in MORPHOLOGICAL[:-1]]
drop = [c for c in morph_df.columns if c not in keep]
morph_df = morph_df.drop(columns=drop)
mapper = {f'test feature {i+1}': feature_names[i] for i in MORPHOLOGICAL[:-1]}
morph_df = morph_df.rename(columns=mapper)

In [13]:
morph_df.describe()

Unnamed: 0,break_measure,fwhm,get_acc,max_speed,peak2peak,trough2peak,rise_coef,smile_cry
count,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0
mean,0.008521,0.035224,0.101185,0.009032865,0.097699,0.190561,0.01137827,0.035826
std,0.003545,0.017608,0.030634,0.004271006,0.022694,0.039112,0.004846682,0.012157
min,0.00062,0.003057,0.032343,4.0365680000000004e-17,0.025971,0.115236,1.912679e-17,0.005013
25%,0.006373,0.026276,0.083422,0.006592689,0.08753,0.163373,0.008671741,0.029316
50%,0.009037,0.033434,0.09802,0.009046122,0.098019,0.19232,0.01082665,0.036026
75%,0.01069,0.037255,0.115389,0.01077517,0.109689,0.217798,0.01450952,0.042025
max,0.015998,0.119573,0.205952,0.02382298,0.157351,0.321972,0.02576309,0.058694


In [14]:
morph_families = {'WF': ['trough2peak', 'peak2peak', 'fwhm', 'rise_coef'],
                  'first': ['max_speed'],
                  'second': ['break_measure', 'smile_cry']}

for fam in morph_families:
    morph_df[fam] = morph_df[morph_families[fam]].sum(axis=1)

In [15]:
morph_fams_df = morph_df[[fam for fam in morph_families]]
morph_fams_df.describe()

Unnamed: 0,WF,first,second
count,50.0,50.0,50.0
mean,0.334862,0.009032865,0.044347
std,0.039035,0.004271006,0.012493
min,0.257241,4.0365680000000004e-17,0.019598
25%,0.305781,0.006592689,0.03815
50%,0.338627,0.009046122,0.043668
75%,0.361445,0.01077517,0.052857
max,0.414542,0.02382298,0.066311
