# Feature importance analysis 
### The information computed here is presented in the _Results_ chapter, under the second (_Waveform-based and spike-timing features allow near-perfect classification of PYR and PV cells_) and third (_Transforming multi-channel spike waveforms to event-based delta-like functions removes all waveform-based information and allows extracting purely spatial features_) subsections and in _Tabels 1-3_. It is also presented in _Figure 5_.

In [1]:
from notebooks_constants import SRC_PATH
import sys
sys.path.insert(0, SRC_PATH)

import sys
import pandas as pd
import numpy as np
import scipy.stats as stats

from constants import WAVEFORM, SPIKE_TIMING, SPATIAL, feature_names_org
from constants import BEST_WF_CHUNK , BEST_ST_CHUNK, BEST_SPATIAL_CHUNK

from paths import MAIN_RES, BASE_RES, FAMS_RES, BASE_FAMS_RES, EVENTS_RES, BASE_EVENTS_RES

In [2]:
PATH = MAIN_RES
BASE = BASE_RES

df = pd.read_csv(PATH, index_col=0)
 
df_base = pd.read_csv(BASE, index_col=0)

## Spatial

In [3]:
spatial_df = df[df.chunk_size == BEST_SPATIAL_CHUNK]
spatial_df = spatial_df[spatial_df.modality == 'spatial']

spatial_df = spatial_df.dropna(how='all', axis=1)
keep = [f'test feature {i+1}' for i in SPATIAL[:-1]]
drop = [c for c in spatial_df.columns if c not in keep]
spatial_df = spatial_df.drop(columns=drop)

mapper = {f'test feature {i+1}': feature_names_org[i] for i in SPATIAL[:-1]}
spatial_df = spatial_df.rename(columns=mapper)

In [4]:
spatial_df_base = df_base[df_base.chunk_size == BEST_SPATIAL_CHUNK]
spatial_df_base = spatial_df_base[spatial_df_base.modality == 'spatial']

keep = [f'test feature {i+1}' for i in SPATIAL[:-1]]
drop = [c for c in spatial_df_base.columns if c not in keep]

spatial_df_base = spatial_df_base.drop(columns=drop)
spatial_df_base = spatial_df_base.dropna(how='all', axis=1)
spatial_df_base = spatial_df_base.rename(columns=mapper)

for col in spatial_df.columns:
    col_test = spatial_df[col].to_numpy()
    col_base = spatial_df_base[col].to_numpy()
    
    test_median, test_prec25, test_prec75 = np.percentile(col_test, [50, 25, 75])
    base_median, base_prec25, base_prec75 = np.percentile(col_base, [50, 25, 75])
    
    print(f"Median of test {col} column is {test_median: .3g} [{test_prec25: .3g}, {test_prec75: .3g}]")
    print(f"Median of base {col} column is {base_median: .3g} [{base_prec25: .3g}, {base_prec75: .3g}]")
    
    p_val = (1 + (col_base > np.median(col_test)).sum()) / (1 + len(col_base))
    print(f"permutation test result for feature {col} is p-value={p_val: .3g}")
    print()

Median of test SPD_Count column is  0.00739 [ 0.00583,  0.0132]
Median of base SPD_Count column is  0.0011 [ 0.000473,  0.00525]
permutation test result for feature SPD_Count is p-value= 0.169

Median of test SPD_SD column is  0.00794 [ 0.00612,  0.0105]
Median of base SPD_SD column is  0.00289 [ 0.00147,  0.0121]
permutation test result for feature SPD_SD is p-value= 0.32

Median of test SPD_Area column is  0.0146 [ 0.0108,  0.0194]
Median of base SPD_Area column is  0.00289 [ 0.00138,  0.0118]
permutation test result for feature SPD_Area is p-value= 0.195

Median of test NEG_Time-lag_SS column is  0.0076 [ 0.00649,  0.0104]
Median of base NEG_Time-lag_SS column is  0.00268 [ 0.00125,  0.0107]
permutation test result for feature NEG_Time-lag_SS is p-value= 0.318

Median of test NEG_Time-lag_SD column is  0.0086 [ 0.00733,  0.013]
Median of base NEG_Time-lag_SD column is  0.00248 [ 0.00123,  0.0104]
permutation test result for feature NEG_Time-lag_SD is p-value= 0.297

Median of test F

## Temporal

In [5]:
st_df = df[df.chunk_size == BEST_ST_CHUNK]
st_df = st_df[st_df.modality == 'spike-timing']

st_df = st_df.dropna(how='all', axis=1)
keep = [f'test feature {i+1}' for i in SPIKE_TIMING[:-1]]
drop = [c for c in st_df.columns if c not in keep]
st_df = st_df.drop(columns=drop)
mapper = {f'test feature {i+1}': feature_names_org[i] for i in SPIKE_TIMING[:-1]}
st_df = st_df.rename(columns=mapper)

In [6]:
st_df_base = df_base[df_base.chunk_size == BEST_ST_CHUNK]
st_df_base = st_df_base[st_df_base.modality == 'spike-timing']

st_df_base = st_df_base.dropna(how='all', axis=1)
st_df_base = st_df_base.drop(columns=drop)
st_df_base = st_df_base.rename(columns=mapper)

for col in st_df.columns:
    col_test = st_df[col].to_numpy()
    col_base = st_df_base[col].to_numpy()
    
    test_median, test_prec25, test_prec75 = np.percentile(col_test, [50, 25, 75])
    base_median, base_prec25, base_prec75 = np.percentile(col_base, [50, 25, 75])
    
    print(f"Median of test {col} column is {test_median: .3g} [{test_prec25: .3g}, {test_prec75: .3g}]")
    print(f"Median of base {col} column is {base_median: .3g} [{base_prec25: .3g}, {base_prec75: .3g}]")
    
    p_val = (1 + (col_base > np.median(col_test)).sum()) / (1 + len(col_base))
    print(f"permutation test result for feature {col} is p-value={p_val: .3g}")
    print()

Median of test Firing_rate column is  0.0766 [ 0.063,  0.0894]
Median of base Firing_rate column is  0.0182 [ 0.0102,  0.0302]
permutation test result for feature Firing_rate is p-value= 0.038

Median of test D_KL_short column is  0.0255 [ 0.0206,  0.0352]
Median of base D_KL_short column is  0.0183 [ 0.0104,  0.0337]
permutation test result for feature D_KL_short is p-value= 0.359

Median of test D_KL_long column is  0.187 [ 0.172,  0.199]
Median of base D_KL_long column is  0.0181 [ 0.00972,  0.0311]
permutation test result for feature D_KL_long is p-value= 0.000999

Median of test Jump_index column is  0.0291 [ 0.0261,  0.0324]
Median of base Jump_index column is  0.0187 [ 0.0105,  0.0335]
permutation test result for feature Jump_index is p-value= 0.311

Median of test PSD_center column is  0.0164 [ 0.0129,  0.0195]
Median of base PSD_center column is  0.0194 [ 0.011,  0.0329]
permutation test result for feature PSD_center is p-value= 0.561

Median of test PSD'_center column is  0.0

## Waveform

In [7]:
wf_df = df[df.chunk_size == BEST_WF_CHUNK]
wf_df = wf_df[wf_df.modality == 'waveform']

wf_df = wf_df.dropna(how='all', axis=1)
keep = [f'test feature {i+1}' for i in WAVEFORM[:-1]]
drop = [c for c in wf_df.columns if c not in keep]
wf_df = wf_df.drop(columns=drop)
mapper = {f'test feature {i+1}': feature_names_org[i] for i in WAVEFORM[:-1]}
wf_df = wf_df.rename(columns=mapper)

In [8]:
wf_df_base = df_base[df_base.chunk_size == BEST_WF_CHUNK]
wf_df_base = wf_df_base[wf_df_base.modality == 'waveform']

wf_df_base = wf_df_base.dropna(how='all', axis=1)
wf_df_base = wf_df_base.drop(columns=drop)
wf_df_base = wf_df_base.rename(columns=mapper)

for col in wf_df.columns:
    col_test = wf_df[col].to_numpy()
    col_base = wf_df_base[col].to_numpy()
    
    test_median, test_prec25, test_prec75 = np.percentile(col_test, [50, 25, 75])
    base_median, base_prec25, base_prec75 = np.percentile(col_base, [50, 25, 75])
    
    print(f"Median of test {col} column is {test_median: .3g} [{test_prec25: .3g}, {test_prec75: .3g}]")
    print(f"Median of base {col} column is {base_median: .3g} [{base_prec25: .3g}, {base_prec75: .3g}]")
    
    p_val = (1 + (col_base > np.median(col_test)).sum()) / (1 + len(col_base))
    print(f"permutation test result for feature {col} is p-value={p_val: .3g}")
    print()

Median of test Break_measure column is  0.00284 [ 0.00201,  0.00425]
Median of base Break_measure column is  0.00924 [ 0.00406,  0.0253]
permutation test result for feature Break_measure is p-value= 0.838

Median of test FWHM column is  0.00483 [ 0.00357,  0.00748]
Median of base FWHM column is  0.00651 [ 0.00276,  0.0177]
permutation test result for feature FWHM is p-value= 0.587

Median of test Acceleration column is  0.117 [ 0.0993,  0.154]
Median of base Acceleration column is  0.00913 [ 0.00398,  0.0243]
permutation test result for feature Acceleration is p-value= 0.002

Median of test Max_speed column is  0.0109 [ 0.00661,  0.014]
Median of base Max_speed column is  0.00742 [ 0.00326,  0.0196]
permutation test result for feature Max_speed is p-value= 0.415

Median of test TTP_magnitude column is  0.109 [ 0.0828,  0.121]
Median of base TTP_magnitude column is  0.0101 [ 0.00429,  0.0256]
permutation test result for feature TTP_magnitude is p-value= 0.003

Median of test TTP_duratio

## Events

In [9]:
PATH = EVENTS_RES
BASE = BASE_EVENTS_RES

df = pd.read_csv(PATH, index_col=0)
 
df_base = pd.read_csv(BASE, index_col=0)

In [10]:
events_names = ['FMC', 'NEG', 'SMC']

In [11]:
events_df = df[df.chunk_size == BEST_SPATIAL_CHUNK]

keep = [f'test feature {i+1}' for i in np.arange(len(events_names))]
drop = [c for c in events_df.columns if c not in keep]
events_df = events_df.drop(columns=drop)

mapper = {f'test feature {i+1}': events_names[i] for i in np.arange(len(events_names))}
events_df = events_df.rename(columns=mapper)

In [12]:
events_df_base = df_base[df_base.chunk_size == 25]

events_df_base = events_df_base.drop(columns=drop)
events_df_base = events_df_base.dropna(how='all', axis=1)
events_df_base = events_df_base.rename(columns=mapper)

for col in events_df.columns:
    col_test = events_df[col].to_numpy()
    col_base = events_df_base[col].to_numpy()
    
    test_median, test_prec25, test_prec75 = np.percentile(col_test, [50, 25, 75])
    base_median, base_prec25, base_prec75 = np.percentile(col_base, [50, 25, 75])
    
    print(f"Median of test {col} column is {test_median: .3g} [{test_prec25: .3g}, {test_prec75: .3g}]")
    print(f"Median of base {col} column is {base_median: .3g} [{base_prec25: .3g}, {base_prec75: .3g}]")
    
    p_val = (1 + (col_base > np.median(col_test)).sum()) / (1 + len(col_base))
    print(f"permutation test result for feature {col} is p-value={p_val: .3g}")
    print()

Median of test FMC column is  0.264 [ 0.249,  0.29]
Median of base FMC column is  0.00913 [ 0.00464,  0.0372]
permutation test result for feature FMC is p-value= 0.000999

Median of test NEG column is  0.0284 [ 0.0236,  0.0361]
Median of base NEG column is  0.00844 [ 0.00452,  0.036]
permutation test result for feature NEG is p-value= 0.297

Median of test SMC column is  0.0983 [ 0.0881,  0.113]
Median of base SMC column is  0.00908 [ 0.0047,  0.0389]
permutation test result for feature SMC is p-value= 0.112



## Spatial families

In [13]:
PATH = FAMS_RES
BASE = BASE_FAMS_RES

df = pd.read_csv(PATH, index_col=0)

df = pd.read_csv(PATH, index_col=0)
 
df_base = pd.read_csv(BASE, index_col=0)


In [14]:
familiy_names = ['value-based', 'time-based', 'graph-based']

In [15]:
family_df = df[df.chunk_size == BEST_SPATIAL_CHUNK]

keep = [f'test feature {i+1}' for i in np.arange(len(familiy_names))]
drop = [c for c in family_df.columns if c not in keep]
family_df = family_df.drop(columns=drop)

mapper = {f'test feature {i+1}': familiy_names[i] for i in np.arange(len(familiy_names))}
family_df = family_df.rename(columns=mapper)

In [16]:
family_df_base = df_base[df_base.chunk_size == BEST_SPATIAL_CHUNK]

family_df_base = family_df_base.drop(columns=drop)
family_df_base = family_df_base.dropna(how='all', axis=1)
family_df_base = family_df_base.rename(columns=mapper)

for col in family_df.columns:
    col_test = family_df[col].to_numpy()
    col_base = family_df_base[col].to_numpy()
    
    test_median, test_prec25, test_prec75 = np.percentile(col_test, [50, 25, 75])
    base_median, base_prec25, base_prec75 = np.percentile(col_base, [50, 25, 75])
    
    print(f"Median of test {col} column is {test_median: .3g} [{test_prec25: .3g}, {test_prec75: .3g}]")
    print(f"Median of base {col} column is {base_median: .3g} [{base_prec25: .3g}, {base_prec75: .3g}]")
    
    p_val = (1 + (col_base > np.median(col_test)).sum()) / (1 + len(col_base))
    print(f"permutation test result for feature {col} is p-value={p_val: .3g}")
    print()

Median of test value-based column is  0.0281 [ 0.0208,  0.0361]
Median of base value-based column is  0.00556 [ 0.00293,  0.0225]
permutation test result for feature value-based is p-value= 0.191

Median of test time-based column is  0.246 [ 0.229,  0.267]
Median of base time-based column is  0.0104 [ 0.00544,  0.0435]
permutation test result for feature time-based is p-value= 0.000999

Median of test graph-based column is  0.111 [ 0.102,  0.129]
Median of base graph-based column is  0.0134 [ 0.0071,  0.0625]
permutation test result for feature graph-based is p-value= 0.135



#### Additional statistical comparisons applying the Kruskal-Wallis test were conducted in Matlab. Those were simply executed for each spatial feature family, and for each spatial event group. To perform the comparisons in Matlab use: 
[p, tbl, stats] = kruskalwallis(mat)

c = multcompare(stats)