# Region analysis
### The notebook compares the performance of models trained on a single region when tested on the training region and on the non-trained-upon region. The information extracted here is presented in the _Results_ chapter (_Spatial models generalize poorer than waveform models but better than spike-timing models_) and in _Figure 6_.
#### Make sure that the following three parameters of constants.py are updated based on the chunking_results notebook outputs: BEST_WF_CHUNK , BEST_ST_CHUNK, and BEST_SPATIAL_CHUNK.

In [1]:
from notebooks_constants import SRC_PATH
import sys
sys.path.insert(0, SRC_PATH)

import pandas as pd
import numpy as np
import scipy.stats as stats
import scipy.io as io

pd.options.display.max_rows = 500
pd.options.display.max_columns = 500

from constants import BEST_WF_CHUNK, BEST_ST_CHUNK, BEST_SPATIAL_CHUNK
from paths import REGION_CA1_RES, REGION_NCX_RES

In [2]:
# change to path of interest
PATH = REGION_CA1_RES
#PATH = REGION_NCX_RES

In [3]:
results = pd.read_csv(PATH, index_col=0)
results.chunk_size = pd.to_numeric(results.chunk_size, downcast='integer')
complete = results.dropna(how='all', axis=1)
grouped_complete = complete.groupby(by=['modality', 'chunk_size'])
grouped_complete[['auc', 'auc2']].quantile(q=[0.25, 0.5, 0.75])

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,auc,auc2
modality,chunk_size,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
spatial,25,0.25,0.80502,0.934211
spatial,25,0.5,0.829615,0.966165
spatial,25,0.75,0.858012,0.979323
spike-timing,1600,0.25,0.76534,0.981673
spike-timing,1600,0.5,0.784229,0.991541
spike-timing,1600,0.75,0.807683,0.994361
waveform,50,0.25,0.981237,0.961466
waveform,50,0.5,0.981998,0.99718
waveform,50,0.75,0.986815,1.0


In [4]:
wf_same = complete[complete.modality == 'waveform'].auc2.to_numpy()
wf_other = complete[complete.modality == 'waveform'].auc.to_numpy()

st_same = complete[complete.modality == 'spike-timing'].auc2.to_numpy()
st_other = complete[complete.modality == 'spike-timing'].auc.to_numpy()

spat_same = complete[complete.modality == 'spatial'].auc2.to_numpy()
spat_other = complete[complete.modality == 'spatial'].auc.to_numpy()

statistic, p_val = stats.mannwhitneyu(wf_same, wf_other)
print(f"Mann-Whitney statistical test results for waveform are p-value={p_val: .3g}")
print()

statistic, p_val = stats.mannwhitneyu(st_same, st_other)
print(f"Mann-Whitney statistical test results for spike-timing are p-value={p_val: .3g}")
print()

statistic, p_val = stats.mannwhitneyu(spat_same, spat_other)
print(f"Mann-Whitney statistical test results for spatial are p-value={p_val: .3g}")
print()

Mann-Whitney statistical test results for waveform are p-value= 0.00314

Mann-Whitney statistical test results for spike-timing are p-value= 3.47e-18

Mann-Whitney statistical test results for spatial are p-value= 4.21e-18



In [5]:
cs_wf = complete[complete.chunk_size == BEST_WF_CHUNK]
cs_wf = cs_wf[cs_wf.modality == 'waveform']
wf_aucs_other = cs_wf.auc.to_numpy()
wf_aucs_same = cs_wf.auc2.to_numpy()

cs_st = complete[complete.chunk_size == BEST_ST_CHUNK]
cs_st = cs_st[cs_st.modality == 'spike-timing']
st_aucs_other= cs_st.auc.to_numpy()
st_aucs_same = cs_st.auc2.to_numpy()

cs_spat = complete[complete.chunk_size == BEST_SPATIAL_CHUNK]
cs_spat = cs_spat[cs_spat.modality == 'spatial']
spat_aucs_other = cs_spat.auc.to_numpy()
spat_aucs_same = cs_spat.auc2.to_numpy()

In [6]:
wf_diff = 100 * (wf_aucs_same - wf_aucs_other) / wf_aucs_same
st_diff = 100 * (st_aucs_same - st_aucs_other) / st_aucs_same
spat_diff = 100 * (spat_aucs_same - spat_aucs_other) / spat_aucs_same

wf_median, wf_prec25, wf_prec75 = np.percentile(wf_diff, [50, 25, 75])
print(f"Median of waveform column is {wf_median: .3f} [{wf_prec25: .3f}, {wf_prec75: .3f}]%")
temp_median, temp_prec25, temp_prec75 = np.percentile(st_diff, [50, 25, 75])
print(f"Median of spike-timing column is {temp_median: .3f} [{temp_prec25: .3f}, {temp_prec75: .3f}]%")
spat_median, spat_prec25, spat_prec75 = np.percentile(spat_diff, [50, 25, 75])
print(f"Median of spatial column is {spat_median: .3f} [{spat_prec25: .3f}, {spat_prec75: .3f}]%\n")

statistic, p_val = stats.wilcoxon(wf_diff, alternative='greater')
print(f"Wilcoxon statistical test results for wf are p-value={p_val: .3g}")
statistic, p_val = stats.wilcoxon(st_diff, alternative='greater')
print(f"Wilcoxon statistical test results for temporal are p-value={p_val: .3g}")
statistic, p_val = stats.wilcoxon(spat_diff, alternative='greater')
print(f"Wilcoxon statistical test results for spatial p-value={p_val: .3g}")

Median of waveform column is  0.811 [-1.992,  1.741]%
Median of spike-timing column is  19.386 [ 18.027,  22.318]%
Median of spatial column is  13.068 [ 9.420,  16.515]%

Wilcoxon statistical test results for wf are p-value= 0.506
Wilcoxon statistical test results for temporal are p-value= 3.78e-10
Wilcoxon statistical test results for spatial p-value= 3.78e-10


In [7]:
statistic, p_val = stats.wilcoxon(wf_aucs_other - 0.5)
print(f"Wilcoxon statistical test results for waveform inter-region auc are p-value={p_val: .3g}")
statistic, p_val = stats.wilcoxon(st_aucs_other - 0.5)
print(f"Wilcoxon statistical test results for spike-timing inter-region auc are p-value={p_val: .3g}")
statistic, p_val = stats.wilcoxon(spat_aucs_other - 0.5)
print(f"Wilcoxon statistical test results for spatial inter-region auc are p-value={p_val: .3g}\n")

statistic, p_val = stats.wilcoxon(wf_aucs_same - 0.5)
print(f"Wilcoxon statistical test results for waveform intra-region auc are p-value={p_val: .3g}")
statistic, p_val = stats.wilcoxon(st_aucs_same - 0.5)
print(f"Wilcoxon statistical test results for spike-timing intra-region auc are p-value={p_val: .3g}")
statistic, p_val = stats.wilcoxon(spat_aucs_same - 0.5)
print(f"Wilcoxon statistical test results for spatial intra-region auc are p-value={p_val: .3g}\n")

Wilcoxon statistical test results for waveform inter-region auc are p-value= 7.43e-10
Wilcoxon statistical test results for spike-timing inter-region auc are p-value= 7.56e-10
Wilcoxon statistical test results for spatial inter-region auc are p-value= 7.54e-10

Wilcoxon statistical test results for waveform intra-region auc are p-value= 5.26e-10
Wilcoxon statistical test results for spike-timing intra-region auc are p-value= 7.42e-10
Wilcoxon statistical test results for spatial intra-region auc are p-value= 7.54e-10



## Compare generalizability

In [8]:
PATH_CA1 = REGION_CA1_RES
PATH_NCX = REGION_NCX_RES

In [9]:
results_ncx = pd.read_csv(PATH_NCX, index_col=0).dropna(how='all', axis=1)
results_ca1 = pd.read_csv(PATH_CA1, index_col=0).dropna(how='all', axis=1)

In [10]:
cs_wf = results_ncx[results_ncx.chunk_size == BEST_WF_CHUNK]
cs_wf = cs_wf[cs_wf.modality == 'waveform']
wf_aucs_other = cs_wf.auc.to_numpy()
wf_aucs_same = cs_wf.auc2.to_numpy()

cs_st = results_ncx[results_ncx.chunk_size == BEST_ST_CHUNK]
cs_st = cs_st[cs_st.modality == 'spike-timing']
st_aucs_other = cs_st.auc.to_numpy()
st_aucs_same = cs_st.auc2.to_numpy()

cs_spat = results_ncx[results_ncx.chunk_size == BEST_SPATIAL_CHUNK]
cs_spat = cs_spat[cs_spat.modality == 'spatial']
spat_aucs_other = cs_spat.auc.to_numpy()
spat_aucs_same = cs_spat.auc2.to_numpy()

wf_diff_ncx = 100 * (wf_aucs_same - wf_aucs_other) / wf_aucs_same
st_diff_ncx = 100 * (st_aucs_same - st_aucs_other) / st_aucs_same
spat_diff_ncx = 100 * (spat_aucs_same - spat_aucs_other) / spat_aucs_same

In [11]:
cs_wf = results_ca1[results_ca1.chunk_size == BEST_WF_CHUNK]
cs_wf = cs_wf[cs_wf.modality == 'waveform']
wf_aucs_other = cs_wf.auc.to_numpy()
wf_aucs_same = cs_wf.auc2.to_numpy()

cs_st = results_ca1[results_ca1.chunk_size == BEST_ST_CHUNK]
cs_st = cs_st[cs_st.modality == 'spike-timing']
st_aucs_other = cs_st.auc.to_numpy()
st_aucs_same = cs_st.auc2.to_numpy()

cs_spat = results_ca1[results_ca1.chunk_size == BEST_SPATIAL_CHUNK]
cs_spat = cs_spat[cs_spat.modality == 'spatial']
spat_aucs_other = cs_spat.auc.to_numpy()
spat_aucs_same = cs_spat.auc2.to_numpy()

wf_diff_ca1 = 100 * (wf_aucs_same - wf_aucs_other) / wf_aucs_same
st_diff_ca1 = 100 * (st_aucs_same - st_aucs_other) / st_aucs_same
spat_diff_ca1 = 100 * (spat_aucs_same - spat_aucs_other) / spat_aucs_same

In [12]:
statistic, p_val = stats.mannwhitneyu(wf_diff_ca1, wf_diff_ncx)
print(f"Mann-Whitney statistical test results for waveform are p-value={p_val: .3g}")

statistic, p_val = stats.mannwhitneyu(st_diff_ca1, st_diff_ncx)
print(f"Mann-Whitney statistical test results for spike-timing are p-value={p_val: .3g}")

statistic, p_val = stats.mannwhitneyu(spat_diff_ca1, spat_diff_ncx)
print(f"Mann-Whitney statistical test results for spatial are p-value={p_val: .3g}")

Mann-Whitney statistical test results for waveform are p-value= 0.066
Mann-Whitney statistical test results for spike-timing are p-value= 2.23e-17
Mann-Whitney statistical test results for spatial are p-value= 2.53e-12


#### Additional statistical comparisons applying the Kruskal-Wallis test were conducted in Matlab. Those were simply executed for each training region based on the performance on the non-trained-upon-region test set for all modalities and based on the diff values for all modalities. To perform the comparisons in Matlab use:
[p, tbl, stats] = kruskalwallis(mat)

c = multcompare(stats)