# Table S1

In [1]:
import h5py
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import os
import scipy.stats as ss

from neuroinference.coupling_utils import read_coupling_coefs
from neuroinference.utils import (calculate_selection_ratio,
                                  cohens_d)
                            
%matplotlib inline

In [2]:
base_path = os.path.join(os.environ['HOME'], 'fits/uoineuro/coupling')

In [3]:
# ac results path
ac_results_paths = [os.path.join(base_path, 'ecog/ecog_coupling.h5')]
n_ac_fits = len(ac_results_paths) 
# extract coupling coefficients
ac_baseline_ccs, ac_uoi_ccs = read_coupling_coefs(ac_results_paths, linear=True, poisson=False)
# read in results files
ac_results = [h5py.File(ac_results_path, 'r') for ac_results_path in ac_results_paths]

In [4]:
# consolidate v1 results paths
v1_results_paths = [os.path.join(base_path, 'pvc11/pvc11_monkey1_coupling.h5'),
                    os.path.join(base_path, 'pvc11/pvc11_monkey2_coupling.h5'),
                    os.path.join(base_path, 'pvc11/pvc11_monkey3_coupling.h5')]
n_v1_fits = len(v1_results_paths)
# read in v1 coupling coefficients
v1_baseline_ccs, v1_uoi_ccs = read_coupling_coefs(v1_results_paths, linear=False, poisson=True)
# read in results files
v1_results = [h5py.File(v1_results_path, 'r') for v1_results_path in v1_results_paths]

In [5]:
# consolidate m1 results
m1_results_paths = [os.path.join(base_path, 'nhp/nhp_indy_20160407_02_coupling.h5'),
                    os.path.join(base_path, 'nhp/nhp_indy_20160411_01_coupling.h5'),
                    os.path.join(base_path, 'nhp/nhp_indy_20160411_02_coupling.h5')]
n_m1_fits = len(m1_results_paths)
# read in m1 coupling coefficients
m1_baseline_ccs, m1_uoi_ccs = read_coupling_coefs(m1_results_paths, linear=False)
# read in results files
m1_results = [h5py.File(m1_results_path, 'r') for m1_results_path in m1_results_paths]

## Dataset Details

In [6]:
print('AC')
print('---------------')
for idx, ac_result in enumerate(ac_results):
    n_samples, n_units = ac_result['lasso/Y'].shape
    print('Dataset ', idx + 1)
    print('Number of samples: ', n_samples)
    print('Number of units: ', n_units)
    print('---------------')
print('Note: 3 units were not used due to faulty channels')

AC
---------------
Dataset  1
Number of samples:  4200
Number of units:  128
---------------
Note: 3 units were not used due to faulty channels


In [7]:
print('V1')
print('---------------')
for idx, v1_result in enumerate(v1_results):
    n_samples, n_units = v1_result['lasso/Y'].shape
    print('Dataset ', idx + 1)
    print('Number of samples: ', n_samples)
    print('Number of units: ', n_units)
    print('---------------')

V1
---------------
Dataset  1
Number of samples:  2400
Number of units:  106
---------------
Dataset  2
Number of samples:  2400
Number of units:  88
---------------
Dataset  3
Number of samples:  2400
Number of units:  112
---------------


In [8]:
print('M1')
print('---------------')
for idx, m1_result in enumerate(m1_results):
    n_samples, n_units = m1_result['lasso/Y'].shape
    print('Dataset ', idx + 1)
    print('Number of samples: ', n_samples)
    print('Number of units: ', n_units)
    print('---------------')

M1
---------------
Dataset  1
Number of samples:  4089
Number of units:  136
---------------
Dataset  2
Number of samples:  4767
Number of units:  146
---------------
Dataset  3
Number of samples:  4400
Number of units:  147
---------------


## Selection Ratio

In [9]:
# calculate selection ratios
ac_baseline_srs = [calculate_selection_ratio(ac_baseline_cc)
                   for ac_baseline_cc in ac_baseline_ccs]
ac_uoi_srs = [calculate_selection_ratio(ac_uoi_cc)
              for ac_uoi_cc in ac_uoi_ccs]

print('AC')
print('---------------')
for idx, (baseline, uoi) in enumerate(zip(ac_baseline_srs, ac_uoi_srs)):
    print('Dataset ', idx + 1)
    print('Baseline: ', format(np.median(baseline), '0.4f'),
          '+/-', format(ss.iqr(baseline) / 2, '0.4f'))
    print('UoI: ', format(np.median(uoi), '0.4f'),
          '+/-', format(ss.iqr(uoi) / 2, '0.4f'))
    print('Reduction in features: ', np.median(baseline) / np.median(uoi))
    print('p-value: ', ss.wilcoxon(baseline, uoi, alternative='greater').pvalue)
    print("Cohen's d: ", cohens_d(baseline, uoi))
    print('---------------')

AC
---------------
Dataset  1
Baseline:  0.2992 +/- 0.0679
UoI:  0.1339 +/- 0.0285
Reduction in features:  2.235294117647059
p-value:  5.440169988567301e-23
Cohen's d:  1.7496657199225092
---------------


In [10]:
v1_baseline_srs = [calculate_selection_ratio(v1_baseline_cc)
                   for v1_baseline_cc in v1_baseline_ccs]
v1_uoi_srs = [calculate_selection_ratio(v1_uoi_cc)
              for v1_uoi_cc in v1_uoi_ccs]

print('V1')
print('---------------')
for idx, (baseline, uoi) in enumerate(zip(v1_baseline_srs, v1_uoi_srs)):
    print('Dataset ', idx + 1)
    print('Baseline: ', format(np.median(baseline), '0.4f'),
          '+/-', format(ss.iqr(baseline) / 2, '0.4f'))
    print('UoI: ', format(np.median(uoi), '0.4f'),
          '+/-', format(ss.iqr(uoi) / 2, '0.4f'))
    print('Reduction in features: ', np.median(baseline) / np.median(uoi))
    print('p-value: ', ss.wilcoxon(baseline, uoi, alternative='greater').pvalue)
    print("Cohen's d: ", cohens_d(baseline, uoi))
    print('---------------')

V1
---------------
Dataset  1
Baseline:  0.5905 +/- 0.1071
UoI:  0.2667 +/- 0.0667
Reduction in features:  2.2142857142857144
p-value:  2.039433446953991e-19
Cohen's d:  2.260925169928966
---------------
Dataset  2
Baseline:  0.6609 +/- 0.1710
UoI:  0.2586 +/- 0.1149
Reduction in features:  2.5555555555555554
p-value:  1.922729679230268e-16
Cohen's d:  1.8711483851770927
---------------
Dataset  3
Baseline:  0.5946 +/- 0.1002
UoI:  0.2793 +/- 0.0732
Reduction in features:  2.1290322580645165
p-value:  2.0343190660141697e-20
Cohen's d:  2.572482453165233
---------------


In [11]:
m1_baseline_srs = [calculate_selection_ratio(m1_baseline_cc)
                   for m1_baseline_cc in m1_baseline_ccs]
m1_uoi_srs = [calculate_selection_ratio(m1_uoi_cc)
              for m1_uoi_cc in m1_uoi_ccs]

print('M1')
print('---------------')
for idx, (baseline, uoi) in enumerate(zip(m1_baseline_srs, m1_uoi_srs)):
    print('Dataset ', idx + 1)
    print('Baseline: ', format(np.median(baseline), '0.4f'),
          '+/-', format(ss.iqr(baseline) / 2, '0.4f'))
    print('UoI: ', format(np.median(uoi), '0.4f'),
          '+/-', format(ss.iqr(uoi) / 2, '0.4f'))
    print('Reduction in features: ', np.median(baseline) / np.median(uoi))
    print('p-value: ', ss.wilcoxon(baseline, uoi, alternative='greater').pvalue)
    print("Cohen's d: ", cohens_d(baseline, uoi))
    print('---------------')

M1
---------------
Dataset  1
Baseline:  0.4333 +/- 0.1222
UoI:  0.0741 +/- 0.0380
Reduction in features:  5.8500000000000005
p-value:  3.334541525940367e-24
Cohen's d:  2.489856015080524
---------------
Dataset  2
Baseline:  0.4552 +/- 0.1198
UoI:  0.0828 +/- 0.0414
Reduction in features:  5.5
p-value:  3.439136319482036e-25
Cohen's d:  2.374615090034304
---------------
Dataset  3
Baseline:  0.4589 +/- 0.1062
UoI:  0.0890 +/- 0.0411
Reduction in features:  5.153846153846154
p-value:  1.1034971078766716e-25
Cohen's d:  2.5801521951484627
---------------


## Predictive Performance

In [12]:
ac_baseline_pp = [np.median(ac_result['lasso/r2s_test'][:], axis=0)
                  for ac_result in ac_results]
ac_uoi_pp = [np.median(ac_result['uoi_lasso_bic/r2s_test'][:], axis=0)
             for ac_result in ac_results]

print('AC')
print('---------------')
for idx, (baseline, uoi) in enumerate(zip(ac_baseline_pp, ac_uoi_pp)):
    print('Dataset ', idx + 1)
    print('Baseline: ', format(np.median(baseline), '0.2f'), '+/-', format(ss.iqr(baseline) / 2, '0.2f'))
    print('UoI: ', format(np.median(uoi), '0.2f'), '+/-', format(ss.iqr(uoi) / 2, '0.2f'))
    print('Median Difference: ', format(np.median(baseline - uoi), '0.2f'))
    print('p-value: ', ss.wilcoxon(x=baseline, y=uoi, alternative='greater').pvalue)
    print("Cohen's d: ", cohens_d(baseline, uoi))
    print('---------------')

AC
---------------
Dataset  1
Baseline:  0.98 +/- 0.02
UoI:  0.98 +/- 0.02
Median Difference:  0.00
p-value:  0.0005070182068858921
Cohen's d:  0.0049282371808172095
---------------


In [13]:
v1_baseline_pp = [np.median(v1_results[idx]['glmnet_poisson/deviances_test'][:], axis=0)
                  for idx in range(n_v1_fits)]
v1_uoi_pp = [np.median(v1_results[idx]['uoi_poisson_log/deviances_test'][:], axis=0)
             for idx in range(n_v1_fits)]

print('V1')
print('---------------')
for idx, (baseline, uoi) in enumerate(zip(v1_baseline_pp, v1_uoi_pp)):
    print('Dataset ', idx + 1)
    print('Baseline: ', format(np.median(baseline), '0.2f'), '+/-', format(ss.iqr(baseline) / 2, '0.2f'))
    print('UoI: ', format(np.median(uoi), '0.2f'), '+/-', format(ss.iqr(uoi) / 2, '0.2f'))
    print('Median Difference: ', format(np.median(baseline - uoi), '0.2f'))
    print('p-value: ', ss.wilcoxon(x=baseline, y=uoi, alternative='less').pvalue)
    print("Cohen's d: ", cohens_d(baseline, uoi))
    print('---------------')

V1
---------------
Dataset  1
Baseline:  202.89 +/- 34.69
UoI:  205.06 +/- 35.52
Median Difference:  -2.99
p-value:  3.078441458594466e-16
Cohen's d:  -0.045805755435526584
---------------
Dataset  2
Baseline:  165.69 +/- 43.52
UoI:  167.87 +/- 43.40
Median Difference:  -1.97
p-value:  1.7385837090928222e-12
Cohen's d:  -0.03256344320526021
---------------
Dataset  3
Baseline:  212.86 +/- 41.48
UoI:  217.40 +/- 41.20
Median Difference:  -3.31
p-value:  7.827677519817016e-19
Cohen's d:  -0.04658866498012063
---------------


In [15]:
m1_baseline_pp = [np.median(m1_results[idx]['glmnet_poisson/deviances_test'][:], axis=0)
                  for idx in range(n_m1_fits)]
m1_uoi_pp = [np.median(m1_results[idx]['uoi_poisson_log/deviances_test'][:], axis=0)\
             for idx in range(n_m1_fits)]


print('M1')
print('---------------')
for idx, (baseline, uoi) in enumerate(zip(m1_baseline_pp, m1_uoi_pp)):
    baseline = np.nan_to_num(baseline)
    uoi = np.nan_to_num(uoi)
    print('Dataset ', idx + 1)
    print('Baseline: ', format(np.median(baseline), '0.2f'), '+/-', format(ss.iqr(baseline) / 2, '0.2f'))
    print('UoI: ', format(np.median(uoi), '0.2f'), '+/-', format(ss.iqr(uoi) / 2, '0.2f'))
    print('Median Difference: ', format(np.median(baseline - uoi), '0.2f'))
    print('p-value: ', ss.wilcoxon(x=baseline, y=uoi, alternative='less').pvalue)
    print("Cohen's d: ", cohens_d(baseline, uoi))
    print('---------------')

M1
---------------
Dataset  1
Baseline:  231.66 +/- 64.56
UoI:  233.28 +/- 64.58
Median Difference:  -2.47
p-value:  9.842495405296869e-24
Cohen's d:  -0.030129136430232135
---------------
Dataset  2
Baseline:  263.01 +/- 60.03
UoI:  266.26 +/- 58.64
Median Difference:  -2.27
p-value:  1.4861628764062627e-23
Cohen's d:  -0.02505348742578228
---------------
Dataset  3
Baseline:  248.21 +/- 56.89
UoI:  249.80 +/- 57.66
Median Difference:  -2.72
p-value:  1.8285777741230783e-25
Cohen's d:  -0.029846250699989963
---------------


## Bayesian Information Criterion

In [16]:
ac_baseline_bics = [np.median(ac_result['lasso/bics'][:], axis=0)
                  for ac_result in ac_results]
ac_uoi_bics = [np.median(ac_result['uoi_lasso_bic/bics'][:], axis=0)
             for ac_result in ac_results]

print('AC')
print('---------------')
for idx, (baseline, uoi) in enumerate(zip(ac_baseline_bics, ac_uoi_bics)):
    print('Dataset ', idx + 1)
    print('Baseline: ', format(np.median(baseline), '0.2f'), '+/-', format(ss.iqr(baseline) / 2, '0.2f'))
    print('UoI: ', format(np.median(uoi), '0.2f'), '+/-', format(ss.iqr(uoi) / 2, '0.2f'))
    print('Median Difference: ', format(np.median(baseline - uoi), '0.2f'))
    print('---------------')

AC
---------------
Dataset  1
Baseline:  -6239.96 +/- 1884.42
UoI:  -6490.60 +/- 1859.09
Median Difference:  170.01
---------------


In [17]:
v1_baseline_bics = [np.median(v1_results[idx]['glmnet_poisson/bics'][:], axis=0)
                    for idx in range(n_v1_fits)]
v1_uoi_bics = [np.median(v1_results[idx]['uoi_poisson_log/bics'][:], axis=0)
               for idx in range(n_v1_fits)]

print('V1')
print('---------------')
for idx, (baseline, uoi) in enumerate(zip(v1_baseline_bics, v1_uoi_bics)):
    print('Dataset ', idx + 1)
    print('Baseline: ', format(np.median(baseline), '0.2f'), '+/-', format(ss.iqr(baseline) / 2, '0.2f'))
    print('UoI: ', format(np.median(uoi), '0.2f'), '+/-', format(ss.iqr(uoi) / 2, '0.2f'))
    print('Median Difference: ', format(np.median(baseline - uoi), '0.2f'))
    print('---------------')

V1
---------------
Dataset  1
Baseline:  -15349.34 +/- 20946.67
UoI:  -15516.19 +/- 20970.74
Median Difference:  149.14
---------------
Dataset  2
Baseline:  -2864.02 +/- 11046.87
UoI:  -3007.10 +/- 11087.03
Median Difference:  131.25
---------------
Dataset  3
Baseline:  -35164.55 +/- 34069.33
UoI:  -35309.01 +/- 34066.44
Median Difference:  161.33
---------------


In [18]:
mc_baseline_bics = [np.median(m1_results[idx]['glmnet_poisson/bics'][:], axis=0) for idx in range(n_m1_fits)]
mc_uoi_bics = [np.median(m1_results[idx]['uoi_poisson_log/bics'][:], axis=0) for idx in range(n_m1_fits)]

for dataset in range(n_m1_fits):
    baseline = mc_baseline_bics[dataset]
    uoi = mc_uoi_bics[dataset]
    
    print('Dataset %s:' %dataset)
    print('-----------')
    print('Baseline BIC: ', format(np.median(baseline), '0.2f'), '+/-', format(ss.iqr(baseline) / 2, '0.2f'))
    print('UoI BIC: ', format(np.median(uoi), '0.2f'), '+/-', format(ss.iqr(uoi) / 2, '0.2f'))
    print('Median del BIC: ', format(np.median(baseline - uoi), '0.2f'))
    print('Wilcoxon Sign Rank Test: ',
          ss.wilcoxon(x=baseline,
                      y=uoi))
    print('Cohens D: ', np.mean(baseline - uoi) / np.std(baseline))
    print('')

Dataset 0:
-----------
Baseline BIC:  480.98 +/- 124.05
UoI BIC:  321.75 +/- 59.49
Median del BIC:  162.17
Wilcoxon Sign Rank Test:  WilcoxonResult(statistic=422.0, pvalue=3.535529653947251e-20)
Cohens D:  0.7248418316419467

Dataset 1:
-----------
Baseline BIC:  542.79 +/- 150.17
UoI BIC:  354.80 +/- 75.35
Median del BIC:  186.10
Wilcoxon Sign Rank Test:  WilcoxonResult(statistic=355.0, pvalue=2.948033801789911e-22)
Cohens D:  0.7246654657934493

Dataset 2:
-----------
Baseline BIC:  564.84 +/- 120.39
UoI BIC:  362.11 +/- 67.42
Median del BIC:  190.53
Wilcoxon Sign Rank Test:  WilcoxonResult(statistic=180.0, pvalue=6.058087006705419e-24)
Cohens D:  0.8389497875457887



## Non-zero Parameters

In [19]:
ac_baseline_ccs_all = np.concatenate(
    [ac_baseline_ccs[idx].ravel()
     for idx in range(n_ac_fits)])
ac_uoi_ccs_all = np.concatenate(
    [ac_uoi_ccs[idx].ravel()
     for idx in range(n_ac_fits)])

# calculate non-zero coefficients
ac_baseline_nz_ccs = ac_baseline_ccs_all[ac_baseline_ccs_all != 0]
ac_uoi_nz_ccs = ac_uoi_ccs_all[ac_uoi_ccs_all != 0]

# perform KS test
print('AC')
print('---------------')
print('KS 2-sample test: ', ss.ks_2samp(ac_baseline_nz_ccs, ac_uoi_nz_ccs).pvalue)

Auditory Cortex
---------------
KS 2-sample test:  5.88596274373403e-69


In [22]:
v1_baseline_ccs_all = np.concatenate(
    [v1_baseline_ccs[idx].ravel()
     for idx in range(n_v1_fits)])
v1_uoi_ccs_all = np.concatenate(
    [v1_uoi_ccs[idx].ravel()
     for idx in range(n_v1_fits)])

v1_baseline_nz_ccs = v1_baseline_ccs_all[v1_baseline_ccs_all != 0]
v1_uoi_nz_ccs = v1_uoi_ccs_all[v1_uoi_ccs_all != 0]

# perform KS test
print('V1')
print('---------------')
print('KS 2-sample test: ', ss.ks_2samp(v1_baseline_nz_ccs, v1_uoi_nz_ccs).pvalue)

V1
---------------
KS 2-sample test:  6.013057238389484e-293


In [23]:
m1_baseline_ccs_all = np.concatenate(
    [m1_baseline_ccs[idx].ravel()
     for idx in range(n_v1_fits)])
m1_uoi_ccs_all = np.concatenate(
    [m1_uoi_ccs[idx].ravel()
     for idx in range(n_v1_fits)])

m1_baseline_nz_ccs = m1_baseline_ccs_all[m1_baseline_ccs_all != 0]
m1_uoi_nz_ccs = m1_uoi_ccs_all[m1_uoi_ccs_all != 0]

# perform KS test
print('M1')
print('---------------')
print('KS 2-sample test: ', ss.ks_2samp(m1_baseline_nz_ccs, m1_uoi_nz_ccs).pvalue)

M1
---------------
KS 2-sample test:  0.0
