In [2]:
import scipy.stats
import numpy as np
from statsmodels.stats.multicomp import pairwise_tukeyhsd
from statsmodels.sandbox.stats.multicomp import get_tukey_pvalue
import joblib
import pandas as pd

In [12]:
decoding_results=pd.read_hdf('../data/Figure5Revision.h5', data='key')

In [13]:
def get_data(decoding_results, task, decoders, methods, window, modality, num_neurons):
    if modality == 'ca':
        index = 0
    elif modality == 'np':
        index = 1
    accs=[]
    keys=[]
    for decoder, method in zip(decoders,methods):
        key = f'{modality}_{method}_{window}'
        if 'joint' in method:
            seeds = decoding_results[task][decoder][key][num_neurons]
            acc= [abs(s[index]).mean() for s in seeds]
        else:
            acc= abs(np.array(decoding_results[task][decoder][key][num_neurons])).mean(axis=-1)
        accs.append(acc)
        keys.append([f'{key}_{decoder}']*len(acc))
    return np.concatenate(accs),np.concatenate(keys)

    
def concat_neurons(decoding_results, task, decoder, method, window, modality):
    if modality == 'ca':
        index = 0
    elif modality == 'np':
        index = 1
    key = f'{modality}_{method}_{window}'
    accs=[]
    for n in decoding_results[task][decoder][key].keys():
        if 'joint' in method:
            seeds = decoding_results[task][decoder][key][n]
            accs.append([abs(s[index]).mean() for s in seeds])
        else:
            accs.append(abs(np.array(decoding_results[task][decoder][key][n])).mean(axis=-1))
    return np.concatenate(accs)

## ANOVA for CEBRA, CEBRA-joint, baseline 330 ms (10 frame window):

In [14]:
np_total_stats = scipy.stats.f_oneway(concat_neurons(decoding_results, 'frame_err', 'knn', 'cebra', '330', 'np'), 
                                      concat_neurons(decoding_results, 'frame_err', 'knn', 'cebra_joint', '330', 'np'), 
                                      concat_neurons(decoding_results, 'frame_err', 'knn', 'baseline', '330', 'np'),
                                     concat_neurons(decoding_results, 'frame_err', 'bayes', 'baseline', '330', 'np'))



print(f'NP total stats \n {np_total_stats}')

NP total stats 
 F_onewayResult(statistic=0.5478740874841249, pvalue=0.6501446364163558)


## ANOVA for CEBRA, CEBRA-joint, baseline 33 ms (1 frame window):

In [15]:
np_total_stats = scipy.stats.f_oneway(concat_neurons(decoding_results, 'frame_err', 'knn', 'cebra', '33', 'np'), 
                                      concat_neurons(decoding_results, 'frame_err', 'knn', 'cebra_joint', '33', 'np'), 
                                      concat_neurons(decoding_results, 'frame_err', 'knn', 'baseline', '33', 'np'),
                                     concat_neurons(decoding_results, 'frame_err', 'bayes', 'baseline', '33', 'np'))



print(f'NP total stats \n {np_total_stats}')

NP total stats 
 F_onewayResult(statistic=7.255789505462228, pvalue=0.00012189571069103389)


## ANOVA for CEBRA, CEBRA-joint, baseline for each neuron numbers

In [16]:
num_neurons = [10, 30, 50, 100, 200, 400, 600, 800, 900, 1000]
for i in num_neurons:
    print(f'For {i} neurons from np recording (330ms):')
    
    np_data, np_keys = get_data(decoding_results, 'frame_err', ['knn', 'knn', 'knn', 'bayes'], ['cebra', 'cebra_joint', 'baseline', 'baseline'], '330', 'np', i)
    
    stats=pairwise_tukeyhsd(np_data.flatten(), np_keys,)
    print(stats)

For 10 neurons from np recording (330ms):
                Multiple Comparison of Means - Tukey HSD, FWER=0.05                 
        group1                group2         meandiff p-adj   lower    upper  reject
------------------------------------------------------------------------------------
np_baseline_330_bayes    np_baseline_330_knn  30.3086 0.1054  -4.9411 65.5583  False
np_baseline_330_bayes       np_cebra_330_knn  25.0236 0.2179 -10.2261 60.2733  False
np_baseline_330_bayes np_cebra_joint_330_knn  46.3276 0.0083  11.0779 81.5773   True
  np_baseline_330_knn       np_cebra_330_knn   -5.285 0.9726 -40.5347 29.9647  False
  np_baseline_330_knn np_cebra_joint_330_knn   16.019 0.5759 -19.2307 51.2687  False
     np_cebra_330_knn np_cebra_joint_330_knn   21.304 0.3417 -13.9457 56.5537  False
------------------------------------------------------------------------------------
For 30 neurons from np recording (330ms):
                Multiple Comparison of Means - Tukey HSD, FWER=0.0

                Multiple Comparison of Means - Tukey HSD, FWER=0.05                 
        group1                group2         meandiff p-adj   lower    upper  reject
------------------------------------------------------------------------------------
np_baseline_330_bayes    np_baseline_330_knn   1.8628 0.5277  -1.9939  5.7195  False
np_baseline_330_bayes       np_cebra_330_knn  -6.3068 0.0013 -10.1635 -2.4501   True
np_baseline_330_bayes np_cebra_joint_330_knn  -6.4403 0.0011  -10.297 -2.5836   True
  np_baseline_330_knn       np_cebra_330_knn  -8.1696 0.0001 -12.0263 -4.3129   True
  np_baseline_330_knn np_cebra_joint_330_knn  -8.3031 0.0001 -12.1597 -4.4464   True
     np_cebra_330_knn np_cebra_joint_330_knn  -0.1334 0.9996  -3.9901  3.7232  False
------------------------------------------------------------------------------------


In [17]:
num_neurons = [10, 30, 50, 100, 200, 400, 600, 800, 900, 1000]
for i in num_neurons:
    print(f'For {i} neurons from np recording (33ms):')
    
    np_data, np_keys = get_data(decoding_results, 'frame_err', ['knn', 'knn', 'knn', 'bayes'], ['cebra', 'cebra_joint', 'baseline', 'baseline'], '33', 'np', i)
    
    stats=pairwise_tukeyhsd(np_data.flatten(), np_keys)
    print(stats)

For 10 neurons from np recording (33ms):
                Multiple Comparison of Means - Tukey HSD, FWER=0.05                
       group1                group2        meandiff p-adj   lower    upper   reject
-----------------------------------------------------------------------------------
np_baseline_33_bayes    np_baseline_33_knn  46.1798 0.0064  12.2479  80.1117   True
np_baseline_33_bayes       np_cebra_33_knn  -1.9331 0.9984  -35.865  31.9988  False
np_baseline_33_bayes np_cebra_joint_33_knn    -3.71  0.989 -37.6419  30.2219  False
  np_baseline_33_knn       np_cebra_33_knn -48.1129 0.0046 -82.0448  -14.181   True
  np_baseline_33_knn np_cebra_joint_33_knn -49.8898 0.0034 -83.8217 -15.9579   True
     np_cebra_33_knn np_cebra_joint_33_knn  -1.7769 0.9987 -35.7088   32.155  False
-----------------------------------------------------------------------------------
For 30 neurons from np recording (33ms):
               Multiple Comparison of Means - Tukey HSD, FWER=0.05            

                Multiple Comparison of Means - Tukey HSD, FWER=0.05                
       group1                group2        meandiff p-adj   lower    upper   reject
-----------------------------------------------------------------------------------
np_baseline_33_bayes    np_baseline_33_knn  22.5131 0.0006   9.8963  35.1299   True
np_baseline_33_bayes       np_cebra_33_knn -48.7858    0.0 -61.4026  -36.169   True
np_baseline_33_bayes np_cebra_joint_33_knn -57.2296    0.0 -69.8464 -44.6127   True
  np_baseline_33_knn       np_cebra_33_knn -71.2989    0.0 -83.9157 -58.6821   True
  np_baseline_33_knn np_cebra_joint_33_knn -79.7427    0.0 -92.3595 -67.1258   True
     np_cebra_33_knn np_cebra_joint_33_knn  -8.4438  0.261 -21.0606    4.173  False
-----------------------------------------------------------------------------------
