In [4]:
from os import path, makedirs
from itertools import product
import numpy as np
import pandas as pd
import glob

from brainpipe.system import study
import statsmodels.api as sm
from statsmodels.formula.api import ols, gls, wls
from mne.stats import fdr_correction, bonferroni_correction
from utils import rename_elecs

In [6]:
"""
Linear Regression using statsmodel by electrode (ols, wls, gls)
>>> Learning effect in the data during encoding
"""

st = study('Olfacto')
exp = 'Enc'#'Enc'
perf, meth = 'wth','WTH'
conds, subjects = ['high','low'],['VACJ','SEMC','LEFC','PIRJ','FERJ','CHAF']
freqs = ['theta']
path_pow = path.join(st.path, 'feature/TPSim_'+exp+'_By_Odor_By_Cond/TPS_by_odor/by_odor_{}/')
filename = path.join(path_pow, 'TPS_pears_{}_{}_{}_{}.npz')
df_name = path.join(path_pow, '{}_LReg_Learning_'+meth+'_{}_{}_{}.csv') #su, conds0, conds1, freq

rois_sel = ['aHC','MFG','ACC','IFG','Amg','pPirT','PHG','Ins_olf','OFC_olf','SFG']

for freq in freqs:
    subjects_c, elecs_c, labels_c = np.array([]), np.array([]), np.array([])
    channels_c, x_c, y_c, z_c = np.array([]), np.array([]), np.array([]), np.array([])
    tps0_c, tps1_c = np.array([]), np.array([])
    T_vals_c, p_vals_c, p_max_c = np.array([]), np.array([]), np.array([])
    p_fdr_c, p_bf_c = np.array([]), np.array([])
    
    for su in subjects:
        files = glob.glob(path_pow.format(perf)+'TPS_pears_'+su+'_odor*_theta.npz')
        for f in files:
            mat0 = np.load(f,allow_pickle=True)
            tps0 = mat0['tps']
            labels, channels = mat0['label'], mat0['channel']
            x, y, z = mat0['xyz'][:,0], mat0['xyz'][:,1], mat0['xyz'][:,2]
            mat1 = np.load(filename.format(su,conds[0],perf[1],freq))['tps']
            mat1b = np.load(filename.format(su,conds[1],perf[1],freq))['tps']
            tps1 = np.concatenate((mat1,mat1b),axis=-1)
            print(np.mean(tps0),np.mean(tps1))
            print (su,mat0.files, 'TPS shape: ', tps0.shape, tps1.shape)
            
            #rename electrodes labels and select only specific electrodes
            labels_new = rename_elecs(labels,x,y,z)
            idx_sel = [i for i,lab in enumerate(labels_new) if lab in rois_sel]
            tps0, labels, channels = tps0[idx_sel,:], labels_new[idx_sel], channels[idx_sel]
            x, y, z, tps1 = x[idx_sel], y[idx_sel], z[idx_sel], tps1[idx_sel,:]
            nelecs = len(idx_sel)
            
            #compute stats Ttests-unpaired
            tps0, tps1 = tps0.swapaxes(0,1), tps1.swapaxes(0,1) #ntrials x nelecs
            Tvals, unc_p = ttest_ind(tps0, tps1, equal_var=False)
            Tvals2, pvals = ttest_perm(tps0, tps1, n_perm=nperm, correction='maxstat',
                                  two_tailed=False, paired=False, equal_var=False, n_jobs=-1)
            _, p_fdr = fdr_correction(unc_p)
            _, p_bf = bonferroni_correction(unc_p)
            #print(Tvals,unc_p, p_fdr)
            #print(Tvals2,pvals)

            #Fill the csv file with elec infos and stats
            subjects_c = np.hstack((subjects_c,np.array([su]*nelecs))) if np.size(subjects_c) else np.array([su]*nelecs)
            elecs_c = np.hstack((elecs_c,np.arange(nelecs))) if np.size(elecs_c) else np.arange(nelecs)
            labels_c = np.hstack((labels_c,labels)) if np.size(labels_c) else labels
            channels_c = np.hstack((channels_c,channels)) if np.size(channels_c) else channels
            x_c = np.hstack((x_c,x)) if np.size(x_c) else x
            y_c = np.hstack((y_c,y)) if np.size(y_c) else y
            z_c = np.hstack((z_c,z)) if np.size(z_c) else z
            tps0_c = np.hstack((tps0_c,np.mean(tps0, axis=0))) if np.size(tps0_c) else np.mean(tps0, axis=0)
            tps1_c = np.hstack((tps1_c,np.mean(tps1, axis=0))) if np.size(tps1_c) else np.mean(tps1, axis=0)
            T_vals_c = np.hstack((T_vals_c,Tvals)) if np.size(T_vals_c) else Tvals
            p_vals_c = np.hstack((p_vals_c,unc_p)) if np.size(p_vals_c) else unc_p
            p_max_c = np.hstack((p_max_c,pvals)) if np.size(p_max_c) else pvals
            p_fdr_c = np.hstack((p_fdr_c,p_fdr)) if np.size(p_fdr_c) else p_fdr
            p_bf_c = np.hstack((p_bf_c,p_bf)) if np.size(p_bf_c) else p_bf
        
    data = np.concatenate((subjects_c[:,np.newaxis],labels_c[:,np.newaxis],channels_c[:,np.newaxis],x_c[:,np.newaxis],y_c[:,np.newaxis],z_c[:,np.newaxis],elecs_c[:,np.newaxis],
                          tps0_c[:,np.newaxis], tps1_c[:,np.newaxis], T_vals_c[:,np.newaxis],
                           p_vals_c[:,np.newaxis], p_max_c[:,np.newaxis], p_fdr_c[:,np.newaxis],
                          p_bf_c[:,np.newaxis]), axis=1)
    df = pd.DataFrame(data, columns=['subjects','labels','channels','x','y','z',
                            'elecs_num', 'tps_'+conds[0], 'tps_'+conds[1], 'Tvals', 'unc_p', 'max_p', 
                            'fdr_p', 'bonf_p'])
    print(df.shape)
    df.to_csv(df_name.format('All_subjects',conds[0],conds[1],freq),index=False)           


-> Olfacto loaded
['/media/karim/Datas4To/1_Analyses_Intra_EM_Odor/Olfacto/feature/TPSim_Enc_By_Odor_By_Cond/TPS_by_odor/by_odor_wth/TPS_pears_VACJ_odor_10_theta.npz', '/media/karim/Datas4To/1_Analyses_Intra_EM_Odor/Olfacto/feature/TPSim_Enc_By_Odor_By_Cond/TPS_by_odor/by_odor_wth/TPS_pears_VACJ_odor_11_theta.npz', '/media/karim/Datas4To/1_Analyses_Intra_EM_Odor/Olfacto/feature/TPSim_Enc_By_Odor_By_Cond/TPS_by_odor/by_odor_wth/TPS_pears_VACJ_odor_12_theta.npz', '/media/karim/Datas4To/1_Analyses_Intra_EM_Odor/Olfacto/feature/TPSim_Enc_By_Odor_By_Cond/TPS_by_odor/by_odor_wth/TPS_pears_VACJ_odor_13_theta.npz', '/media/karim/Datas4To/1_Analyses_Intra_EM_Odor/Olfacto/feature/TPSim_Enc_By_Odor_By_Cond/TPS_by_odor/by_odor_wth/TPS_pears_VACJ_odor_14_theta.npz', '/media/karim/Datas4To/1_Analyses_Intra_EM_Odor/Olfacto/feature/TPSim_Enc_By_Odor_By_Cond/TPS_by_odor/by_odor_wth/TPS_pears_VACJ_odor_15_theta.npz', '/media/karim/Datas4To/1_Analyses_Intra_EM_Odor/Olfacto/feature/TPSim_Enc_By_Odor_By_Co

ZeroDivisionError: division by zero

In [197]:
from collections import Counter

fold = 'Ret'
perf, meth = 'all', 'WTH_BTW'
conds = ['high','low']
olf_regions = ['Amg','pPirT','OFC_olf','Ins_olf']

save_path = path.join(st.path, 'feature/TPSim_'+fold+'_By_Odor_By_Cond/TPS_by_odor/')
#save_path = path.join(st.path, 'feature/TPSim_'+fold+'_By_Odor_By_Cond/TPS_by_odor/')
#save_path = path.join(st.path, 'classified/TPSim_classif_'+fold+'_by_cond_dissim/')
df_name = path.join(save_path, '{}_Ttests_'+meth+'_{}_{}_{}.csv') #su, conds0, conds1, freq
#df_name = path.join(save_path, '{}_Ttests_{}_{}_{}.csv') #su, conds0, conds1, freq

df = pd.read_csv(df_name.format('All_subjects',conds[0], conds[1],'theta'))
print('Initial df shape', df.shape)

thrs = [0.05, 0.01, 0.001]
corrections = ['bonf_p']

for th, corr in product(thrs,corrections):
    df_sel = df.loc[df[corr]<th]
    df_sel['sign'] = ['separation' if t > 0 else 'completion' for t in df_sel['Tvals']]
    print('\n stats at p < ',th, 'correction : ',corr, df_sel.shape)
    print(Counter(df['labels'].loc[df[corr]>th]))
    
    rois = np.unique(df_sel['labels'])
    for roi in rois:
        df_roi = df_sel.loc[df_sel['labels']==roi]
        df_inc = df_roi.loc[df_roi['sign']=='completion'].groupby(['subjects']).count()
        df_dec = df_roi.loc[df_roi['sign']=='separation'].groupby(['subjects']).count()
            
        if (df_inc.shape[0] >= 3) or (df_inc.shape[0] >=2 and roi in olf_regions):
            print(roi, 'NB of subjects with completion',df_inc.shape[0],' subjects')
            df_plot = df_roi[['subjects','labels','channels','tps_'+conds[0],
                              'tps_'+conds[1]]].loc[df_roi['sign']=='completion']
            print('#electrodes in total >>> ',df_plot.shape[0])
            
        if (df_dec.shape[0] >= 3) or (df_dec.shape[0] >=2 and roi in olf_regions):
            print(roi, 'NB of subjects with separation',df_dec.shape[0],' subjects')
            df_plot = df_roi[['subjects','labels','channels','tps_'+conds[0],
                              'tps_'+conds[1]]].loc[df_roi['sign']=='separation']
            print('#electrodes in total >>> ',df_plot.shape[0])
            #print(df_plot)


Initial df shape (232, 14)

 stats at p <  0.05 correction :  bonf_p (219, 15)
Counter({'OFC_olf': 3, 'SFG': 3, 'ACC': 2, 'Ins_olf': 2, 'IFG': 1, 'pPirT': 1, 'MFG': 1})
ACC NB of subjects with separation 4  subjects
#electrodes in total >>>  11
Amg NB of subjects with separation 2  subjects
#electrodes in total >>>  5
IFG NB of subjects with separation 5  subjects
#electrodes in total >>>  31
Ins_olf NB of subjects with separation 2  subjects
#electrodes in total >>>  7
MFG NB of subjects with separation 4  subjects
#electrodes in total >>>  36
OFC_olf NB of subjects with separation 4  subjects
#electrodes in total >>>  16
PHG NB of subjects with separation 3  subjects
#electrodes in total >>>  18
SFG NB of subjects with separation 4  subjects
#electrodes in total >>>  55
aHC NB of subjects with separation 5  subjects
#electrodes in total >>>  35
pPirT NB of subjects with separation 3  subjects
#electrodes in total >>>  5

 stats at p <  0.01 correction :  bonf_p (206, 15)
Counter({'OF