### concatenate RAW data as function of cond

In [2]:
import numpy as np
from brainpipe.system import study
from utils import odor_groups_wgth as dict_ #odor_groups_3wgth
from os.path import join, exists
from os import makedirs
from itertools import product

st = study('Olfacto')
phase = 'E'
PATH = join(st.path, 'database/')
PATH_OD = join(PATH, 'Encoding_By_Odor/')
od_name = join(PATH_OD, '{}_odor_{}_bipo_all_noWM_physFT.npz')
PATH_COND = join(PATH, 'Encoding_By_Cond_v=1_elecs=all/')
save_name = join(PATH_COND, '{}_odor_{}_{}.npz')

if not exists(PATH_COND):
    makedirs(PATH_COND)
    
for su in dict_:
    for cond in dict_[su]:
        print('>>> processing:', su, cond)
        data = np.array([])
        for od in dict_[su][cond]:
            mat = np.load(od_name.format(su,od),allow_pickle=True)
            data = np.concatenate((data, mat['x']),axis=-1) if np.size(data) else mat['x']
            print(su, od, cond, mat['channels'].shape,data.shape)
        dict_pow = {}
        for file in mat.files:
            dict_pow[file] = mat[file]
        dict_pow['x'] = data
        np.savez(save_name.format(su,cond,phase),**dict_pow)

-> Olfacto loaded
>>> processing: CHAF low
CHAF 1 low (61,) (61, 3584, 1)
CHAF 2 low (61,) (61, 3584, 4)
CHAF 4 low (61,) (61, 3584, 5)
CHAF 5 low (61,) (61, 3584, 6)
>>> processing: CHAF high
CHAF 3 high (61,) (61, 3584, 3)
CHAF 8 high (61,) (61, 3584, 6)
CHAF 7 high (61,) (61, 3584, 10)
CHAF 9 high (61,) (61, 3584, 15)
>>> processing: VACJ low
VACJ 11 low (39,) (39, 3584, 3)
VACJ 14 low (39,) (39, 3584, 6)
VACJ 12 low (39,) (39, 3584, 8)
VACJ 10 low (39,) (39, 3584, 11)
>>> processing: VACJ high
VACJ 15 high (39,) (39, 3584, 2)
VACJ 17 high (39,) (39, 3584, 4)
VACJ 16 high (39,) (39, 3584, 7)
VACJ 13 high (39,) (39, 3584, 11)
>>> processing: SEMC low
SEMC 7 low (53,) (53, 3584, 4)
SEMC 10 low (53,) (53, 3584, 11)
SEMC 11 low (53,) (53, 3584, 15)
SEMC 12 low (53,) (53, 3584, 21)
SEMC 13 low (53,) (53, 3584, 31)
>>> processing: SEMC high
SEMC 5 high (53,) (53, 3584, 3)
SEMC 8 high (53,) (53, 3584, 6)
SEMC 9 high (53,) (53, 3584, 9)
>>> processing: PIRJ low
PIRJ 1 low (24,) (24, 3584, 7

### compute TPSim for RAW data

In [4]:
from itertools import combinations
from scipy import stats
"""
Compute TPSim by combining all odors from each CONDITION
"""
exp = 'Enc'
###############################################################################
st = study('Olfacto')
path_data = join(st.path, 'database/Encoding_By_Cond_v=1_elecs=all/')
pow_file = join(path_data, '{}_odor_{}_'+exp[0]+'.npz')
pathsave = join(st.path,'feature/TPSim_3groups_{}/TPS_RAW_btw_v=1_elecs=all/')#_'+RT_type+'/')
savename = join(pathsave,'TPS_pears_{}_{}_btw.npz')

###############################################################################
if not exists(pathsave.format(exp)):
    makedirs(pathsave.format(exp))
#############################################RT_type##################################
subjects = ['PIRJ','CHAF','FERJ','VACJ','SEMC','LEFC']
conds = ['low','mid','high']
to_take = [1024,2560] #from -1s to +2s in frames

def tpsim_by_cond(su,cond):
    mat = np.load(pow_file.format(su,cond),allow_pickle=True)
    print(su,cond,mat['x'].shape)
    data = mat['x'][:,to_take[0]:to_take[1],:] #3584 points 
    nelecs,npts,ntrials = data.shape
    ncomb = len([t0 for t0,_ in combinations(np.arange(ntrials), 2)])
    tps_su = np.zeros((nelecs,ncomb))
    for elec in range(nelecs):
        x0 = data[elec,...]
        i = 0
        for t0, t1 in combinations(np.arange(ntrials), 2):
            R, _ = stats.pearsonr(x0[:,t0],x0[:,t1])
            D = 1 - R # <<<<<<< HERE TO CHANGE FOR DISTANCE COMPUTATIONS
            tps_su[elec,i] += D
            i += 1
    print(su,cond,'TPSim',tps_su.shape,'initial data',data.shape)
    dict_ = {'tps':tps_su, 'label':mat['Mai_RL'], 'channel':mat['channels'], 'xyz':mat['xyz']}
    np.savez(savename.format(exp,su,cond),**dict_)

for su,cond in product(subjects,conds):
    tpsim_by_cond(su,cond)
# Parallel(n_jobs=-1)(delayed(
#     tpsim_by_cond)(su,cond) for su,cond in product(subjects,conds))

-> Olfacto loaded
PIRJ low (24, 3584, 14)
PIRJ low TPSim (24, 91) initial data (24, 1536, 14)
PIRJ mid (24, 3584, 9)
PIRJ mid TPSim (24, 36) initial data (24, 1536, 9)
PIRJ high (24, 3584, 2)
PIRJ high TPSim (24, 1) initial data (24, 1536, 2)
CHAF low (61, 3584, 5)
CHAF low TPSim (61, 10) initial data (61, 1536, 5)
CHAF mid (61, 3584, 4)
CHAF mid TPSim (61, 6) initial data (61, 1536, 4)
CHAF high (61, 3584, 12)
CHAF high TPSim (61, 66) initial data (61, 1536, 12)
FERJ low (32, 3584, 16)
FERJ low TPSim (32, 120) initial data (32, 1536, 16)
FERJ mid (32, 3584, 6)
FERJ mid TPSim (32, 15) initial data (32, 1536, 6)
FERJ high (32, 3584, 9)
FERJ high TPSim (32, 36) initial data (32, 1536, 9)
VACJ low (39, 3584, 9)
VACJ low TPSim (39, 36) initial data (39, 1536, 9)
VACJ mid (39, 3584, 4)
VACJ mid TPSim (39, 6) initial data (39, 1536, 4)
VACJ high (39, 3584, 9)
VACJ high TPSim (39, 36) initial data (39, 1536, 9)
SEMC low (53, 3584, 21)
SEMC low TPSim (53, 210) initial data (53, 1536, 21)
SEMC 

### Compute Linear Regression analyses and summarize stats

In [5]:
from utils import rename_elecs
import statsmodels.api as sm
from mne.stats import fdr_correction, bonferroni_correction
import pandas as pd
"""
Correlate TPSim with behavioral variables // Memory group
BY ELECTRODE and Plot summary (for all included electrodes)
"""

exps = ['Enc']
meth, conds, stat = 'RAW_btw', ['low','mid','high'], 'LinReg'
subjects = ['CHAF','VACJ','PIRJ','SEMC','FERJ','LEFC']
rois_sel = ['aHC','MFG','ACC','IFG','Amg','pPirT','PHG','Ins_olf',
            'OFC_olf','SFG']
dict_perf = {'low':1, 'mid':2, 'high':3}
st = study('Olfacto')

for exp in exps:
    ###############################################################################
    path_tps = join(st.path, 'feature/TPSim_3groups_'+exp+'/')
    tps_form = join(path_tps, 'TPS_'+meth+'_v=1_elecs=all/TPS_pears_{}_{}_btw.npz')
    df_path = join(path_tps, 'LinReg_stats_RAW_v=1_elecs=all/')
    df_name = join(df_path, '{}_ols_'+meth+'_RAW_{}.csv') #su, conds0, conds1, freq
    ###############################################################################
    if not exists(df_path):
        makedirs(df_path)

    subjects_c, elecs_c, labels_c = np.array([]), np.array([]), np.array([])
    channels_c, x_c, y_c, z_c = np.array([]), np.array([]), np.array([]), np.array([])
    tps_scores, T_vals_c, p_vals_c = np.array([]), np.array([]), np.array([])
    p_fdr_c, p_bf_c = np.array([]), np.array([])

    for su in subjects:
        #load all elec info,rename and select electrodes id
        mat = np.load(tps_form.format(su,conds[0]),allow_pickle=True)
        labels, channels = mat['label'], mat['channel']
        x, y, z = mat['xyz'][:,0], mat['xyz'][:,1], mat['xyz'][:,2]
        labels_new = rename_elecs(labels,x,y,z)
        idx_sel = [i for i,lab in enumerate(labels_new) if lab in rois_sel]
        labels, channels = labels_new[idx_sel], channels[idx_sel]
        x, y, z = x[idx_sel], y[idx_sel], z[idx_sel]

        if stat == 'LinReg':
            tps_su = np.zeros((len(idx_sel),len(conds)))
            all_tps, all_scores = [], []
            for c,cond in enumerate(conds):
                mat = np.load(tps_form.format(su,cond))
                tps = mat['tps'][idx_sel]
                nelecs, ntrials = tps.shape
                score_ = np.array([dict_perf[cond]]*ntrials)
                tps_su[:,c] += np.mean(tps,axis=1)
                #fill tps and score vectors
                all_tps.append(tps)
                all_scores.append(score_)

            #compute stats Ttests-unpaired
            all_tps = np.concatenate(all_tps,axis=1)
            all_scores = np.concatenate(all_scores,axis=0)
            T, unc_p = [], []
            for elec in range(nelecs):
                #Tval,pval = stats.kendalltau(all_tps[elec],all_scores)
                Y, X = np.array(all_tps[elec]), sm.add_constant(np.array(all_scores))
                model_ols = sm.OLS(Y,X).fit()
                Tval, pval = np.round(model_ols.tvalues[1],3),model_ols.pvalues[1]
                T.append(Tval), unc_p.append(pval)
        if stat == 'Ttest':
            tps0 = np.load(tps_form.format(su,conds[0]))['tps'][idx_sel]
            nelecs = len(idx_sel)
            tps1 = np.load(tps_form.format(su,conds[1]))['tps'][idx_sel]
            mean0, mean1 = np.mean(tps0,axis=1)[:,np.newaxis], np.mean(tps1,axis=1)[:,np.newaxis]
            tps_su = np.concatenate((mean0,mean1),axis=1)
            #compute stats Ttests-unpaired
            tps0, tps1 = tps0.swapaxes(0,1), tps1.swapaxes(0,1) #ntrials x nelecs
            T, unc_p = ttest_ind(tps0, tps1, equal_var=False)
            print(T.shape, unc_p.shape)
        _, p_fdr = fdr_correction(unc_p)
        _, p_bf = bonferroni_correction(unc_p)

        #fill all df data
        subjects_c = np.hstack((subjects_c,np.array([su]*nelecs))) if np.size(subjects_c) else np.array([su]*nelecs)
        elecs_c = np.hstack((elecs_c,np.arange(nelecs))) if np.size(elecs_c) else np.arange(nelecs)
        labels_c = np.hstack((labels_c,labels)) if np.size(labels_c) else labels
        channels_c = np.hstack((channels_c,channels)) if np.size(channels_c) else channels
        tps_scores = np.concatenate((tps_scores,tps_su),axis=0) if np.size(tps_scores) else tps_su
        x_c = np.hstack((x_c,x)) if np.size(x_c) else x
        y_c = np.hstack((y_c,y)) if np.size(y_c) else y
        z_c = np.hstack((z_c,z)) if np.size(z_c) else z
        T_vals_c = np.hstack((T_vals_c,T)) if np.size(T_vals_c) else T
        p_vals_c = np.hstack((p_vals_c,unc_p)) if np.size(p_vals_c) else unc_p
        p_fdr_c = np.hstack((p_fdr_c,p_fdr)) if np.size(p_fdr_c) else p_fdr
        p_bf_c = np.hstack((p_bf_c,p_bf)) if np.size(p_bf_c) else p_bf

    data = np.concatenate((subjects_c[:,np.newaxis],labels_c[:,np.newaxis],
                channels_c[:,np.newaxis],x_c[:,np.newaxis],y_c[:,np.newaxis],
                z_c[:,np.newaxis],elecs_c[:,np.newaxis],tps_scores,T_vals_c[:,np.newaxis],
                p_vals_c[:,np.newaxis],p_fdr_c[:,np.newaxis],p_bf_c[:,np.newaxis]),
                axis=1)
    df = pd.DataFrame(data, columns=['subjects','labels','channels','x','y','z',
                            'elecs_num', 'tps_'+conds[0], 'tps_'+conds[1],'tps_'+conds[2],
                                    'Tvals', 'unc_p','fdr_p', 'bonf_p'])
    print(df.shape)
    df.to_csv(df_name.format('All_subjects',stat),index=False)   

-> Olfacto loaded
(236, 14)


In [6]:
from collections import Counter

st = study('Olfacto')
exp, stat = 'Enc', 'LinReg'#'Enc'
meth = 'btw'
thrs = [0.05]
corrections = ['fdr_p']

##################################################################################
path_pow = join(st.path, 'feature/TPSim_3groups_'+exp+'/LinReg_stats_RAW_v=1_elecs=all/')
df_name = join(path_pow, 'All_subjects_ols_RAW_btw_RAW_LinReg.csv') #su, conds0, conds1, freq
df_stat_save = join(path_pow, 'Bilan_{}_OLS_'+meth+'_{}_{}_{}_{}_{}_{}.csv')
df_stat_all = join(path_pow, 'Bilan_{}_OLS_'+meth+'_{}_{}_{}_{}.csv')
##################################################################################

df = pd.read_csv(df_name)
print('Initial df shape', df.shape, meth, exp, stat)

for th, corr in product(thrs,corrections):
    df_sel = df.loc[df[corr]<th]
    df_sel['sign'] = ['separation' if t > 0 else 'completion' for t in df_sel['Tvals']]
    print('\n',meth,'stats at p < ',th, 'correction : ',corr, df_sel.shape)
    print(df_sel)
    rois = np.unique(df_sel['labels'])
    for roi in rois:
        df_roi = df_sel.loc[df_sel['labels']==roi]
        df_inc = df_roi.loc[df_roi['sign']=='completion'].groupby(['subjects']).count()
        df_dec = df_roi.loc[df_roi['sign']=='separation'].groupby(['subjects']).count()

        if (df_inc.shape[0] >= 3):
            print(roi, 'NB of subjects with completion',df_inc.shape[0],' subjects')
            df_plot = df_roi.loc[df_roi['sign']=='completion']
            print('#electrodes in total >>> ',df_plot.shape[0])
            df_plot.to_csv(df_stat_save.format('All_subjects','RAW','mem_groups',
                                               'compl',roi,corr+str(th),stat))

        if (df_dec.shape[0] >= 3):
            print(roi, 'NB of subjects with separation',df_dec.shape[0],' subjects')
            df_plot = df_roi.loc[df_roi['sign']=='separation']
            print('#electrodes in total >>> ',df_plot.shape[0])
            df_plot.to_csv(df_stat_save.format('All_subjects','RAW','mem_groups',
                                               'sep',roi,corr+str(th),stat))
            #print(df_plot)

-> Olfacto loaded
Initial df shape (236, 14) btw Enc LinReg

 btw stats at p <  0.05 correction :  fdr_p (3, 15)
    subjects   labels channels      x      y      z  elecs_num   tps_low  \
211     LEFC      aHC    b3-b2  34.55 -20.30 -10.85          2  1.003046   
223     LEFC      MFG   k10-k9  40.35  46.55  12.60         14  1.013134   
230     LEFC  OFC_olf    o7-o6  28.95  38.65 -13.55         21  1.022978   

      tps_mid  tps_high  Tvals     unc_p    fdr_p    bonf_p        sign  
211  0.974159  0.954550 -3.075  0.002219  0.02478  0.059924  completion  
223  1.011890  0.917893 -4.376  0.000015  0.00040  0.000400  completion  
230  0.967591  0.951690 -3.009  0.002753  0.02478  0.074339  completion  
