In [1]:
from os import makedirs
from os.path import join, exists
from itertools import product
import numpy as np
import pandas as pd

from scipy import stats
import statsmodels.api as sm

from brainpipe.system import study
from mne.stats import fdr_correction, bonferroni_correction
from utils import rename_elecs, odors_su_score, odor_list_su
print(odor_list_su('Early_Late'))

{'CHAF': [7, 8, 9, 2, 3], 'LEFC': [1, 2, 3, 4, 14, 15, 16, 17], 'PIRJ': [4, 9, 1, 18, 6, 7], 'VACJ': [14, 15, 16, 17, 10, 11, 12, 13], 'SEMC': [10, 11, 12, 13, 5, 7, 8, 9], 'FERJ': [16, 17, 5, 7, 12, 13, 2, 1]}


In [2]:
"""
Correlate TPSim with behavioral variables (RT, Perf)
BY ELECTRODE and Plot summary (for all included electrodes)
"""

exp = 'Enc'
freqs = ['theta'] #'1_alpha', '2_beta','3_gamma', high_gamma
meth = 'btw'
scorings = ['Pleasantness','Familiarity']
rois_sel = ['aHC','MFG','ACC','IFG','Amg','pPirT','PHG','Ins_olf',
            'OFC_olf','SFG']
###############################################################################
st = study('Olfacto')
path_tps = join(st.path, 'feature/TPSim_3groups_Enc/')
PATH = '/media/karim/Datas4To/1_Analyses_Intra_EM_Odor/1bis_OE_BaseSam/JPlailly201306_seeg_ALS/behavior/'
#filename = join(path_tps, 'by_odor_{}_v=1_elecs=all/TPS_pears_{}_odor{}_{}_{}.npz')
filename = join(path_tps, 'by_odor_{}_thgh_time_v=1_elecs=all/TPS_pears_learn_{}_{}_{}_{}_2gr.npz')
df_path = join(path_tps, 'correlations_perf_ratings/')
df_name = join(df_path, '{}_ols_'+meth+'_{}_{}_late.csv') #su, conds0, conds1, freq
###############################################################################
if not exists(df_path):
    makedirs(df_path)
df_odor = pd.read_excel(join(PATH,'Recap_Odeurs_Evaluations.xlsx'),sheet_name='Final')
   
for scoring in scorings:
    dict_r = {}
    for i,od in enumerate(df_odor['od_num']):
        dict_r[int(od[1:])] = df_odor[scoring][i]
    #odors_su = odors_su_score(scoring,meth)
    for freq in freqs:
        subjects_c, elecs_c, labels_c = np.array([]), np.array([]), np.array([])
        channels_c, x_c, y_c, z_c = np.array([]), np.array([]), np.array([]), np.array([])
        T_vals_c, p_vals_c = np.array([]), np.array([])
        p_fdr_c, p_bf_c = np.array([]), np.array([])
        
        for su in odor_list_su('Early_Late'):
            all_tps, all_scores = [], []
            for od in odor_list_su('Early_Late')[su]:
                mat = np.load(filename.format(meth,su,meth,od,freq),allow_pickle=True)
                
                #load all elec info,rename and select electrodes id
                labels, channels = mat['label'], mat['channel']
                x, y, z = mat['xyz'][:,0], mat['xyz'][:,1], mat['xyz'][:,2]
                labels_new = rename_elecs(labels,x,y,z)
                idx_sel = [i for i,lab in enumerate(labels_new) if lab in rois_sel]
            
                #selected data
                labels, channels = labels_new[idx_sel], channels[idx_sel]
                x, y, z = x[idx_sel], y[idx_sel], z[idx_sel]
                tps = 1 - mat['tps_1'][idx_sel]
                nelecs, ntrials = tps.shape
                #score_ = np.array([odors_su[su][od]]*ntrials)
                score_ = np.array([dict_r[od]]*ntrials)
                
                #fill tps and score vectors
                all_tps.append(tps)
                all_scores.append(score_)
                
            #compute stats Ttests-unpaired
            all_tps = np.concatenate(all_tps,axis=1)
            all_scores = np.concatenate(all_scores,axis=0)
            T, unc_p = [], []
            for elec in range(nelecs):
                Tval,pval = stats.kendalltau(all_tps[elec],all_scores)
                #Y, X = np.array(all_tps[elec]), sm.add_constant(np.array(all_scores))
                #model_ols = sm.OLS(Y,X).fit()
                #Tval, pval = np.round(model_ols.tvalues[1],3),model_ols.pvalues[1]
                T.append(Tval), unc_p.append(pval)
            _, p_fdr = fdr_correction(unc_p)
            _, p_bf = bonferroni_correction(unc_p)
            
            #save data for stats
            dict_ = {}
            for fi in mat.files:
                dict_[fi] = mat[fi]
            dict_[scoring] = all_scores
            dict_['tps'] = all_tps
            np.savez(join(df_path,'TPS_{}_corr_{}_{}_{}_{}.npz'.format(scoring,su,
                                meth,freq,scoring)),**dict_)
            #fill all df data
            subjects_c = np.hstack((subjects_c,np.array([su]*nelecs))) if np.size(subjects_c) else np.array([su]*nelecs)
            elecs_c = np.hstack((elecs_c,np.arange(nelecs))) if np.size(elecs_c) else np.arange(nelecs)
            labels_c = np.hstack((labels_c,labels)) if np.size(labels_c) else labels
            channels_c = np.hstack((channels_c,channels)) if np.size(channels_c) else channels
            x_c = np.hstack((x_c,x)) if np.size(x_c) else x
            y_c = np.hstack((y_c,y)) if np.size(y_c) else y
            z_c = np.hstack((z_c,z)) if np.size(z_c) else z
            T_vals_c = np.hstack((T_vals_c,T)) if np.size(T_vals_c) else T
            p_vals_c = np.hstack((p_vals_c,unc_p)) if np.size(p_vals_c) else unc_p
            p_fdr_c = np.hstack((p_fdr_c,p_fdr)) if np.size(p_fdr_c) else p_fdr
            p_bf_c = np.hstack((p_bf_c,p_bf)) if np.size(p_bf_c) else p_bf
        
        #save df with statistical results
        data = np.concatenate((subjects_c[:,np.newaxis],labels_c[:,np.newaxis],
                    channels_c[:,np.newaxis],x_c[:,np.newaxis],y_c[:,np.newaxis],
                    z_c[:,np.newaxis],elecs_c[:,np.newaxis],T_vals_c[:,np.newaxis],
                    p_vals_c[:,np.newaxis],p_fdr_c[:,np.newaxis],p_bf_c[:,np.newaxis]),
                    axis=1)
        df = pd.DataFrame(data, columns=['subjects','labels','channels','x','y','z',
                                'elecs_num', 'Tvals', 'unc_p','fdr_p', 'bonf_p'])
        print(df.shape)
        df.to_csv(df_name.format('All_subjects',freq,scoring),index=False)   

-> Olfacto loaded
(230, 11)
(230, 11)


In [8]:
from collections import Counter

st = study('Olfacto')
exp = 'Enc'#'Enc'
meth = 'btw'
scorings = ['Familiarity','Pleasantness']#['Epi','Rec']
olf_regions = ['Amg','pPirT','OFC_olf','Ins_olf']
freq = 'theta'

path_pow = join(st.path, 'feature/TPSim_3groups_Enc/correlations_perf_ratings/')
df_name = join(path_pow, '{}_ols_'+meth+'_{}_{}_late.csv') #su, conds0, conds1, freq
df_stat_save = join(path_pow, 'Bilan_{}_OLS_'+meth+'_{}_{}_{}_{}_late.csv')

for scoring in scorings:
    df = pd.read_csv(df_name.format('All_subjects','theta',scoring))
    print('Initial df shape', df.shape)

    thrs = [0.05]
    corrections = ['fdr_p']

    for th, corr in product(thrs,corrections):
        df_sel = df.loc[df[corr]<th]
        df_sel['sign'] = ['separation' if t < 0 else 'completion' for t in df_sel['Tvals']]
        print('\n',scoring,meth,'stats at p < ',th, 'correction : ',corr, df_sel.shape)
        print(Counter(df['labels'].loc[df[corr]<th]))
        
        rois = np.unique(df_sel['labels'])
        for roi in rois:
            df_roi = df_sel.loc[df_sel['labels']==roi]
            print(roi,df_roi)
            df_inc = df_roi.loc[df_roi['sign']=='completion'].groupby(['subjects']).count()
            df_dec = df_roi.loc[df_roi['sign']=='separation'].groupby(['subjects']).count()

            if (df_inc.shape[0] >= 3) or (df_inc.shape[0] >=2 and roi in olf_regions):
                print(roi, 'NB of subjects with completion',df_inc.shape[0],' subjects')
                df_plot = df_roi.loc[df_roi['sign']=='completion']
                print('#electrodes in total >>> ',df_plot.shape[0])
                df_plot.to_csv(df_stat_save.format('All_subjects','theta',roi,
                                                   scoring,corr+str(th)))
                
            if (df_dec.shape[0] >= 3) or (df_dec.shape[0] >=2 and roi in olf_regions):
                print(roi, 'NB of subjects with separation',df_dec.shape[0],' subjects')
                df_plot = df_roi.loc[df_roi['sign']=='separation']
                print('#electrodes in total >>> ',df_plot.shape[0])
                df_plot.to_csv(df_stat_save.format('All_subjects','theta',roi,
                                                   scoring,corr+str(th)))
                #print(df_plot)


-> Olfacto loaded
Initial df shape (230, 11)

 Familiarity btw stats at p <  0.05 correction :  fdr_p (28, 12)
Counter({'aHC': 8, 'OFC_olf': 5, 'SFG': 4, 'MFG': 4, 'IFG': 3, 'Ins_olf': 2, 'PHG': 1, 'pPirT': 1})
IFG     subjects labels channels     x      y      z  elecs_num     Tvals  \
101     PIRJ    IFG  o12-o11  48.1  39.20 -13.15         13 -0.129354   
224     FERJ    IFG   o10-o9  38.3  43.20 -14.55         26  0.133817   
225     FERJ    IFG  o11-o10  42.3  43.45 -14.50         27  0.110379   

        unc_p     fdr_p    bonf_p        sign  
101  0.002081  0.012041  0.037464  separation  
224  0.001967  0.015732  0.062930  completion  
225  0.010677  0.048808  0.341657  completion  
Ins_olf     subjects   labels channels     x      y     z  elecs_num     Tvals  \
103     PIRJ  Ins_olf    t3-t2  43.6 -17.65 -1.25         15  0.126176   
217     FERJ  Ins_olf    h5-h4  49.6 -12.00  4.65         19 -0.137800   

        unc_p     fdr_p    bonf_p        sign  
103  0.002676  0.0120

In [6]:
"""Compute correlations only for OFC and aHC"""
from collections import Counter

st = study('Olfacto')
exp = 'Enc'#'Enc'
meth = 'btw'
scorings = ['Familiarity','Pleasantness']#['Epi','Rec']
rois_sel = ['aHC','OFC_olf']
freq = 'theta'

path_pow = join(st.path, 'feature/TPSim_3groups_Enc/correlations_perf_ratings/')
df_name = join(path_pow, '{}_ols_'+meth+'_{}_{}_late.csv') #su, conds0, conds1, freq
df_stat_save = join(path_pow, 'Bilan_{}_OLS_'+meth+'_{}_{}_{}_{}_late.csv')

for scoring in scorings:
    df = pd.read_csv(df_name.format('All_subjects','theta',scoring))
    print('Initial df shape', df.shape)

    thrs = [0.05]
    corrections = ['fdr_p']

    for th, corr in product(thrs,corrections):
        df['sign'] = ['separation' if t < 0 else 'completion' for t in df['Tvals']]
        print('\n',scoring,meth,'stats at p < ',th, 'correction : ',corr, df_sel.shape)
        print(Counter(df['labels'].loc[df[corr]<th]))

        for roi in rois_sel:
            df_roi = df.loc[df['labels']==roi]
            df_roi['p_corr'] = fdr_correction(df_roi['unc_p'].values)[1]
            df_sel = df_roi.loc[df_roi['p_corr']<th]
            df_inc = df_sel.loc[df_sel['sign']=='completion'].groupby(['subjects']).count()
            df_dec = df_sel.loc[df_sel['sign']=='separation'].groupby(['subjects']).count()

            if (df_inc.shape[0] >= 3) or (df_inc.shape[0] >=2 and roi in olf_regions):
                print(roi, 'NB of subjects with completion',df_inc.shape[0],' subjects')
                df_plot = df_sel.loc[df_sel['sign']=='completion']
                print('#electrodes in total >>> ',df_plot.shape[0])
                df_plot.to_csv(df_stat_save.format('All_subjects','theta_by_roi',roi,
                                                   scoring,corr+str(th)))
                
            if (df_dec.shape[0] >= 3) or (df_dec.shape[0] >=2 and roi in olf_regions):
                print(roi, 'NB of subjects with separation',df_dec.shape[0],' subjects')
                df_plot = df_sel.loc[df_sel['sign']=='separation']
                print('#electrodes in total >>> ',df_plot.shape[0])
                df_plot.to_csv(df_stat_save.format('All_subjects','theta_by_roi',roi,
                                                   scoring,corr+str(th)))
                #print(df_plot)


-> Olfacto loaded
Initial df shape (230, 11)

 Familiarity btw stats at p <  0.05 correction :  fdr_p (3, 13)
Counter({'aHC': 8, 'OFC_olf': 5, 'SFG': 4, 'MFG': 4, 'IFG': 3, 'Ins_olf': 2, 'PHG': 1, 'pPirT': 1})
aHC     subjects labels channels      x      y      z  elecs_num     Tvals  \
62      LEFC    aHC    b2-b1  30.65 -20.50 -10.75          1  0.104676   
67      LEFC    aHC    d4-d3  32.95 -13.05 -26.45          6  0.090015   
68      LEFC    aHC    d5-d4  36.80 -12.75 -26.35          7  0.105964   
69      LEFC    aHC    d6-d5  40.75 -12.45 -26.30          8  0.079508   
88      PIRJ    aHC    b4-b3  35.10 -16.20 -15.30          0 -0.142714   
108     VACJ    aHC    b4-b3  34.85 -19.25 -15.10          2  0.179165   
200     FERJ    aHC    b2-b1  22.45 -13.80 -20.30          2 -0.151540   
205     FERJ    aHC  b'2-b'1 -22.70 -17.25 -13.05          7 -0.115632   
207     FERJ    aHC  b'4-b'3 -30.45 -17.20 -13.40          9 -0.126947   

        unc_p     fdr_p    bonf_p        sign

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
exp = 'Enc'
freqs = ['theta'] #'1_alpha', '2_beta','3_gamma', high_gamma
meth = 'btw'
scorings = ['Familiarity','Pleasantness']#['Epi','Rec']
###############################################################################
st = study('Olfacto')
path_tps = join(st.path, 'feature/TPSim_3groups_Enc/')
df_path = join(path_tps, 'correlations_perf_ratings/')
tps_name = join(df_path, 'TPS_{}_corr_{}_{}_{}_{}.npz')
name_search = 'Bilan_All_subjects_OLS_'+meth+'_{}_by_roi' #su, conds0, conds1, freq
plt_save = join(df_path, '{}_{}_{}_{}_p=.05.pdf')
###############################################################################

for freq,score in product(freqs,scorings):
    files = st.search(name_search.format(freq),folder=df_path)
    files_sel = [fi for fi in files if fi.endswith(score+'_fdr_p0.05.csv')]
    for fi in files_sel:
        roi = fi.split('_')[-4]
        roi = roi if roi != 'olf' else fi.split('_')[-5]
        print('>>> processing',fi,score,roi)
        df = pd.read_csv(join(df_path,fi))
        subjects = np.unique(df['subjects'])
        score_all, tps_all, subj_list = [], [], []
        
        for su in subjects:
            mat = np.load(tps_name.format(score,su,meth,freq,score),allow_pickle=True)
            chans = df['channels'].loc[df['subjects']==su].values
            idx = [i for i,chan in enumerate(mat['channel']) if chan in chans]
            score_ = (mat[score]-np.mean(mat[score]))/np.std(mat[score]) #RT 'all_score
            #score_ = mat['all_score']
            tps_data = np.mean(mat['tps'][idx],axis=0)
            subj_l = [su]*score_.shape[0]
            print(su,'stats',stats.kendalltau(score_,tps_data))
            score_all.extend(score_), tps_all.extend(tps_data), subj_list.extend(subj_l)

        score_all = np.array(score_all)[:,np.newaxis]
        tps_all = np.array(tps_all)[:,np.newaxis]
        subj_list = np.array(subj_list)[:,np.newaxis]
        df_plot = pd.DataFrame(data=np.concatenate((score_all,tps_all,subj_list),axis=1),
                                   columns=['RT_'+score,'tps','subjects'])
        df_plot['tps'] = df_plot['tps'].astype('float')
        df_plot['_'+score] = df_plot['RT_'+score].astype('float')
        sns.lmplot(x="_"+score, y="tps", data=df_plot,hue="subjects",
                       x_estimator=np.mean,aspect=0.7,fit_reg=True,ci=90)
        plt.savefig(plt_save.format(meth,freq,score,roi))        
        plt.savefig(plt_save.format(meth,freq,score,roi).replace('pdf','png'))

In [None]:
from utils import odor_groups_3wgth, odors_su_score
dict_perf = {'low':1, 'mid':2, 'high':3}

for su in odor_groups_3wgth:
    print('>>> processing ', su)
    RT_su, score_su = [], []
    for cond in odor_groups_3wgth[su]:
        for od in odor_groups_3wgth[su][cond]:
            if (su == 'LEFC') and (od=='2'):
                pass
            else:
                RT_su.append(odors_su_score('Epi','btw')[su][int(od)])
                score_su.append(dict_perf[cond])
    RT_su = (RT_su - np.mean(RT_su))/np.std(RT_su)
    sns.regplot(score_su,RT_su)
    plt.show()

