In [209]:
from os.path import join, exists
from os import makedirs
import pandas as pd
import numpy as np
from itertools import product
import matplotlib.pyplot as plt
from numpy.polynomial.polynomial import polyfit

from brainpipe.system import study
from utils import subjects, score_odor_su, x0,y0,z0, x1,y1,z1
from scipy.stats import kendalltau, spearmanr, pearsonr, ttest_ind, zscore
import statsmodels.api as sm
import seaborn as sns

In [210]:
"""
1) T-tests (low vs high) and Kendall R btw mean power and mem scores by trial
2) Check for gradient of pow modulations across ROIs (dist olf OFC and y in HC)
"""
st = study('Ripples')
PATH = join(st.path, 'feature/gamma_power_R_odors/')
filename = join(PATH, '{}_odors=all_elecs=all_freq={}_bsl_pow.npz')
df_name = join(PATH, 'df_pow_correl_f={}_rois=all.csv')

PATH_b = '/media/1_Analyses_Intra_EM_Odor/1bis_OE_BaseSam/JPlailly201306_seeg_ALS/behavior/'
df_b_name = join(PATH_b, 'episodic_all_su_all_trials_all_rep_old_odors.csv')

freqs = ['l_gamma','h_gamma']#['l_gamma','h_gamma','all_gamma','all_gamma2'] #['l_theta','h_theta']
t0, t1 = 0, 1

for freq in freqs:
    print(freq)
    subjs_sel = subjects #np.setdiff1d(subjects,np.array(['CHAF']))
    sub, chans, xyz, labs = np.array([]),np.array([]),np.array([]),np.array([])
    S_ks, S_ps, S_ss, T_s = np.array([]),np.array([]),np.array([]), np.array([])
    for su in subjs_sel:
        if exists(filename.format(su,freq)):
            mat = np.load(filename.format(su,freq),allow_pickle=True)
            chs, coords, labels = mat['channels'], mat['xyz'], mat['labels']
            sub = np.concatenate((sub,np.array([su]*len(chs)))) if np.size(sub) else np.array([su]*len(chs))
            chans = np.concatenate((chans,chs)) if np.size(chans) else chs
            xyz = np.concatenate((xyz,coords),axis=0) if np.size(xyz) else coords
            labs = np.concatenate((labs,labels)) if np.size(labs) else labels

            if PATH.split('_')[-2] == 'E':
                score_su = [score_odor_su[su][od] for od in mat['odor']]
            else:
                df_b = pd.read_csv(df_b_name)
                odors_order = np.array([int(x) for x in mat['odor']])
                df_b_su = df_b.loc[(df_b['subject_name']==su)]
                df_b_su = df_b_su[['odor_num','repetition_of_target','epi_chance_wght']]
                #order df according to mat file
                df_b_su['order'] = [np.where(odors_order==od)[0][0] for od in df_b_su['odor_num']]
                df_b_su = df_b_su.sort_values(by=['order','repetition_of_target'])
                #once sorted remove rejected trials at preproc from df
                df_sel = df_b_su[mat['trials_sel']]
                score_su = df_sel['epi_chance_wght']

            score_EM = mat['EM_3gr']
            xpow = mat['xpow'][0] #take first and only freq
            time = mat['time']-3
            t_sel = [t for t,ti in enumerate(time) if t0<=ti<t1]
            xpow_n = np.mean(xpow[:,t_sel,:],axis=1) #over the whole array

            for elec in range(xpow.shape[0]):
                pow_elec = xpow_n[elec,:]
                S_k = np.array(kendalltau(pow_elec,score_su))[np.newaxis]
                S_p = np.array(pearsonr(pow_elec,score_su))[np.newaxis]
                S_s = np.array(spearmanr(pow_elec,score_su))[np.newaxis]
                pow_l = pow_elec[np.where(score_EM=='low')[0]]
                pow_h = pow_elec[np.where(score_EM=='high')[0]]
                #T > 0 if high > low
                T_ = np.array(ttest_ind(pow_h,pow_l))[np.newaxis]

                S_ks = np.concatenate((S_ks,S_k),axis=0) if np.size(S_ks) else S_k
                S_ps = np.concatenate((S_ps,S_p),axis=0) if np.size(S_ps) else S_p
                S_ss = np.concatenate((S_ss,S_s),axis=0) if np.size(S_ss) else S_s
                T_s = np.concatenate((T_s,T_),axis=0) if np.size(T_s) else T_

    data = np.concatenate((sub[:,np.newaxis], chans[:,np.newaxis], labs[:,np.newaxis],
                           xyz, S_ks, S_ps, S_ss,T_s), axis=1)
    df = pd.DataFrame(data, columns=['subjects','channels','labels','x','y','z',
                                    'R_k','p_k','R_p','p_p','R_s','p_s','T_s','p_t'])
    dist_OFC = []
    for e in range(xyz.shape[0]):
        OFC_olf = np.array([x0,y0,z0] if xyz[e][0] > 0 else [x1,y1,z1])
        xyz_e = np.array([c for c in xyz[e]])
        dist_OFC.append(np.round(np.linalg.norm(xyz_e-OFC_olf),2))
    df['dist_OFC'] = np.array(dist_OFC)
    df['x_abs'] = np.abs(df['x'])
    df[['x','y','z','x_abs']] = df[['x','y','z','x_abs']].astype(float)
    df[['R_k','T_s']] = df[['R_k','T_s']].astype(float)
    df.to_csv(df_name.format(freq))
    
    #select df by roi and checks gradients
    df_olf = df.loc[(df['labels']=='olf')]
    df_HC = df.loc[(df['labels'].isin(['HC']))]
    df_OFC = df.loc[df['labels'].isin(['orbital','orbital2'])]
    print('nb of elecs in HC=',df_HC.shape[0],'sig T',df_HC.loc[df_HC['p_t']<0.05].shape[0])
    print('nb of elecs in HC=',df_HC.shape[0],'sig K',df_HC.loc[df_HC['p_k']<0.05].shape[0])
    print('nb of elecs in olf=',df_olf.shape[0],'sig T',df_olf.loc[df_olf['p_t']<0.05].shape[0])
    print('nb of elecs in olf=',df_olf.shape[0],'sig K',df_olf.loc[df_olf['p_k']<0.05].shape[0])
    print('nb of elecs in orbital=',df_OFC.shape[0],'sig T',df_OFC.loc[df_OFC['p_t']<0.05].shape[0])
    print('nb of elecs in orbital=',df_OFC.shape[0],'sig K',df_OFC.loc[df_OFC['p_k']<0.05].shape[0])
    
    #X_HC = sm.add_constant(df_HC[['x_abs','y','z']].values) ## X usually means our input variables (or independent variables))
    #y_HC = df_HC['R_k']
    #res_HC = sm.OLS(y_HC, X_HC).fit()
    
    Correl_HCy_k = pearsonr(df_HC['y'],df_HC['R_k'])
    Correl_HCy_t = pearsonr(df_HC['y'],df_HC['T_s'])
    Correl_olf_k = pearsonr(df_olf['y'],df_olf['R_k'])
    Correl_olf_t = pearsonr(df_olf['y'],df_olf['T_s'])
    Correl_OFC_k = pearsonr(df_OFC['dist_OFC'],df_OFC['R_k'])
    Correl_OFC_t = pearsonr(df_OFC['dist_OFC'],df_OFC['T_s'])
    
    #fig, axs = plt.subplots(2,figsize=(5,8))
    #c ='y'
    #axs[0].plot(df_HC[c], df_HC['T_s'],'bo')
    #b, m = polyfit(df_HC[c], df_HC['T_s'], 1)
    #axs[0].plot(df_HC[c], b + m * df_HC[c], '-',color='black')
    #axs[1].plot(df_OFC['dist_OFC'],df_OFC['T_s'],'ro')
    #b2, m2 = polyfit(df_OFC['dist_OFC'], df_OFC['T_s'], 1)
    #axs[1].plot(df_OFC['dist_OFC'], b2 + m2 * df_OFC['dist_OFC'], '-',color='black')
    #plt.show()
    #print('coords in HC', res_HC.summary())   
    #print('coords in olf', res_olf.summary())
    #print('coords in orbital', res_OFC.summary())
    print('Tstudent & y HC', Correl_HCy_t)    
    print('Rkendall & y HC', Correl_HCy_k)    
    print('Tstudent & y olf', Correl_olf_t)    
    print('Rkendall & y olf', Correl_olf_k)    
    print('Tstudent & dist in orbital', Correl_OFC_t)    
    print('Rkendall & dist in orbital', Correl_OFC_k)    
    #print('Linear Reg orb', res_OFC.summary())

-> Ripples loaded
l_gamma
nb of elecs in HC= 53 sig T 0
nb of elecs in HC= 53 sig K 3
nb of elecs in olf= 13 sig T 0
nb of elecs in olf= 13 sig K 0
nb of elecs in orbital= 89 sig T 1
nb of elecs in orbital= 89 sig K 3
Tstudent & y HC (-0.08647465944396007, 0.5381001913303745)
Rkendall & y HC (-0.10577494192724932, 0.45096362488149)
Tstudent & y olf (0.024493002223708947, 0.9366960475412949)
Rkendall & y olf (0.01882272314051468, 0.9513333765708112)
Tstudent & dist in orbital (-0.1491755314934649, 0.16294198467701856)
Rkendall & dist in orbital (-0.10761481657755835, 0.31547360554248277)
h_gamma
nb of elecs in HC= 53 sig T 1
nb of elecs in HC= 53 sig K 3
nb of elecs in olf= 13 sig T 0
nb of elecs in olf= 13 sig K 2
nb of elecs in orbital= 89 sig T 2
nb of elecs in orbital= 89 sig K 5
Tstudent & y HC (-0.21262826995066894, 0.12636754380056958)
Rkendall & y HC (-0.03374555312860618, 0.810422111270751)
Tstudent & y olf (-0.22496400960468338, 0.4599412086603944)
Rkendall & y olf (0.22605526

### check for diff in HC subfields

In [211]:
from add_freesurfer_labels import add_fsf_labels
import pingouin as pg

st = study('Ripples')
PATH = join(st.path, 'feature/theta_power_E_odors/')
csv_name = join(PATH, 'df_pow_correl_f={}_rois=all.csv')
FSF_file = join(st.path, 'feature_new/All_subjects_Freesurfer_labels.csv')
freqs = ['h_theta'] #'l_theta','h_theta']

for freq in freqs:
    df = pd.read_csv(csv_name.format(freq))
    df_fsf = add_fsf_labels(df)
    df_HC = df_fsf.loc[df_fsf['labels'].isin(['HC'])]
    df_HC = df_HC.loc[df_HC['p_k']<0.05]
    df_HC = df_HC.loc[df_HC['hip_CA'].isin(['CA1','subiculum','CA3', 'CA4'])] #'CA3', 'CA4'
    df_HC['hip_CA_3'] = [lab if lab in ['subiculum','CA1'] else 'CA3' for lab in df_HC['hip_CA']]
    df2 =  df_HC[['hip_CA','hip_CA_3','R_k']]

    aov = pg.welch_anova(dv='R_k', between='hip_CA_3', data=df2)
    print(aov)
    Ttests = pg.pairwise_gameshowell(data=df2, dv='R_k',between='hip_CA_3').round(3)
    print(Ttests)


-> Ripples loaded
CHAF missing contacts [] []
LEFC missing contacts [] []
FERJ missing contacts [] []
SEMC missing contacts [] []
VACJ missing contacts [] []
PIRJ missing contacts [] []
     Source  ddof1  ddof2    F  p-unc
0  hip_CA_3      1    inf  0.0    1.0


  pval = f.sf(fval, ddof1, 1 / lamb)
  'ddof2': 1 / lamb,


ValueError: cannot convert float NaN to integer