In [1]:
from os.path import join, exists
from os import makedirs
import pandas as pd
import numpy as np
from itertools import product, combinations
import matplotlib.pyplot as plt

from brainpipe.system import study
from scipy.stats import kendalltau, spearmanr, pearsonr, ttest_ind, zscore
import statsmodels.api as sm
from utils import subjects, odor_groups_3wgth, su_list_od
from mne.stats import fdr_correction

  import pandas.util.testing as tm


### Characterize modulations of theta power at ENCODING
Impact of time, space, context, memory and odor properties

In [None]:
st = study('Ripples')
PATH = join(st.path, 'feature/{}_power_E_odors/')
filename = join(PATH, '{}_odors=all_elecs={}_freq={}_pow.npz')

freqs = ['theta','gamma']
freqs_to_elecs = { 'theta': ['psd','h_theta'],
                   'gamma': ['all','h_gamma']}

for freq in freqs:
    y_HC = []
    for su in subjects:
        pow_mat = np.load(filename.format(freq, su, freqs_to_elecs[freq][0],
                                         freqs_to_elecs[freq][1]), allow_pickle=True)
        idx = np.where(pow_mat['labels']=='HC')[0]
        y = pow_mat['xyz'][:,1][idx]
        if len(y) > 1:
            y_HC.extend(y)

In [27]:
%load_ext autoreload
%autoreload 2

In [31]:
from compute_odor_distances_all_pairs import compute_dist_trials_su_E, reorder_df
st = study('Ripples')
PATH = join(st.path, 'feature/theta_power_E_odors/')
filename = join(PATH, '{}_odors=all_elecs=psd_freq={}_pow.npz')
PATH_SAVE = join(PATH, 'distance_pow_features/')
savename = join(PATH_SAVE, '{}_odors=all_elecs=all_freq={}_dist=TPS.npz')
dfname = join(PATH_SAVE, 'df_dist=TPS_dims=all_odors=all_su={}_freq={}_norm={}.csv')
if not exists(PATH_SAVE):
    makedirs(PATH_SAVE)

PATH_b = '/media/1_Analyses_Intra_EM_Odor/1bis_OE_BaseSam/JPlailly201306_seeg_ALS/behavior/'
df_perf = join(PATH_b,'encoding_individual_results.xls')
df_od_dist = join(PATH_b,'distance_odors_all_pairs_meth=sum.csv')
cols_s = ['trial_time','odor_num','encoding_day','0_insp_V','0_exp_V']

freqs, norm = ['l_theta','h_theta'], True #['l_theta','h_theta']
t0, t1 = 0, 2

for freq in freqs:
    subjs = subjects if freq != 'l_theta' else np.setdiff1d(subjects,['CHAF'])
    for su in subjs:
        print('processing',freq,su)
        pow_mat = np.load(filename.format(su,freq), allow_pickle=True)
        labels, y_coord = pow_mat['labels'], pow_mat['xyz'][:,1]
        new_labs = []
        for i,lab in enumerate(labels):
            if (lab == 'HC') & (y_coord[i] > -20.5):
                new_labs.append('aHC')
            if (lab == 'HC') & (y_coord[i] <= -20.5):
                new_labs.append('pHC')
            if lab != 'HC':
                new_labs.append(lab)
        labels = np.array([lab if lab != 'orbital2' else 'orbital' for lab in labels])
        time = pow_mat['time']-3
        t_sel = [t for t,ti in enumerate(time) if t0<=ti<t1]
        pow_data = pow_mat['xpow'][0,:,t_sel,:].swapaxes(0,1)
        #pow_data = np.mean(pow_mat['xpow'][0,:,t_sel,:], axis=0)
        #pow_data = (pow_data - np.min(pow_data))/(np.max(pow_data)-np.min(pow_data))
        nelecs, _, ntrials = pow_data.shape
        trials_taken = pow_mat['trials_sel'] #select trials
        od_order = np.array([int(x) for x in pow_mat['odor']])

        df_b = pd.read_excel(df_perf, sheet_name=su)[trials_taken]
        df_order = reorder_df(df_b, od_order)
        df_all_dist = compute_dist_trials_su_E(df_order, df_od_dist,
                                                        su, cols_s, norm=norm)
        df_all_dist.to_csv(dfname.format(su,freq,norm))
        print(df_all_dist.shape)
        0/0

        #compute power distance
        pow_dist = np.zeros((nelecs, df_all_dist.shape[0]))
        for e in range(nelecs):
            i = 0
            for tr0, tr1 in combinations(range(ntrials),2):
                if df_order['odor_num'].iloc[tr0] != df_order['odor_num'].iloc[tr1]:
                    pow_dist[e,i] += 1 - pearsonr(pow_data[e,:,tr0],pow_data[e,:,tr1])[0]
                    #pow_dist[e,i] += np.round(np.linalg.norm(pow_data[e,tr0]-pow_data[e,tr1]),2)
                    i += 1
        #rescale btw 0 and 1 as other features
        pow_dist_z = (pow_dist - np.min(pow_dist))/(np.max(pow_dist)-np.min(pow_dist))
        dico_dist = {}
        for fi in np.setdiff1d(pow_mat.files,'xpow'):
            dico_dist[fi] = pow_mat[fi]
        dico_dist['dist_pow'] = pow_dist
        dico_dist['dist_pow_z'] = pow_dist_z
        dico_dist['new_lab'] = np.array(new_labs)
        np.savez(savename.format(su,freq), **dico_dist)

-> Ripples loaded
processing l_theta FERJ
(1060, 11)


ZeroDivisionError: division by zero

In [24]:
"""
Compute similarities of spaces 
"""

st = study('Ripples')
PATH = join(st.path, 'feature/theta_power_E_odors/distance_pow_features/')
PATH_SAVE = join(PATH, 'similarity_results/')
if not exists(PATH_SAVE):
    makedirs(PATH_SAVE)
dataname = join(PATH, '{}_odors=all_elecs=all_freq={}_dist=TPS.npz')
csvname = join(PATH, 'df_dist=TPS_dims=all_odors=all_su={}_freq={}_norm={}.csv')
csv_save = join(PATH_SAVE, 'df_dist=TPS_su={}_freq={}_roi=all_feats=all_R={}.csv')

freqs, norm = ['l_theta','h_theta'] , True #['l_theta','h_theta'] 'l_gamma','h_gamma'
Rs = ['kend','pears','spear']
feats = ['od_dist', 'temp_dist', 'resp_dist','spa_dist', 'rich_dist']

for freq, R_meth in product(freqs,Rs):
    subjs = subjects if freq != 'l_theta' else np.setdiff1d(subjects,['CHAF'])
    for su in subjs:
        print('processing',freq,su)
        d_mat = np.load(dataname.format(su,freq), allow_pickle=True)
        df_d = pd.read_csv(csvname.format(su,freq,norm))
        dist_x = d_mat['dist_pow_z']
        nelecs, ntrials = dist_x.shape

        sim_spaces = np.zeros((nelecs,len(feats)))
        sim_pvals = np.zeros((nelecs,len(feats)))
        for e in range(nelecs):
            for i,f in enumerate(feats):
                if R_meth == 'kend':
                    R,p = kendalltau(dist_x[e,:],df_d[f].values)
                if R_meth == 'pears':
                    R,p = pearsonr(dist_x[e,:],df_d[f].values)
                if R_meth == 'spear':
                    R,p = spearmanr(dist_x[e,:],df_d[f].values)
                sim_spaces[e,i] += R
                sim_pvals[e,i] += p

        ch_data = np.concatenate((np.array([su]*nelecs)[:,np.newaxis],
                                  d_mat['labels'][:,np.newaxis],
                                  d_mat['new_lab'][:,np.newaxis],
                                  d_mat['channels'][:,np.newaxis],
                                  d_mat['xyz']), axis=1)
        data_df = np.concatenate((ch_data,sim_spaces,sim_pvals), axis=1)
        cols = ['subj','labels','new_lab','channels','x','y','z']+['sim_'+f for f in feats]
        cols = cols + ['p_'+f for f in feats]
        df_save = pd.DataFrame(data_df, columns=cols)
        df_save.to_csv(csv_save.format(su,freq,R_meth),index=False)


-> Ripples loaded
processing l_theta FERJ
processing l_theta LEFC
processing l_theta PIRJ
processing l_theta SEMC
processing l_theta VACJ
processing l_theta FERJ
processing l_theta LEFC
processing l_theta PIRJ
processing l_theta SEMC
processing l_theta VACJ
processing l_theta FERJ
processing l_theta LEFC
processing l_theta PIRJ
processing l_theta SEMC
processing l_theta VACJ
processing h_theta CHAF
processing h_theta LEFC
processing h_theta FERJ
processing h_theta SEMC
processing h_theta VACJ
processing h_theta PIRJ
processing h_theta CHAF
processing h_theta LEFC
processing h_theta FERJ
processing h_theta SEMC
processing h_theta VACJ
processing h_theta PIRJ
processing h_theta CHAF
processing h_theta LEFC
processing h_theta FERJ
processing h_theta SEMC
processing h_theta VACJ
processing h_theta PIRJ


In [25]:
"""
sum-up results of space similarity
"""

st = study('Ripples')
PATH = join(st.path, 'feature/theta_power_E_odors/distance_pow_features/similarity_results/')
csvname = join(PATH, 'df_dist=TPS_su={}_freq={}_roi=all_feats=all_R={}.csv')
savename = join(PATH, 'dist=TPS_all_su_freq={}_roi={}_feat={}_R={}.csv')

freqs = ['l_theta','h_theta'] #['l_gamma','h_gamma'] 'l_theta','h_theta'
Rs = ['pears']
feats = ['od_dist','temp_dist','resp_dist','spa_dist','rich_dist']
rois = ['olf','orbital','HC']

for freq, feat, R_meth in product(freqs,feats,Rs):
    subjs = subjects if freq != 'l_theta' else np.setdiff1d(subjects,['CHAF'])
    for roi in rois:
        df_sig = pd.DataFrame()
        for su in subjs:
            df_d = pd.read_csv(csvname.format(su,freq,R_meth))
            df_s = df_d.loc[df_d['labels']==roi]
            df_s['p_corr'] = fdr_correction(df_s['p_'+feat])[1]
            df_s = df_s.loc[df_s['p_'+feat]<0.05]
            
            if df_s.shape[0] > 1:
                df_s = df_s[['subj','labels','new_lab','channels',
                         'x','y','z','sim_'+feat,'p_'+feat]]
                df_sig = df_sig.append(df_s)
        
        if (df_sig.shape[0]>1):
            df_sig_neg = df_sig.loc[df_sig['sim_'+feat]<0]
            df_sig_pos = df_sig.loc[df_sig['sim_'+feat]>0]
            if (df_sig.groupby(['subj']).count().shape[0]>=2):
                print(freq,feat,roi,df_sig)
                df_sig.to_csv(savename.format(freq,roi,feat,R_meth))
                if df_sig_pos.groupby(['subj']).count().shape[0] >= 3:
                    print('processing',freq,feat,R_meth,roi)
                    print('nb elecs sig tot ', df_sig.shape[0])
                    print('nb elecs sig pos ', df_sig_pos.shape[0])
                    print('nb of sub sig pos', df_sig_pos.groupby(['subj']).count().shape[0])
                    print(df_sig_pos)
                if df_sig_neg.groupby(['subj']).count().shape[0] >= 3:
                    print('nb elecs sig neg ', df_sig_neg.shape[0])
                    print(freq,feat,roi,'nb of sub sig neg', df_sig_neg.groupby(['subj']).count().shape[0])
                    print(df_sig_neg)
                    print((''))


-> Ripples loaded
