In [None]:
from os.path import join, exists
from os import makedirs
from itertools import product, combinations
import numpy as np
from numpy.polynomial.polynomial import polyfit
import pandas as pd
from collections import Counter

from brainpipe.system import study
from utils import subjects, score_odor_su
from similarity_funcs import compute_tps_btw
from scipy.stats import ttest_ind, ttest_1samp, pearsonr, spearmanr, kendalltau
from mne.stats import fdr_correction, bonferroni_correction

import matplotlib.pyplot as plt

In [None]:
"""
Compute similarity btw all odors by subject PLEASANTNESS FAMILIARITY or BOTH
RDM MATRICES (Perceptual) // RDMs TPS already computed
"""
###############################################################################
st = study('Olfacto')
path_pow = join(st.path, 'feature_new/TPSim_power_data/')
pow_file = join(path_pow, '{}_odors=all_elecs=psd_freq=l_theta_pow_EL.npz')
path_save = join(st.path, 'feature_new/RDM_Perceptual/')
save_file = join(path_save, '{}_odors=all_dims=all_df={}_RDM_{}.npz')
PATH = '/media/1_Analyses_Intra_EM_Odor/1bis_OE_BaseSam/JPlailly201306_seeg_ALS/behavior/'
df_od_avg = join(PATH, 'Recap_Odeurs_Evaluations.xlsx')
###############################################################################
if not exists(path_save):
    makedirs(path_save)
###############################################################################
sheet = 'Final_Lucile' #,'Final_Lucile','Final_avg' 
#Final (just 2 odors added from Lucile) Final Lucile (all data available from Lucile taken)
#Final avg (Lucile's and my data are averaged when possible)
col_sel = ['od_num','odors','Pleasantness','Familiarity']
dims = ['Pleasantness','Familiarity', ['Pleasantness','Familiarity']]
steps = ['E','L']

df = pd.read_excel(df_od_avg,sheet_name=sheet)

for su in subjects:
    mat = np.load(pow_file.format(su),allow_pickle=True)
    odor_su = np.unique(mat['od_'+steps[0]])
    combs = [o1+'_'+o2 for o1,o2 in combinations(odor_su,2)]
    
    #compute distance for all pairs of odors along dims
    dico_dist = {}
    for dim in dims:
        dist_su = np.zeros((len(combs)))
        for i, odors in enumerate(combinations(odor_su,2)):
            rating_o1 = df[dim].loc[df['od_num']=='O'+odors[0]].values
            rating_o2 = df[dim].loc[df['od_num']=='O'+odors[1]].values
            dist_od = np.round(np.linalg.norm(rating_o1-rating_o2),2)            
            dist_su[i] += dist_od
        dim = 'both' if dim == ['Pleasantness','Familiarity'] else dim
        dico_dist[dim] = dist_su
    dico_dist['combs'] = combs
    np.savez(save_file.format(su,sheet,steps[0]),**dico_dist)

In [None]:
"""
Correlation between TPS and Perceptual distances
"""
###############################################################################
st = study('Olfacto')
path_tps = join(st.path, 'feature_new/TPSim_by_odor_btw/')
tps_file = join(path_tps, '{}_odors=all_tps=btw_elecs=psd_freq={}_xpow_L.npz')
path_rdm = join(st.path, 'feature_new/RDM_Perceptual/')
rdm_file = join(path_rdm, '{}_odors=all_dims=all_df={}_RDM_L.npz')
path_save = join(st.path, 'feature_new/Corr_RDMs_TPS=btw/')
save_file = join(path_save, '{}_corr_RDMs_freq={}_dims=all_df={}_L.npz')
###############################################################################
if not exists(path_save):
    makedirs(path_save)
###############################################################################
sheets = ['Final_avg','Final_Lucile','Final'] 
dims = ['Pleasantness','Familiarity','both']
freqs = ['l_theta','h_theta']

for su,freq,sheet in product(subjects,freqs,sheets):
    mat = np.load(tps_file.format(su,freq),allow_pickle=True)
    combs_l, TPD = mat['combs'], 1-mat['tps']
    nelecs, ncombs = TPD.shape
    rdms = np.load(rdm_file.format(su,sheet),allow_pickle=True)
    
    #create vector with odors dist same order as TPS
    all_corr, all_p = np.zeros((len(dims),nelecs)), np.zeros((len(dims),nelecs))
    for d,dim in enumerate(dims):
        dico_dist = {comb:dist for comb,dist in zip(rdms['combs'],rdms[dim])}
        dico_dist.update({comb.split('_')[1]+'_'+comb.split('_')[0]:dist \
                                      for comb,dist in zip(rdms['combs'],rdms[dim])})
        dim_comb = [dico_dist[comb] for comb in combs_l]
        
        elecs_corr, elecs_p = np.zeros((nelecs)), np.zeros((nelecs))
        for elec in range(nelecs):
            R, p = kendalltau(TPD[elec],dim_comb)
            elecs_corr[elec] += R
            elecs_p[elec] += p
        
        all_corr[d] += elecs_corr
        all_p[d] += elecs_p
    
    dico_rdm = {}
    for fi in mat.files:
        if fi != ['xpow','tps','pvals']:
            dico_rdm[fi] = mat[fi]
    dico_rdm['corr'] = all_corr
    dico_rdm['pvals'] = all_p
    dico_rdm['dims'] = dims
    np.savez(save_file.format(su,freq,sheet),**dico_rdm)

In [None]:
"""
Sum up correlations into DF with all subjects and electrodes
"""
###############################################################################
st = study('Olfacto')
path_ken = join(st.path, 'feature_new/Corr_RDMs_TPS=btw/')
corr_file = join(path_ken, '{}_corr_RDMs_freq={}_dims=all_df={}_L.npz')
df_path = join(path_ken, 'dfs_results/')
dfsave = join(df_path, 'df_elecs=ALL_OFC_freq={}_dim={}_df={}_L.csv')
###############################################################################
if not exists(df_path):
    makedirs(df_path)
###############################################################################
sheets = ['Final'] 
dims = ['Pleasantness','Familiarity','both']
freqs = ['l_theta','h_theta']
rois = ['OFC','OFC_olf']

for sheet,freq in product(sheets,freqs):
    for d,dim in enumerate(dims):
        subjects_c, elecs_c, labels_c = np.array([]), np.array([]), np.array([])
        channels_c, x_c, y_c, z_c = np.array([]), np.array([]), np.array([]), np.array([])
        R_vals, pvals, pval_fdr = np.array([]), np.array([]), np.array([])

        for su in subjects:
            mat0 = np.load(corr_file.format(su,freq,sheet),allow_pickle=True)
            labels, channels = mat0['labels'], mat0['channels']
            x, y, z = mat0['xyz'][:,0], mat0['xyz'][:,1], mat0['xyz'][:,2]
            
            #select electrodes in ROIS
            idx_sel = [i for i,lab in enumerate(labels) if lab in rois]
            nelecs = len(idx_sel)
            labels, channels = labels[idx_sel], channels[idx_sel]
            x, y, z = x[idx_sel], y[idx_sel], z[idx_sel]
            corr, ps = mat0['corr'][d,idx_sel], mat0['pvals'][d,idx_sel]
            ps_fdr = fdr_correction(ps)[1]
            
            #Fill the csv file with elec infos and stats
            subjects_c = np.hstack((subjects_c,np.array([su]*nelecs))) if np.size(subjects_c) else np.array([su]*nelecs)
            elecs_c = np.hstack((elecs_c,np.arange(nelecs))) if np.size(elecs_c) else np.arange(nelecs)
            labels_c = np.hstack((labels_c,labels)) if np.size(labels_c) else labels
            channels_c = np.hstack((channels_c,channels)) if np.size(channels_c) else channels
            x_c = np.hstack((x_c,x)) if np.size(x_c) else x
            y_c = np.hstack((y_c,y)) if np.size(y_c) else y
            z_c = np.hstack((z_c,z)) if np.size(z_c) else z

            R_vals = np.hstack((R_vals,corr)) if np.size(R_vals) else corr
            pvals = np.hstack((pvals,ps)) if np.size(pvals) else ps
            pval_fdr = np.hstack((pval_fdr,ps_fdr)) if np.size(pval_fdr) else ps_fdr
            
        data = np.concatenate((subjects_c[:,np.newaxis],labels_c[:,np.newaxis],
                    channels_c[:,np.newaxis],x_c[:,np.newaxis],y_c[:,np.newaxis],
                    z_c[:,np.newaxis],elecs_c[:,np.newaxis],R_vals[:,np.newaxis],
                    pvals[:,np.newaxis],pval_fdr[:,np.newaxis]),axis=1)
        df = pd.DataFrame(data, columns=['subjects','labels','channels','x','y','z',
                                         'elecs_num','R_'+dim,'unc_'+dim,'fdr_'+dim])
        print(df.shape)
        df.to_csv(dfsave.format(freq,dim,sheet),index=False)

### summarize stats 

In [None]:
###############################################################################
st = study('Olfacto')
df_path = join(path_ken, 'dfs_results/')
dfname = join(df_path, 'df_elecs=ALL_OFC_freq={}_dim={}_df={}_E.csv')
###############################################################################
sheets = ['Final']#,'Final_avg','Final'] #'Final_avg','Final_Lucile',
dims = ['Pleasantness','Familiarity','both']
freqs = ['l_theta','h_theta']

thrs = [0.05]
corrections = ['fdr_']

for freq,dim,sheet in product(freqs,dims,sheets):
    print('\n Processing',freq,dim,sheet)
    df = pd.read_csv(dfname.format(freq,dim,sheet))
    
    for th, corr in product(thrs,corrections):
        df_sel = df.loc[(df[corr+dim]<th)&(df['R_'+dim]>0)]
        print('stats at p < ',th, 'correction : ',corr, df_sel.shape, 
                                                      'for dimension',dim)
        if df_sel.shape[0] >= 3:
            print(Counter(df_sel['labels']))
            print(df_sel)


In [None]:
"""Only consider specific ROIs and not ALL brain regions"""
from brainpipe.system import study
from mne.stats import bonferroni_correction, fdr_correction
from collections import Counter
from os import path
import pandas as pd
from itertools import product
import numpy as np
import math

st = study('Olfacto')
fold, freq, sheet = 'Enc', 'theta', 'Final'
# cond = 'high'
path_file = path.join(st.path, 
          'feature/TPSim_3groups_'+fold+'/similarity_matrix_btw_v=1_elecs=all_early_late/')
df_name = path.join(path_file, 
                'All_subjects_correl_rdm_pleas_fam_score_'+freq+'_mean=False_'+sheet+'_E.csv') #su, conds0, conds1, freq
df_save = path.join(path_file, 
                'All_subjects_correl_{}_{}_{}_'+freq+'_'+sheet+'_mean=False_{}_E.csv')

thrs = [0.05]
corrections = ['unc_']#['fdr_','bonf_']
dims = ['pl','fam']
rois = ['OFC_olf','pPirT']
olf_regions = ['OFC_olf','pPirT']

df_init = pd.read_csv(df_name)
#print('Initial df shape', df_init.shape,df_init.columns)
# combine Amg/pPirT together
df_init['labels'] = [x if x != 'Amg' else 'pPirT' for x in df_init['labels']]

for th, dim, corr in product(thrs,dims,corrections):
    for roi in rois:
        print('>>> processing', roi, dim, corr, th)
        df_roi = df_init.loc[df_init['labels']==roi]
        pvals = [p if not math.isnan(p) else 1 for p in df_roi['unc_'+dim].values]
        df_roi['new_pvalues'] = fdr_correction(pvals)[1]
        #print(df_roi[['subjects','labels','channels','R_'+dim]].mean())
        #print(df_roi[['subjects','labels','channels','R_'+dim]].sem())
        #             'unc_'+dim, 'fdr_'+dim, 'new_pvalues']].loc[df_roi['unc_'+dim]<th])
        df_sel = df_roi.loc[df_roi['new_pvalues']<th]
        print('sig results',df_sel)
        df_sel['sign'] = ['similar' if t > 0 else 'different' for t in df_sel['R_'+dim]]
        print('\n stats at p < ',th, 'correction : ',corr, df_sel.shape, 'for dimension',dim)
        print(Counter(df_sel['labels']))

        df_dec = df_sel.loc[df_sel['sign']=='similar'].groupby(['subjects']).count()
        if (df_dec.shape[0] >= 3) or (df_dec.shape[0] >=2 and roi in olf_regions):
            print(roi, 'NB of subjects with SIMILAR',df_dec.shape[0],' subjects')
            df_plot = df_sel.loc[df_sel['sign']=='similar']
            print('#electrodes in total >>> ',df_plot.shape[0])
            df_plot[['subjects','labels','channels','x','y','z','R_'+dim,
                         'unc_'+dim, 'fdr_'+dim,'new_pvalues']].to_csv(df_save.format(roi,corr+str(th),'late',dim))
            print(df_plot[['subjects','labels','channels','x','y','z','R_'+dim,
                         'unc_'+dim, 'fdr_'+dim,'new_pvalues']])
            print(df_plot['R_'+dim].mean(), df_plot['R_'+dim].std())
            


In [None]:
"""
//// PLEASANTNESS + FAMILIARITY ////
Final OFC (2 patients 3 elecs)
Final avg OFC (3 patients 5 elecs) + MFG (3 patients 4 elecs)

//// FAMILIARITY ////
Final OFC (2P,3elecs), MFG (3P, 5elecs)
Final avg MFG (3P,6elecs), OFC (2P 2elecs 2 sens), aHC (3P,4elecs)

//// PLEASANTNESS ////
Final OFC (3P, 4elecs)
Final avg OFC (3P, 4elecs)
"""

In [None]:
from os.path import join
from brainpipe.system import study
import pandas as pd
import numpy as np
from itertools import combinations

st = study('Olfacto')
PATH = '/media/karim/Datas4To/1_Analyses_Intra_EM_Odor/1bis_OE_BaseSam/JPlailly201306_seeg_ALS/behavior/'
filename = 'Recap_Odeurs_Evaluations.xlsx'
PATH_SAVE = join(st.path, 'feature/TPSim_3groups_Enc/similarity_matrix_btw/')
savename = 'dist_pl_fam_all_odors_pleas.npz'

df = pd.read_excel(PATH+filename, sheet_name='Final')
odors = df[['od_num']].values[:,0]
#odors = [14,10,3,8,13,6,12,9,1,18,4,15,5,17,11,2,16,7] #ordered by Fam
odors = [10,1,12,3,9,13,6,5,8,14,16,4,18,11,15,7,2,17] #ordered by Pleas
#odors = [10,3,9,1,13,12,6,14,8,5,4,18,16,15,7,2,11,17] #fam * pleas
#odors = [10,3,9,13,1,6,12,14,8,5,4,18,16,15,7,2,11,17] #fam + pleas

print(df.columns)

dist_f, dist_p, dist_fp = [], [], []
for o1,o2 in combinations(odors,2):
    f1 = df.loc[df['od_num']=='O'+str(o1)]['Familiarity'].values
    f2 = df.loc[df['od_num']=='O'+str(o2)]['Familiarity'].values
    p1 = df.loc[df['od_num']=='O'+str(o1)]['Pleasantness'].values
    p2 = df.loc[df['od_num']=='O'+str(o2)]['Pleasantness'].values
    fp1, fp2 = np.array([f1,p1]), np.array([f2,p2])
    dist_f.append(np.round(np.linalg.norm(f1-f2),2))
    dist_p.append(np.round(np.linalg.norm(p1-p2),2))
    dist_fp.append(np.round(np.linalg.norm(fp1-fp2),2))
print(len(dist_f))
np.savez(PATH_SAVE+savename,d_f=np.array(dist_f),d_p=np.array(dist_p),
        d_fp=np.array(dist_fp))

### Plot odors RDM matrices 

In [None]:
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
from matplotlib import cm

from matplotlib.patches import Patch
from sklearn.manifold import MDS
"""
Plot TPSim matrices (RDM) for all odors (Familiarity Pleasantness, both)
"""
###############################################################################
st = study('Olfacto')
path_npz = join(st.path,'feature/TPSim_3groups_Enc/')
path_pow = join(path_npz, 'similarity_matrix_btw/dist_pl_fam_all_odors_mult.npz')
savename = join(path_npz, 'distance_graphs/Plot_distance_{}_all_odors_mult.png')
###############################################################################
exp = 'Enc' #Ret, Enc
###############################################################################
#new_order = [10,1,12,3,9,13,6,5,8,14,16,4,18,11,15,7,2,17] #Pleas order
new_order = [10,3,9,1,13,12,6,14,8,5,4,18,16,15,7,2,11,17] #fam * pleas
#new_order = [10,3,9,13,1,6,12,14,8,5,4,18,16,15,7,2,11,17] #fam + pleas
#new_order = [14,10,3,8,13,6,12,9,1,18,4,15,5,17,11,2,16,7] #Fam order
mat = np.load(path_pow)
features = mat.files

for feat in features:
    combs = mat[feat]
    n_od = 18
    idx = list(np.arange(1,n_od+1))
    tri = np.zeros((n_od, n_od))
    tri[np.triu_indices(n_od,1)] = combs
    tri[np.tril_indices(n_od, -1)] = tri.T[np.tril_indices(n_od, -1)]

    model = MDS(n_components=2, dissimilarity='precomputed', random_state=None)
    out = model.fit_transform(tri)
    fig, (ax1,ax2) = plt.subplots(1,2,figsize=(8,3))
    colors = 'black'
    markers = 'o'

    for i, txt in enumerate(idx):
        ax1.scatter(out[i,0], out[i,1], c=colors, marker=markers)
        ax1.annotate('O'+str(txt), (out[i,0], out[i,1]))
    ax1.set_xlabel('component 1')
    ax1.set_ylabel('component 2')
    #ax1.axis('equal')
        
    #subplot #1 Graph 2D 
    cmap = cm.get_cmap('viridis', 30)
    mask =  np.tri(tri.shape[0], k=0) #mask upper triangle
    A = np.ma.array(tri, mask=mask) # mask out the lower triangle
    cax = ax2.imshow(A, vmin=0,vmax=3,interpolation="nearest", cmap=cmap)
    ax2.set_xticks(np.arange(n_od))
    ax2.set_yticks(np.arange(n_od))
    ax2.set_xticklabels(new_order,fontsize=11)
    ax2.set_yticklabels(new_order,fontsize=11)
    plt.colorbar(cax)
    plt.subplots_adjust(top=0.92)

    title = 'Distance btw odors in {} domaine )'.format(feat)
    fig.suptitle(title, fontsize=12)
    
    plt.savefig(savename.format(feat))
    plt.savefig(savename.format(feat).replace('.png','.pdf'))
    plt.clf()
    plt.close()


In [None]:
PATH = '/media/karim/Datas4To/1_Analyses_Intra_EM_Odor/1bis_OE_BaseSam/JPlailly201306_seeg_ALS/behavior/'
filename = 'Recap_Odeurs_Evaluations.xlsx'
PATH_SAVE = join(st.path, 'feature/TPSim_3groups_Enc/distance_graphs/')
savename = 'Correl_Fam_Pleas.png'

df = pd.read_excel(PATH+filename, sheet_name='Final')
pleas = df[['Pleasantness']]
fam = df[['Familiarity']]
R,p = pearsonr(pleas.values[:,0],fam.values[:,0])
print(R,p)

In [1]:
import pyrfume
print(pyrfume.__path__)
# Get raw data from the Sigma Fragrance & Flavor Catalog
from pyrfume import sigma_ff
descriptors, data = sigma_ff.get_data()

# Get a PubChem CID-indexed dataframe of the odorant and descriptor data from that catalog:
sigma = pyrfume.load_data('sigma/sigma.csv')


['/home/karim/.local/lib/python3.9/site-packages/pyrfume']


FileNotFoundError: [Errno 2] No such file or directory: '/home/karim/.local/lib/python3.9/site-packages/pyrfume-data/sigma/sigma_ff_catalog.txt'