## Import Libraries

In [2]:
from os import path
from itertools import product
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import ScalarFormatter, MaxNLocator
import scipy.io as sio

from brainpipe.classification import *
from brainpipe.system import study
from brainpipe.feature import power, amplitude, sigfilt
from brainpipe.visual import *
from brainpipe.statistics import *
from scipy.stats import *

## Power Decoding - Partial//Detailed Encoding
### For ALL time points

In [6]:
PATH ='/media/karim/Datas4To/1_Analyses_Intra_EM_Odor/Olfacto/database/Retrieval_Rest_LowHigh/'
mat = np.load(PATH+'VACJ_odor_high_bipo_sel_physFT.npz')['x']
print(mat.shape)

(74, 1536, 12)


In [None]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.model_selection import StratifiedKFold as SKFold
from sklearn.metrics import roc_auc_score
from numpy.random import permutation

conds,phases, subjects = ['low','high'],['odor'],['SEMC','VACJ','FERJ','LEFC','PIRJ','CHAF']#,'MICP','VACJ','SEMC','LEFC','PIRJ']
color_codes = ['darkorange','blue']
st = study('Olfacto')
freqs = 7
nperm = 1000

for su,phase in product(subjects,phases):
    path_pow = path.join(st.path, 'feature/TPSim_Enc_Ret_By_Odor/TPS_R_p_by_cond/')
    save_path = path.join(st.path, 'classified/Classif_TPSim_Encoding_Retrieval_Odor_1000perm/')

    pow_list = []
    #=========================== Load Power files (nfreq, nelec, nwin, ntrial) =================================    
    mat0 = np.load(path.join(path_pow, su+'_'+phase+'_'+conds[0]+'_bipo_sel_physFT_pow'+bsl+'.npz'))
    names, channels, freq_names, time = mat0['Mai_RL'], mat0['channels'],mat0['fname'], mat0['time']
    #print(mat0['xpow'].shape,time.shape,time[17:42])
    pow_list.append(mat0['xpow'][:,:,18,:][:,:,np.newaxis,:])
    nelecs = mat0['xpow'].shape[1]
    mat1 = np.load(path.join(path_pow, su+'_'+phase+'_'+conds[1]+'_bipo_sel_physFT_pow'+bsl+'.npz'))
    pow_list.append(mat1['xpow'][:,:,18,:][:,:,np.newaxis,:])
    print (su, 'power shape: ', [pow.shape for pow in pow_list])
    # =========================== Select Power for 1 elec 1 freq =================================                 
    iterator = range(nelecs)
    for elec_num in iterator:
        for freq in range(2,freqs):
            elec, elec_label, freq_name = channels[elec_num], names[elec_num], freq_names[freq]
            print ('elec ', elec, 'elec_label ', elec_label)
            #Filenames to save
            name_auc = (save_path+str(freq)+'_'+freq_name+'/auc/'+su +'_auc_'+conds[0]+'_'+conds[1]+'_'+str(elec_label)+'_('+str(elec_num)+').npy')
            name_perm = (save_path+str(freq)+'_'+freq_name+'/auc/'+su +'_perm_'+str(elec_label)+'_('+str(elec_num)+').npy')
                        
            if path.exists(name_auc):
                print(su,bsl,phase,elec_num,freq,'already computed')
            else:
                print('--» processing',su, 'elec', elec_num,'/',nelecs, 'freq',freq)
                pow_data_elec = []
                for i,power in enumerate(pow_list):
                    pow_data_elec.append(power[freq,elec_num].swapaxes(0,1))
                nwin = power.shape[1]

        # =============================  Classification Computation ============================================================           
                # create a data matrix, concatenate along the trial dimension
                x = np.concatenate(pow_data_elec, axis=0)
                print ('Size of the concatenated data: ', x.shape, 'Number time windows : ', x.shape[1])
                y = np.hstack([np.array([i]*len(power)) for i, power in enumerate(pow_data_elec)])
                print ('Size of label for classif: ', len(y))

                auc = np.array([])
                for t in range(x.shape[1]):
                    X = x[:,t]
                    X = X.reshape(-1, 1)
                    score_rep = []
                    for i in range(10):
                        k = 5
                        skf = SKFold(n_splits=k, random_state=None, shuffle=True)
                        skf.get_n_splits(X, y)
                        score_cv = []
                        for train_index, test_index in skf.split(X, y):
                            clf = LDA()
                            X_train, X_test = X[train_index], X[test_index]
                            y_train, y_test = y[train_index], y[test_index]
                            clf.fit(X=X_train, y=y_train)
                            y_pred = clf.predict(X_test)
                            score_cv.append(roc_auc_score(y_test,y_pred,average='weighted'))
                        score_rep.append(np.mean(score_cv))
                    score_rep = np.asarray(score_rep).reshape(1,len(score_rep))
                    auc = np.vstack((auc, score_rep)) if np.size(auc) else score_rep
                auc = np.swapaxes(auc,0,1)

                perm_scores = np.array([])
                for t in range(x.shape[1]):
                    X = x[:,t]
                    X = X.reshape(-1, 1)
                    perm_rep = []
                    for perm in range(nperm):
                        y_perm = y[permutation(len(y))]
                        score_cv = []
                        for train_index, test_index in skf.split(X, y_perm):
                            clf = LDA()
                            X_train, X_test = X[train_index], X[test_index]
                            y_train, y_test = y_perm[train_index], y_perm[test_index]
                            clf.fit(X=X_train, y=y_train)
                            y_pred = clf.predict(X_test)
                            score_cv.append(roc_auc_score(y_test,y_pred,average='weighted'))
                        perm_rep.append(np.mean(score_cv))
                    perm_rep = np.asarray(perm_rep).reshape(1,len(perm_rep))
                    perm_scores = np.vstack((perm_scores, perm_rep)) if np.size(perm_scores) else perm_rep
                perm_scores = np.swapaxes(perm_scores,0,1)           
                th_0_05_perm = perm_pvalue2level(perm_scores, p=0.01, maxst=True)
                th_0_01_perm = perm_pvalue2level(perm_scores, p=0.001, maxst=True)
                print('th_perm 005: ', th_0_05_perm[0], '001',th_0_01_perm[0], 'auc_max', np.max(auc))

        # ============================== PLOT POWER ANALYSIS + STATS & DECODING ACCURACY ===================================================
#                 # plot and figure parameters
#                 xfmt = ScalarFormatter(useMathText=True)
#                 xfmt.set_powerlimits((0,3))
#                 fig = plt.figure(1,figsize=(7,7))
#                 title = 'Power-Stats-DA for '+su+' '+conds[0]+' vs '+conds[1]+' '+str(elec)+' '+str(elec_label)+' ('+str(elec_num)+')'
#                 fig.suptitle(title, fontsize=12)

#                 # Plot the POW + STATS
#                 plt.subplot(211)        
#                 BorderPlot(time, x, y=y, kind='sem', alpha=0.2, color=color_codes,linewidth=2, 
#                            ncol=1, xlabel='Time (s)',ylabel = r'Power', legend=conds)
#                 rmaxis(plt.gca(), ['right', 'top'])
#                 addLines(plt.gca(), vLines=[0], vColor=['darkgray'], vWidth=[2])
#                 plt.legend(loc=0, handletextpad=0.1, frameon=False)
#                 plt.gca().yaxis.set_major_locator(MaxNLocator(3,integer=True))

#                 # Plot DA for the POW
#                 plt.subplot(212)
#                 BorderPlot(time, auc, color='b', kind='sd',xlabel='Time (s)', ylim=[0.4,1.], ylabel='Decoding accuracy (%)',linewidth=2, alpha=0.3)
#                 rmaxis(plt.gca(), ['right', 'top'])
#                 addLines(plt.gca(), vLines=[0], vColor=['darkgray'], vWidth=[2])
#                 plt.gca().yaxis.set_major_locator(MaxNLocator(3,integer=True))
#                 plt.plot(time, th_0_05_perm*np.ones(len(time)), '--', color='r', linewidth=2)
#                 #plt.plot(times_plot, th_0_01_perm*np.ones(len(times_plot)), '--', color='orange', linewidth=2)

                #Save plots
                np.save(name_auc, auc)
                np.save(name_perm, perm_scores)
#                 plt.savefig(plot_name, dpi=300, bbox_inches='tight')
#                 plt.clf()
#                 plt.close() 
                del X, auc, pow_data_elec
    del pow_list

#### ML for the TPSim - ST analysis

In [16]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.model_selection import StratifiedKFold as SKFold
from sklearn.metrics import roc_auc_score
from numpy.random import permutation

conds,phases, subjects = ['low','high'],['odor'],['SEMC','VACJ','FERJ','LEFC','PIRJ','CHAF']#,'MICP','VACJ','SEMC','LEFC','PIRJ']
freq_names = ['2_theta','3_alpha','4_beta','5_gamma1','6_gamma2']
color_codes = ['darkorange','blue']
st = study('Olfacto')
nperm = 1000

for su,phase in product(subjects,phases):
    path_pow = path.join(st.path, 'feature/TPSim_Enc_Ret_By_Odor/TPS_R_p_by_cond/')
    save_path = path.join(st.path, 'classified/Classif_TPSim_Encoding_Retrieval_Odor_1000perm/')

    pow_list = []
    #=========================== Load Power files (nfreq, nelec, nwin, ntrial) =================================    
    mat0 = np.load(path.join(path_pow, 'TPS_spear_'+su+'_'+phase+'_'+conds[0]+'.npz'))
    names, channels = mat0['label'], mat0['channel']
    #print(mat0['xpow'].shape,time.shape,time[17:42])
    pow_list.append(mat0['TPS']) #nfreq, nelecs, ntrials
    nelecs = mat0['TPS'].shape[1]
    mat1 = np.load(path.join(path_pow, 'TPS_spear_'+su+'_'+phase+'_'+conds[1]+'.npz'))
    pow_list.append(mat1['TPS'])
    print (su, 'power shape: ', [pow.shape for pow in pow_list])
    
    # =========================== Select Power for 1 elec 1 freq =================================                 
    iterator = range(nelecs)
    for elec_num in iterator:
        for freq in range(len(freq_names)):
            elec, elec_label, freq_name = channels[elec_num], names[elec_num], freq_names[freq]
            print ('elec ', elec, 'elec_label ', elec_label)
            #Filenames to save
            name_auc = (save_path+freq_name+'/'+su +'_auc_'+conds[0]+'_'+conds[1]+'_'+str(elec_label)+'_('+str(elec_num)+').npy')
            name_perm = (save_path+freq_name+'/'+su +'_perm_'+str(elec_label)+'_('+str(elec_num)+').npy')
                        
            if not path.exists(name_auc):
                #print(su,phase,elec_num,freq,'already computed')
            #else:
                print('--» processing',su, 'elec', elec_num,'/',nelecs, 'freq',freq)
                pow_data_elec = []
                for i,power in enumerate(pow_list):
                    pow_data_elec.append(power[freq,elec_num][np.newaxis].swapaxes(0,1))
                print('mean TPSim for low & high', [np.mean(power) for power in pow_data_elec])
                nwin = power.shape[1]

        # =============================  Classification Computation ============================================================           
                # create a data matrix, concatenate along the trial dimension
                x = np.concatenate(pow_data_elec, axis=0)
                print ('Size of the concatenated data: ', x.shape, 'Number time windows : ', x.shape[1])
                y = np.hstack([np.array([i]*len(power)) for i, power in enumerate(pow_data_elec)])
                print ('Size of label for classif: ', len(y))

                auc = np.array([])
                for t in range(x.shape[1]):
                    X = x[:,t]
                    X = X.reshape(-1, 1)
                    score_rep = []
                    for i in range(10):
                        k = 5
                        skf = SKFold(n_splits=k, random_state=None, shuffle=True)
                        skf.get_n_splits(X, y)
                        score_cv = []
                        for train_index, test_index in skf.split(X, y):
                            clf = LDA()
                            X_train, X_test = X[train_index], X[test_index]
                            y_train, y_test = y[train_index], y[test_index]
                            clf.fit(X=X_train, y=y_train)
                            y_pred = clf.predict(X_test)
                            score_cv.append(roc_auc_score(y_test,y_pred,average='weighted'))
                        score_rep.append(np.mean(score_cv))
                    score_rep = np.asarray(score_rep).reshape(1,len(score_rep))
                    auc = np.vstack((auc, score_rep)) if np.size(auc) else score_rep
                auc = np.swapaxes(auc,0,1)
                
                perm_scores = np.array([])
                for t in range(x.shape[1]):
                    X = x[:,t]
                    X = X.reshape(-1, 1)
                    perm_rep = []
                    for perm in range(nperm):
                        y_perm = y[permutation(len(y))]
                        score_cv = []
                        for train_index, test_index in skf.split(X, y_perm):
                            clf = LDA()
                            X_train, X_test = X[train_index], X[test_index]
                            y_train, y_test = y_perm[train_index], y_perm[test_index]
                            clf.fit(X=X_train, y=y_train)
                            y_pred = clf.predict(X_test)
                            score_cv.append(roc_auc_score(y_test,y_pred,average='weighted'))
                        perm_rep.append(np.mean(score_cv))
                    perm_rep = np.asarray(perm_rep).reshape(1,len(perm_rep))
                    perm_scores = np.vstack((perm_scores, perm_rep)) if np.size(perm_scores) else perm_rep
                perm_scores = np.swapaxes(perm_scores,0,1)           
                th_0_05_perm = perm_pvalue2level(perm_scores, p=0.05, maxst=True)
                th_0_01_perm = perm_pvalue2level(perm_scores, p=0.01, maxst=True)
                th_0_001_perm = perm_pvalue2level(perm_scores, p=0.001, maxst=True)
                print('th_perm 05: ', th_0_05_perm[0], '01',th_0_01_perm[0],
                      '001',th_0_001_perm[0], 'auc_mean', np.mean(auc))
                np.save(name_auc, auc)
                np.save(name_perm, perm_scores)
                del X, auc, pow_data_elec
    del pow_list

-> Olfacto loaded
SEMC power shape:  [(5, 65, 93), (5, 65, 27)]
elec  b12-b11 elec_label  MTG
--» processing SEMC elec 0 / 65 freq 0
mean TPSim for low & high [0.01409976554288948, 0.10994152046783627]
Size of the concatenated data:  (120, 1) Number time windows :  1
Size of label for classif:  120
th_perm 05:  0.5 01 0.5 001 0.514444444444 auc_mean 0.5
elec  b12-b11 elec_label  MTG
--» processing SEMC elec 0 / 65 freq 1
mean TPSim for low & high [-0.016848573045517016, 0.080980228348649361]
Size of the concatenated data:  (120, 1) Number time windows :  1
Size of label for classif:  120
th_perm 05:  0.5 01 0.5 001 0.52 auc_mean 0.5
elec  b12-b11 elec_label  MTG
--» processing SEMC elec 0 / 65 freq 2
mean TPSim for low & high [0.11378446115288218, 0.16819827346143135]
Size of the concatenated data:  (120, 1) Number time windows :  1
Size of label for classif:  120
th_perm 05:  0.5 01 0.5 001 0.516666666667 auc_mean 0.5
elec  b12-b11 elec_label  MTG
--» processing SEMC elec 0 / 65 freq 

  S**2))[:self._max_components]


th_perm 05:  0.5 01 0.5 001 0.573918128655 auc_mean 0.549415204678
elec  o10-o9 elec_label  OFC
--» processing SEMC elec 28 / 65 freq 4
mean TPSim for low & high [0.19152720510954799, 0.031913116123642435]
Size of the concatenated data:  (120, 1) Number time windows :  1
Size of label for classif:  120
th_perm 05:  0.5 01 0.5 001 0.514736842105 auc_mean 0.499473684211
elec  o11-o10 elec_label  IFG
--» processing SEMC elec 29 / 65 freq 0
mean TPSim for low & high [-0.010105909936130647, -0.44706209969367866]
Size of the concatenated data:  (120, 1) Number time windows :  1
Size of label for classif:  120
th_perm 05:  0.5 01 0.5 001 0.529473684211 auc_mean 0.498216374269
elec  o11-o10 elec_label  IFG
--» processing SEMC elec 29 / 65 freq 1
mean TPSim for low & high [0.09801924165251838, -0.12843219159008634]
Size of the concatenated data:  (120, 1) Number time windows :  1
Size of label for classif:  120
th_perm 05:  0.5 01 0.5 001 0.52 auc_mean 0.499473684211
elec  o11-o10 elec_label  I

In [16]:
from sklearn.model_selection import StratifiedKFold
X = np.array([[1, 2], [3, 4], [1, 2], [3, 4], [2,6], [8,3],[4,2],[4,3]])
y = np.array([0,0,1,1,1,1,1,1])
skf = StratifiedKFold(n_splits=2,shuffle=True)
for i in range(10):
    for train_index, test_index in skf.split(X, y):
        print(i, "TRAIN:", train_index, "TEST:", test_index)
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]

0 TRAIN: [1 2 4 5] TEST: [0 3 6 7]
0 TRAIN: [0 3 6 7] TEST: [1 2 4 5]
1 TRAIN: [0 5 6 7] TEST: [1 2 3 4]
1 TRAIN: [1 2 3 4] TEST: [0 5 6 7]
2 TRAIN: [1 2 3 6] TEST: [0 4 5 7]
2 TRAIN: [0 4 5 7] TEST: [1 2 3 6]
3 TRAIN: [1 4 6 7] TEST: [0 2 3 5]
3 TRAIN: [0 2 3 5] TEST: [1 4 6 7]
4 TRAIN: [1 3 5 6] TEST: [0 2 4 7]
4 TRAIN: [0 2 4 7] TEST: [1 3 5 6]
5 TRAIN: [1 3 5 7] TEST: [0 2 4 6]
5 TRAIN: [0 2 4 6] TEST: [1 3 5 7]
6 TRAIN: [1 4 5 7] TEST: [0 2 3 6]
6 TRAIN: [0 2 3 6] TEST: [1 4 5 7]
7 TRAIN: [1 2 4 7] TEST: [0 3 5 6]
7 TRAIN: [0 3 5 6] TEST: [1 2 4 7]
8 TRAIN: [0 3 4 6] TEST: [1 2 5 7]
8 TRAIN: [1 2 5 7] TEST: [0 3 4 6]
9 TRAIN: [1 3 6 7] TEST: [0 2 4 5]
9 TRAIN: [0 2 4 5] TEST: [1 3 6 7]


In [31]:
# score_rep = np.asarray([])
# for i in range(10):
#     score_cv = []
#     for train_index, test_index in cv.split(X, y):
#         X_train, X_test = X[train_index], X[test_index]
#         y_train, y_test = y[train_index], y[test_index]
#         clf.fit(X=X_train, y=y_train)
#         y_pred = clf.predict(X_test)
#         score_cv.append(roc_auc_score(y_test,y_pred,average='weighted'))
#     score_rep = np.vstack(append(np.mean(score_cv))
# score_rep = np.asarray(score_rep)

sc_rep = np.array([])
for i in range(10):
    score_cv = []
    for j in range(5):
        x = list(np.random.rand(15))
        score_cv.append(x)
    sc_rep = np.vstack((sc_rep,np.mean(score_cv))) if np.size(sc_rep) else np.mean(score_cv)
print(sc_rep.shape)
#             score_rep = np.vstack(append(np.mean(score_cv))
#         score_rep = np.asarray(score_rep)

(10, 1)
