## Import Libraries

In [1]:
from os import path
from itertools import product
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import ScalarFormatter, MaxNLocator
import scipy.io as sio

from brainpipe.classification import *
from brainpipe.system import study
from brainpipe.feature import power, amplitude, sigfilt
from brainpipe.visual import *
from brainpipe.statistics import *
from scipy.stats import *

## User variables

In [2]:
bsl = None
# PATH TO DATA
st = study('Olfacto')
path_pow = path.join(st.path, 'feature/7_Power_E1E2_Odor_Good_Bad_EpiScore_Expi_AAL/')
save_path = path.join(st.path, 'classified/2_Classif_Power_time_5_by_AAL_EpiScore_win700_step100_expi/')
# POWER & STATS PARAMETERS
nfreq = 7
nperm = 100

-> Olfacto loaded


## Power Decoding - Good Bad Odors Encoding
### For ALL time points

In [4]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.model_selection import StratifiedKFold as SKFold
from sklearn.metrics import roc_auc_score
from numpy.random import permutation

phases, bsl, subjects = ['odor'],['None'],['SEMC']
for b, su, phase in product(bsl,subjects,phases):
    #=========================== Load Power files (nfreq, nelec, nwin, ntrial) =================================    
    bad_mat = np.load(path.join(path_pow, su+'_'+phase+'_bad_bipo_sel_aal_5_'+b+'_pow.npz'))
    good_data = np.load(path.join(path_pow, su+'_'+phase+'_good_bipo_sel_aal_5_'+b+'_pow.npz'))['xpow']
    bad_data, names, channels = bad_mat['xpow'], bad_mat['labels'], bad_mat['channels']
    freq_names, time = bad_mat['fname'], bad_mat['time'][:-6]
    print (su, 'bad shape: ', bad_data.shape, 'good shape: ', good_data.shape)
    
    # =========================== Select Power for 1 elec 1 freq =================================                 
    for elec_num in range(bad_data.shape[1]):
        for freq in range(nfreq):
            # load power files for 1 elec // 1 freq // Bad-Good conditions
            bad_data_elec = bad_data[freq,elec_num][:-6].swapaxes(0,1)
            good_data_elec = good_data[freq,elec_num][:-6].swapaxes(0,1)
            print ('data elec ', bad_data_elec.shape, good_data_elec.shape)
            nwin = good_data.shape[1]
            elec, elec_label, freq_name = channels[elec_num], names[elec_num], freq_names[freq]
            print ('elec ', elec, 'elec_label ', elec_label)
            
    # =============================  Classification Computation ============================================================           
            # create a data matrix, concatenate along the trial dimension
            x = np.concatenate((bad_data_elec, good_data_elec), axis=0)
            print ('Size of the concatenated data: ', x.shape, 'Number time windows : ', x.shape[1])
            #create label vector (0 for rest and 1 for odor)
            y = np.asarray([0]*bad_data_elec.shape[0] + [1]*good_data_elec.shape[0])
            print ('Size of label for classif: ', len(y))
            
            auc = np.array([])
            for t in range(x.shape[1]):
                X = x[:,t]
                X = X.reshape(-1, 1)
                score_rep = []
                for i in range(10):
                    skf = SKFold(n_splits=10, random_state=None, shuffle=True)
                    skf.get_n_splits(X, y)
                    score_cv = []
                    for train_index, test_index in skf.split(X, y):
                        clf = LDA()
                        X_train, X_test = X[train_index], X[test_index]
                        y_train, y_test = y[train_index], y[test_index]
                        clf.fit(X=X_train, y=y_train)
                        y_pred = clf.predict(X_test)
                        score_cv.append(roc_auc_score(y_test,y_pred,average='weighted'))
                    score_rep.append(np.mean(score_cv))
                score_rep = np.asarray(score_rep).reshape(1,len(score_rep))
                auc = np.vstack((auc, score_rep)) if np.size(auc) else score_rep
            auc = np.swapaxes(auc,0,1)
            #print(auc.shape)
            #print('to plot', 'data',x.shape, 'time',times_plot.shape,'label', y.shape)

            perm_scores = np.array([])
            for t in range(x.shape[1]):
                X = x[:,t]
                X = X.reshape(-1, 1)
                perm_rep = []
                for perm in range(nperm):
                    y_perm = y[permutation(len(y))]
                    score_cv = []
                    for train_index, test_index in skf.split(X, y_perm):
                        clf = LDA()
                        X_train, X_test = X[train_index], X[test_index]
                        y_train, y_test = y_perm[train_index], y_perm[test_index]
                        clf.fit(X=X_train, y=y_train)
                        y_pred = clf.predict(X_test)
                        score_cv.append(roc_auc_score(y_test,y_pred,average='weighted'))
                    perm_rep.append(np.mean(score_cv))
                perm_rep = np.asarray(perm_rep).reshape(1,len(perm_rep))
                perm_scores = np.vstack((perm_scores, perm_rep)) if np.size(perm_scores) else perm_rep
                #print(perm_scores.shape)
            perm_scores = np.swapaxes(perm_scores,0,1)
            #print(perm_scores.shape)
            #print(perm_scores)
            #print(pvalues.shape, pvalues.min(), pvalues.max())
            th_0_05_perm = perm_pvalue2level(perm_scores, p=0.05, maxst=True)
            th_0_01_perm = perm_pvalue2level(perm_scores, p=0.01, maxst=True)
            print('th_perm : ', th_0_05_perm[0], th_0_01_perm[0])
            
    # ========================== Create a pvalue vector for uac measure ========================
            auc_pvals = []
            for i in range(auc.shape[1]):
                if np.mean(auc[:,i]) > th_0_01_perm[0]:
                    auc_pvals.append(0.009)
                elif np.mean(auc[:,i]) > th_0_05_perm[0]:
                    auc_pvals.append(0.04)
                else:
                    auc_pvals.append(1)
            print (auc_pvals)
    
    # ============================== PLOT POWER ANALYSIS + STATS & DECODING ACCURACY ===================================================
            # plot and figure parameters
            xfmt = ScalarFormatter(useMathText=True)
            xfmt.set_powerlimits((0,3))
            fig = plt.figure(1,figsize=(7,7))
            title = 'Power-Stats-DA for '+su+' Bad/Good '+str(elec)+' '+str(elec_label)+' ('+str(elec_num)+')'
            fig.suptitle(title, fontsize=12)
            # Time vector to plot power
            #times_plot = np.arange(0, 3, 1)
            #print('step and time',time)

            # Plot the POW + STATS
            plt.subplot(211)        
            #print(len(times_plot),X.shape)
            BorderPlot(time, x, y=y, kind='sem', alpha=0.2, color=['b','m'],linewidth=2, 
                       ncol=1, xlabel='Time (s)',ylabel = r'Power', legend=['bad','good'])
            #addLines(plt.gca(), vLines=[1.5,3], vColor=['r']*2, vWidth=[2]*2, hLines=[0], 
            #     hColor=['#000000'], hWidth=[2])
            rmaxis(plt.gca(), ['right', 'top'])
            #addPval(plt.gca(),auc_pvals, p=0.05, x=time, y=x.mean(), color='r', lw=2, minsucc=2)
            #addPval(plt.gca(),auc_pvals, p=0.01, x=times_plot, y=x.mean(), color='orange', lw=2, minsucc=2)
            plt.legend(loc=0, handletextpad=0.1, frameon=False)
            plt.gca().yaxis.set_major_locator(MaxNLocator(3,integer=True))

            # Plot DA for the POW
            plt.subplot(212)
            BorderPlot(time, auc, color='b', kind='sd',xlabel='Time (s)', ylim=[0.4,1.], ylabel='Decoding accuracy (%)',linewidth=2, alpha=0.3)
            rmaxis(plt.gca(), ['right', 'top'])
            #addLines(plt.gca(), vLines=[1.5,3], vWidth=[2]*2, vColor=['r']*2, hLines=[0.5], 
            #     hColor=['#000000'], hWidth=[2])
            plt.legend(loc=0, handletextpad=0.1, frameon=False) 
            plt.gca().yaxis.set_major_locator(MaxNLocator(3,integer=True))
            plt.plot(time, th_0_05_perm*np.ones(len(time)), '--', color='r', linewidth=2)
            #plt.plot(times_plot, th_0_01_perm*np.ones(len(times_plot)), '--', color='orange', linewidth=2)

            #Filenames to save
            name_auc = (save_path+'/'+str(freq)+'_'+freq_name+'/auc/'+su +'_auc_Good_Bad_'+str(elec_label)+'_('+str(elec_num)+').npy')
            name_th_0_05_perm = (save_path+'/'+str(freq)+'_'+freq_name+'/auc/'+su +'_th_0_05_perm_'+str(elec_label)+'_('+str(elec_num)+').npy')
            name_th_0_01_perm = (save_path+'/'+str(freq)+'_'+freq_name+'/auc/'+su +'_th_0_01_perm_'+str(elec_label)+'_('+str(elec_num)+').npy')
            plot_name = (save_path+'/'+str(freq)+'_'+freq_name+'/fig/'+su +'_Power_Good_Bad_'+str(elec)+'_'+str(elec_label)+'_('+str(elec_num)+').png')            
            
            # Criteria to be significant
            auc_pvals = np.ravel(auc_pvals)
            underp = np.where(auc_pvals < 0.05)[0]
            pvsplit = np.split(underp, np.where(np.diff(underp) != 1)[0]+1)
            signif = [True for k in pvsplit if len(k) >= 3]
            if len(signif) >=1:
                plot_sig = (save_path+'/'+str(freq)+'_'+freq_name+'/signif/'+su +'_Power_Good_Bad_'+str(elec)+'_'+str(elec_label)+'_('+str(elec_num)+').png')            
                plt.savefig(plot_sig, dpi=300, bbox_inches='tight')
            
            #Save plots
            np.save(name_auc, auc)
            np.save(name_th_0_05_perm, th_0_05_perm[0])
            np.save(name_th_0_01_perm, th_0_01_perm[0])
            plt.savefig(plot_name, dpi=300, bbox_inches='tight')
            plt.clf()
            plt.close() 
            del bad_data_elec, good_data_elec, X, auc
    del bad_data, good_data

TypeError: Can't convert 'list' object to str implicitly

### Classif AUC 3 wins in time

In [None]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.model_selection import StratifiedKFold as SKFold
from sklearn.metrics import roc_auc_score
from numpy.random import permutation

# PATH TO DATA
st = study('Olfacto')
path_pow = path.join(st.path, 'feature/7_Power_E1E2_Odor_Good_Bad_EpiScore_Expi/')
# POWER & STATS PARAMETERS
nfreq = 7
nperm = 100

phases, bsl, subjects = ['odor'],['None','mean_trial'],['FERJ','MICP','VACJ','SEMC','LEFC','PIRJ','CHAF']
for b, su, phase in product(bsl,subjects,phases):
    save_path = path.join(st.path, 'classified/1_Classif_Mean_Power_'+str(b)+'_EpiScore_allrois_3wins_expi/')
    #=========================== Load Power files (nfreq, nelec, nwin, ntrial) =================================    
    bad_data = np.load(path.join(path_pow, su+'_'+phase+'_bad_bipo_sel_phys_'+b+'_500_pow.npz'))['xpow']
    good_data = np.load(path.join(path_pow, su+'_'+phase+'_good_bipo_sel_phys_'+b+'_500_pow.npz'))['xpow']
    names = np.load(path.join(path_pow, su+'_'+phase+'_bad_bipo_sel_phys_'+b+'_500_pow.npz'))['labels']
    channels = np.load(path.join(path_pow, su+'_'+phase+'_bad_bipo_sel_phys_'+b+'_500_pow.npz'))['channels']
    freq_names = np.load(path.join(path_pow, su+'_'+phase+'_bad_bipo_sel_phys_'+b+'_500_pow.npz'))['fname']
    print (su, 'bad shape: ', bad_data.shape, 'good shape: ', good_data.shape)
    
    # =========================== Select Power for 1 elec 1 freq =================================                 
    for elec_num in range(bad_data.shape[1]):
        for freq in range(nfreq):
            # load power files for 1 elec // 1 freq // Bad-Good conditions
            bad_data_elec = bad_data[freq,elec_num].swapaxes(0,1)
            good_data_elec = good_data[freq,elec_num].swapaxes(0,1)
            print ('data elec ', bad_data_elec.shape, good_data_elec.shape)
            nwin = good_data.shape[1]
            elec, elec_label, freq_name = channels[elec_num], names[elec_num], freq_names[freq]
            print ('elec ', elec, 'elec_label ', elec_label)
            
    # =============================  Classification Computation ============================================================           
            # create a data matrix, concatenate along the trial dimension
            x = np.concatenate((bad_data_elec, good_data_elec), axis=0)
            print ('Size of the concatenated data: ', x.shape, 'Number time windows : ', x.shape[1])
            #create label vector (0 for rest and 1 for odor)
            y = np.asarray([0]*bad_data_elec.shape[0] + [1]*good_data_elec.shape[0])
            print ('Size of label for classif: ', len(y))
            
            auc = np.array([])
            for t in range(x.shape[1]):
                X = x[:,t]
                X = X.reshape(-1, 1)
                score_rep = []
                for i in range(10):
                    skf = SKFold(n_splits=10, random_state=i)
                    skf.get_n_splits(X, y)
                    score_cv = []
                    for train_index, test_index in skf.split(X, y):
                        clf = LDA()
                        X_train, X_test = X[train_index], X[test_index]
                        y_train, y_test = y[train_index], y[test_index]
                        clf.fit(X=X_train, y=y_train)
                        y_pred = clf.predict(X_test)
                        score_cv.append(roc_auc_score(y_test,y_pred,average='weighted'))
                    score_rep.append(np.mean(score_cv))
                score_rep = np.asarray(score_rep).reshape(1,len(score_rep))
                auc = np.vstack((auc, score_rep)) if np.size(auc) else score_rep
            auc = np.swapaxes(auc,0,1)
            #print(auc.shape)
            #print('to plot', 'data',x.shape, 'time',times_plot.shape,'label', y.shape)

            perm_scores = np.array([])
            for t in range(x.shape[1]):
                X = x[:,t]
                X = X.reshape(-1, 1)
                perm_rep = []
                for perm in range(nperm):
                    y_perm = y[permutation(len(y))]
                    score_cv = []
                    for train_index, test_index in skf.split(X, y_perm):
                        clf = LDA()
                        X_train, X_test = X[train_index], X[test_index]
                        y_train, y_test = y_perm[train_index], y_perm[test_index]
                        clf.fit(X=X_train, y=y_train)
                        y_pred = clf.predict(X_test)
                        score_cv.append(roc_auc_score(y_test,y_pred,average='weighted'))
                    perm_rep.append(np.mean(score_cv))
                perm_rep = np.asarray(perm_rep).reshape(1,len(perm_rep))
                perm_scores = np.vstack((perm_scores, perm_rep)) if np.size(perm_scores) else perm_rep
                #print(perm_scores.shape)
            perm_scores = np.swapaxes(perm_scores,0,1)
            #print(perm_scores.shape)
            #print(perm_scores)
            #print(pvalues.shape, pvalues.min(), pvalues.max())
            th_0_05_perm = perm_pvalue2level(perm_scores, p=0.05, maxst=True)
            th_0_01_perm = perm_pvalue2level(perm_scores, p=0.01, maxst=True)
            print('th_perm : ', th_0_05_perm[0], th_0_01_perm[0])
            
    # ========================== Create a pvalue vector for uac measure ========================
            auc_pvals = []
            for i in range(auc.shape[1]):
                if np.mean(auc[:,i]) > th_0_01_perm[0]:
                    auc_pvals.append(0.009)
                elif np.mean(auc[:,i]) > th_0_05_perm[0]:
                    auc_pvals.append(0.04)
                else:
                    auc_pvals.append(1)
            print (auc_pvals)
    
    # ============================== PLOT POWER ANALYSIS + STATS & DECODING ACCURACY ===================================================
            fig = plt.figure(1,figsize=(5,7))
            title = 'Power ('+str(bsl[0])+') & AUC Score\n'+su+' Bad/Good '+str(elec)+' '+str(elec_label)+' ('+str(elec_num)+')'
            fig.suptitle(title, fontsize=12)
            # print(bad_data.shape,good_data.shape,bad_data_elec.shape,good_data_elec.shape,x.shape)
            plt.subplot(211)
            times_pow, w = np.arange(1,6,2), 0.8
            mean_b, sd_b = bad_data_elec.mean(axis=0), bad_data_elec.std(axis=0)
            mean_g, sd_g = good_data_elec.mean(axis=0), good_data_elec.std(axis=0)
            plt.bar(times_pow-w/2, mean_b, color='b',yerr=sd_b,label='bad', width=w,align='center')
            plt.bar(times_pow+w/2, mean_g, color='m',yerr=sd_g,label='good', width=w,align='center')
            plt.ylabel('Power')
            plt.xlabel('Time (s)')
            plt.xticks(times_pow,['0 - 1.5','1.5 - 3','3 - 4.5'])
            rmaxis(plt.gca(), ['right', 'top'])
            addPval(plt.gca(),auc_pvals, p=0.05, x=times_pow, y=mean_b/2, color='r', lw=2)
            addPval(plt.gca(),auc_pvals, p=0.01, x=times_pow, y=mean_b/2, color='orange', lw=2, minsucc=2)
            plt.legend(loc=0, handletextpad=0.1, frameon=False)

            # Plot DA for the POW
            plt.subplot(212)
            mean_auc, sd_auc = auc.mean(axis=0), auc.std(axis=0)
            plt.bar(times_pow, mean_auc, color='grey',yerr=sd_auc, width=w,align='center')
            rmaxis(plt.gca(), ['right', 'top'])
            plt.ylabel('AUC score')
            plt.xlabel('Time (s)')
            plt.xticks(times_pow,['0 - 1.5','1.5 - 3','3 - 4.5'])
            plt.ylim(0,1)
            plt.plot(np.arange(0,9,3), th_0_05_perm*np.ones(len(times_pow)), '--', color='r', 
                     linewidth=2, label='p < 0.5')
            plt.plot(np.arange(0,9,3), th_0_01_perm*np.ones(len(times_pow)), '--', color='orange', 
                     linewidth=2, label='p < 0.01')
            plt.legend(loc=0, handletextpad=0.1, frameon=False)
            for i, v in enumerate(mean_auc):
                plt.text(times_pow[i]-w/3, v-0.1, str(round(v,2)), color='black')
            #plt.show()
            #Filenames to save
            name_auc = (save_path+'/'+su+'/'+str(freq)+'_'+freq_name+'/auc/'+su +'_auc_Good_Bad_'+str(elec_label)+'_('+str(elec_num)+').npy')
            name_th_0_05_perm = (save_path+'/'+su+'/'+str(freq)+'_'+freq_name+'/auc/'+su +'_th_0_05_perm_'+str(elec_label)+'_('+str(elec_num)+').npy')
            name_th_0_01_perm = (save_path+'/'+su+'/'+str(freq)+'_'+freq_name+'/auc/'+su +'_th_0_01_perm_'+str(elec_label)+'_('+str(elec_num)+').npy')
            plot_name = (save_path+'/'+su+'/'+str(freq)+'_'+freq_name+'/fig/'+su +'_Power_Good_Bad_'+str(elec)+'_'+str(elec_label)+'_('+str(elec_num)+').png')            
            #Save plots
            np.save(name_auc, auc)
            np.save(name_th_0_05_perm, th_0_05_perm[0])
            np.save(name_th_0_01_perm, th_0_01_perm[0])
            plt.savefig(plot_name, dpi=300, bbox_inches='tight')
            plt.clf()
            plt.close() 
            del bad_data_elec, good_data_elec, X, auc
    del bad_data, good_data