## Import Libraries

In [1]:
from os import path
from itertools import product
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import ScalarFormatter, MaxNLocator
import scipy.io as sio
from scipy.signal import savgol_filter

from brainpipe.classification import *
from brainpipe.system import study
from brainpipe.feature import power, amplitude, sigfilt
from brainpipe.visual import *
from brainpipe.statistics import *
from scipy.stats import *

### Power Decoding - Cycle power

In [2]:
from os import listdir
st = study('respi')
path_pow = path.join(st.path, 'Power_reshape/')

files = listdir(path_pow)
for fi in files:
    mat = np.load(path_pow+fi)
    print(mat.files)
    print(fi,mat['pow'].shape,mat['f'],len(mat['labels']))
    /0

-> respi loaded
['BA', 'f', 'aal', 'xyz', 'labels', 'channels', 'fname', 'pow']
CHAF_E_no_odor_pow_reshape_40.npz (96, 4, 150, 40) [[  4   8]
 [  8  14]
 [ 14  30]
 [ 30 120]] 96


TypeError: 'int' object is not callable

In [24]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.model_selection import StratifiedKFold as SKFold
from sklearn.metrics import roc_auc_score
from numpy.random import permutation
from joblib import delayed, Parallel
import random

conds,cycles = ['odorall','no_odor'],['_reshape', ''] #no_odor
subjects = ['SEMC','MICP','VACJ','LEFC','PIRJ','FERJ','CHAF']
color_codes = ['darkorange','blue']
nb_points_cycle = 40
st = study('respi')
freqs = 4
nperm = 1000

path_pow = path.join(st.path, 'Power_reshape/')
save_path = path.join(st.path, 'classified/LDA_Odor_No_Odor_new_balanced/')

def classif_by_elec(elec_num,pow_list):
    for freq in range(freqs):
        elec, elec_label, freq_name = channels[elec_num], names[elec_num], freq_names[freq]
        print ('elec ', elec, 'elec_label ', elec_label)
        #Filenames to save
        name_auc = (save_path+str(freq)+'_'+freq_name+'/auc/'+su +'_auc_'+cond+cycles[0]+'_'+str(elec_label)+'_('+str(elec_num)+').npy')
        name_perm = (save_path+str(freq)+'_'+freq_name+'/auc/'+su +'_perm_'+cond+cycles[0]+'_'+str(elec_label)+'_('+str(elec_num)+').npy')
        plot_name = (save_path+str(freq)+'_'+freq_name+'/fig/'+su +'_Power_'+cond+cycles[0]+'_'+str(elec_label)+'_('+str(elec_num)+').png')    
        if path.exists(name_auc):
            print(su,cycles[0],cycles[1],cond,elec_num,freq,'already computed')
        else:
            print('--» processing',su, 'elec', elec_num,'/',nelecs, 'freq',freq)
            pow_data_elec = []
            for i,power in enumerate(pow_list):
                pow_data_elec.append(power[freq,elec_num].swapaxes(0,1))
            nwin = power.shape[1]

            # create a data matrix, concatenate along the trial dimension
            x = np.concatenate(pow_data_elec, axis=0)
            x = savgol_filter(x, 11, 3, axis=1)
            print ('Size of the concatenated data: ', x.shape, 'Number time windows : ', x.shape[1])
            y = np.hstack([np.array([i]*len(power)) for i, power in enumerate(pow_data_elec)])
            print ('Size of label for classif: ', len(y))

            auc = np.array([])
            for t in range(x.shape[1]):
                X = x[:,t]
                X = X.reshape(-1, 1)
                score_rep = []
                for i in range(10):
                    k = 10
                    skf = SKFold(n_splits=k, random_state=None, shuffle=True)
                    skf.get_n_splits(X, y)
                    score_cv = []
                    for train_index, test_index in skf.split(X, y):
                        clf = LDA()
                        X_train, X_test = X[train_index], X[test_index]
                        y_train, y_test = y[train_index], y[test_index]
                        clf.fit(X=X_train, y=y_train)
                        y_pred = clf.predict(X_test)
                        score_cv.append(roc_auc_score(y_test,y_pred,average='weighted'))
                    score_rep.append(np.mean(score_cv))
                score_rep = np.asarray(score_rep).reshape(1,len(score_rep))
                auc = np.vstack((auc, score_rep)) if np.size(auc) else score_rep
            auc = np.swapaxes(auc,0,1)

            perm_scores = np.array([])
            for t in range(x.shape[1]):
                X = x[:,t]
                X = X.reshape(-1, 1)
                perm_rep = []
                for perm in range(nperm):
                    y_perm = y[permutation(len(y))]
                    score_cv = []
                    for train_index, test_index in skf.split(X, y_perm):
                        clf = LDA()
                        X_train, X_test = X[train_index], X[test_index]
                        y_train, y_test = y_perm[train_index], y_perm[test_index]
                        clf.fit(X=X_train, y=y_train)
                        y_pred = clf.predict(X_test)
                        score_cv.append(roc_auc_score(y_test,y_pred,average='weighted'))
                    perm_rep.append(np.mean(score_cv))
                perm_rep = np.asarray(perm_rep).reshape(1,len(perm_rep))
                perm_scores = np.vstack((perm_scores, perm_rep)) if np.size(perm_scores) else perm_rep
            perm_scores = np.swapaxes(perm_scores,0,1)           
            th_0_01_perm = perm_pvalue2level(perm_scores, p=0.01, maxst=True)
            th_0_001_perm = perm_pvalue2level(perm_scores, p=0.001, maxst=True)
            print('th_perm 01: ', th_0_01_perm[0], '001',th_0_001_perm[0], 'auc_max', np.max(auc))

    # ============================== PLOT POWER ANALYSIS + STATS & DECODING ACCURACY ===================================================
            # plot and figure parameters
            xfmt = ScalarFormatter(useMathText=True)
            xfmt.set_powerlimits((0,3))
            fig = plt.figure(1,figsize=(7,7))
            title = 'Power-Stats-DA for '+su+' '+cond+'_'+cycles[0]+' vs '+cond+'_'+cycles[1]+' '+str(elec)+' '+str(elec_label)+' ('+str(elec_num)+')'
            fig.suptitle(title, fontsize=12)

            # Plot the POW + STATS
            plt.subplot(211)        
            BorderPlot(time, x, y=y, kind='sem', alpha=0.2, color=color_codes,linewidth=2, 
                       ncol=1, xlabel='Time (s)',ylabel = r'Power', legend=['No Odor','Odor'])
            rmaxis(plt.gca(), ['right', 'top'])
            addLines(plt.gca(), vLines=[0], vColor=['darkgray'], vWidth=[2])
            plt.legend(loc=0, handletextpad=0.1, frameon=False)
            plt.gca().yaxis.set_major_locator(MaxNLocator(3,integer=True))

            # Plot DA for the POW
            plt.subplot(212)
            BorderPlot(time, auc, color='b', kind='sd',xlabel='Time (s)', ylim=[0.4,1.], ylabel='Decoding accuracy (%)',linewidth=2, alpha=0.3)
            rmaxis(plt.gca(), ['right', 'top'])
            addLines(plt.gca(), vLines=[0], vColor=['darkgray'], vWidth=[2])
            plt.gca().yaxis.set_major_locator(MaxNLocator(3,integer=True))
            plt.plot(time, th_0_01_perm*np.ones(len(time)), '--', color='r', linewidth=2)

            #Save plots
            np.save(name_auc, auc)
            np.save(name_perm, perm_scores)
            plt.savefig(plot_name, dpi=300, bbox_inches='tight')
            plt.clf()
            plt.close()
            del X, auc, pow_data_elec
            
for su in subjects:
    pow_list = []
    #=========================== Load Power files (nfreq, nelec, nwin, ntrial) =================================    
    mat0 = np.load(path.join(path_pow, su+'_E_'+conds[0]+'_pow'+cycles[0]+'_'+str(nb_points_cycle)+'.npz'))
    mat1 = np.load(path.join(path_pow, su+'_E_'+conds[1]+'_pow'+cycles[0]+'_'+str(nb_points_cycle)+'.npz'))
    print('files in pow mat', mat0.files)
    names, channels, freq_names = mat0['labels'], mat0['channels'],mat0['fname']
    freq_names = [freq.decode("utf-8") for freq in freq_names]
    time = np.arange(0.,1.,1/nb_points_cycle)
    
    #power dimensions // nelecs, nfreqs, ncycles, ntimepoints
    pow0, pow1 = mat0['pow'], mat1['pow']
    print('before',pow0.shape,pow1.shape)
    
    #filter all data by rois
    rois_keep = ['ACC','HC','IFG','Ins','MFG','OFC','PHG','SFG','pPirT','Amg','Amg-PirT']
    idx_roi = np.where([ roi in rois_keep for roi in names])[0]
    pow0, pow1 = pow0[idx_roi,...], pow1[idx_roi,...]
    names, channels= names[idx_roi], channels[idx_roi]
    
    #transform power dimensions // nfreq, nelecs, ntimepoints, ncycles
    pow0 = pow0.swapaxes(0,1).swapaxes(2,3)
    pow_list.append(pow0)
    pow1 = pow1.swapaxes(0,1).swapaxes(2,3)
    pow_list.append(pow1)
    print('after',pow0.shape,pow1.shape)
    nelecs = pow0.shape[1]
    print (su, 'power shape: ', [pow.shape for pow in pow_list])
    
    #randomly select same number of trials for both conds
    n_trials0, n_trials1 = pow0.shape[-1], pow1.shape[-1]
    if n_trials0 < n_trials1:
        rand_id = np.random.choice(np.arange(n_trials1),size=n_trials0)
        pow1 = pow1[:,:,:,rand_id]
        print('pow0',pow0.shape,'pow1',pow1.shape)
    elif n_trials0 > n_trials1:
        rand_id = np.random.choice(np.arange(n_trials0),size=n_trials1)
        pow0 = pow0[:,:,:,rand_id]
        print('pow0',pow0.shape,'pow1',pow1.shape)
        
    #compute LDA classif by elec in parallel
    iterator = range(nelecs)
    Parallel(n_jobs=-1)(delayed(classif_by_elec)(elec_num, pow_list) for elec_num in iterator)
    del pow_list

-> respi loaded
files in pow mat ['BA', 'f', 'aal', 'xyz', 'labels', 'channels', 'fname', 'pow']
before (91, 4, 60, 40) (91, 4, 181, 40)
after (4, 80, 40, 60) (4, 80, 40, 181)
SEMC power shape:  [(4, 80, 40, 60), (4, 80, 40, 181)]
pow0 (4, 80, 40, 60) pow1 (4, 80, 40, 60)


KeyboardInterrupt: 