# Classification Power Encoding

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import ScalarFormatter, MaxNLocator
import scipy.io as sio
#%matplotlib notebook
#%matplotlib inline
#%load_ext autoreload
#%autoreload 2
from brainpipe.classification import *
from brainpipe.system import study
from brainpipe.feature import power, amplitude, sigfilt
from brainpipe.visual import *

from sklearn.svm import SVC
from sklearn.model_selection import RandomizedSearchCV, StratifiedKFold
from scipy.stats import expon

from brainpipe.statistics import *
from os import path
from mne.stats import *
from scipy.stats import binom

## Classification across time for all freq bands and subjects
### Encoding Good vs Bad odors

In [5]:
# Importing files 
st = study('Olfacto')
pathfiles = path.join(st.path, 'feature/6_Power_E1E2_Odor_Good_Bad_700_100/')
elecfiles = path.join(st.path, 'database/TS_E_all_by_odor_th40_art400_30_250_5s_concatOK/')
path2save = path.join(st.path, 'classified/8_Classif_Power_Good_Bad_across_time_700ms_step100ms_subset/svm_optimized/')

test = 'subset' 
classifs = ['svm']

if test == True:
    n_elec = {'VACJ' :1}
    subjects = ['VACJ']
    nfreq = 1
    
elif test == False :
    subjects = ['SEMC','PIRJ','LEFC','MICP','CHAF','VACJ'] 
    n_elec = {
    'CHAF' : 107,
    'VACJ' : 139, 
    'SEMC' : 107,
    'PIRJ' : 106,
    'LEFC' : 193,
    'MICP' : 105,
        }
    nfreq = 6
    
elif test == 'subset': #HC, PHC, Amg, Pir
    subjects = ['SEMC','PIRJ','LEFC','MICP','VACJ'] 
    n_elec = {
    'VACJ' : [1,2,3,11,12,13,14,15,16,17,22,23,24,60,61,62],
    'SEMC' : [0,1,2,3,4],
    'PIRJ' : [0,1,2,3,4,11,12,13,14,15,16,22,23,24,25,26,33,34,35,36,37,38],
    'LEFC' : [0,1,11,12,13,14,22,23,24,25,26,27],
    'MICP' : [0,1,2,3,9,18,10,11,12,19,20,21,22,23,29,30,31,32,33,40,41,42,43,44],
        }
    nfreq = 6
    
for classif in classifs:
    for su in subjects:
        #for elec_num in range(n_elec[su]):
        for elec_num in n_elec[su]:
            for freq in range(nfreq):
                #files & data to load
                bad_data = np.load(path.join(pathfiles, su+'_concat_odor_bad_bipo_power.npz'))['xpow'][freq,elec_num] #take power for one freq band, one elec
                bad_data = bad_data.swapaxes(0,1)
                good_data = np.load(path.join(pathfiles, su+'_concat_odor_good_bipo_power.npz'))['xpow'][freq,elec_num] #take power for one freq band, one elec
                good_data = good_data.swapaxes(0,1)
                nwin = good_data.shape[1]
                print ('bad shape: ', bad_data.shape, 'good shape: ', good_data.shape)
                elec = np.load(path.join(elecfiles, su+'_concat_odor_bad_bipo.npz'))['channel'][elec_num]
                elec_label = np.load(path.join(elecfiles, su+'_concat_odor_bad_bipo.npz'))['label'][elec_num]
                freq_name = np.load(path.join(pathfiles, su+'_concat_odor_bad_bipo_power.npz'))['fname'][freq]
                print ('elec ', elec, 'elec_label ', elec_label)

# ================================  STATISTICS FOR POWER  =====================================
                n_rep = 10
                T_rep, p_val_rep = np.array([]), np.array([])
                da_rep, daperm_rep = np.array([]), np.array([])
                alpha = 0.05
                
                first = True
                for i in range(n_rep):
                    #reshape data to have the exact same nb of trials (mandatory for t-tests)
                    if bad_data.shape[0] > good_data.shape[0]:
                        bad_data = bad_data[np.random.randint(bad_data.shape[0], size=good_data.shape[0]), :] #reshape bad_data to fit good_data shape
                    if bad_data.shape[0] < good_data.shape[0]:
                        good_data = good_data[np.random.randint(good_data.shape[0], size=bad_data.shape[0]), :]
                    ntrials = bad_data.shape[0]
                    X = bad_data - good_data #the last dimension needs to be time
                    T0, p_values, H0 = permutation_t_test(X, n_permutations=1000, tail=0, n_jobs=1, verbose=None)
                    T_rep = np.vstack((T_rep,T0)) if np.size(T_rep) else T0
                    p_val_rep = np.vstack((p_val_rep,p_values)) if np.size(p_val_rep) else p_values

# =============================  CLASSIFICATION COMPUTATION ============================================================           
                    #create a data matrix, concatenate along the trial dimension
                    bad_good = np.concatenate((bad_data, good_data), axis=0)
                    print ('Size of the concatenated data: ', bad_good.shape, 'Number of features : ', bad_good.shape[1])

                    #create label vector (0 for rest and 1 for odor)
                    y = [0]*bad_data.shape[0] + [1]*good_data.shape[0]
                    print ('Size of label for classif: ', len(y))
                    
                    da_final = []
                    for i in range(bad_good.shape[1]):
                        if first:
                            cv = StratifiedKFold(n_splits=10)
                            clf = SVC(class_weight='balanced', kernel='rbf')
                            params = {'C': expon(scale=100), 'gamma': expon(scale=.1)}
                            RS = RandomizedSearchCV(estimator=clf,
                                        param_distributions=params,
                                        n_iter=100,
                                        n_jobs=-1,
                                        cv=cv)
                            RS.fit(X=bad_good[:,i].reshape(-1,1), y=y)
                            best_params = RS.best_params_
                            best_params['class_weight'] = 'balanced'
                            best_params['kernel'] = 'rbf'

                        # Define a cross validation:
                        cv = defCv(y, n_folds=10, cvtype='skfold', rep=10)
                        # Define classifier technique
                        clf = defClf(y=y, clf=classif, kern='rbf',
                                     C=best_params['C'], gamma=best_params['gamma'],
                                     class_weight=best_params['class_weight'])#,n_tree=200, random_state=100)
                        #Classify rest and odor
                        cl = classify(y, clf=clf, cvtype=cv)

                        # Evaluate the classifier on data:
                        da,pvalue,daperm = cl.fit(bad_good[:,i].reshape(-1,1), n_perm=100,method='bino',mf=False)
                        da_final.append(da.tolist())
                    da = np.asarray(da_final).squeeze().swapaxis(0,1)
                    print(da.shape)
                    print ('decoding accuracy',da.shape, 'pvalues ', pvalue.shape,)
                    da_rep = np.vstack((da_rep,da)) if np.size(da_rep) else da
                    first = False
                    
# =============================== TAKE MAX STATS TO PLOT =====================================
                # Take the max pvalues for each time window
                idx_pval_max = []
                for s in range(nwin):
                    pval_max = p_val_rep[:,s].max()
                    idx_pval_max.append(pval_max)
                #print (p_val_rep.shape, idx_pval_max)

                #Save da accuracy
                np.save(path2save+su+'_da_Bad_vs_Good__'+str(freq_name)+'_'+classif+'_'+str(elec_label)+'_('+str(elec_num)+')',da_rep)

# ============================== PLOT POWER ANALYSIS + STATS & DECODING ACCURACY ===================================================
                # data to plot
                bad_to_plot = np.load(path.join(pathfiles, su+'_concat_odor_bad_bipo_power.npz'))['xpow'][freq,elec_num]
                bad_to_plot = bad_to_plot.swapaxes(0,1)
                good_to_plot = np.load(path.join(pathfiles, su+'_concat_odor_good_bipo_power.npz'))['xpow'][freq,elec_num] #take power for one freq band, one elec
                good_to_plot = good_to_plot.swapaxes(0,1)
                bad_good_plot = np.concatenate((bad_to_plot, good_to_plot), axis=0)
                y_plot = [0]*bad_to_plot.shape[0] + [1]*good_to_plot.shape[0]
    
                # plot and figure parameters
                xfmt = ScalarFormatter(useMathText=True)
                xfmt.set_powerlimits((0,3))
                fig = plt.figure(1,figsize=(7,7))
                step = 3700/ bad_to_plot.shape[1]
                time = np.arange(-700, 3000, step)
                print (len(time))
                title = 'Power and DA for '+str(freq_name)+' '+su+' '+classif+' '+str(elec_label)+' ('+str(elec_num)+')'
                fig.suptitle(title, fontsize=12)

                # Plot the ERPs and the stats
                plt.subplot(211)
                BorderPlot(time, bad_good_plot, y=y_plot, kind='sem', alpha=0.2, color=['b', 'm'], 
                           linewidth=2, ncol=1, xlabel='Time (ms)', ylabel = r' $\mu$V', 
                           legend = ['bad', 'good'])
                addPval(plt.gca(), idx_pval_max, p=0.05, x=time, y=5, color='0.5', lw=2)
                addPval(plt.gca(), idx_pval_max, p=0.01, x=time, y=0.2, color='0.7', lw=2)
                addPval(plt.gca(), idx_pval_max, p=0.001, x=time, y=0.3, color='0.9', lw=2)
                addLines(plt.gca(), vLines=[0], vColor=['black'], vWidth=[2], hLines=[0], 
                         hColor=['#000000'], hWidth=[2])
                rmaxis(plt.gca(), ['right', 'top'])
                plt.legend(loc=0, handletextpad=0.1, frameon=False)
                plt.gca().yaxis.set_major_locator(MaxNLocator(3,integer=True))

                #Plot the da
                plt.subplot(212)
                BorderPlot(time, da_rep, color='darkslateblue', kind='std',xlabel='Time (ms)', ylim=[da.min()-10,da.max()+10],
                           ylabel='Decoding accuracy (%)',linewidth=2,alpha=0.3)
                rmaxis(plt.gca(), ['right', 'top'])
                addLines(plt.gca(), vLines=[0], vColor=['black'], vWidth=[2], hLines=[50], 
                         hColor=['#000000'], hWidth=[2])
                plt.gca().yaxis.set_major_locator(MaxNLocator(3,integer=True))
                th_0_05 = 100*np.around(binom.isf(0.05, good_data.shape[0], 0.5)/good_data.shape[0],2)
                th_0_01 = 100*np.around(binom.isf(0.01, good_data.shape[0], 0.5)/good_data.shape[0],2)
                th_0_001 = 100*np.around(binom.isf(0.001, good_data.shape[0], 0.5)/good_data.shape[0],2)
                plt.plot(time, th_0_05*np.ones(len(time)), '--', color='orange', 
                          linewidth=2, label= str(th_0_05)+' - p < .05')
                plt.plot(time, th_0_01*np.ones(len(time)), '--', color='orangered', 
                          linewidth=2, label= str(th_0_01)+' - p < .01')
                plt.plot(time, th_0_001*np.ones(len(time)), '--', color='r', 
                          linewidth=2, label= str(th_0_001)+' - p < .001')
                plt.legend(loc=0, handletextpad=0.1, frameon=False)

# =========================== SAVE FIGURES & CLEAN MEMORY ==========================================================================
                #Save the plot
                fname = path.join(path2save, su + '_'+freq_name+'_'+str(elec_label)+'_('+str(elec_num)+')_'+'0.01.png')
                fig.savefig(fname, dpi=300, bbox_inches='tight')
                print ('saving --»' ,fname)
                plt.clf()
                plt.close()
                del bad_good, good_data, bad_data, elec, elec_label, freq_name, da, daperm, pvalue, y



-> Olfacto loaded
bad shape:  (21, 29) good shape:  (19, 29)
elec  b2-b1 elec_label  aHC&aHC-Ent
Size of the concatenated data:  (38, 29) Number of features :  29
Size of label for classif:  38


ImportError: [joblib] Attempting to do parallel computing without protecting your import on a system that does not support forking. To use parallel-computing in a script, you must protect your main loop using "if __name__ == '__main__'". Please see the joblib documentation on Parallel for more information

In [None]:
from scipy.stats import binom

binom.isf(0.01, good_data.shape[0], 0.5)/good_data.shape[0]


In [None]:
da