## Calculate ERPs

In [11]:
import numpy as np
import matplotlib.pyplot as plt
from os import path
from matplotlib.ticker import ScalarFormatter, MaxNLocator
#%matplotlib notebook

from brainpipe.system import study
from brainpipe.visual import *
from brainpipe.statistics import *
from mne.baseline import rescale
from mne.filter import filter_data
from mne.stats import *

## User variables

In [35]:
# where to find data
st = study('Olfacto')
path_data = path.join (st.path, 'database/TS_E_all_by_odor_th40_art400_30_250_5s_Good_Bad_EpiScore/')
save_path = path.join(st.path, 'feature/ERP_Groups_Odors_250ms_rescale_filtered_stats_bootstrap/')
# analysis parameters
score = 'Epi' #'Rec'
low_pass_filter = 10.
sf = 512.
norm_mode = 'mean' #'ratio' 'mean' 'percent' 
baseline = [973 , 1024] #100ms before odor perception
data_to_use = [973, 1536] #1000ms after odor
time_points = data_to_use[1]-data_to_use[0]

-> Olfacto loaded


## Plot ERPs for Odor groups

In [38]:
test = True

if test == True:
    n_elec = {'VACJ' :1}
    subjects = ['VACJ']
else :
    subjects = ['SEMC','PIRJ','LEFC','MICP','CHAF','VACJ'] 
    n_elec = {
    'CHAF' : 107,
    'VACJ' : 139, 
    'SEMC' : 107,
    'PIRJ' : 106,
    'LEFC' : 193,
    'MICP' : 105,
        }

for su in subjects:
    for elec in range(0, n_elec[su],1):
        #Load files
        badname = su+'_concat_odor_bad_bipo.npz'
        goodname = su+'_concat_odor_good_bipo.npz'
        data_bad = np.load(path.join(path_data, badname))
        data_good = np.load(path.join(path_data, goodname))
        data_bad, channel, label, data_good = data_bad['x'], data_bad['channel'], data_bad['label'], data_good['x']

        # Select data for one elec + name :
        data_elec_bad = data_bad[elec,:,:]
        data_elec_good = data_good[elec,:,:]
        ntrials = str(data_elec_bad.shape[1])+'/'+ str(data_elec_good.shape[1]) #to be displayed on figures
        print ('Channel : ', channel[elec], 'Label : ', label[elec], 'N_trials :', ntrials, 
               'Bad shape : ', data_elec_bad.shape, 'Good shape : ', data_elec_good.shape)

        #Filter data for one elec (all trials):
        data_elec_bad = np.array(data_elec_bad, dtype='float64')
        data_elec_good = np.array(data_elec_good, dtype='float64')
        data_bad_to_filter = np.swapaxes(data_elec_bad, 0, 1)
        data_good_to_filter = np.swapaxes(data_elec_good, 0, 1)
        filtered_data_bad = filter_data(data_bad_to_filter, sfreq=512, l_freq=None, h_freq=low_pass_filter, method='fir', phase='zero-double')
        filtered_data_good = filter_data(data_good_to_filter, sfreq=512, l_freq=None, h_freq=low_pass_filter, method='fir', phase='zero-double')
        print ('Size of filtered data bad :', filtered_data_bad.shape, 'filtered data good : ', filtered_data_good.shape,)

        #Normalize the non-averaged data (all trials)
        times = np.arange(filtered_data_bad.shape[1])
        print ('time points : ', times.shape)
        norm_filtered_data_bad = rescale(filtered_data_bad, times=times, baseline=baseline, mode=norm_mode)
        norm_filtered_data_good = rescale(filtered_data_good, times=times, baseline=baseline, mode=norm_mode)
        print ('Size norm & filtered data 0 : ', norm_filtered_data_bad.shape, norm_filtered_data_good.shape,)
        
        # Range of the data to compute
        data_range = range(data_to_use[0], data_to_use[1])
        # Select a time window in the data
        data_bad = norm_filtered_data_bad[:, data_range]
        data_good = norm_filtered_data_good[:, data_range,]
        #print ('-> Shape of the selected data for learn 0', data_bad.shape, 'learn 1', data_good.shape,)

        # =======================================  STATISTICS  =====================================
        n_rep = 10 #bootstrap
        T_rep = np.array([])
        p_val_rep = np.array([])
        alpha = 0.05
        
        for i in range(n_rep):
            #reshape data to have the exact same nb of trials (mandatory for t-tests)
            if data_bad.shape[0] > data_good.shape[0]:
                data_bad = data_bad[np.random.randint(data_bad.shape[0], size=data_good.shape[0]), :] #reshape bad_data to fit good_data shape
                #print ('rand bad matrix', data_bad.shape)
            if data_bad.shape[0] < data_good.shape[0]:
                data_good = data_good[np.random.randint(data_good.shape[0], size=data_bad.shape[0]), :]
                #print ('rand good matrix', data_good.shape)
            X = data_bad - data_good #the last dimension needs to be time
            T0, p_values, H0 = permutation_t_test(X, n_permutations=100, tail=0, n_jobs=1, verbose=None)
            T_rep = np.vstack((T_rep,T0)) if np.size(T_rep) else T0
            p_val_rep = np.vstack((p_val_rep,p_values)) if np.size(p_val_rep) else p_values
        
        p_val_max = []
        for t in range(time_points):
            pmax = p_val_rep[:,t].max()
            p_val_max.append(pmax)
        
        # ========================== PREPARE PLOTS AND SAVE STATS =========================================
            
        # plot and figure parameters
        xfmt = ScalarFormatter(useMathText=True)
        xfmt.set_powerlimits((0,3))
        fig = plt.figure(1,figsize=(7,7))
        title = 'ERP and Stats for '+su+''+score+' Good/Bad '+ channel [elec] +' '+label[elec]+' ('+str(elec)+') ntrials:'+str(ntrials)
        fig.suptitle(title, fontsize=12)
        times_plot = 1000 * np.arange((baseline[0] - baseline[1]), data_to_use[1]-baseline[1]) / sf
        #print (len(times_plot))
        lines = [0] #time vector is in ms

        # Plot the stats for the ERPs
        plt.subplot(211)
        BorderPlot(times_plot, p_val_max, color=['b'], linewidth=2, ncol=1, xlabel='Time (ms)',
                   ylabel = 'pvalues')
        addPval(plt.gca(), p_val_max, p=0.05, x=times_plot, y=0.5, color='r', lw=3)
        addLines(plt.gca(), vLines=lines, vColor=['r'], vWidth=[2], hLines=[0], 
                 hColor=['#000000'], hWidth=[2])
        rmaxis(plt.gca(), ['right', 'top'])
        plt.gca()

        # Plot the ERPs and the stats
        plt.subplot(212)
        data_all = np.concatenate((data_bad, data_good), axis=0)
        label_bad = np.zeros(data_bad.shape[0], dtype='int64')
        label_good = np.ones(data_good.shape[0], dtype='int64')
        labels = np.concatenate((label_bad, label_good), axis=0)
        BorderPlot(times_plot, data_all, y=labels, kind='sem', alpha=0.2, color=['b', 'm'], 
                   linewidth=2, ncol=1, xlabel='Time (ms)',ylabel = r' $\mu$V', 
                   legend = ['bad', 'good'])
        addPval(plt.gca(), p_val_rep.mean(0), p=0.05, x=times_plot, y=5, color='r', lw=3)
        addLines(plt.gca(), vLines=lines, vColor=['r'], vWidth=[2], hLines=[0], 
                 hColor=['#000000'], hWidth=[2])
        rmaxis(plt.gca(), ['right', 'top'])
        plt.legend(loc=0, handletextpad=0.1, frameon=False)
        plt.gca().yaxis.set_major_locator(MaxNLocator(3,integer=True))

        # Save the plots
        if sum(idx_signif_nb) >= n_rep - n_rep*alpha:
            name_pval_rep = (save_path +'Significant/'+su +'_pvalues_all_' + score +'_'+label[elec]+'_('+str(elec)+').npy')
            name_p_val_max = (save_path +'Significant/'+su +'_pvalues_max_' + score +'_'+label[elec]+'_('+str(elec)+').npy')
            plot_name = (save_path +'Significant/'+su +'_pvalues_'  + score +'_'+label[elec]+'_('+str(elec)+').png')
        else:
            fname = (save_path +su +'_pvalues_good_bad_' + channel [elec] +'_'+label[elec]+'_('+str(elec)+').npy')
            plot_name = (save_path +su +'_pvalues_good_bad_'  + channel [elec] +'_'+label[elec]+'_('+str(elec)+').png')
        
        np.save(name_pval_rep, p_val_rep)
        np.save(name_p_val_max, p_val_max)
        plt.savefig(plot_name, dpi=300, bbox_inches='tight')
        plt.clf()
        plt.close()
        del idx_signif_nb, T_rep, p_val_rep


Channel :  b2-b1 Label :  mHC-Ent N_trials : 13/13 Bad shape :  (2560, 13) Good shape :  (2560, 13)
Setting up low-pass filter at 10 Hz
h_trans_bandwidth chosen to be 2.5 Hz
Filter length of 1352 samples (2.641 sec) selected
Setting up low-pass filter at 10 Hz
h_trans_bandwidth chosen to be 2.5 Hz
Filter length of 1352 samples (2.641 sec) selected
Size of filtered data bad : (13, 2560) filtered data good :  (13, 2560)
time points :  (2560,)
Applying baseline correction (mode: mean)
Applying baseline correction (mode: mean)
Size norm & filtered data 0 :  (13, 2560) (13, 2560)
563
563


NameError: name 'idx_signif_nb' is not defined

In [29]:
x = np.load(path.join(save_path,'SEMC_pvalues_good_bad_b2-b1_aHC&aHC-Ent_(0).npy'))
print(x.shape)
data_range = range(data_to_use[0], data_to_use[1])
print(data_to_use[1]-data_to_use[0]-512)

(100, 563)
51
