# Import stuff

In [10]:
import warnings
warnings.filterwarnings('ignore')

In [11]:
import numpy as np
import matplotlib.pyplot as plt
import os
import sys
import seaborn as sns
import pandas as pd
import scipy as sp
from scipy import stats, ndimage
from scipy.stats import pearsonr 
from itertools import chain, zip_longest
from copy import deepcopy
import glmtools as glm

from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LinearRegression, Ridge, LogisticRegression
from sklearn.model_selection import KFold, cross_val_predict, train_test_split 
from sklearn.metrics import mean_squared_error, r2_score

import mne
from mne.stats import permutation_t_test
mne.set_log_level('warning') 

#%matplotlib qt
%matplotlib inline

input_dir = 'TaskstimulusEpochsMastoids'


In [12]:
def load_subj_eeg(path, file, downsample=None):
    fp = os.path.join(path, '%s-epo.fif' % file)
    print('>>> Loading %s' % fp)
    epochs = mne.read_epochs(fp, preload=True)
    if downsample is not None:
        epochs = epochs.resample(downsample)
    return epochs

def load_all_eeg(path, files, downsample=None):
    subject_epochs = [load_subj_eeg(path, file, downsample=downsample) for file in files]
    epochs = mne.epochs.concatenate_epochs(subject_epochs)
    return epochs

In [18]:
def runclustertest_epochs(data, contrast_name, channels, tmin = None, tmax = None, gauss_smoothing = None, out_type = 'indices', n_permutations = 'Default', n_jobs = 1):
    '''
    func to run cluster permutation tests on voltage data (epochs)
    data = data object. dictionary where each key is a contrast name, and inside it is a list (of length nsubs) of Evoked objects
    contrast_name = name of the contrast you want to run the test on
    channels = list. list of channels you want to average over. if one channel only, obviously no averaging across channels. still needs to be list
    tmin, tmax = if you want to restrict permutation tests to a time window, do it here
    gauss_smoothing = width (sigma) of a gaussian smoothing that is performed on the single subject data prior to running the test. if None (default) - no smoothing.
                      NOTE: the time width of this smoothing depends on your sampling frequency so make sure to use this properly
    out_type = specify output type. default to indices, can set to mask if you really want
    '''
    import scipy as sp
    from scipy import ndimage
    from copy import deepcopy
    
    dat       = deepcopy(data[contrast_name])
    nsubs     = len(dat)
    times    = deepcopy(dat[0]).crop(tmin=tmin, tmax=tmax).times
    cludat    = np.empty(shape = (nsubs, 1, times.size)) #specify 1 because we're going to average across channels anyway
    
    for i in range(nsubs):
        tmp = deepcopy(dat[i])
        tmp.crop(tmin=tmin, tmax=tmax).pick_channels(channels) #select time window and channels we want
        if gauss_smoothing != None:
            cludat[i,:,:] = sp.ndimage.gaussian_filter1d(np.nanmean(tmp.data, axis=0), sigma = gauss_smoothing)
        else:
            cludat[i,:,:] = np.nanmean(tmp.data, axis=0) #average across channels
    if n_permutations != 'Default':
        t, clusters, cluster_pv, H0 = mne.stats.permutation_cluster_1samp_test(cludat, out_type=out_type, n_permutations = n_permutations, n_jobs = n_jobs)
    else:
        t, clusters, cluster_pv, H0 = mne.stats.permutation_cluster_1samp_test(cludat, out_type=out_type, n_jobs = n_jobs)
    return t, clusters, cluster_pv, H0

In [13]:
def nanzscore(vector, zero_out_nans = True):
            '''
            zscore a vector ignoring nans
            optionally can set nans to 0 afterwards. useful for regressors
            '''
            vector = np.divide(np.subtract(vector, np.nanmean(vector)), np.nanstd(vector))
            if zero_out_nans:
                vector = np.where(np.isnan(vector), 0, vector)
            
            return vector

In [14]:
participant_numbers = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 20, 21] # 14 was excluded due to noise

sessions = [1, 2]

partners = ['"overconfident"', '"underconfident"']


# Regress voltage on time

In [None]:
## for every trial, regress voltage against timepoint
## get a beta for every trial

In [62]:
%matplotlib inline

roi = ["Pz", "CPz", "POz", "P1", "P2"]

#for sub in participant_numbers:
for sub in [3]:
    participant_files = []
    for session in sessions:
        participant_files.append('%i_%i' % (sub, session))
    epochs = load_all_eeg(path='%s/' % input_dir, files=participant_files)
    
    # participant confidence on each trial
    confidence = epochs.metadata['participant_confidence'].to_numpy() 
    
    # indices of roi channels to perform the regressions on
    channel_indices = []
    for channel in roi:
        channel_index = mne.pick_channels(info['ch_names'], [channel])
        channel_indices.append(channel_index[0])
    
    # crop epochs to time window of interest
    epoch_start = 0.2
    epoch_end = 0.6
    epochs = epochs.crop(tmin=epoch_start, tmax=epoch_end, include_tmax=True)
    
    # dependent variable is voltage
    data = epochs.get_data()
    ntrials, nchannels, ntimes = data.shape
    
    # regressor is time
    timepoint = np.array(range(ntimes))
    x = timepoint.reshape(-1, 1)

    timepoint_betas = np.zeros([nchannels, ntrials]) *np.nan
    timepoint_correlations = np.zeros([nchannels, ntrials]) *np.nan

    a = -1
    for channel in channel_indices:
        a = a+1
        data_channel = data[:, channel, :]
        
        for trial in range(ntrials):
            y = data_channel[trial, :]
        
            # creating an object of LinearRegression class
            LR = LinearRegression()
            # fitting the model --> for every channel, for every trial, we regress voltage against time
            LR.fit(x,y)            
            betas = LR.coef_
            
            timepoint_betas[channel, trial] = betas[0]
            timepoint_correlations[channel, trial] = sp.stats.pearsonr(timepoint, y)[0]
        
        #print(np.mean(timepoint_betas[channel, :]))
        #print(np.mean(timepoint_correlations[channel, :]))
        
        # correlation of beta with confidence
        print(roi[a])
        print(sp.stats.pearsonr(timepoint_betas[channel, :], confidence))
        print(sp.stats.pearsonr(timepoint_correlations[channel, :], confidence))
       
    #np.save(file='/Users/majaf/Desktop/Google Drive/PhD/Study 5 Confidence Matching with EEG (A)/Data Analysis/TimepointBetas/slope_%i.npy' % sub, arr=timepoint_betas)

>>> Loading TaskstimulusEpochsMastoids/3_1-epo.fif
>>> Loading TaskstimulusEpochsMastoids/3_2-epo.fif
Pz
(0.05471444248316028, 0.18482146594878396)
(0.04556943614028139, 0.2695221715203272)
CPz
(0.0555462690895719, 0.178223168690332)
(0.035983665687658, 0.38335768587762203)
POz
(0.05547685864250326, 0.17876697460367807)
(0.06011844897403891, 0.14504659759319932)
P1
(0.05800306032059598, 0.15975727315115815)
(0.049186219485774806, 0.23329972230097284)
P2
(0.05289978394821936, 0.19983830807451453)
(0.046525280210824874, 0.2595949911973805)


In [58]:
y

array([ 9.04268836e-06,  8.92784010e-06,  8.73439785e-06,  8.18530680e-06,
        7.15981878e-06,  5.77574747e-06,  4.37843664e-06,  3.42652993e-06,
        3.30112243e-06,  4.10927032e-06,  5.58237934e-06,  7.15020724e-06,
        8.18938406e-06,  8.33788727e-06,  7.70489896e-06,  6.84256611e-06,
        6.47272327e-06,  7.10524364e-06,  8.75702881e-06,  1.09338907e-05,
        1.28938682e-05,  1.40490829e-05,  1.42771187e-05,  1.39573867e-05,
        1.37056659e-05,  1.39636268e-05,  1.46970524e-05,  1.53986130e-05,
        1.54056015e-05,  1.43432444e-05,  1.24180660e-05,  1.03702541e-05,
        9.09785467e-06,  9.16439421e-06,  1.04767141e-05,  1.23267509e-05,
        1.37822844e-05,  1.42068311e-05,  1.36090939e-05,  1.26184905e-05,
        1.21013554e-05,  1.26421091e-05,  1.41894034e-05,  1.60664549e-05,
        1.73318658e-05,  1.72797500e-05,  1.57982249e-05,  1.33975700e-05,
        1.09141311e-05,  9.07746107e-06,  8.19173965e-06,  8.09608991e-06,
        8.39014137e-06,  