In [1]:
import os.path as op
import numpy as np
import numpy.matlib
from pandas import read_csv
import matplotlib.pyplot as plt

from sklearn.model_selection import StratifiedKFold
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score
from sklearn.manifold import MDS

import mne
from mne.io import read_raw_fif, concatenate_raws
from mne.datasets import visual_92_categories

In [2]:
print(__doc__)

data_path = visual_92_categories.data_path()

# Define stimulus - trigger mapping
fname = op.join(data_path, 'visual_stimuli.csv')
conds = read_csv(fname)
print(conds.head(5))

Automatically created module for IPython interactive environment
   trigger       condition  human  face  animal  natural
0        0  human bodypart      1     0       1        1
1        1  human bodypart      1     0       1        1
2        2  human bodypart      1     0       1        1
3        3  human bodypart      1     0       1        1
4        4  human bodypart      1     0       1        1


In [3]:
max_trigger = 92
conds = conds[:max_trigger]  # take only the first 24 rows

In [4]:
conditions = []
for c in conds.values:
    cond_tags = list(c[:2])
    cond_tags += [('not-' if i == 0 else '') + conds.columns[k]
                  for k, i in enumerate(c[2:], 2)]
    conditions.append('/'.join(map(str, cond_tags)))
print(conditions[:5])

['0/human bodypart/human/not-face/animal/natural', '1/human bodypart/human/not-face/animal/natural', '2/human bodypart/human/not-face/animal/natural', '3/human bodypart/human/not-face/animal/natural', '4/human bodypart/human/not-face/animal/natural']


In [5]:
event_id = dict(zip(conditions, conds.trigger + 1))
event_id['0/human bodypart/human/not-face/animal/natural']

1

In [6]:
n_runs = 1  # 4 for full data (use less to speed up computations)
fname = op.join(data_path, 'sample_subject_%i_tsss_mc.fif')
raws = [read_raw_fif(fname % block, verbose='error')
        for block in range(n_runs)]  # ignore filename warnings
raw = concatenate_raws(raws)

events = mne.find_events(raw, min_duration=.002)
events = events[events[:, 2] <= max_trigger]

1374 events found
Event IDs: [  1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18
  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36
  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54
  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72
  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90
  91  92  93 200 222 244]


In [7]:
picks = mne.pick_types(raw.info, meg=True)
epochs = mne.Epochs(raw, events=events, event_id=event_id, baseline=None,
                    picks=picks, tmin=-.1, tmax=.4, preload=True)

Not setting metadata
Not setting metadata
920 matching events found
No baseline correction applied
0 projection items activated
Loading data for 920 events and 501 original time points ...
0 bad epochs dropped


In [8]:
X = epochs.copy().crop(0.05, 0.3).get_data()
Q = epochs.copy().crop(-0.1, 0).get_data()

In [9]:
print(Q.shape)
print(X.shape)

(920, 306, 101)
(920, 306, 251)


In [10]:
#Morlet Wavelet
myfreqrange = np.array([2,120])
Fs = 1000; # Hz, sampling rate
#timedur = Q.shape[2]/Fs; % seconds, temporal duration
timevec = np.linspace(0.05, 0.3, 251) # vector of time between 0 and timedur seconds
timevec_gauss = np.linspace(-2, 2, 4001)
lofreq = myfreqrange[0]
hifreq = myfreqrange[1]

In [11]:
NWavelets = 236
NCycles = 7
MyFreqs = np.linspace(lofreq,hifreq,NWavelets)
MyMorletFamily = np.empty((0,4001), float) #equivalent to "clear" # What is the equivalent of clear?

In [12]:
def normpdf(x, mu=0, sigma=1):
    u = float((x-mu) / abs(sigma))
    y = np.exp(-u*u/2) / (np.sqrt(2*np.pi) * abs(sigma))
    return y

for wnum in range(0, NWavelets):
        myomega = 2 * np.pi * MyFreqs[wnum];
        mysigma = NCycles/myomega
        mygauss = np.array([normpdf(i,0,mysigma)
                    for i in timevec_gauss])
        mySig = np.exp(1j*myomega*timevec_gauss)
        MyMorletFamily = np.append(MyMorletFamily, [mySig * mygauss], axis = 0)

In [13]:
MyMorletFamily.shape
X.shape[1]

306

In [14]:
import gc

In [15]:
def fre_decomp(fftMEG, MyMorletFamily, NEvents, nconvolution, nshift):
    fftGW = np.fft.fft(MyMorletFamily[:],nconvolution) # one dimensional transform?
    fftconv = fftMEG * np.matlib.repmat(fftGW,NEvents,1) # * for multiplying numpy arrays
    conv_result = np.fft.ifft(fftconv,nconvolution,1)
    conv_result = conv_result[:,(nshift):(conv_result.shape[1] - nshift)] # index?  
    power = np.power(np.absolute(conv_result), 2)
    del fftGW 
    del fftconv
    del conv_result
    gc.collect()
    return power

In [18]:
#Transfer time series data into frequency domain
MEG_Power =  np.empty((0, 236,251,920))

# Baseline; divide by -.1 to 0



# Time Bins
TimeBins = np.linspace(0.05, 0.3, 25)
# Frequency Bins
FreqBins = np.array([2, 4, 8, 13, 20, 35, 55, 80, 120])

Nchan = X.shape[1]
NTBins = len(TimeBins)
NFBins = len(FreqBins)

In [22]:
#for chan in range(0, 5): #looping thru channels
for chan in range(0, 1):
#chan = 1
    #Signal Data
    #print(chan)
    data = np.squeeze(X[:, chan, :])
    NEvents = data.shape[0]
    nconvolution = timevec_gauss.size + data.shape[1] - 1
    nshift = int((timevec_gauss.size-1)/2)
    
    #Baseline Data
    pretrial_data = np.squeeze(Q[:, chan, :])
    pretrial_nconvolution = timevec_gauss.size + pretrial_data.shape[1] - 1
    nshift = int((timevec_gauss.size-1)/2)

        #for enum in range(0, NEvents): #looping thru events

    fftMEG = np.fft.fft(data[:,:],nconvolution) # signal into frequency domain
    pretrial_fftMEG = np.fft.fft(pretrial_data[:,:], pretrial_nconvolution) # pretrial into frequency domain
    
    MEG_chan =  np.empty((0,251,920))  
    
    for wnum in range(0, NWavelets):
        print(wnum)
        #fftGW = np.fft.fft(MyMorletFamily[wnum,:],nconvolution) # one dimensional transform?
        #pretrial_fftGW = np.fft.fft(MyMorletFamily[wnum,:], pretrial_nconvolution)

        #pretrial_fftconv = pretrial_fftMEG * np.matlib.repmat(pretrial_fftGW,NEvents,1)
        #pretrial_conv_result = np.fft.ifft(pretrial_fftconv,pretrial_nconvolution,1)
        #pretrial_conv_result = pretrial_conv_result[:,(nshift):(pretrial_conv_result.shape[1] - nshift)]
        #pretrial_power = np.power(np.absolute(pretrial_conv_result), 2)

        #fftconv = fftMEG * np.matlib.repmat(fftGW,NEvents,1) # * for multiplying numpy arrays
        #conv_result = np.fft.ifft(fftconv,nconvolution,1)
        #conv_result = conv_result[:,(nshift):(conv_result.shape[1] - nshift)] # index? 
        
        pretrial_power = fre_decomp(pretrial_fftMEG, MyMorletFamily[wnum,:], NEvents, pretrial_nconvolution, nshift)
        signal_power = fre_decomp(fftMEG, MyMorletFamily[wnum,:], NEvents, nconvolution, nshift)

        normalized_MEG = np.transpose(signal_power)/np.matlib.repmat(np.median(np.transpose(pretrial_power), 0) , 251, 1)

        MEG_chan = np.append(MEG_chan, [normalized_MEG], axis = 0) #numpy equivalent of transpose

    MEG_Power = np.append(MEG_Power, [MEG_chan], axis = 0)
    del MEG_chan
    gc.collect()
    print(chan)
            #MEG_dB(wnum,:,:) = 10*log10(MEG_Power(wnum,:,:))
            
            #baseline???
            #for loops??
            #np.fft.fft
            # For numpy array, it seems that * is an element wise multiplication operator. But are we operating on np.array?
        
        
        

#fast fourier transform (in numpy) (cohen's book) np.fft





#one participant, for now  
#wavelet number, then channel number


#fft for each channel
# then for each wavelet, going to do the fft of the gabor wavelet family, organize it by channel
#element by element multiplication in python: numpy dot multiply
#conv result using ifft , second dimension (,1) in python
#nshift gives center
#then baseline correct the power; conv result from 50-300 and divide from -.1 to 0

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
0


In [31]:
MEG_Power.shape

(1, 236, 251, 920)

In [32]:
def dsearchn(x,y):
    IDX = np.zeros((y.shape[0]))
    for line in range(0, y.shape[0]):
        distances = np.abs(x - y[line])
        distances.argmin()
        IDX[line] = distances.argmin()
    return IDX

In [43]:
TimeInds = dsearchn(timevec, TimeBins)
print(TimeBins)
timevec[TimeInds.astype(int)]

[0.05       0.06041667 0.07083333 0.08125    0.09166667 0.10208333
 0.1125     0.12291667 0.13333333 0.14375    0.15416667 0.16458333
 0.175      0.18541667 0.19583333 0.20625    0.21666667 0.22708333
 0.2375     0.24791667 0.25833333 0.26875    0.27916667 0.28958333
 0.3       ]


array([0.05 , 0.06 , 0.071, 0.081, 0.092, 0.102, 0.112, 0.123, 0.133,
       0.144, 0.154, 0.165, 0.175, 0.185, 0.196, 0.206, 0.217, 0.227,
       0.237, 0.248, 0.258, 0.269, 0.279, 0.29 , 0.3  ])

In [21]:
#Time bin + Freq bin
AllData(pnum).EncMainEEG_dB = EncMainEEG_dB;



# Create features
TimeInds = dsearchn(timevec, TimeBins);
FreqInds = dsearchn(MyFreqs, FreqBins);
    
NTBins = length(TimeInds);
NFBins = length(FreqInds);
AllData(pnum).MyFreqs = MyFreqs;
AllData(pnum).TBins = TimeBins;
AllData(pnum).FBins = FreqBins;
    
clear temp this_featureset
temp = []
this_featureset = np.zeros((NTBins, NFBins, 306, 920))
for tnum in range(0, (NTBins-1)):
    for fnum in range(0, (NFBins-1)):
        temp = MEG_Power[:, FreqInds[fnum]:FreqInds[fnum + 1]]
        temp = AllData(pnum).EncMainEEG_dB(FreqInds(fnum):FreqInds(fnum+1),...
            TimeInds(tnum):TimeInds(tnum+1),:,:);
        this_featureset(fnum,tnum,:,:) = zscore(median(squeeze(median(temp,1)),1));
            
        end
    end
    
AllData(pnum).EncMainEEGFeatures = this_featureset;
    
sprintf('Finished participant %d',pnum)

SyntaxError: invalid syntax (<ipython-input-21-06e724093f4b>, line 16)

In [None]:


# Classify using the average signal in the window 50ms to 300ms
# to focus the classifier on the time interval with best SNR.
clf = make_pipeline(StandardScaler(),
                    LogisticRegression(C=1, solver='liblinear',
                                       multi_class='auto'))

#y = epochs.events[:, 2]
y_sup = (epochs.events[:, 2] > 48).astype(int) #set up superordinate classification label. 
classes = set(y_sup) 

#cv = StratifiedKFold(n_splits=5, random_state=0, shuffle=True)

# Compute confusion matrix for each cross-validation fold
#y_pred = np.zeros((len(y_sup), len(classes)))
#for train, test in cv.split(X, y_sup):
    # Fit
 #   clf.fit(X[train], y_sup[train])
    # Probabilistic prediction (necessary for ROC-AUC scoring metric)
  #  y_pred[test] = clf.predict_proba(X[test])

In [14]:
y_sup = (epochs.events[:, 2] > 48).astype(int)
y_sup.shape
classes

NameError: name 'classes' is not defined

In [None]:
# Data trianing testing split

In [None]:
#Regularized Regression Lasso / Rigid / Logistics 

In [None]:
#SVM

In [None]:
#RVM

In [None]:
#RF

In [90]:
confusion = np.zeros((len(classes), len(classes)))
for ii, train_class in enumerate(classes):
    for jj in range(ii, len(classes)):
        confusion[ii, jj] = roc_auc_score(y_sup == train_class, y_pred[:, jj])
        confusion[jj, ii] = confusion[ii, jj]
confusion  

array([[0.67340695, 0.32659305],
       [0.32659305, 0.67340695]])