In [1]:
## Import packages needed for this script
import numpy as np
import pandas as pd 
import mne

In [19]:
## Importing data and preprocessing data
data = pd.read_csv('Data/test_set.csv')
data = data.drop(columns = ['Unnamed: 0', 'err', 'file_loc', 'trial_no']) #, 'trial_no', 'patient_no'
data['alcoholic'] = data['alcoholic'].map({True:1, False:0})
data['match'] = data['match'].map({'obj':0, 'nomatch':1, 'match':2})

In [21]:
new_cols = [col for col in data.columns if col != 'patient_no'] + ['patient_no']
data = data[new_cols]
data.head()

Unnamed: 0,sample,AF1,AF2,AF7,AF8,AFZ,C1,C2,C3,C4,...,T7,T8,TP7,TP8,X,Y,nd,alcoholic,match,patient_no
0,0,-6.083,-7.64,-0.682,6.46,-4.71,7.762,-0.773,24.017,-2.319,...,-7.741,-8.921,-1.862,1.76,-2.441,-5.341,-3.56,1,0,co2a0000364
1,1,-4.618,-9.593,0.295,2.065,-3.733,-7.375,-0.285,-21.881,0.61,...,-6.276,-7.456,-3.326,1.272,-4.395,-5.341,-4.537,1,0,co2a0000364
2,2,-2.177,-7.151,-1.17,-5.259,-1.292,-2.981,1.18,-3.815,2.075,...,-2.37,-4.527,-2.838,-0.682,-4.883,-4.364,-5.025,1,0,co2a0000364
3,3,1.241,0.173,-3.611,-8.189,1.638,2.391,1.668,-5.28,-1.343,...,1.048,-2.574,-1.862,-2.635,-2.441,-3.876,-3.56,1,0,co2a0000364
4,4,2.706,8.962,-5.564,-4.283,4.079,2.391,1.18,0.58,-1.343,...,0.071,-3.062,-0.885,-3.611,0.488,-3.876,-0.631,1,0,co2a0000364


In [22]:
## Importing data and preprocessing data
# data = pd.read_csv('Data/test_set.csv')
# data = data.drop(columns = ['Unnamed: 0', 'err', 'file_loc', 'trial_no']) #, 'trial_no', 'patient_no'
# data['alcoholic'] = data['alcoholic'].map({True:1, False:0})
# data['match'] = data['match'].map({'obj':0, 'nomatch':1, 'match':2})

## Separate data between alcoholic and control
alcoholic = data[data['alcoholic']==True]
control = data[data['alcoholic']==False ]

alcoholic = alcoholic.drop(columns=['alcoholic'])
control = control.drop(columns=['alcoholic'])

In [23]:
## Setting channels and frequency for MNE 
ch_names = ['AF1', 'AF2', 'AF7', 'AF8', 'AFZ', 'C1', 'C2', 'C3', 'C4',
       'C5', 'C6', 'CP1', 'CP2', 'CP3', 'CP4', 'CP5', 'CP6', 'CPZ', 'CZ', 'F1',
       'F2', 'F3', 'F4', 'F5', 'F6', 'F7', 'F8', 'FC1', 'FC2', 'FC3', 'FC4',
       'FC5', 'FC6', 'FCZ', 'FP1', 'FP2', 'FPZ', 'FT7', 'FT8', 'FZ', 'O1',
       'O2', 'OZ', 'P1', 'P2', 'P3', 'P4', 'P5', 'P6', 'P7', 'P8', 'PO1',
       'PO2', 'PO7', 'PO8', 'POZ', 'PZ', 'T7', 'T8', 'TP7', 'TP8', 'X', 'Y',
       'nd']

sfreq = 256 #Hz

In [24]:
def read_data(data):
    ## Setting data
    info = mne.create_info(ch_names=ch_names, sfreq=sfreq, ch_types='eeg')
    raw = mne.io.RawArray(data.transpose().iloc[1:-1], info) #Transpose data
    
    ## Adding Stimulus to data
    info = mne.create_info(['STI'], raw.info['sfreq'], ['stim']) 
    stim_raw = mne.io.RawArray(data.transpose().iloc[-1:], info) #Transpose data
    raw.add_channels([stim_raw], force_update_info=True) #

    ## Filtering data
    raw.filter(l_freq=0, h_freq=40, picks=ch_names)
    
    ## Epochs
    epochs = mne.make_fixed_length_epochs(raw, duration=1)

    array = epochs.get_data()
    
    
    return array

In [None]:
%%capture
a = read_data(control.iloc[:,:-1])

In [None]:
a.info

In [None]:
a.shape #no of events, no of channels (including stimulus), no_times
# no of events = 30 trials  x 10 patients in the control group 

In [25]:
control_patients = [patient for patient in control.patient_no.unique()]
alcoholic_patients = [patient for patient in alcoholic.patient_no.unique()]

In [26]:
%%capture
control_epochs_array = []
for patient in control_patients:
    control_epochs_array.append(read_data(control[control.patient_no==patient].iloc[:,:-1]))
    
alcoholic_epochs_array = []
for patient in alcoholic_patients:
    alcoholic_epochs_array.append(read_data(alcoholic[alcoholic.patient_no==patient].iloc[:,:-1]))

In [27]:
alcoholic_epochs_array[0].shape
#no of events (trails), no of channels (including stimulus), no_times

(30, 65, 256)

In [28]:
## Assign labels (y)
control_epoch_labels = [len(i)*[0] for i in control_epochs_array]
alcoholic_epoch_labels = [len(i)*[1] for i in alcoholic_epochs_array]

len(control_epoch_labels), len(alcoholic_epoch_labels)

(10, 10)

In [29]:
data_list = control_epochs_array + alcoholic_epochs_array
label_list = control_epoch_labels + alcoholic_epoch_labels

In [30]:
group_list = [[i]*len(j) for i,j in enumerate(data_list)]
len(group_list) #Total patients (control+alcoholic)

20

In [31]:
data_array = np.vstack(data_list) #Processed signal 
label_array = np.hstack(label_list) #label Al, C 
group_array = np.hstack(group_list) 

print(data_array.shape, label_array.shape, group_array.shape)

(600, 65, 256) (600,) (600,)


In [32]:
## Save arrays
np.save('Data/data_array_t.npy', data_array) 
np.save('Data/label_array_t.npy', label_array)
np.save('Data/group_array_t.npy', group_array)

In [33]:
epochs_times = epochs.times
np.save('Data/epochs_times_t', epochs_times)

NameError: name 'epochs' is not defined