#**Installing MNE** 

In [None]:
! pip install mne

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting mne
  Downloading mne-1.0.3-py3-none-any.whl (7.5 MB)
[K     |████████████████████████████████| 7.5 MB 5.0 MB/s 
Installing collected packages: mne
Successfully installed mne-1.0.3


# **Starting Time**


In [None]:
import time
start = time.time()

# **Connecting Colab To Drive**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# **Importing the Necessary Libraries**

In [None]:
from glob import glob # it help us to read all files in the Folder 
import os
import mne # It is a Package which is Used to Analysis the EEG Dataset
from numpy import array
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# **Importing the Dataset**

In [None]:
all_file_path = glob('/content/drive/MyDrive/2022-04-26_project_emmanuel/EC_Healthy_and_MDD_fif/*.fif')
print(len(all_file_path))

58


In [None]:
all_file_path

['/content/drive/MyDrive/2022-04-26_project_emmanuel/EC_Healthy_and_MDD_fif/mdd1_ec_cropped_raw.fif',
 '/content/drive/MyDrive/2022-04-26_project_emmanuel/EC_Healthy_and_MDD_fif/mdd10_ec_cropped_raw.fif',
 '/content/drive/MyDrive/2022-04-26_project_emmanuel/EC_Healthy_and_MDD_fif/mdd11_ec_cropped_raw.fif',
 '/content/drive/MyDrive/2022-04-26_project_emmanuel/EC_Healthy_and_MDD_fif/mdd13_ec_cropped_raw.fif',
 '/content/drive/MyDrive/2022-04-26_project_emmanuel/EC_Healthy_and_MDD_fif/mdd14_ec_cropped_raw.fif',
 '/content/drive/MyDrive/2022-04-26_project_emmanuel/EC_Healthy_and_MDD_fif/mdd15_ec_cropped_raw.fif',
 '/content/drive/MyDrive/2022-04-26_project_emmanuel/EC_Healthy_and_MDD_fif/mdd17_ec_cropped_raw.fif',
 '/content/drive/MyDrive/2022-04-26_project_emmanuel/EC_Healthy_and_MDD_fif/mdd18_ec_cropped_raw.fif',
 '/content/drive/MyDrive/2022-04-26_project_emmanuel/EC_Healthy_and_MDD_fif/mdd19_ec_cropped_raw.fif',
 '/content/drive/MyDrive/2022-04-26_project_emmanuel/EC_Healthy_and_MDD_fi

# **Separating the Healthy Subjects and MDD Patients**

In [None]:
healthy_file_path = [i for i in all_file_path if 'h' in i.split('/')[6]]
patient_file_path = [i for i in all_file_path if 'm' in i.split('/')[6]]
print(len(healthy_file_path), len(patient_file_path))

28 30


# **Reading the File Path and Getting the Data from One File**

In [None]:
def read_data(file_path):
    data = mne.io.read_raw_fif(file_path, preload = True)
    data.set_eeg_reference()
    data.filter(l_freq = 0.5, h_freq = 60)
    epochs = mne.make_fixed_length_epochs(data, duration = 5, overlap = 2) # it splits the data into Segments
    array = epochs.get_data()
    return array

In [None]:
sample_data = read_data(healthy_file_path[0])

Opening raw data file /content/drive/MyDrive/2022-04-26_project_emmanuel/EC_Healthy_and_MDD_fif/h10_ec_cropped_raw.fif...
Isotrak not found
    Range : 0 ... 96255 =      0.000 ...   375.996 secs
Ready.
Reading 0 ... 96255  =      0.000 ...   375.996 secs...
EEG channel type selected for re-referencing
Applying average reference.
Applying a custom ('EEG',) reference.
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 60 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 60.00 Hz
- Upper transition bandwidth: 15.00 Hz (-6 dB cutoff frequency: 67.50 Hz)
- Filter length: 1691 samples (6.605 sec)

Not setting metadata
124 matching events found
No baseline corre

In [None]:
sample_data.shape # no. of epochs, channels, length of signals 

(124, 20, 1280)

# **Reading the File Path and Getting the Data from All File**

In [None]:
%%capture
control_epochs_array = [read_data(i) for i in healthy_file_path]
patient_epochs_array = [read_data(i) for i in patient_file_path]

In [None]:
control_epochs_array[0].shape, control_epochs_array[1].shape, control_epochs_array[2].shape, control_epochs_array[3].shape, control_epochs_array[4].shape#, control_epochs_array[5].shape, control_epochs_array[6].shape, control_epochs_array[7].shape, control_epochs_array[8].shape, control_epochs_array[9].shape, control_epochs_array[10].shape, control_epochs_array[11].shape, control_epochs_array[12].shape, control_epochs_array[38].shape, control_epochs_array[14].shape

((124, 20, 1280),
 (99, 20, 1280),
 (99, 20, 1280),
 (99, 20, 1280),
 (100, 20, 1280))

In [None]:
control_epochs_array[0].shape

(124, 20, 1280)

In [None]:
patient_epochs_array[0].shape, patient_epochs_array[1].shape, patient_epochs_array[2].shape, patient_epochs_array[3].shape#, control_epochs_array[4].shape#, control_epochs_array[5].shape#, control_epochs_array[6].shape#, control_epochs_array[7].shape,  

((100, 20, 1280), (99, 20, 1280), (99, 20, 1280), (99, 20, 1280))

# **Creating the Labels**

In [None]:
control_epochs_labels = [len(i)*[0] for i in control_epochs_array]
patient_epochs_labels = [len(i)*[1] for i in patient_epochs_array]
len(control_epochs_labels), len(patient_epochs_labels)

(28, 30)

# **Combining Both the Healthy and MDD Files**

In [None]:
data_list = control_epochs_array + patient_epochs_array
label_list = control_epochs_labels + patient_epochs_labels

# **Spletting the Dataset in the Basis of Epochs**

In [None]:
epochs_array=control_epochs_array+patient_epochs_array
epochs_labels=control_epochs_labels+patient_epochs_labels
print(len(epochs_array),len(epochs_labels))

58 58


# **Creating the Groups**

In [None]:
groups = [[i]*len(j) for i,j in enumerate(data_list)]
len(groups)

58

# **Converting the Data from List to array**

In [None]:
data_array = np.vstack(data_list)
label_array = np.hstack(label_list)
groups_array = np.hstack(groups)
print(data_array.shape, label_array.shape, groups_array.shape)

(5735, 20, 1280) (5735,) (5735,)


# **Feature Extraction**

In [None]:
from scipy import stats

def mean(x):
    return np.mean(x,axis=-1)
def std(x):  
    return np.std(x,axis=-1)
def ptp(x):
    return np.ptp(x,axis=-1)
def var(x):
    return np.var(x,axis=-1)
def minim(x):
    return np.min(x,axis=-1)
def maxim(x):
    return np.max(x,axis=-1)
def argminim(x):
    return np.argmin(x,axis=-1) 
def argmaxim(x):
    return np.argmax(x,axis=-1)   
                   
def abs_diff_signals(x):
    return np.sum(np.abs(np.diff(x,axis=-1)),axis=-1)
def skewness(x):
    return stats.skew(x,axis=-1)
def kurtosis(x):
    return stats.kurtosis(x,axis=-1)
#def rms(x):
#    return np.sqrt(np.mean(x**2,axis=-1)
def concatenate_features(x):
    return np.concatenate((mean(x),std(x),ptp(x),var(x),minim(x),maxim(x),argminim(x),argmaxim(x),abs_diff_signals(x),\
                           skewness(x),kurtosis(x)),axis=-1)  

# **Loop to Extract the Features**

In [None]:
features=[]
for d in data_array:
    features.append(concatenate_features(d))

# **Converting the Features list into Features Arrays**

In [None]:
features_array=np.array(features)
features_array.shape

(5735, 220)

# **Logistic Regression for Classification**

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline # it use a PipeLine for Scaling with classification purpose
from sklearn.preprocessing import StandardScaler # It use for Scaling
from sklearn.model_selection import GroupKFold,GridSearchCV # 'GroupKFold' for Splitting the Dataset, based on the Groups not on the bases of trails.
      # 'GridSearchCV' for Hyper Perameter Tuning

In [None]:
clf=LogisticRegression() # Defining Classifier
gkf=GroupKFold(5) # 5 groups
pipe=Pipeline([('scalar',StandardScaler()),('clf',clf)])
param_grid={'clf__C':[0.1,0.3,0.5,0.7,1,3,5,7]} # Random Parameters
gscv=GridSearchCV(pipe,param_grid,cv=gkf,n_jobs=12)
gscv.fit(features_array,label_array,groups=groups_array)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


GridSearchCV(cv=GroupKFold(n_splits=5),
             estimator=Pipeline(steps=[('scalar', StandardScaler()),
                                       ('clf', LogisticRegression())]),
             n_jobs=12,
             param_grid={'clf__C': [0.1, 0.3, 0.5, 0.7, 1, 3, 5, 7]})

# **Accuracy**

In [None]:
gscv.best_score_

0.7365733527631197

# **End of the Time**

In [None]:
end = time.time()
print("Total time in sec:",end - start)

Total time in sec: 101.24415326118469
