<a href="https://colab.research.google.com/github/NeuroTechBSB/BR41N.IO-2025/blob/main/ecog_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Importing some suggested pipelines

# Library used to return all file paths names that match a specific pattern (helps to open files in a chosen directory)
from glob import glob # help using glob: https://builtin.com/software-engineering-perspectives/glob-in-python

# MNE lybrary to handle ECoG data
!pip install mne
from mne_bids import BIDSPath, read_raw_bids
from mne.viz import plot_alignment, snapshot_brain_montage

# Usefull python libraries to do data analysis/visualizations
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import random

# Keras is a deep learning API capable of running on top of TensorFlow
import keras
from keras import layers

# Library for machine learning and artificial intelligence
import tensorflow as tf

# Library for machine learning
from sklearn import preprocessing, model_selection

In [None]:
# Uploading the data files locally in the Google Drive space
from google.colab import files
files.upload() #this will prompt you to upload a file locally into the Google Drive (temporally for the session)

## Alternatively (if want to use a already downloaded file in Google Drive):
# from google.colab import drive
# drive.mount('/content/drive')
# !cp /content/drive/MyDrive/FILE.zip /content
# %%capture
# !unzip /content/FILE.zip -d data


In [None]:
# Usefull functions (that I know and I think can be useful for this analysis, though not in the pipeline rn)

!unzip FILE.zip # Unzipping a FILE.zip
!pwd # checking currently folder
!ls # to know which files are located in a directory
%cd # perminantly and explictly change the current path
!cd # command ONLY temporarily changes the current path during the one line shell script is running. After that, the IPython intepreter will inplictly change back to the ‘original’ directory
!mkdir # make a new directory (ex: !mkdir /content/name_directory)
files = os.listdir('FILE') # returns a list containing the names of the files within the given directory
print(FILE) # visualize a file
print(type(FILE)) # visualize a file type/extension
FILE.head()

# set search path and glob for files
# here we want to look for csv files in the input directory
path = 'input'
files = glob.glob(path + '/*.csv')

df = pd.read_csv(r'FILE.csv') # to read a .csv file
print(df)

df = (df - df.mean()) / df.std() # to normalize data


In [None]:
## Loading and pre-processing the data
# Using as references:
 # (1) https://mne.tools/stable/auto_tutorials/clinical/30_ecog.html  (for the MNE-related documentation for processing ECoG)
 # (2) https://github.com/talhaanwarch/youtube-tutorials/blob/main/BCI_Competition_IV.ipynb  (for some of the pre-processing steps)

########## verify the information of the stimulus in our project (this example is using a epilepsy dataset, so we need to adapt it to what we have)
# first define the bids path. This will create the structure to organize the dataset info
bids_path = BIDSPath(
    root=bids_root,
    subject="pt1",
    session="presurgery",
    task="ictal",
    datatype="ieeg",
    extension=".vhdr",
)

# Then we'll use it to load in the sample dataset. This function changes the units of some channels, so we suppress a related warning here by using verbose='error'.
raw = read_raw_bids(bids_path=bids_path, verbose="error")

# Pick only the ECoG channels, removing the EKG channels
raw.pick(picks="ecog")

# Load the data
raw.load_data()

# Check for events info
events = mne.events_from_annotations(raw)

# Remove line frequency interference
raw.notch_filter([60], trans_bandwidth=3) # verify if the power line frequency on the dataset is 60Hz or 50Hz (it depends on the country the data was recorded)

# drop bad channels
raw.drop_channels(raw.info["bads"])

# the coordinate frame of the montage
montage = raw.get_montage()
print(montage.get_positions()["coord_frame"])

# add fiducials to montage
montage.add_mni_fiducials(subjects_dir) # subjects_dir = location of the subject file

# now with fiducials assigned, the montage will be properly converted to "head" which is what MNE requires internally (this is the coordinate
# system with the origin between LPA and RPA whereas MNI has the origin at the posterior commissure)
raw.set_montage(montage)

# Make a 25 second epoch that spans before and after the stimulus onset ########## verify the time of the stimulus in our project
epoch_length = 25  # seconds
epochs = mne.Epochs(
    raw,
    event_id="onset",
    tmin=13,
    tmax=13 + epoch_length,
    baseline=None,
)
# Make evoked from the one epoch and resample
evoked = epochs.average().resample(200)
del epochs

# Check epochs shape
epochs.get_data().shape

# Creating evoked potentials (can be useful)
evoked_stimulus = epochs['Stimulus'].average()


In [None]:
# Suggestion for a SVM classification model (YET TO ADAPT TO THE DATASET OF THE PROJECT).
# Using as references:
 # (1) https://keras.io/examples/timeseries/eeg_signal_classification/   (using only some useful functions from this pipeine)
 # (2) https://www.kaggle.com/code/oliverright/eeg-brain-signals-emotion-classification   (using only some useful functions from this pipeine)
 # (3) https://www.kaggle.com/code/parhammostame/eeg-eye-state-classification-using-kernel-svm  (using the SVM model proposed)

print(FILE["label"].unique(), "\n") # Show the unique labels of the dataset
print(len(FILE["label"].unique()), "\n") # Show how much of unique labels is within the dataset

# separate targets so you can preprocess the EEG data easily ##### ADAPT
Y = df['eyeDetection']
print( Y.shape )
X = df.drop(columns='eyeDetection')
print( X.shape )
X.head()

###### train and test a Kernel SVM ################3

# train an SVM to classify
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler

# split train test data
X_train, X_test, y_train, y_test = train_test_split(X, Y, random_state=48, test_size=0.2, stratify=Y, shuffle=True)

# Showing how many data in the test and in the training group
print(
    f"Length of x_train : {len(x_train)}\nLength of x_test : {len(x_test)}\nLength of y_train : {len(y_train)}\nLength of y_test : {len(y_test)}"
)

# normalize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

from sklearn.metrics import roc_auc_score
# train with grid search
svc = SVC()
parameters = {'gamma': [0.1, 1, 10], 'C': [0.1, 1, 10]}
clf = GridSearchCV(svc, parameters)
clf.fit(X_train, y_train)

# predict labels
y_pred = clf.predict(X_test)

# extract accuracy (r2 score)
results = roc_auc_score(y_test, y_pred)

# print score
print( 'Score is: ' + str( results ) )
print( 'Best params for the kernel SVM is: ' + str(clf.best_params_) )

##### Confusion matrix ##############3

from sklearn.metrics import confusion_matrix

# confusion matrix estimation
conf = confusion_matrix(y_test, y_pred, normalize='true')

plt.figure(figsize=(8,6))
sns.heatmap(conf, annot=True, cmap='seismic', annot_kws={'fontsize':18})
_ = plt.title( 'AUC score: ' + str(round(results, 2) ), fontsize=15)
_ = plt.xticks(ticks=[0.5, 1.5], labels=['Eyes-closed', 'Eyes-open'])
_ = plt.yticks(ticks=[0.5, 1.5], labels=['Eyes-closed', 'Eyes-open'])
_ = plt.ylabel('True label', fontsize=15)
_ = plt.xlabel('Predicted label', fontsize=15)