In [1108]:
# import all the relevant libraries
import wfdb
import mne
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier,AdaBoostClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, hamming_loss
from sklearn.pipeline import make_pipeline,Pipeline
from sklearn.preprocessing import FunctionTransformer,StandardScaler
from mne.datasets import sample
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score, KFold, GridSearchCV
from mne_features.feature_extraction import extract_features
from sklearn.datasets import load_iris
from sklearn.feature_selection import SelectKBest,f_classif
import neurokit2 as nk
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.inspection import DecisionBoundaryDisplay
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from imblearn.over_sampling import SMOTE

In [1059]:
# This function takes in the record path of a PSG file and outputs the raw annotated data. It renames, changes types and removes channels so that they
# are consistent between different patients. Some patients had more/less data collected through different manners which had to be fixed.
def epoch_extraction(record_path):
    record = wfdb.rdrecord(record_path)
    annotation = wfdb.rdann(record_path, 'st')
    
    annotation_mne = mne.Annotations(
        onset=annotation.sample / record.fs,
        duration=30,
        description=annotation.aux_note
    )
    
    signals = record.p_signal  # Signal data as NumPy array
    sampling_rate = record.fs  # Sampling frequency (e.g., 250 Hz)
    channel_names = record.sig_name  # Channel names
    seen_categories = set()
    
    # Assign types while ensuring only the first occurrence is assigned correctly
    channel_types = []
    channel_names_updated = []
    
    for ch in channel_names:
        if "EEG" in ch and "EEG" not in seen_categories:
            channel_types.append("eeg")
            channel_names_updated.append("EEG")
            seen_categories.add("EEG")
        elif "Resp" in ch and "Resp" not in seen_categories:
            channel_types.append("resp")
            channel_names_updated.append("Respiratory")
            seen_categories.add("Resp")
        elif "ECG" in ch and "ECG" not in seen_categories:
            channel_types.append("ecg")
            channel_names_updated.append("ECG")
            seen_categories.add("ECG")
        else:
            channel_types.append("misc")
            channel_names_updated.append(ch)
    # Create MNE Info object
    info = mne.create_info(
        ch_names=channel_names_updated,
        sfreq=sampling_rate,
        ch_types=channel_types
    )
    # Convert signals to MNE RawArray
    raw = mne.io.RawArray(signals.T, info)
    
    # Add annotations to the raw object
    raw.set_annotations(annotation_mne)
    misc_channels = [ch for ch, ch_type in zip(channel_names_updated, channel_types) if "misc" in ch_type]
    raw.drop_channels(misc_channels)
    # Reclassify events. The original annotations had descriptions that were not relevant to this classifier. Everything is relabled either
    event_labels = {
        1: "Hypopnea",
        2: "Obstructive Apnea",
        3: "Central Apnea",
        4: "No Apnea Event"
    }
    new_id = {}
    for desc in raw.annotations.description:
        if "H" in desc or "HA" in desc:  # Hypopnea
            new_id[desc] = 1
        elif " A" in desc or "X" in desc:  # Obstructive Apnea
            new_id[desc] = 2
        elif "CA" in desc or "CAA" in desc:  # Central Apnea
            new_id[desc] = 3
        else:  # Other
            new_id[desc] = 4
    
    # Generate events with the new mapping
    events, event_id = mne.events_from_annotations(raw, event_id=new_id)
    
    new_annotations = mne.Annotations(
        onset=events[:, 0] / raw.info["sfreq"],  # Convert sample index to seconds
        duration=[30] * len(events),  # Assume each event lasts 30 seconds
        description=[event_labels[e] for e in events[:, 2]]  # Convert IDs back to labels
    )
    tmax = 30.0 - 1.0 / raw.info["sfreq"]  # tmax in included
    epochs = mne.Epochs(
            raw,
            events=events,
            tmin=0.0,
            tmax=tmax,
            baseline=None,
            # picks=["ECG",'Respiratory',"EEG"]
            picks="ECG"
        )
    return epochs

In [778]:
# compile a list of raw data for each subject and combine it into a single large raw file.
List_of_subjects=open(r"\Users\piotr\Desktop\PSG data\List of subjects.txt","r").read().split("\n")
list_of_epochs=[]
for i in List_of_subjects:
    record_path = fr"C:\Users\piotr\Desktop\PSG data\MIT Data\{i}"
    list_of_epochs.append(epoch_extraction(record_path))
concat_epochs=mne.concatenate_epochs(list_of_epochs)

In [917]:
def feature_extractor(epochs):
    df = nk.mne_to_df(epochs)
    
    # Initialize list for storing extracted features
    features_list = []
    
    for epoch_id in epochs.selection:  
        # Extract ECG signal for current epoch
        ecg_signal = df[df["Epoch"] == epoch_id]["ECG"].values  
    
        peaks, _ = nk.ecg_peaks(ecg_signal, sampling_rate=250)
    
        # Compute HRV features (ONLY Time & Frequency, WITHOUT Nonlinear features)
        hrv_time = nk.hrv_time(peaks, sampling_rate=250)
        hrv_freq = nk.hrv_frequency(peaks, sampling_rate=250)
        hrv_nonlinear = nk.hrv_nonlinear(peaks, sampling_rate=250)
        # Combine results
        hrv_features = pd.concat([hrv_time, hrv_freq,hrv_nonlinear], axis=1)
        features_list.append(hrv_features.iloc[0])
    features_df = pd.DataFrame(features_list)
    return features_df

In [979]:
list_of_df=[]
z=0
for i in list_of_epochs:
    list_of_df.append(feature_extractor(i))
    print(z)
    z+=1
merged=pd.concat(list_of_df)
merged.replace([np.inf, -np.inf], np.nan)
merged.dropna(axis=1,inplace=True)

  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  warn(
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse

0


  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  warn(
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(m

1


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(


2


  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  warn(
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(


3


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(

4


  warn(
  warn(
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  ms

5


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


6


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(

7


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


8


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  w

9


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  mse = np.trapz(mse) / len(mse)
  warn(
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  mse = np.trapz(mse) / len(m

10


  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  warn(
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  warn(
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse

11


  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  warn(
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  warn(
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  

12


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  

13


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  warn(
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  warn(
  mse = np.trapz(mse) / len(ms

14


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  mse = np.trapz(mse) / len(mse)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(

15


  warn(


In [965]:
#I deleted 4th indexed list of epochs
#Then deleted 13

In [1021]:
merged=pd.concat(list_of_df)
merged.replace([np.inf, -np.inf], np.nan.inplace=True)
merged.dropna(axis=1,inplace=True)

In [1068]:
pipe = Pipeline([('scaling', StandardScaler()),("selection", SelectKBest(f_classif, k=10)),
                 ('learning', GradientBoostingClassifier(n_estimators=100, random_state=41))])
X=merged.to_numpy()
y=concat_epochs.events[:, 2]
cross_val_score(pipe,X,y)

array([0.81356899, 0.7622577 , 0.71363377, 0.75356532, 0.70450656])

In [1092]:
X=merged.to_numpy()
y=concat_epochs.events[:, 2]
names = [
    "Nearest Neighbors",
    "Linear SVM",
    "RBF SVM",
    "Decision Tree",
    "Random Forest",
    "Neural Net",
    "AdaBoost",
    "Naive Bayes",
    "QDA",
]

classifiers = [
    KNeighborsClassifier(3),
    SVC(kernel="linear", C=0.025, random_state=42),
    SVC(gamma=2, C=1, random_state=42),
    DecisionTreeClassifier(max_depth=5, random_state=42),
    RandomForestClassifier(
        max_depth=5, n_estimators=10, max_features=1, random_state=42
    ),
    MLPClassifier(alpha=1, max_iter=1000, random_state=42),
    AdaBoostClassifier(random_state=42),
    GaussianNB(),
    QuadraticDiscriminantAnalysis(),
]
for name, clf in zip(names, classifiers):
    pipe = make_pipeline(StandardScaler(), SelectKBest(f_classif, k=10), clf)
    cross=cross_val_score(pipe,X,y)
    print(f"average cross validation of {name}")
    print(np.mean(cross))

average cross validation of Nearest Neighbors
0.6499213922898748
average cross validation of Linear SVM
0.7484886309899758
average cross validation of RBF SVM
0.7309146529064688
average cross validation of Decision Tree
0.7440355383603674
average cross validation of Random Forest
0.7531605373033751
average cross validation of Neural Net
0.7350223529495942
average cross validation of AdaBoost
0.7375364337142191
average cross validation of Naive Bayes
0.611936598670076
average cross validation of QDA
0.592440520599643


In [1100]:
X=merged.to_numpy()
y_binary=[]
for i in y:
    if i==4:
        y_binary.append(0)
    else:
        y_binary.append(1)
names = [
    "Nearest Neighbors",
    "Linear SVM",
    "RBF SVM",
    "Decision Tree",
    "Random Forest",
    "Neural Net",
    "AdaBoost",
    "Naive Bayes",
    "QDA",
]

classifiers = [
    KNeighborsClassifier(3),
    SVC(kernel="linear", C=0.025, random_state=42),
    SVC(gamma=2, C=1, random_state=42),
    DecisionTreeClassifier(max_depth=5, random_state=42),
    RandomForestClassifier(
        max_depth=5, n_estimators=10, max_features=1, random_state=42
    ),
    MLPClassifier(alpha=1, max_iter=1000, random_state=42),
    AdaBoostClassifier(random_state=42),
    GaussianNB(),
    QuadraticDiscriminantAnalysis(),
]
for name, clf in zip(names, classifiers):
    pipe = make_pipeline(StandardScaler(), SelectKBest(f_classif, k=10), clf)
    cross=cross_val_score(pipe,X,y_binary)
    print(f"average cross validation of {name}")
    print(np.mean(cross))

average cross validation of Nearest Neighbors
0.7011416818602545
average cross validation of Linear SVM
0.747119549415532
average cross validation of RBF SVM
0.7357081296048279
average cross validation of Decision Tree
0.739811536632754
average cross validation of Random Forest
0.7422110069006967
average cross validation of Neural Net
0.7442621575263386
average cross validation of AdaBoost
0.7277232514256389
average cross validation of Naive Bayes
0.6911055880097386
average cross validation of QDA
0.715629567426682


In [1104]:
pipe = Pipeline([('scaling', StandardScaler()),("selection", SelectKBest(f_classif, k=10)),
                 ('learning',RandomForestClassifier(
        max_depth=5, n_estimators=10, max_features=1, random_state=42))])
X=merged.to_numpy()
y=concat_epochs.events[:, 2]
np.mean(cross_val_score(pipe,X,y))

np.float64(0.7531605373033751)

In [1106]:
rf = RandomForestClassifier(random_state=42)
pipe = Pipeline([('scaling', StandardScaler()), ('learning', rf)])

scores = cross_val_score(pipe, X, y, cv=5, scoring="accuracy")  # 5-fold cross-validation
print(f"Baseline Random Forest Accuracy: {scores.mean():.4f} ± {scores.std():.4f}")

Baseline Random Forest Accuracy: 0.6967 ± 0.0850


In [1110]:
best_k = []
scores = []

for k in range(5, X.shape[1], 5):  # Test k from 5 to the total number of features
    selector = SelectKBest(f_classif, k=k)
    X_new = selector.fit_transform(X, y)

    pipe = Pipeline([
        ('scaling', StandardScaler()), 
        ('selection', SelectKBest(f_classif, k=k)), 
        ('learning', RandomForestClassifier(random_state=42))
    ])
    
    score = cross_val_score(pipe, X_new, y, cv=5, scoring="accuracy").mean()
    best_k.append(k)
    scores.append(score)

# Find k with the highest accuracy
optimal_k = best_k[np.argmax(scores)]
print(f"Optimal k: {optimal_k} with accuracy: {max(scores):.4f}")

Optimal k: 10 with accuracy: 0.7235


In [1111]:
pipe = Pipeline([
    ('scaling', StandardScaler()), 
    ('selection', SelectKBest(f_classif, k=optimal_k)), 
    ('learning', RandomForestClassifier(n_estimators=200, random_state=42))
])

cross_val_score(pipe, X, y, cv=5, scoring="accuracy").mean()

np.float64(0.7200770010817097)

In [1114]:
param_grid = {
    "n_estimators": [100, 200, 300],   # Number of trees
    "max_depth": [10, 15, 20],         # Tree depth
    "min_samples_split": [2, 5, 10],   # Min samples to split a node
    "min_samples_leaf": [1, 2, 5],     # Min samples per leaf
    "max_features": ["sqrt", "log2"]   # Number of features per tree
}

# Create the model
rf = RandomForestClassifier(random_state=42)

# Grid Search with Cross Validation
grid_search = GridSearchCV(rf, param_grid, cv=5, scoring="accuracy", n_jobs=-1, verbose=2)
grid_search.fit(X, y)

# Print the best parameters
print("Best parameters:", grid_search.best_params_)
print("Best accuracy:", grid_search.best_score_)

Fitting 5 folds for each of 162 candidates, totalling 810 fits
Best parameters: {'max_depth': 10, 'max_features': 'log2', 'min_samples_leaf': 5, 'min_samples_split': 2, 'n_estimators': 200}
Best accuracy: 0.7149396278476188


In [1150]:
pipe = Pipeline([
    ('scaling', StandardScaler()), 
    ('selection', SelectKBest(f_classif, k=optimal_k)), 
    ('learning', RandomForestClassifier(max_depth=10,max_features= 'log2',min_samples_leaf=5,min_samples_split=2,n_estimators=200, random_state=42))
])

cross_val_score(pipe, X, y, cv=5, scoring="accuracy").mean()

np.float64(0.7393556314277332)

In [1152]:
X_train,X_test,y_train,y_test=train_test_split(X,y)

In [1174]:
pipe.fit(X_train,y_train)
y_pred=pipe.predict(X_test)
report=classification_report(y_test,y_pred)

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [1175]:
print(report)

              precision    recall  f1-score   support

           1       0.00      0.00      0.00       183
           2       0.70      0.18      0.28       328
           3       0.00      0.00      0.00        32
           4       0.77      0.99      0.87      1649

    accuracy                           0.77      2192
   macro avg       0.37      0.29      0.29      2192
weighted avg       0.68      0.77      0.69      2192



In [1184]:
smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)



In [1188]:
pipe.fit(X_train_resampled,y_train_resampled)
y_pred=pipe.predict(X_test)
report=classification_report(y_test,y_pred)

In [1192]:
print(report)

              precision    recall  f1-score   support

           1       0.18      0.38      0.25       183
           2       0.38      0.59      0.46       328
           3       0.05      0.34      0.09        32
           4       0.89      0.59      0.71      1649

    accuracy                           0.57      2192
   macro avg       0.38      0.48      0.38      2192
weighted avg       0.74      0.57      0.62      2192



In [1200]:
accuracy_score(y_pred,y_test)

0.5670620437956204