## Package Initialization

In [2]:
import numpy as np
import os
import pandas as pd
import sklearn
from sklearn.preprocessing import StandardScaler, label_binarize
import matplotlib.pyplot as plt
from sklearn.mixture import GaussianMixture
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.patches import Ellipse
import matplotlib.cm as cm
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, roc_curve, roc_auc_score
from sklearn.svm import SVC

os.environ["OMP_NUM_THREADS"] = '1'
import warnings
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from scipy.ndimage import gaussian_filter1d
from matplotlib.lines import Line2D
from sklearn.metrics import silhouette_score
from sklearn.metrics import davies_bouldin_score
warnings.filterwarnings('ignore')

# Multi-class Supervised Machine-learning for Audiotory Stimuli on Comatose Patients

This is a sandbox for intializing supervised learning on this project.

The main idea is to use machine learning to classify EEG responses to different audiotory stimuli.

The labels will be resting, medical and familiar.
These three labels will be used, in order for the model to predict stimuli category from unseen epochs. 
The main goal will be to evaluate the model with accuracy, precision and recall from a confusion matrix and ROC-curve

## Data importing

In [3]:
import os
import pandas as pd
from sklearn.preprocessing import StandardScaler

path = '1_csv'
folder = 'C:\\Users\\RJEN0307\\Desktop\\Bachelorprojekt\\Data\\fif files\\epochs_csv\\'

full_path = os.path.join(folder, path)

patient_numbers = []
patient_data = {}
X_all = []
y_all = []

for file in os.listdir(full_path):
    filename = os.fsdecode(file)
    patient_number = filename.split('_')[0] 
    patient_numbers.append(patient_number)
    patient_file_dir = os.path.join(full_path, filename)
    data = pd.read_csv(patient_file_dir)
    data.rename(columns={'Unnamed: 0': 'Index'}, inplace=True)
    data['Event'] = data['Event'].map({'R': 0, 'M': 1, 'F': 2})

    resting_data = data[data['Event'] == 0]
    moving_data = data[data['Event'] == 1]
    familiar_data = data[data['Event'] == 2]

    target_count = min(moving_data.shape[0], familiar_data.shape[0])

    resting_data = resting_data.iloc[-target_count:]

    resting_data = resting_data.reset_index(drop=True)
    moving_data = moving_data.reset_index(drop=True)
    familiar_data = familiar_data.reset_index(drop=True)

    balanced_data = pd.concat([resting_data, moving_data, familiar_data]).reset_index(drop=True)

    standarize_list = ['PSD Delta', 'PSD Delta_N', 'PSD Theta', 'PSD Theta_N', 'PSD Alpha', 'PSD Alpha_N', 
                       'PSD Beta', 'PSD Beta_N', 'PSD Gamma', 'PSD Gamma_N', 'PSD SE', 'PSD MSF', 'PSD Sef90', 
                       'PSD Sef95', 'PE', 'wSMI', 'Kolmogorov', 'Freq_Slope mean', 'Freq_Slope std']

    scaler = StandardScaler()
    balanced_data[standarize_list] = scaler.fit_transform(balanced_data[standarize_list])

    patient_data[patient_number] = balanced_data
    
    X = balanced_data[standarize_list].values
    y = balanced_data['Event'].values 

    X_all.append(X)
    y_all.append(y)


    print(f'Loaded and balanced data for patient {patient_number}')

print(patient_data['p10'])

X_all = np.vstack(X_all)
y_all = np.hstack(y_all)

print("Shape of X (features):", X_all.shape)
print("Shape of y (labels):", y_all.shape)

Loaded and balanced data for patient p10
Loaded and balanced data for patient p11
Loaded and balanced data for patient p12
Loaded and balanced data for patient p13
Loaded and balanced data for patient p14
Loaded and balanced data for patient p15
Loaded and balanced data for patient p16
Loaded and balanced data for patient p17
Loaded and balanced data for patient p18
Loaded and balanced data for patient p19
Loaded and balanced data for patient p20
Loaded and balanced data for patient p21
Loaded and balanced data for patient p22
Loaded and balanced data for patient p23
Loaded and balanced data for patient p24
Loaded and balanced data for patient p25
Loaded and balanced data for patient p27
Loaded and balanced data for patient p28
Loaded and balanced data for patient p29
Loaded and balanced data for patient p2
Loaded and balanced data for patient p30
Loaded and balanced data for patient p31
Loaded and balanced data for patient p32
Loaded and balanced data for patient p33
Loaded and balanc

## Logistic Regression Model

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X_all, y_all, test_size=0.2, random_state=42)

In [5]:
clf = LogisticRegression()
model = clf.fit(X_train, y_train)
y_pred = model.predict(X_test)
y_score = clf.predict_proba(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))

print(y_score)



Accuracy: 0.367327919052057
[[1064  799  475]
 [ 848 1041  663]
 [ 852 1115  654]]
[[0.32324677 0.32893934 0.34781389]
 [0.26559532 0.35340028 0.3810044 ]
 [0.40479642 0.27667693 0.31852665]
 ...
 [0.40955023 0.30413689 0.28631288]
 [0.23321571 0.40145278 0.36533151]
 [0.37199551 0.31638043 0.31162406]]


## Support Vector Machine

In [6]:
X_train = X_test = y_train = y_test = logistic_model = y_pred = y_score = accuracy = fpr = tpr = roc_auc = clf = None

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X_all, y_all, test_size=0.2, random_state=42)

In [8]:
svm = SVC(kernel='linear', probability=True, random_state=42, decision_function_shape='ovr')  # 'ovr' is One-vs-Rest
clf = svm.fit(X_train, y_train)
y_pred = clf.predict(X_test)
decision_scores = svm.decision_function(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.4f}')

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nDecision Function Output (First 5 Samples):")
print(decision_scores[:5])
