# ***Group 7 Capstone Notebook***

## Data Loading, Pre-Processing, and Feature Extraction

In [3]:
# pip install mne==0.23.0

In [4]:
# pip install antropy

In [5]:
# Import Required Libraries
import mne
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
from scipy.signal import welch
from antropy import spectral_entropy, app_entropy, sample_entropy
import warnings
warnings. filterwarnings('ignore')

# Function for Data Visualization
def plot_eeg(data_normal, data_fatigue, title, time, channels, n_channels=5):
    fig, axes = plt.subplots(n_channels, 1, figsize=(10, 7))
    fig.canvas.manager.set_window_title(title) # Set the window title

    #Plot the Corresponding Data for Normal and Fatigue
    for i in range(n_channels):
        axes[i].plot(time, data_normal[i], label=f'{channels[i]} - Normal', color='blue', alpha=0.7)
        axes[i].plot(time, data_fatigue[i], label=f'{channels[i]} - Sleepy', color='orange', alpha=0.7)
        axes[i].set_ylabel('Amplitude (µV)')
        axes[i].legend(loc='upper right', fontsize=8)
        if i == n_channels - 1:
            axes[i].set_xlabel('Time (s)')
    
    plt.tight_layout()  # Adjust layout  
    plt.show()

# Initialize lists to store features and labels
X_features = []  # To store Power Spectral Density (PSD) and Spectral Entropy features
y_features = []  # To store labels
X_psd = []       # To store Power Spectral Density features only
X_sp_ent = []    # To store Spectral Entropy features only

# Loop over all subjects
for i in range(1, 13):
    try:
        # DATA LOADING
        # Define file paths
        normal_file = f"C:\\Users\\ASUS\\Desktop\\Capstone\\{i}\\{i}\\Normal state.cnt"
        fatigue_file = f"C:\\Users\\ASUS\\Desktop\\Capstone\\{i}\\{i}\\Fatigue state.cnt"

        # Read EEG data for both normal and fatigue states
        raw_normal = mne.io.read_raw_cnt(normal_file, preload=True, verbose=False)
        raw_fatigue = mne.io.read_raw_cnt(fatigue_file, preload=True, verbose=False)
        channels = raw_normal.ch_names
        sfreq = raw_normal.info['sfreq']
        min_samples = min(raw_normal.get_data().shape[1], raw_fatigue.get_data().shape[1])
        time = np.linspace(0, min_samples / sfreq, min_samples)

        # Plot Raw EEG
        # Creates a Separate Window
        %matplotlib qt 
        plot_eeg(raw_normal.get_data()[:, :min_samples], raw_fatigue.get_data()[:, :min_samples], title=f'Raw EEG Subject {i}', time=time, channels=channels)
        
        # DATA PRE-PROCESSING
        # Apply Notch filter to remove 50 Hz line noise
        raw_normal.notch_filter(50, fir_design='firwin')
        raw_fatigue.notch_filter(50, fir_design='firwin')
        
        # Apply bandpass filter (0.15 Hz to 45 Hz)
        raw_normal.filter(0.15, 45, fir_design='firwin')
        raw_fatigue.filter(0.15, 45, fir_design='firwin')
        min_samples = min(raw_normal.get_data().shape[1], raw_fatigue.get_data().shape[1])
        time = np.linspace(0, min_samples / sfreq, min_samples)

        # Plot Pre-Processed EEG
        # Creates a Separate Window
        %matplotlib qt 
        plot_eeg(raw_normal.get_data()[:, :min_samples], raw_fatigue.get_data()[:, :min_samples], title=f'Pre-Processed EEG Subject {i}', time=time, channels=channels)

        # Generate 1-second epochs
        epochs_normal = mne.make_fixed_length_epochs(raw_normal, duration=1, preload=True)
        epochs_fatigue = mne.make_fixed_length_epochs(raw_fatigue, duration=1, preload=True)

        # FEATURE EXTRACTION
        # Extract Power Spectral Density (PSD) features
        psd_normal, freqs = mne.time_frequency.psd_multitaper(epochs_normal, fmin=0.15, fmax=45)
        psd_fatigue, _ = mne.time_frequency.psd_multitaper(epochs_fatigue, fmin=0.15, fmax=45)

        # Extract Spectral Entropy features
        sfreq = raw_normal.info['sfreq']  # Sampling frequency
        sp_entropy_normal = spectral_entropy(epochs_normal, sfreq, method="welch", normalize=True)
        sp_entropy_fatigue = spectral_entropy(epochs_fatigue, sfreq, method="welch", normalize=True)
        
        # Create labels
        y_normal = np.zeros(psd_normal.shape[0])  # Normal state: 0
        y_fatigue = np.ones(psd_fatigue.shape[0])  # Fatigue state: 1

        # Append PSD features and labels
        X_features.append(np.hstack((psd_normal[:,:,0], sp_entropy_normal)))
        X_features.append(np.hstack((psd_fatigue[:,:,0], sp_entropy_fatigue)))
        X_psd.append(psd_normal[:,:,0])
        X_psd.append(psd_fatigue[:,:,0])
        X_sp_ent.append(sp_entropy_normal)
        X_sp_ent.append(sp_entropy_fatigue)
        y_features.append(y_normal)
        y_features.append(y_fatigue)

        # Print processing status
        print(f"Processed Subject {i}: Normal {len(psd_normal)} epochs, Fatigue {len(psd_fatigue)} epochs")

    except Exception as e:
        print(f"Error processing subject {i}: {e}")

# Concatenate all subjects' PSD and Spectral Entropy features and labels
X_features = np.concatenate(X_features, axis=0)
X_psd = np.concatenate(X_psd, axis=0)
X_sp_ent = np.concatenate(X_sp_ent, axis=0)
y_features = np.concatenate(y_features, axis=0)
y_psd = y_features
y_sp_ent = y_features
X_psd_1 = X_psd

# Perform Feature Scaling
standard_scaler = StandardScaler()
X_features = standard_scaler.fit_transform(X_features)
X_psd = standard_scaler.fit_transform(X_psd)
X_sp_ent = standard_scaler.fit_transform(X_sp_ent)

# Power Spectral Density Features
print(f"PSD Features shape: {X_psd.shape}, Labels shape: {y_psd.shape}")

# Split PSD features into training and testing datasets
X_psd_train, X_psd_test, y_psd_train, y_psd_test = train_test_split(
    X_psd, y_psd, train_size=0.8, random_state=42, stratify=y_psd)

print("Power Spectral Density Features")
print(f"Training data shape: {X_psd_train.shape}, Testing data shape: {X_psd_test.shape}")

# Spectral Entropy Features
print(f"\nSpectral Entropy Features shape: {X_sp_ent.shape}, Labels shape: {y_sp_ent.shape}")

# Split Spectral Entropy features into training and testing datasets
X_sp_ent_train, X_sp_ent_test, y_sp_ent_train, y_sp_ent_test = train_test_split(
    X_sp_ent, y_sp_ent, train_size=0.8, random_state=42, stratify=y_sp_ent)

print("Spectral Entropy Features")
print(f"Training data shape: {X_sp_ent_train.shape}, Testing data shape: {X_sp_ent_test.shape}")

# Power Spectral Density and Spectral Entropy Features
print(f"\nPSD and Spectral Entropy Features shape: {X_features.shape}, Labels shape: {y_features.shape}")

# Split PSD and Spectral Entropy features into training and testing datasets
X_features_train, X_features_test, y_features_train, y_features_test = train_test_split(
    X_features, y_features, train_size=0.8, random_state=42, stratify=y_features)

print("Power Spectral Density and Spectral Entropy Features")
print(f"Training data shape: {X_features_train.shape}, Testing data shape: {X_features_test.shape}")


Not setting metadata
Not setting metadata
600 matching events found
No baseline correction applied
0 projection items activated
Loading data for 600 events and 1000 original time points ...
0 bad epochs dropped
Not setting metadata
Not setting metadata
601 matching events found
No baseline correction applied
0 projection items activated
Loading data for 601 events and 1000 original time points ...
0 bad epochs dropped
    Using multitaper spectrum estimation with 7 DPSS windows
    Using multitaper spectrum estimation with 7 DPSS windows
Processed Subject 1: Normal 600 epochs, Fatigue 601 epochs
Not setting metadata
Not setting metadata
600 matching events found
No baseline correction applied
0 projection items activated
Loading data for 600 events and 1000 original time points ...
0 bad epochs dropped
Not setting metadata
Not setting metadata
601 matching events found
No baseline correction applied
0 projection items activated
Loading data for 601 events and 1000 original time points 

A portion of the normal state and fatigue state raw EEG of each subject and it pre-processed EEG was visualized in individual separate windows. The pre-processing done includes the notch filtering to remove the line noise interference and it was further pre-processed by applying bandpass filter to highlight the delta, theta, alpha, and beta band of the EEG where difference between normal state and fatigue state can be observed. Specifically, at delta and theta band represents the deep meditation and sleep, while alpha and beta shows alertness and consciousness. Next, the feature extraction consist of extracting power spectral density (PSD) and spectral entropy features from the 40 channels of EEG. Datasets were created where one dataset contains PSD features only, one dataset alloted for the spectral entropy only, and a final dataset that includes both features. They were normalized using Standard Scaler. Each set of dataset were then splitted for training and testing data.

## Machine Learning Classification

### Power Spectral Density Features Only

#### Random Forest Classifier

In [10]:
# Essential Libraries
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_score
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
import time

accuracy_psd = []             #Store Validation Accuracy of PSD feature only
kfold_accuracy_psd = []       #Store K-Fold Accuracy of PSD feature only
execution_psd = []            #Store Execution Time of PSD feature only

# Random Forest Classifier Implementation
start = time.time()
clf = RandomForestClassifier(n_estimators=250)
clf.fit(X_psd_train.reshape(X_psd_train.shape[0], -1), y_psd_train)  # Flatten features

# Predict on test data
y_pred = clf.predict(X_psd_test.reshape(X_psd_test.shape[0], -1))  # Flatten features
end = time.time()

# Apply K-Fold Cross Validation
K_Fold = StratifiedKFold(n_splits=10, shuffle=True)
accuracies = cross_val_score(estimator=clf, X=X_psd, y=y_psd, cv=K_Fold, scoring='accuracy')
accuracies_average = accuracies.mean()

# Calculate accuracy and runtime
accuracy = accuracy_score(y_psd_test, y_pred)
print(f"Classification Accuracy: {accuracy*100}")
print(f"Average K-Fold Accuracy: {accuracies_average*100}")
runtime = (end-start)   # Execution Runtime
print(f"Execution Time: {runtime} s")
accuracy_psd.append(accuracy)
kfold_accuracy_psd.append(accuracies_average)
execution_psd.append(runtime)

# Confusion matrix
cm = confusion_matrix(y_psd_test, y_pred)

# Classification report
report = classification_report(y_psd_test, y_pred, target_names=["Normal", "Fatigue"])
print(report)

# Confusion matrix heatmap
plt.figure(figsize=(10,7), num='Random Forest - PSD Only')
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=["Normal", "Fatigue"], yticklabels=["Normal", "Fatigue"])
plt.title("Confusion Matrix", fontsize=14)
plt.xlabel("Predicted Label", fontsize=12)
plt.ylabel("True Label", fontsize=12)

plt.show()

Classification Accuracy: 88.6001386001386
Average K-Fold Accuracy: 89.34571026804036
Execution Time: 11.451846599578857 s
              precision    recall  f1-score   support

      Normal       0.87      0.91      0.89      1442
     Fatigue       0.90      0.87      0.88      1444

    accuracy                           0.89      2886
   macro avg       0.89      0.89      0.89      2886
weighted avg       0.89      0.89      0.89      2886



#### Support Vector Machine

In [12]:
# Support Vector Machine Implementation
start = time.time()
support_vector_machine = SVC(kernel = 'rbf')
support_vector_machine.fit(X_psd_train.reshape(X_psd_train.shape[0], -1), y_psd_train)  # Flatten features

# Predict on test data
y_pred = support_vector_machine.predict(X_psd_test.reshape(X_psd_test.shape[0], -1))  # Flatten features
end = time.time()

# Apply K-Fold Cross Validation
K_Fold = StratifiedKFold(n_splits=10, shuffle=True)
accuracies = cross_val_score(estimator=support_vector_machine, X=X_psd, y=y_psd, cv=K_Fold, scoring='accuracy')
accuracies_average = accuracies.mean()

# Calculate accuracy and runtime
accuracy = accuracy_score(y_psd_test, y_pred)
print(f"Classification Accuracy: {accuracy*100}")
print(f"Average K-Fold Accuracy: {accuracies_average*100}")
runtime = (end-start)   # Execution Runtime
print(f"Execution Time: {runtime} s")
accuracy_psd.append(accuracy)
kfold_accuracy_psd.append(accuracies_average)
execution_psd.append(runtime)

# Confusion matrix
cm = confusion_matrix(y_psd_test, y_pred)

# Classification report
report = classification_report(y_psd_test, y_pred, target_names=["Normal", "Fatigue"])
print(report)

# Confusion matrix heatmap
plt.figure(figsize=(10,7), num='Support Vector Machine - PSD Only')
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=["Normal", "Fatigue"], yticklabels=["Normal", "Fatigue"])
plt.title("Confusion Matrix", fontsize=14)
plt.xlabel("Predicted Label", fontsize=12)
plt.ylabel("True Label", fontsize=12)

plt.show()

Classification Accuracy: 63.340263340263334
Average K-Fold Accuracy: 63.03178191527705
Execution Time: 19.651829957962036 s
              precision    recall  f1-score   support

      Normal       0.59      0.86      0.70      1442
     Fatigue       0.74      0.41      0.53      1444

    accuracy                           0.63      2886
   macro avg       0.67      0.63      0.61      2886
weighted avg       0.67      0.63      0.61      2886



#### K-Nearest Neighbor

In [14]:
# K-Nearest Neighbor Implementation
start = time.time()
k_nearest_neighbors = KNeighborsClassifier(n_neighbors=9)
k_nearest_neighbors.fit(X_psd_train.reshape(X_psd_train.shape[0], -1), y_psd_train)  # Flatten features

# Predict on test data
y_pred = k_nearest_neighbors.predict(X_psd_test.reshape(X_psd_test.shape[0], -1))  # Flatten features
end = time.time()

# Apply K-Fold Cross Validation
K_Fold = StratifiedKFold(n_splits=10, shuffle=True)
accuracies = cross_val_score(estimator=k_nearest_neighbors, X=X_psd, y=y_psd, cv=K_Fold, scoring='accuracy')
accuracies_average = accuracies.mean()

# Calculate accuracy and runtime
accuracy = accuracy_score(y_psd_test, y_pred)
print(f"Classification Accuracy: {accuracy*100}")
print(f"Average K-Fold Accuracy: {accuracies_average*100}")
runtime = (end-start)   # Execution Runtime
print(f"Execution Time: {runtime} s")
accuracy_psd.append(accuracy)
kfold_accuracy_psd.append(accuracies_average)
execution_psd.append(runtime)

# Confusion matrix
cm = confusion_matrix(y_psd_test, y_pred)

# Classification report
report = classification_report(y_psd_test, y_pred, target_names=["Normal", "Fatigue"])
print(report)

# Confusion matrix heatmap
plt.figure(figsize=(10,7), num='K-Nearest Neighbor - PSD Only')
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=["Normal", "Fatigue"], yticklabels=["Normal", "Fatigue"])
plt.title("Confusion Matrix", fontsize=14)
plt.xlabel("Predicted Label", fontsize=12)
plt.ylabel("True Label", fontsize=12)

plt.show()

Classification Accuracy: 69.36936936936937
Average K-Fold Accuracy: 70.03324673227586
Execution Time: 0.2677648067474365 s
              precision    recall  f1-score   support

      Normal       0.69      0.71      0.70      1442
     Fatigue       0.70      0.68      0.69      1444

    accuracy                           0.69      2886
   macro avg       0.69      0.69      0.69      2886
weighted avg       0.69      0.69      0.69      2886



### Spectral Entropy Features Only

### Random Forest Classifier

In [17]:
accuracy_sp_ent = []             #Store Validation Accuracy of Spectral Entropy feature only
kfold_accuracy_sp_ent = []       #Store K-Fold Accuracy of Spectral Entropy feature only
execution_sp_ent = []            #Store Execution Time of Spectral Entropy feature only

# Random Forest Classifier Implementation
start = time.time()
clf = RandomForestClassifier(n_estimators=250)
clf.fit(X_sp_ent_train.reshape(X_sp_ent_train.shape[0], -1), y_sp_ent_train)  # Flatten features

# Predict on test data
y_pred = clf.predict(X_sp_ent_test.reshape(X_sp_ent_test.shape[0], -1))  # Flatten features
end = time.time()

# Apply K-Fold Cross Validation
K_Fold = StratifiedKFold(n_splits=10, shuffle=True)
accuracies = cross_val_score(estimator=clf, X=X_sp_ent, y=y_sp_ent, cv=K_Fold, scoring='accuracy')
accuracies_average = accuracies.mean()

# Calculate accuracy and runtime
accuracy = accuracy_score(y_sp_ent_test, y_pred)
print(f"Classification Accuracy: {accuracy*100}")
print(f"Average K-Fold Accuracy: {accuracies_average*100}")
runtime = (end-start)   # Execution Runtime
print(f"Execution Time: {runtime} s")
accuracy_sp_ent.append(accuracy)
kfold_accuracy_sp_ent.append(accuracies_average)
execution_sp_ent.append(runtime)

# Confusion matrix
cm = confusion_matrix(y_sp_ent_test, y_pred)

# Classification report
report = classification_report(y_sp_ent_test, y_pred, target_names=["Normal", "Fatigue"])
print(report)

# Confusion matrix heatmap
plt.figure(figsize=(10,7), num='Random Forest - Spectral Entropy Only')
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=["Normal", "Fatigue"], yticklabels=["Normal", "Fatigue"])
plt.title("Confusion Matrix", fontsize=14)
plt.xlabel("Predicted Label", fontsize=12)
plt.ylabel("True Label", fontsize=12)

plt.show()

Classification Accuracy: 99.13374913374913
Average K-Fold Accuracy: 99.25135740669721
Execution Time: 9.057426691055298 s
              precision    recall  f1-score   support

      Normal       0.99      0.99      0.99      1442
     Fatigue       0.99      0.99      0.99      1444

    accuracy                           0.99      2886
   macro avg       0.99      0.99      0.99      2886
weighted avg       0.99      0.99      0.99      2886



#### Support Vector Machine

In [19]:
# Support Vector Machine Implementation
start = time.time()
support_vector_machine = SVC(kernel = 'rbf')
support_vector_machine.fit(X_sp_ent_train.reshape(X_sp_ent_train.shape[0], -1), y_sp_ent_train)  # Flatten features

# Predict on test data
y_pred = support_vector_machine.predict(X_sp_ent_test.reshape(X_sp_ent_test.shape[0], -1))  # Flatten features
end = time.time()

# Apply K-Fold Cross Validation
K_Fold = StratifiedKFold(n_splits=10, shuffle=True)
accuracies = cross_val_score(estimator=support_vector_machine, X=X_sp_ent, y=y_sp_ent, cv=K_Fold, scoring='accuracy')
accuracies_average = accuracies.mean()

# Calculate accuracy and runtime
accuracy = accuracy_score(y_sp_ent_test, y_pred)
print(f"Classification Accuracy: {accuracy*100}")
print(f"Average K-Fold Accuracy: {accuracies_average*100}")
runtime = (end-start)   # Execution Runtime
print(f"Execution Time: {runtime} s")
accuracy_sp_ent.append(accuracy)
kfold_accuracy_sp_ent.append(accuracies_average)
execution_sp_ent.append(runtime)

# Confusion matrix
cm = confusion_matrix(y_sp_ent_test, y_pred)

# Classification report
report = classification_report(y_sp_ent_test, y_pred, target_names=["Normal", "Fatigue"])
print(report)

# Confusion matrix heatmap
plt.figure(figsize=(10,7), num='Support Vector Machine - Spectral Entropy Only')
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=["Normal", "Fatigue"], yticklabels=["Normal", "Fatigue"])
plt.title("Confusion Matrix", fontsize=14)
plt.xlabel("Predicted Label", fontsize=12)
plt.ylabel("True Label", fontsize=12)

plt.show()

Classification Accuracy: 99.02979902979902
Average K-Fold Accuracy: 98.98794986173627
Execution Time: 3.6254916191101074 s
              precision    recall  f1-score   support

      Normal       0.99      0.99      0.99      1442
     Fatigue       0.99      0.99      0.99      1444

    accuracy                           0.99      2886
   macro avg       0.99      0.99      0.99      2886
weighted avg       0.99      0.99      0.99      2886



#### K-Nearest Neighbor

In [21]:
# K-Nearest Neighbor Implementation
start = time.time()
k_nearest_neighbors = KNeighborsClassifier(n_neighbors=9)
k_nearest_neighbors.fit(X_sp_ent_train.reshape(X_sp_ent_train.shape[0], -1), y_sp_ent_train)  # Flatten features

# Predict on test data
y_pred = k_nearest_neighbors.predict(X_sp_ent_test.reshape(X_sp_ent_test.shape[0], -1))  # Flatten features
end = time.time()

# Apply K-Fold Cross Validation
K_Fold = StratifiedKFold(n_splits=10, shuffle=True)
accuracies = cross_val_score(estimator=k_nearest_neighbors, X=X_sp_ent, y=y_sp_ent, cv=K_Fold, scoring='accuracy')
accuracies_average = accuracies.mean()

# Calculate accuracy and runtime
accuracy = accuracy_score(y_sp_ent_test, y_pred)
print(f"Classification Accuracy: {accuracy*100}")
print(f"Average K-Fold Accuracy: {accuracies_average*100}")
runtime = (end-start)   # Execution Runtime
print(f"Execution Time: {runtime} s")
accuracy_sp_ent.append(accuracy)
kfold_accuracy_sp_ent.append(accuracies_average)
execution_sp_ent.append(runtime)

# Confusion matrix
cm = confusion_matrix(y_sp_ent_test, y_pred)

# Classification report
report = classification_report(y_sp_ent_test, y_pred, target_names=["Normal", "Fatigue"])
print(report)

# Confusion matrix heatmap
plt.figure(figsize=(10,7), num='K-Nearest Neighbor - Spectral Entropy Only')
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=["Normal", "Fatigue"], yticklabels=["Normal", "Fatigue"])
plt.title("Confusion Matrix", fontsize=14)
plt.xlabel("Predicted Label", fontsize=12)
plt.ylabel("True Label", fontsize=12)

plt.show()

Classification Accuracy: 97.85169785169785
Average K-Fold Accuracy: 98.03822172754211
Execution Time: 0.07373595237731934 s
              precision    recall  f1-score   support

      Normal       0.97      0.99      0.98      1442
     Fatigue       0.99      0.97      0.98      1444

    accuracy                           0.98      2886
   macro avg       0.98      0.98      0.98      2886
weighted avg       0.98      0.98      0.98      2886



### Power Spectral Density and Spectral Entropy Features

#### Random Forest Classifier

In [24]:
accuracy_features = []             #Store Validation Accuracy of PSD and Spectral Entropy features
kfold_accuracy_features = []       #Store K-Fold Accuracy of PSD and Spectral Entropy features
execution_features = []            #Store Execution Time of PSD and Spectral Entropy features

# Random Forest Classifier Implementation
start = time.time()
clf = RandomForestClassifier(n_estimators=250)
clf.fit(X_features_train.reshape(X_features_train.shape[0], -1), y_features_train)  # Flatten features

# Predict on test data
y_pred = clf.predict(X_features_test.reshape(X_features_test.shape[0], -1))  # Flatten features
end = time.time()

# Apply K-Fold Cross Validation
K_Fold = StratifiedKFold(n_splits=10, shuffle=True)
accuracies = cross_val_score(estimator=clf, X=X_features, y=y_features, cv=K_Fold, scoring='accuracy')
accuracies_average = accuracies.mean()

# Calculate accuracy and runtime
accuracy = accuracy_score(y_features_test, y_pred)
print(f"Classification Accuracy: {accuracy*100}")
print(f"Average K-Fold Accuracy: {accuracies_average*100}")
runtime = (end-start)   # Execution Runtime
print(f"Execution Time: {runtime} s")
accuracy_features.append(accuracy)
kfold_accuracy_features.append(accuracies_average)
execution_features.append(runtime)

# Confusion matrix
cm = confusion_matrix(y_features_test, y_pred)

# Classification report
report = classification_report(y_features_test, y_pred, target_names=["Normal", "Fatigue"])
print(report)

# Confusion matrix heatmap
plt.figure(figsize=(10,7), num='Random Forest - PSD and Spectral Entropy')
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=["Normal", "Fatigue"], yticklabels=["Normal", "Fatigue"])
plt.title("Confusion Matrix", fontsize=14)
plt.xlabel("Predicted Label", fontsize=12)
plt.ylabel("True Label", fontsize=12)

plt.show()

Classification Accuracy: 99.68814968814968
Average K-Fold Accuracy: 99.48013414032832
Execution Time: 11.363505363464355 s
              precision    recall  f1-score   support

      Normal       1.00      1.00      1.00      1442
     Fatigue       1.00      1.00      1.00      1444

    accuracy                           1.00      2886
   macro avg       1.00      1.00      1.00      2886
weighted avg       1.00      1.00      1.00      2886



#### Support Vector Machine

In [26]:
# Support Vector Machine Implementation
start = time.time()
support_vector_machine = SVC(kernel = 'rbf')
support_vector_machine.fit(X_features_train.reshape(X_features_train.shape[0], -1), y_features_train)  # Flatten features

# Predict on test data
y_pred = support_vector_machine.predict(X_features_test.reshape(X_features_test.shape[0], -1))  # Flatten features
end = time.time()

# Apply K-Fold Cross Validation
K_Fold = StratifiedKFold(n_splits=10, shuffle=True)
accuracies = cross_val_score(estimator=support_vector_machine, X=X_features, y=y_features, cv=K_Fold, scoring='accuracy')
accuracies_average = accuracies.mean()

# Calculate accuracy and runtime
accuracy = accuracy_score(y_features_test, y_pred)
print(f"Classification Accuracy: {accuracy*100}")
print(f"Average K-Fold Accuracy: {accuracies_average*100}")
runtime = (end-start)   # Execution Runtime
print(f"Execution Time: {runtime} s")
accuracy_features.append(accuracy)
kfold_accuracy_features.append(accuracies_average)
execution_features.append(runtime)

# Confusion matrix
cm = confusion_matrix(y_features_test, y_pred)

# Classification report
report = classification_report(y_features_test, y_pred, target_names=["Normal", "Fatigue"])
print(report)

# Confusion matrix heatmap
plt.figure(figsize=(10,7), num='Support Vector Machine - PSD and Spectral Entropy')
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=["Normal", "Fatigue"], yticklabels=["Normal", "Fatigue"])
plt.title("Confusion Matrix", fontsize=14)
plt.xlabel("Predicted Label", fontsize=12)
plt.ylabel("True Label", fontsize=12)

plt.show()

Classification Accuracy: 98.30214830214831
Average K-Fold Accuracy: 98.12838871091299
Execution Time: 6.865126609802246 s
              precision    recall  f1-score   support

      Normal       0.99      0.98      0.98      1442
     Fatigue       0.98      0.99      0.98      1444

    accuracy                           0.98      2886
   macro avg       0.98      0.98      0.98      2886
weighted avg       0.98      0.98      0.98      2886



#### K-Nearest Neighbor

In [28]:
# K-Nearest Neighbor Implementation
start = time.time()
k_nearest_neighbors = KNeighborsClassifier(n_neighbors=9)
k_nearest_neighbors.fit(X_features_train.reshape(X_features_train.shape[0], -1), y_features_train)  # Flatten features

# Predict on test data
y_pred = k_nearest_neighbors.predict(X_features_test.reshape(X_features_test.shape[0], -1))  # Flatten features
end = time.time()

# Apply K-Fold Cross Validation
K_Fold = StratifiedKFold(n_splits=10, shuffle=True)
accuracies = cross_val_score(estimator=k_nearest_neighbors, X=X_features, y=y_features, cv=K_Fold, scoring='accuracy')
accuracies_average = accuracies.mean()

# Calculate accuracy and runtime
accuracy = accuracy_score(y_features_test, y_pred)
print(f"Classification Accuracy: {accuracy*100}")
print(f"Average K-Fold Accuracy: {accuracies_average*100}")
runtime = (end-start)   # Execution Runtime
print(f"Execution Time: {runtime} s")
accuracy_features.append(accuracy)
kfold_accuracy_features.append(accuracies_average)
execution_features.append(runtime)

# Confusion matrix
cm = confusion_matrix(y_features_test, y_pred)

# Classification report
report = classification_report(y_features_test, y_pred, target_names=["Normal", "Fatigue"])
print(report)

# Confusion matrix heatmap
plt.figure(figsize=(10,7), num='K-Nearest Neighbor - PSD and Spectral Entropy')
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=["Normal", "Fatigue"], yticklabels=["Normal", "Fatigue"])
plt.title("Confusion Matrix", fontsize=14)
plt.xlabel("Predicted Label", fontsize=12)
plt.ylabel("True Label", fontsize=12)

plt.show()

Classification Accuracy: 97.22799722799724
Average K-Fold Accuracy: 97.08857529245878
Execution Time: 0.06991767883300781 s
              precision    recall  f1-score   support

      Normal       0.96      0.98      0.97      1442
     Fatigue       0.98      0.96      0.97      1444

    accuracy                           0.97      2886
   macro avg       0.97      0.97      0.97      2886
weighted avg       0.97      0.97      0.97      2886



#### Comparison of Validation Accuracy, K-Fold Accuracy, and Execution Time

In [30]:
models = ["Random Forest", "SVM", "KNN"]                            # Set as X values
X_axis = np.arange(len(models)) 

# Plot the Comparison of Testing Accuracy
plt.figure(figsize=(10,7), num='Comparison of Testing Accuracy')
plt.bar(X_axis-0.3, accuracy_psd, 0.3, label="PSD only")
plt.bar(X_axis, accuracy_sp_ent, 0.3, label="Spectral Entropy only")
plt.bar(X_axis+0.3, accuracy_features, 0.3, label="Both Features")
plt.xticks(X_axis, models) 
plt.xlabel("Machine Learning Model")
plt.ylabel("Testing Accuracy (%)")
plt.title("Comparison of Testing Accuracy")
plt.legend()
plt.show()

# Plot the Comparison of K-Fold Accuracy
plt.figure(figsize=(10,7), num='Comparison of K-Fold Accuracy')
plt.bar(X_axis-0.3, kfold_accuracy_psd, 0.3, label="PSD only")
plt.bar(X_axis, kfold_accuracy_sp_ent, 0.3, label="Spectral Entropy only")
plt.bar(X_axis+0.3, kfold_accuracy_features, 0.3, label="Both Features")
plt.xticks(X_axis, models) 
plt.xlabel("Machine Learning Model")
plt.ylabel("K-Fold Accuracy (%)")
plt.title("Comparison of K-Fold Accuracy")
plt.legend()
plt.show()

# Plot the Comparison of Execution Time
plt.figure(figsize=(10,7), num='Comparison of Execution Time')
plt.bar(X_axis-0.3, execution_psd, 0.3, label="PSD only")
plt.bar(X_axis, execution_sp_ent, 0.3, label="Spectral Entropy only")
plt.bar(X_axis+0.3, execution_features, 0.3, label="Both Features")
plt.xticks(X_axis, models) 
plt.xlabel("Machine Learning Model")
plt.ylabel("Execution Time (s)")
plt.title("Comparison of Execution Time")
plt.legend()
plt.show()

The output of the model predictions shows that when using PSD features alone, relative low accuracy is achieved and showed the longest average runtime. When using the Spectral entropy features alone, the accuracy and runtime are greatly improved, in fact in garnered the highest average accuracy and shortest runtime among the three datasets used. Finally when using both features, the accuracy and runtime were close to when using spectral entropy alone, however, the accuracy slightly decreased and the runtime increased by a small factor. It must be noted that the highest accuracy among all the model implementation was achieved when using both features paired with random forest classifier. Analyzing the trend of model predictions, it showed that the spectral entropy of driver's EEG alone shows a promising potential in detecting fatigue. 

#### ***Average PSD per Frequency Range***

In [33]:
# Frequency bands
bands = {'Delta (0.5-4 Hz)': (0.5, 4),
         'Theta (4-8 Hz)': (4, 8),
         'Alpha (8-13 Hz)': (8, 13),
         'Beta (13-30 Hz)': (13, 30)}

# Storage
avg_psd_normal = []
avg_psd_fatigue = []

# Ensure band_idx matches the frequency dimension of X_psd
if len(freqs) != X_psd_1.shape[1]:
    print(f"Mismatch detected: freqs length ({len(freqs)}) does not match X_psd frequency axis ({X_psd_1.shape[1]}).")
    freqs = freqs[:X_psd_1.shape[1]]  # Truncate freqs to match X_psd

# Loop through frequency bands and compute averages
for band, (low, high) in bands.items():
    band_idx = (freqs >= low) & (freqs <= high)  # Indices for the current band
    
    # Average PSD for the current band
    avg_psd_normal.append(X_psd_1[y_psd == 0][:, band_idx].mean(axis=1).mean())
    avg_psd_fatigue.append(X_psd_1[y_psd == 1][:, band_idx].mean(axis=1).mean())

# Bar plot
x = range(len(bands))  # Indices for bars
plt.figure(figsize=(10,7), num='Average PSD Across Frequency Bands')
plt.bar(x, avg_psd_normal, width=0.4, label='Normal (Non-Sleepy)', color='blue', alpha=0.7)
plt.bar([i + 0.4 for i in x], avg_psd_fatigue, width=0.4, label='Fatigue (Sleepy)', color='red', alpha=0.7)
plt.xticks([i + 0.2 for i in x], bands.keys(), rotation=45)
plt.title('Average PSD Across Frequency Bands')
plt.xlabel('Frequency Band')
plt.ylabel('Average Power (PSD)')
plt.legend()
plt.tight_layout()
plt.show()

Mismatch detected: freqs length (45) does not match X_psd frequency axis (40).


As part of the results, the average power spectral density of each state is compared per frequency range to discern if there is distinction between them. It shows that in the delta band, there is not that much difference between the PSD of the states. In the other bands, difference between the states were observed specifically, at theta band, the normal state had a higher PSD, while on the alpha and beta band, it was noted that fatigue state have a higher PSD. High PSD means that the power is distibuted across the specified range of frequencies. It can be interpreted that in the theta band which signifies the behavior of deep meditation, the normal state showed varied power levels however in the fatigue state the power levels of EEG are not varied meaning they are clustered. On the other hand, in the alpha and beta band which signifies consciousness, the normal state is the clustered one which depicts calm and tranquil response while the high PSD of fatigue state may mean that they are battling the sleepiness they are experiencing.