In [2]:
import mne
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report, accuracy_score, roc_auc_score
import matplotlib.pyplot as plt
from Preprocessing.feature_extraction import load_eeg_data, compute_band_power, extract_features
import warnings
mne.set_log_level('error')
warnings.filterwarnings('ignore')

In [3]:
def pipeline(base_path):
    train_dir = base_path + 'train4ml.csv'
    test_dir = base_path + 'test4ml.csv'
    val_dir = base_path + 'val4ml.csv'
    data_train, label_train = load_eeg_data(train_dir)
    data_val, label_val = load_eeg_data(val_dir)   
    data_test, label_test = load_eeg_data(test_dir)
    
    train_X = data_train
    train_y = label_train
    val_X = data_val
    val_y = label_val
    test_X = data_test
    test_y = label_test
    
    # Scaling
    scaler = StandardScaler()
    train_X = scaler.fit_transform(train_X)
    val_X = scaler.transform(val_X)
    test_X = scaler.transform(test_X)
    
    # Train through GridSearchCV
    mlp = MLPClassifier()
    param_grid = {
    'hidden_layer_sizes': [(100,), (50, 50), (30, 30, 30)],  # 은닉층 구조
    'activation': ['relu', 'tanh'],  # 활성화 함수
    'solver': ['adam', 'sgd'],  # 최적화 알고리즘
    'alpha': [0.0001, 0.001, 0.01],  # L2 정규화 파라미터
    'learning_rate': ['constant', 'adaptive'],  # 학습률 조정 방식
    'max_iter': [200, 300, 500]  # 최대 반복 횟수
}

    grid_search = GridSearchCV(mlp, param_grid, cv=5, scoring='accuracy', verbose=2, n_jobs=-1)
    grid_search.fit(train_X, train_y)  # Fit the model on the training data

    # Print the best parameters and the best score from the validation process
    print("Best parameters found: ", grid_search.best_params_)
    print("Best cross-validation accuracy: {:.2f}%".format(grid_search.best_score_ * 100))
    
    # (Validation) Use the best model to make predictions on the validation set
    best_model = grid_search.best_estimator_
    val_predictions = best_model.predict(val_X)

    # Evaluate the model on the validation set
    print("\nValidation Set Performance:")
    print("Validation Accuracy: {:.2f}%".format(accuracy_score(val_y, val_predictions) * 100))
    print("Validation ROC-AUC Score: {:.2f}".format(roc_auc_score(val_y, val_predictions)))
    print("\nValidation Classification Report:")
    print(classification_report(val_y, val_predictions))
    
    # (Test) After validation, use the best model to predict on the test set
    test_predictions = best_model.predict(test_X)

    # Evaluate the model on the test set
    print("\nTest Set Performance:")
    print("Test Accuracy: {:.2f}%".format(accuracy_score(test_y, test_predictions) * 100))
    print("Test ROC-AUC Score: {:.2f}".format(roc_auc_score(test_y, test_predictions)))
    print("\nTest Classification Report:")
    print(classification_report(test_y, test_predictions))

In [4]:
# Load data
base_path_1 = '/Users/imdohyeon/Library/CloudStorage/GoogleDrive-dhlim1598@gmail.com/공유 드라이브/4N_PKNU/Project/EEG-LLM/Dataset/subject 1 data (k3b)/down sampling X ver/label15/4ml/'
base_path_2 = '/Users/imdohyeon/Library/CloudStorage/GoogleDrive-dhlim1598@gmail.com/공유 드라이브/4N_PKNU/Project/EEG-LLM/Dataset/subject 1 data (k3b)/down sampling X ver/label25/4ml/'
base_path_3 = '/Users/imdohyeon/Library/CloudStorage/GoogleDrive-dhlim1598@gmail.com/공유 드라이브/4N_PKNU/Project/EEG-LLM/Dataset/subject 1 data (k3b)/down sampling X ver/label35/4ml/'
base_path_4 = '/Users/imdohyeon/Library/CloudStorage/GoogleDrive-dhlim1598@gmail.com/공유 드라이브/4N_PKNU/Project/EEG-LLM/Dataset/subject 1 data (k3b)/down sampling X ver/label45/4ml/'

In [5]:
train_mlp_1 = pipeline(base_path_1)

Fitting 5 folds for each of 216 candidates, totalling 1080 fits




Best parameters found:  {'activation': 'tanh', 'alpha': 0.01, 'hidden_layer_sizes': (100,), 'learning_rate': 'adaptive', 'max_iter': 200, 'solver': 'sgd'}
Best cross-validation accuracy: 90.82%

Validation Set Performance:
Validation Accuracy: 69.44%
Validation ROC-AUC Score: 0.69

Validation Classification Report:
              precision    recall  f1-score   support

           1       0.71      0.59      0.65        17
           5       0.68      0.79      0.73        19

    accuracy                           0.69        36
   macro avg       0.70      0.69      0.69        36
weighted avg       0.70      0.69      0.69        36


Test Set Performance:
Test Accuracy: 83.33%
Test ROC-AUC Score: 0.83

Test Classification Report:
              precision    recall  f1-score   support

           1       0.82      0.82      0.82        17
           5       0.84      0.84      0.84        19

    accuracy                           0.83        36
   macro avg       0.83      0.83      

In [6]:
train_mlp_2 = pipeline(base_path_2)

Fitting 5 folds for each of 216 candidates, totalling 1080 fits




[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=constant, max_iter=200, solver=adam; total time=   0.1s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=constant, max_iter=300, solver=adam; total time=   0.1s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=constant, max_iter=500, solver=adam; total time=   0.1s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=constant, max_iter=500, solver=sgd; total time=   0.1s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=adaptive, max_iter=300, solver=adam; total time=   0.1s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=adaptive, max_iter=300, solver=sgd; total time=   0.1s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=adaptive, max_iter=500, solver=sgd; total time=   0.2s
[CV] END activation=relu, alpha=0.0001, hidd



[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=constant, max_iter=200, solver=adam; total time=   0.1s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=constant, max_iter=300, solver=adam; total time=   0.1s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=constant, max_iter=500, solver=sgd; total time=   0.1s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=adaptive, max_iter=200, solver=adam; total time=   0.1s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=adaptive, max_iter=200, solver=sgd; total time=   0.1s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=adaptive, max_iter=300, solver=adam; total time=   0.1s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=adaptive, max_iter=500, solver=sgd; total time=   0.1s
[CV] END activation=relu, alpha=0.0001, hidd



[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=constant, max_iter=200, solver=adam; total time=   0.1s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=constant, max_iter=200, solver=sgd; total time=   0.0s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=constant, max_iter=300, solver=adam; total time=   0.1s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=constant, max_iter=300, solver=sgd; total time=   0.1s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=constant, max_iter=500, solver=sgd; total time=   0.1s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=adaptive, max_iter=200, solver=adam; total time=   0.0s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=adaptive, max_iter=200, solver=sgd; total time=   0.0s
[CV] END activation=relu, alpha=0.0001, hidde



Best parameters found:  {'activation': 'relu', 'alpha': 0.01, 'hidden_layer_sizes': (50, 50), 'learning_rate': 'adaptive', 'max_iter': 200, 'solver': 'adam'}
Best cross-validation accuracy: 84.29%

Validation Set Performance:
Validation Accuracy: 83.33%
Validation ROC-AUC Score: 0.83

Validation Classification Report:
              precision    recall  f1-score   support

           2       0.78      0.95      0.86        19
           5       0.92      0.71      0.80        17

    accuracy                           0.83        36
   macro avg       0.85      0.83      0.83        36
weighted avg       0.85      0.83      0.83        36


Test Set Performance:
Test Accuracy: 75.00%
Test ROC-AUC Score: 0.75

Test Classification Report:
              precision    recall  f1-score   support

           2       0.74      0.78      0.76        18
           5       0.76      0.72      0.74        18

    accuracy                           0.75        36
   macro avg       0.75      0.75   

In [7]:
train_mlp_3 = pipeline(base_path_3)

Fitting 5 folds for each of 216 candidates, totalling 1080 fits




[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(30, 30, 30), learning_rate=adaptive, max_iter=200, solver=sgd; total time=   0.1s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(30, 30, 30), learning_rate=adaptive, max_iter=200, solver=sgd; total time=   0.1s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(30, 30, 30), learning_rate=adaptive, max_iter=500, solver=adam; total time=   0.2s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(30, 30, 30), learning_rate=adaptive, max_iter=500, solver=adam; total time=   0.1s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(100,), learning_rate=constant, max_iter=200, solver=sgd; total time=   0.1s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(100,), learning_rate=constant, max_iter=200, solver=sgd; total time=   0.1s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(100,), learning_rate=constant, max_iter=300, solver=sgd; total time=   0.1s
[CV] END activation=tanh, alpha=



[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(50, 50), learning_rate=adaptive, max_iter=500, solver=adam; total time=   0.2s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(50, 50), learning_rate=adaptive, max_iter=500, solver=sgd; total time=   0.2s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(30, 30, 30), learning_rate=constant, max_iter=300, solver=adam; total time=   0.1s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(30, 30, 30), learning_rate=constant, max_iter=300, solver=adam; total time=   0.1s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(30, 30, 30), learning_rate=adaptive, max_iter=200, solver=adam; total time=   0.1s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(30, 30, 30), learning_rate=adaptive, max_iter=200, solver=adam; total time=   0.1s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(30, 30, 30), learning_rate=adaptive, max_iter=200, solver=sgd; total time=   0.1s
[CV] END activation=tan



[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(100,), learning_rate=constant, max_iter=500, solver=adam; total time=   0.2s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(100,), learning_rate=constant, max_iter=500, solver=sgd; total time=   0.1s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(100,), learning_rate=adaptive, max_iter=500, solver=adam; total time=   0.2s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(100,), learning_rate=adaptive, max_iter=500, solver=adam; total time=   0.1s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(50, 50), learning_rate=constant, max_iter=300, solver=adam; total time=   0.1s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(50, 50), learning_rate=constant, max_iter=300, solver=adam; total time=   0.1s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(50, 50), learning_rate=constant, max_iter=500, solver=sgd; total time=   0.3s
[CV] END activation=tanh, alpha=0.001, hidde



Best parameters found:  {'activation': 'tanh', 'alpha': 0.01, 'hidden_layer_sizes': (100,), 'learning_rate': 'adaptive', 'max_iter': 200, 'solver': 'sgd'}
Best cross-validation accuracy: 82.38%

Validation Set Performance:
Validation Accuracy: 75.00%
Validation ROC-AUC Score: 0.74

Validation Classification Report:
              precision    recall  f1-score   support

           3       0.67      0.71      0.69        14
           5       0.81      0.77      0.79        22

    accuracy                           0.75        36
   macro avg       0.74      0.74      0.74        36
weighted avg       0.75      0.75      0.75        36


Test Set Performance:
Test Accuracy: 69.44%
Test ROC-AUC Score: 0.69

Test Classification Report:
              precision    recall  f1-score   support

           3       0.65      0.83      0.73        18
           5       0.77      0.56      0.65        18

    accuracy                           0.69        36
   macro avg       0.71      0.69      

In [8]:
train_mlp_4 = pipeline(base_path_4)

Fitting 5 folds for each of 216 candidates, totalling 1080 fits




[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(100,), learning_rate=constant, max_iter=300, solver=sgd; total time=   0.1s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(100,), learning_rate=adaptive, max_iter=200, solver=adam; total time=   0.1s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(100,), learning_rate=adaptive, max_iter=200, solver=adam; total time=   0.1s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(100,), learning_rate=adaptive, max_iter=300, solver=adam; total time=   0.1s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(100,), learning_rate=adaptive, max_iter=300, solver=adam; total time=   0.1s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(50, 50), learning_rate=constant, max_iter=200, solver=adam; total time=   0.1s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(50, 50), learning_rate=constant, max_iter=200, solver=adam; total time=   0.1s
[CV] END activation=tanh, alpha=0.01, hidden_layer_s



[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(50, 50), learning_rate=adaptive, max_iter=300, solver=adam; total time=   0.1s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(50, 50), learning_rate=adaptive, max_iter=500, solver=sgd; total time=   0.2s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(50, 50), learning_rate=adaptive, max_iter=500, solver=sgd; total time=   0.2s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(30, 30, 30), learning_rate=constant, max_iter=300, solver=sgd; total time=   0.1s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(30, 30, 30), learning_rate=constant, max_iter=300, solver=sgd; total time=   0.1s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(30, 30, 30), learning_rate=constant, max_iter=500, solver=sgd; total time=   0.2s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(30, 30, 30), learning_rate=constant, max_iter=500, solver=sgd; total time=   0.2s
[CV] END activation=tanh



[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(30, 30, 30), learning_rate=adaptive, max_iter=200, solver=sgd; total time=   0.1s
[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(30, 30, 30), learning_rate=adaptive, max_iter=300, solver=sgd; total time=   0.1s
[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(30, 30, 30), learning_rate=adaptive, max_iter=300, solver=sgd; total time=   0.1s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=constant, max_iter=200, solver=adam; total time=   0.1s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=constant, max_iter=200, solver=sgd; total time=   0.1s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=constant, max_iter=300, solver=sgd; total time=   0.1s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=constant, max_iter=300, solver=sgd; total time=   0.1s
[CV] END activation=tanh, alpha=0.0



[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(30, 30, 30), learning_rate=adaptive, max_iter=300, solver=sgd; total time=   0.1s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(100,), learning_rate=constant, max_iter=200, solver=adam; total time=   0.1s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(100,), learning_rate=constant, max_iter=200, solver=sgd; total time=   0.0s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(100,), learning_rate=constant, max_iter=300, solver=adam; total time=   0.1s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(100,), learning_rate=constant, max_iter=300, solver=adam; total time=   0.1s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(100,), learning_rate=constant, max_iter=500, solver=sgd; total time=   0.1s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(100,), learning_rate=constant, max_iter=500, solver=sgd; total time=   0.1s
[CV] END activation=tanh, alpha=0.001, hidden

