In [2]:
import sys
import os

# 프로젝트 최상위 디렉터리를 추가
project_path = os.path.abspath(os.path.join(os.getcwd(), '..'))
sys.path.append(project_path)

In [3]:
import mne
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report, accuracy_score, roc_auc_score
import matplotlib.pyplot as plt
from Preprocessing.feature_extraction import load_eeg_data, compute_band_power, extract_features
mne.set_log_level('error')

In [4]:
def pipeline(base_path):
    train_dir = base_path + 'train4ml.csv'
    test_dir = base_path + 'test4ml.csv'
    val_dir = base_path + 'val4ml.csv'
    data_train, label_train = load_eeg_data(train_dir)
    data_val, label_val = load_eeg_data(val_dir)   
    data_test, label_test = load_eeg_data(test_dir)
    
    train_X = data_train
    train_y = label_train
    val_X = data_val
    val_y = label_val
    test_X = data_test
    test_y = label_test
    
    # Scaling
    scaler = StandardScaler()
    train_X = scaler.fit_transform(train_X)
    val_X = scaler.transform(val_X)
    test_X = scaler.transform(test_X)
    
    # Train through GridSearchCV
    svm = SVC()
    param_grid = {
        'C': [0.1, 1, 10, 50, 80, 100, 150],  # Reduced range of the regularization parameter
        'gamma': ['scale', 0.01, 0.1, 1],  # Key gamma values with a focus on potential sweet spots
        'kernel': ['linear', 'rbf'],  # Focus on the most commonly effective kernels
        'class_weight': [None, 'balanced'],  # Option to handle imbalanced classes
    }

    grid_search = GridSearchCV(svm, param_grid, cv=5, scoring='accuracy', verbose=2, n_jobs=-1)
    grid_search.fit(train_X, train_y)  # Fit the model on the training data

    # Print the best parameters and the best score from the validation process
    print("Best parameters found: ", grid_search.best_params_)
    print("Best cross-validation accuracy: {:.2f}%".format(grid_search.best_score_ * 100))
    
    # (Validation) Use the best model to make predictions on the validation set
    best_model = grid_search.best_estimator_
    val_predictions = best_model.predict(val_X)

    # Evaluate the model on the validation set
    print("\nValidation Set Performance:")
    print("Validation Accuracy: {:.2f}%".format(accuracy_score(val_y, val_predictions) * 100))
    print("Validation ROC-AUC Score: {:.2f}".format(roc_auc_score(val_y, val_predictions)))
    print("\nValidation Classification Report:")
    print(classification_report(val_y, val_predictions))
    
    # (Test) After validation, use the best model to predict on the test set
    test_predictions = best_model.predict(test_X)

    # Evaluate the model on the test set
    print("\nTest Set Performance:")
    print("Test Accuracy: {:.2f}%".format(accuracy_score(test_y, test_predictions) * 100))
    print("Test ROC-AUC Score: {:.2f}".format(roc_auc_score(test_y, test_predictions)))
    print("\nTest Classification Report:")
    print(classification_report(test_y, test_predictions))

In [5]:
# Load data
base_path_1 = "G:/공유 드라이브/4N_PKNU/Project/EEG-LLM/Dataset/subject 1 data (k3b)/down sampling X ver/label15/4ml/"
base_path_2 = "G:/공유 드라이브/4N_PKNU/Project/EEG-LLM/Dataset/subject 1 data (k3b)/down sampling X ver/label25/4ml/"
base_path_3 = "G:/공유 드라이브/4N_PKNU/Project/EEG-LLM/Dataset/subject 1 data (k3b)/down sampling X ver/label35/4ml/"
base_path_4 = "G:/공유 드라이브/4N_PKNU/Project/EEG-LLM/Dataset/subject 1 data (k3b)/down sampling X ver/label45/4ml/"

In [6]:
train_svm_1 = pipeline(base_path_1)

Fitting 5 folds for each of 112 candidates, totalling 560 fits
Best parameters found:  {'C': 1, 'class_weight': None, 'gamma': 0.01, 'kernel': 'rbf'}
Best cross-validation accuracy: 89.00%

Validation Set Performance:
Validation Accuracy: 66.67%
Validation ROC-AUC Score: 0.66

Validation Classification Report:
              precision    recall  f1-score   support

           1       0.69      0.53      0.60        17
           5       0.65      0.79      0.71        19

    accuracy                           0.67        36
   macro avg       0.67      0.66      0.66        36
weighted avg       0.67      0.67      0.66        36


Test Set Performance:
Test Accuracy: 83.33%
Test ROC-AUC Score: 0.83

Test Classification Report:
              precision    recall  f1-score   support

           1       0.82      0.82      0.82        17
           5       0.84      0.84      0.84        19

    accuracy                           0.83        36
   macro avg       0.83      0.83      0.83 

In [7]:
train_svm_2 = pipeline(base_path_2)

Fitting 5 folds for each of 112 candidates, totalling 560 fits
Best parameters found:  {'C': 1, 'class_weight': None, 'gamma': 0.01, 'kernel': 'rbf'}
Best cross-validation accuracy: 83.46%

Validation Set Performance:
Validation Accuracy: 83.33%
Validation ROC-AUC Score: 0.83

Validation Classification Report:
              precision    recall  f1-score   support

           2       0.81      0.89      0.85        19
           5       0.87      0.76      0.81        17

    accuracy                           0.83        36
   macro avg       0.84      0.83      0.83        36
weighted avg       0.84      0.83      0.83        36


Test Set Performance:
Test Accuracy: 86.11%
Test ROC-AUC Score: 0.86

Test Classification Report:
              precision    recall  f1-score   support

           2       0.84      0.89      0.86        18
           5       0.88      0.83      0.86        18

    accuracy                           0.86        36
   macro avg       0.86      0.86      0.86 

In [8]:
train_svm_3 = pipeline(base_path_3)

Fitting 5 folds for each of 112 candidates, totalling 560 fits
Best parameters found:  {'C': 0.1, 'class_weight': None, 'gamma': 'scale', 'kernel': 'linear'}
Best cross-validation accuracy: 80.52%

Validation Set Performance:
Validation Accuracy: 72.22%
Validation ROC-AUC Score: 0.72

Validation Classification Report:
              precision    recall  f1-score   support

           3       0.62      0.71      0.67        14
           5       0.80      0.73      0.76        22

    accuracy                           0.72        36
   macro avg       0.71      0.72      0.71        36
weighted avg       0.73      0.72      0.72        36


Test Set Performance:
Test Accuracy: 58.33%
Test ROC-AUC Score: 0.58

Test Classification Report:
              precision    recall  f1-score   support

           3       0.56      0.78      0.65        18
           5       0.64      0.39      0.48        18

    accuracy                           0.58        36
   macro avg       0.60      0.58   

In [9]:
train_svm_4 = pipeline(base_path_4)

Fitting 5 folds for each of 112 candidates, totalling 560 fits
Best parameters found:  {'C': 10, 'class_weight': None, 'gamma': 0.1, 'kernel': 'rbf'}
Best cross-validation accuracy: 76.75%

Validation Set Performance:
Validation Accuracy: 72.22%
Validation ROC-AUC Score: 0.70

Validation Classification Report:
              precision    recall  f1-score   support

           4       0.69      0.60      0.64        15
           5       0.74      0.81      0.77        21

    accuracy                           0.72        36
   macro avg       0.72      0.70      0.71        36
weighted avg       0.72      0.72      0.72        36


Test Set Performance:
Test Accuracy: 75.00%
Test ROC-AUC Score: 0.77

Test Classification Report:
              precision    recall  f1-score   support

           4       0.88      0.68      0.77        22
           5       0.63      0.86      0.73        14

    accuracy                           0.75        36
   macro avg       0.76      0.77      0.75 