In [1]:
import mne
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report, accuracy_score, cohen_kappa_score
import matplotlib.pyplot as plt
from Preprocessing.feature_extraction import load_eeg_data, compute_band_power, extract_features

In [2]:
def pipeline(base_path, window_size, selected_columns):
    train_dir = base_path + 'train.csv'
    test_dir = base_path + 'test.csv'
    val_dir = base_path + 'val.csv'
    data_train, label_train = load_eeg_data(train_dir)
    data_val, label_val = load_eeg_data(val_dir)   
    data_test, label_test = load_eeg_data(test_dir)
    
    features_train = preprocessing(data_train, label_train, window_size, selected_columns)
    features_val = preprocessing(data_val, label_val, window_size, selected_columns)
    features_test = preprocessing(data_test, label_test, window_size, selected_columns)
    
    train_X = features_train.iloc[:, :-1].values
    train_y = features_train.iloc[:, -1].values
    val_X = features_val.iloc[:, :-1].values
    val_y = features_val.iloc[:, -1].values
    test_X = features_test.iloc[:, :-1].values
    test_y = features_test.iloc[:, -1].values
    
    # Scaling
    scaler = StandardScaler()
    train_X = scaler.fit_transform(train_X)
    val_X = scaler.transform(val_X)
    test_X = scaler.transform(test_X)
    
    # Train through GridSearchCV
    svm = SVC()
    param_grid = {
        'C': [0.1, 1, 10, 50, 80, 100, 150],  # Reduced range of the regularization parameter
        'gamma': ['scale', 0.01, 0.1, 1],  # Key gamma values with a focus on potential sweet spots
        'kernel': ['linear', 'rbf'],  # Focus on the most commonly effective kernels
        'class_weight': [None, 'balanced'],  # Option to handle imbalanced classes
    }

    grid_search = GridSearchCV(svm, param_grid, cv=5, scoring='accuracy', verbose=2, n_jobs=-1)
    grid_search.fit(train_X, train_y)  # Fit the model on the training data

    # Print the best parameters and the best score from the validation process
    print("Best parameters found: ", grid_search.best_params_)
    print("Best cross-validation accuracy: {:.2f}%".format(grid_search.best_score_ * 100))
    
    # (Validation) Use the best model to make predictions on the validation set
    best_model = grid_search.best_estimator_
    val_predictions = best_model.predict(val_X)

    # Evaluate the model on the validation set
    print("\nValidation Set Performance:")
    print("Validation Accuracy: {:.2f}%".format(accuracy_score(val_y, val_predictions) * 100))
    print("Validation Kappa Coefficient: {:.2f}".format(cohen_kappa_score(val_y, val_predictions)))
    print("\nValidation Classification Report:")
    print(classification_report(val_y, val_predictions))
    
    # (Test) After validation, use the best model to predict on the test set
    test_predictions = best_model.predict(test_X)

    # Evaluate the model on the test set
    print("\nTest Set Performance:")
    print("Test Accuracy: {:.2f}%".format(accuracy_score(test_y, test_predictions) * 100))
    print("Test Kappa Coefficient: {:.2f}".format(cohen_kappa_score(test_y, test_predictions)))
    print("\nTest Classification Report:")
    print(classification_report(test_y, test_predictions))

In [3]:
def extract_features_ml(data, selected_columns, sfreq=100, precision=6):
    feature_dict = {}  # 결과를 저장할 딕셔너리

    for item in selected_columns:
        channel_idx = item[0]  # 채널 인덱스
        bands = item[1]  # 해당 채널에서 추출할 주파수 대역 리스트

        # 주파수 대역이 하나만 주어졌을 때도 리스트로 처리
        if isinstance(bands, tuple):
            bands = [bands]

        # 채널의 데이터 추출
        eeg_data = data.iloc[:, channel_idx].values  # 특정 채널의 데이터를 가져옴
        ch_name = data.columns[channel_idx]  # 채널 이름

        # mne RawArray 객체 생성
        info = mne.create_info(ch_names=[ch_name], sfreq=sfreq, ch_types='eeg')
        raw = mne.io.RawArray(eeg_data[np.newaxis, :], info)  # 2D array 필요

        # 주파수 대역별로 PSD 계산
        for band in bands:
            band_power = compute_band_power(raw, band)

            # band_power가 numpy 배열인 경우 float로 변환
            if isinstance(band_power, np.ndarray):
                band_power = band_power.item()

            # 소수점 이하 precision 자리까지 반올림
            band_power = float(f"{band_power:.{precision}f}")

            # 열 이름 생성 (예: Channel_1_10-12Hz)
            column_name = f'{ch_name}_{band[0]}-{band[1]}Hz'
            feature_dict[column_name] = band_power

    # 최종 데이터프레임 생성
    features = pd.DataFrame([feature_dict])

    return features


In [4]:
def preprocessing(df, labels, window_size, selected_columns):
    """
    Convert a DataFrame of EEG data into a JSON format suitable for GPT-3 davinci.
    =================================
    1. You should pick selected_columns before running this function.
    2. It contains the process of feature extraction.
    
    =================================
    :param df: Data converted to pandas DataFrame from the original csv file
    :param labels: Label for each window (provide a list, left, right, top, bottom)
    :param window_size: Window size to divide EEG data
    :param selected_columns: EEG channel to use, along with their respective frequency bands (provide a list of lists)
    :return: List of data in JSON format
    """
    features_array = []

    for start in range(0, len(df) - window_size + 1, window_size):
        # Extract the window data
        window_data = df.iloc[start:start + window_size, :]
        
        # Extract features using the provided function
        features = extract_features_ml(window_data, selected_columns)
        
        # Convert features DataFrame to a list
        features_flat = features.values.flatten().tolist()

        # Add the label for the current window
        label = int(labels[start])
        features_flat.append(label)

        features_array.append(features_flat)

    # Extract column names directly from the features dataframe and append 'label' for the label column
    feature_columns = list(features.columns)
    feature_columns.append('label')

    # Convert the list of features to a DataFrame
    features_df = pd.DataFrame(features_array, columns=feature_columns)

    return features_df


In [5]:
# Load data
base_path = '/Users/imdohyeon/Library/CloudStorage/GoogleDrive-dhlim1598@gmail.com/공유 드라이브/4N_PKNU/Project/EEG-LLM/Dataset/subj(high)/'
window_size = 1000

In [6]:
# Selected Columns (250Hz) 

# 0: FCz / 1: FC4 / 2: C3 / 3: Cz / 4: C4 / 5: CP3 
# label 1: C4(20, 22), C4(22, 24), FC4(10, 12)
# label 2: C3(20, 22), C3(22, 24)
# label 3: Cz(18, 20), Cz(8, 10),  CP3(28, 30)
# label 4: FCz(10, 12), FCz(12, 14), FCz(28, 30)
"""
selected_columns = [
    [0, [(10, 12), (12, 14), (28, 30)]],  # FCz
    # [1, [(10, 12)]],  # FC4
    [2, [(20, 22), (22, 24)]],  # C3
    [3, [(8, 10), (18, 20)]],  # Cz
    [4, [(20, 22), (22, 24)]],  # C4
    # [5, [(28, 30)]]  # CP3
]
"""

'\nselected_columns = [\n    [0, [(10, 12), (12, 14), (28, 30)]],  # FCz\n    # [1, [(10, 12)]],  # FC4\n    [2, [(20, 22), (22, 24)]],  # C3\n    [3, [(8, 10), (18, 20)]],  # Cz\n    [4, [(20, 22), (22, 24)]],  # C4\n    # [5, [(28, 30)]]  # CP3\n]\n'

In [7]:
# Columns selected from the downsampled data into 100Hz

# 0: FCz / 1: FC4 / 2: C3 / 3: Cz / 4: C4 / 5: CP3 
# label 1: C4(20, 22), C4(22, 24), FC4(10, 12)
# label 2: C3(20, 22), C3(22, 24)
# label 3: Cz(18, 20), Cz(8, 10),  CP3(28, 30)
# label 4: FCz(10, 12), FCz(12, 14), FCz(28, 30)
downsampled_selected_columns = [
    [0, [(10, 12), (12, 14), (28, 30)]],  # FCz
    # [1, [(10, 12)]],  # FC4
    [2, [(20, 22), (22, 24)]],  # C3
    [3, [(8, 10), (18, 20)]],  # Cz
    [4, [(20, 22), (22, 24)]],  # C4
    # [5, [(28, 30)]]  # CP3
]

In [8]:
ds_train = pipeline(base_path, window_size, downsampled_selected_columns)
ds_train

Creating RawArray with float64 data, n_channels=1, n_times=1000
    Range : 0 ... 999 =      0.000 ...     9.990 secs
Ready.
Effective window size : 2.560 (s)
Effective window size : 2.560 (s)
Effective window size : 2.560 (s)
Creating RawArray with float64 data, n_channels=1, n_times=1000
    Range : 0 ... 999 =      0.000 ...     9.990 secs
Ready.
Effective window size : 2.560 (s)
Effective window size : 2.560 (s)
Creating RawArray with float64 data, n_channels=1, n_times=1000
    Range : 0 ... 999 =      0.000 ...     9.990 secs
Ready.
Effective window size : 2.560 (s)
Effective window size : 2.560 (s)
Creating RawArray with float64 data, n_channels=1, n_times=1000
    Range : 0 ... 999 =      0.000 ...     9.990 secs
Ready.
Effective window size : 2.560 (s)
Effective window size : 2.560 (s)
Creating RawArray with float64 data, n_channels=1, n_times=1000
    Range : 0 ... 999 =      0.000 ...     9.990 secs
Ready.
Effective window size : 2.560 (s)
Effective window size : 2.560 (s)
E

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



Creating RawArray with float64 data, n_channels=1, n_times=1000
    Range : 0 ... 999 =      0.000 ...     9.990 secs
Ready.
Effective window size : 2.560 (s)
Effective window size : 2.560 (s)
Creating RawArray with float64 data, n_channels=1, n_times=1000
    Range : 0 ... 999 =      0.000 ...     9.990 secs
Ready.
Effective window size : 2.560 (s)
Effective window size : 2.560 (s)
Creating RawArray with float64 data, n_channels=1, n_times=1000
    Range : 0 ... 999 =      0.000 ...     9.990 secs
Ready.
Effective window size : 2.560 (s)
Effective window size : 2.560 (s)
Effective window size : 2.560 (s)
Creating RawArray with float64 data, n_channels=1, n_times=1000
    Range : 0 ... 999 =      0.000 ...     9.990 secs
Ready.
Effective window size : 2.560 (s)
Effective window size : 2.560 (s)
Creating RawArray with float64 data, n_channels=1, n_times=1000
    Range : 0 ... 999 =      0.000 ...     9.990 secs
Ready.
Effective window size : 2.560 (s)
Effective window size : 2.560 (s)
C

In [11]:
int_base_path = '/Users/imdohyeon/Library/CloudStorage/GoogleDrive-dhlim1598@gmail.com/공유 드라이브/4N_PKNU/Project/EEG-LLM/Dataset/subj(high+inter)/'

int_train = pipeline(int_base_path, window_size, downsampled_selected_columns)

Creating RawArray with float64 data, n_channels=1, n_times=1000
    Range : 0 ... 999 =      0.000 ...     9.990 secs
Ready.
Effective window size : 2.560 (s)
Effective window size : 2.560 (s)
Effective window size : 2.560 (s)
Creating RawArray with float64 data, n_channels=1, n_times=1000
    Range : 0 ... 999 =      0.000 ...     9.990 secs
Ready.
Effective window size : 2.560 (s)
Effective window size : 2.560 (s)
Creating RawArray with float64 data, n_channels=1, n_times=1000
    Range : 0 ... 999 =      0.000 ...     9.990 secs
Ready.
Effective window size : 2.560 (s)
Effective window size : 2.560 (s)
Creating RawArray with float64 data, n_channels=1, n_times=1000
    Range : 0 ... 999 =      0.000 ...     9.990 secs
Ready.
Effective window size : 2.560 (s)
Effective window size : 2.560 (s)
Creating RawArray with float64 data, n_channels=1, n_times=1000
    Range : 0 ... 999 =      0.000 ...     9.990 secs
Ready.
Effective window size : 2.560 (s)
Effective window size : 2.560 (s)
E

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



Creating RawArray with float64 data, n_channels=1, n_times=1000
    Range : 0 ... 999 =      0.000 ...     9.990 secs
Ready.
Effective window size : 2.560 (s)
Effective window size : 2.560 (s)
Creating RawArray with float64 data, n_channels=1, n_times=1000
    Range : 0 ... 999 =      0.000 ...     9.990 secs
Ready.
Effective window size : 2.560 (s)
Effective window size : 2.560 (s)
Creating RawArray with float64 data, n_channels=1, n_times=1000
    Range : 0 ... 999 =      0.000 ...     9.990 secs
Ready.
Effective window size : 2.560 (s)
Effective window size : 2.560 (s)
Creating RawArray with float64 data, n_channels=1, n_times=1000
    Range : 0 ... 999 =      0.000 ...     9.990 secs
Ready.
Effective window size : 2.560 (s)
Effective window size : 2.560 (s)
Effective window size : 2.560 (s)
Creating RawArray with float64 data, n_channels=1, n_times=1000
    Range : 0 ... 999 =      0.000 ...     9.990 secs
Ready.
Effective window size : 2.560 (s)
Effective window size : 2.560 (s)
C

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



Effective window size : 2.560 (s)
Effective window size : 2.560 (s)
Creating RawArray with float64 data, n_channels=1, n_times=1000
    Range : 0 ... 999 =      0.000 ...     9.990 secs
Ready.
Effective window size : 2.560 (s)
Effective window size : 2.560 (s)
Creating RawArray with float64 data, n_channels=1, n_times=1000
    Range : 0 ... 999 =      0.000 ...     9.990 secs
Ready.
Effective window size : 2.560 (s)
Effective window size : 2.560 (s)
Creating RawArray with float64 data, n_channels=1, n_times=1000
    Range : 0 ... 999 =      0.000 ...     9.990 secs
Ready.
Effective window size : 2.560 (s)
Effective window size : 2.560 (s)
Effective window size : 2.560 (s)
Creating RawArray with float64 data, n_channels=1, n_times=1000
    Range : 0 ... 999 =      0.000 ...     9.990 secs
Ready.
Effective window size : 2.560 (s)
Effective window size : 2.560 (s)
Creating RawArray with float64 data, n_channels=1, n_times=1000
    Range : 0 ... 999 =      0.000 ...     9.990 secs
Ready.
E

In [12]:
low_base_path = '/Users/imdohyeon/Library/CloudStorage/GoogleDrive-dhlim1598@gmail.com/공유 드라이브/4N_PKNU/Project/EEG-LLM/Dataset/subj(high+inter+low)/'

low_train = pipeline(low_base_path, window_size, downsampled_selected_columns)

Creating RawArray with float64 data, n_channels=1, n_times=1000
    Range : 0 ... 999 =      0.000 ...     9.990 secs
Ready.
Effective window size : 2.560 (s)
Effective window size : 2.560 (s)
Effective window size : 2.560 (s)
Creating RawArray with float64 data, n_channels=1, n_times=1000
    Range : 0 ... 999 =      0.000 ...     9.990 secs
Ready.
Effective window size : 2.560 (s)
Effective window size : 2.560 (s)
Creating RawArray with float64 data, n_channels=1, n_times=1000
    Range : 0 ... 999 =      0.000 ...     9.990 secs
Ready.
Effective window size : 2.560 (s)
Effective window size : 2.560 (s)
Creating RawArray with float64 data, n_channels=1, n_times=1000
    Range : 0 ... 999 =      0.000 ...     9.990 secs
Ready.
Effective window size : 2.560 (s)
Effective window size : 2.560 (s)
Creating RawArray with float64 data, n_channels=1, n_times=1000
    Range : 0 ... 999 =      0.000 ...     9.990 secs
Ready.
Effective window size : 2.560 (s)
Effective window size : 2.560 (s)
E

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



Ready.
Effective window size : 2.560 (s)
Effective window size : 2.560 (s)
Creating RawArray with float64 data, n_channels=1, n_times=1000
    Range : 0 ... 999 =      0.000 ...     9.990 secs
Ready.
Effective window size : 2.560 (s)
Effective window size : 2.560 (s)
Creating RawArray with float64 data, n_channels=1, n_times=1000
    Range : 0 ... 999 =      0.000 ...     9.990 secs
Ready.
Effective window size : 2.560 (s)
Effective window size : 2.560 (s)
Effective window size : 2.560 (s)
Creating RawArray with float64 data, n_channels=1, n_times=1000
    Range : 0 ... 999 =      0.000 ...     9.990 secs
Ready.
Effective window size : 2.560 (s)
Effective window size : 2.560 (s)
Creating RawArray with float64 data, n_channels=1, n_times=1000
    Range : 0 ... 999 =      0.000 ...     9.990 secs
Ready.
Effective window size : 2.560 (s)
Effective window size : 2.560 (s)
Creating RawArray with float64 data, n_channels=1, n_times=1000
    Range : 0 ... 999 =      0.000 ...     9.990 secs
R

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



Creating RawArray with float64 data, n_channels=1, n_times=1000
    Range : 0 ... 999 =      0.000 ...     9.990 secs
Ready.
Effective window size : 2.560 (s)
Effective window size : 2.560 (s)
Creating RawArray with float64 data, n_channels=1, n_times=1000
    Range : 0 ... 999 =      0.000 ...     9.990 secs
Ready.
Effective window size : 2.560 (s)
Effective window size : 2.560 (s)
Creating RawArray with float64 data, n_channels=1, n_times=1000
    Range : 0 ... 999 =      0.000 ...     9.990 secs
Ready.
Effective window size : 2.560 (s)
Effective window size : 2.560 (s)
Creating RawArray with float64 data, n_channels=1, n_times=1000
    Range : 0 ... 999 =      0.000 ...     9.990 secs
Ready.
Effective window size : 2.560 (s)
Effective window size : 2.560 (s)
Effective window size : 2.560 (s)
Creating RawArray with float64 data, n_channels=1, n_times=1000
    Range : 0 ... 999 =      0.000 ...     9.990 secs
Ready.
Effective window size : 2.560 (s)
Effective window size : 2.560 (s)
C

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



Creating RawArray with float64 data, n_channels=1, n_times=1000
    Range : 0 ... 999 =      0.000 ...     9.990 secs
Ready.
Effective window size : 2.560 (s)
Effective window size : 2.560 (s)
Creating RawArray with float64 data, n_channels=1, n_times=1000
    Range : 0 ... 999 =      0.000 ...     9.990 secs
Ready.
Effective window size : 2.560 (s)
Effective window size : 2.560 (s)
Creating RawArray with float64 data, n_channels=1, n_times=1000
    Range : 0 ... 999 =      0.000 ...     9.990 secs
Ready.
Effective window size : 2.560 (s)
Effective window size : 2.560 (s)
Effective window size : 2.560 (s)
Creating RawArray with float64 data, n_channels=1, n_times=1000
    Range : 0 ... 999 =      0.000 ...     9.990 secs
Ready.
Effective window size : 2.560 (s)
Effective window size : 2.560 (s)
Creating RawArray with float64 data, n_channels=1, n_times=1000
    Range : 0 ... 999 =      0.000 ...     9.990 secs
Ready.
Effective window size : 2.560 (s)
Effective window size : 2.560 (s)
C