In [1]:
import numpy as np
import pickle
import glob
import optuna

from scipy.io import loadmat
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, make_scorer, accuracy_score


# Параметры
fs = 128  # Частота дискретизации
n_subjects = 5  # Количество испытуемых
channels = ['AF3', 'F7', 'F3', 'FC5', 'T7', 'P7', 'O1', 'O2', 'P8', 
            'T8', 'FC6', 'F4', 'F8', 'AF4']
useful_channels = ['F7', 'F3', 'P7', 'O1', 'O2', 'P8', 'AF4']

# Индексы полезных каналов
use_channel_inds = [channels.index(c) for c in useful_channels if c in channels]

# Пороговые значения для выделения состояний
mkpt1 = int(fs * 10 * 60)  # 10 минут в отсчетах
mkpt2 = int(fs * 20 * 60)  # 20 минут в отсчетах
mkpt3 = 214540  # Конец записи
interval = mkpt3 - mkpt2

# Карта испытуемых и используемых дней
subject_map = {
    s: [i for i in range(int(7 * (s - 1)) + 3, int(7 * (s - 1)) + (5 if s != 5 else 4) + 3)]
    for s in range(1, n_subjects + 1)
}

inp_dir = 'data/EEG Data/'  # Директория данных

# Сохранение данных в pickle для каждого испытуемого
for s in range(1, n_subjects + 1):
    data = {'channels': useful_channels, 'fs': fs}
    
    for i, t in enumerate(subject_map[s]):
        trial = {}
        trial_data = loadmat(f'{inp_dir}eeg_record{t}.mat')
        eeg = trial_data['o']['data'][0][0][:, 3:17]
        eeg = eeg[:, use_channel_inds]
        
        trial['focussed'] = eeg[:interval]
        trial['unfocussed'] = eeg[mkpt1:mkpt1 + interval]
        trial['drowsed'] = eeg[mkpt2:mkpt2 + interval]
        data[f'trial_{i + 1}'] = trial
    
    with open(f'data/subject_{s}.pkl', 'wb') as f:
        pickle.dump(data, f, pickle.HIGHEST_PROTOCOL)

print("Данные успешно сохранены в pickle-файлы.")


  from .autonotebook import tqdm as notebook_tqdm


Данные успешно сохранены в pickle-файлы.


In [2]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Загрузка данных из pickle-файлов
X, y = [], []
state_num = {'focussed': 0, 'unfocussed': 1, 'drowsed': 2}

for file in glob.glob('data/subject_*.pkl'):
    with open(file, 'rb') as f:
        data = pickle.load(f)
        for trial_key, trial_data in data.items():
            if 'trial' in trial_key:
                for state, label in state_num.items():
                    X.append(trial_data[state])
                    y.append(np.full(trial_data[state].shape[0], label))

# Преобразование в numpy массивы
X = np.concatenate(X)
y = np.concatenate(y)

# Стандартизация данных
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Разделение данных на обучающую, валидационную и тестовую выборки
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, stratify=y, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, stratify=y_temp, random_state=42)

print(f"Train shape: {X_train.shape}, Validation shape: {X_val.shape}, Test shape: {X_test.shape}")


Train shape: (2632608, 7), Validation shape: (877536, 7), Test shape: (877536, 7)


In [3]:
import optuna
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score

# Определение целевой функции для Optuna
def objective(trial):
    # Подбор гиперпараметров
    C = trial.suggest_float('C', 1e-4, 1e2, log=True)  # От 0.0001 до 100
    gamma = trial.suggest_float('gamma', 1e-5, 1e1, log=True)  # От 0.00001 до 10
    kernel = trial.suggest_categorical('kernel', ['linear', 'rbf', 'poly', 'sigmoid'])
    
    # Создание и обучение модели SVM с текущими гиперпараметрами
    model = SVC(C=C, gamma=gamma, kernel=kernel)
    
    # Оценка через кросс-валидацию на обучающей выборке
    scores = cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy', n_jobs=-1)
    
    return np.mean(scores)  # Возврат среднего значения точности

# Создание и запуск Optuna-исследования
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50)

# Параметры лучшей модели
print(f"Best trial: {study.best_trial.params}")


[I 2024-10-22 07:32:31,065] A new study created in memory with name: no-name-131ea51a-e1ec-45ee-a142-576377ce7030
[W 2024-10-22 13:36:46,376] Trial 0 failed with parameters: {'C': 0.6838642707606686, 'gamma': 0.003559704752062895, 'kernel': 'linear'} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "c:\Study_projects_part_2\brain_focus_signals\impulse_egg_signals_project\Lib\site-packages\optuna\study\_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "C:\Users\User\AppData\Local\Temp\ipykernel_6784\3791494043.py", line 16, in objective
    scores = cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy', n_jobs=-1)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Study_projects_part_2\brain_focus_signals\impulse_egg_signals_project\Lib\site-packages\sklearn\utils\_param_validation.py", line 213, in wrapper
    retur

KeyboardInterrupt: 

In [None]:
# Создание модели с оптимальными параметрами
best_svm = SVC(**study.best_params)

# Обучение на тренировочной выборке
best_svm.fit(X_train, y_train)

# Оценка на валидационной выборке
val_accuracy = best_svm.score(X_val, y_val)
print(f"Validation Accuracy: {val_accuracy:.4f}")

# Оценка на тестовой выборке
test_accuracy = best_svm.score(X_test, y_test)
print(f"Test Accuracy: {test_accuracy:.4f}")

# Прогнозы на тестовой выборке
y_pred = best_svm.predict(X_test)

print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=['Focussed', 'Unfocussed', 'Drowsed']))

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))
