In [2]:
import numpy as np
import pickle
from scipy.io import loadmat

# Параметры
fs = 128  # Частота дискретизации
n_subjects = 5  # Количество испытуемых
channels = ['AF3', 'F7', 'F3', 'FC5', 'T7', 'P7', 'O1', 'O2', 'P8', 
            'T8', 'FC6', 'F4', 'F8', 'AF4']
useful_channels = ['F7', 'F3', 'P7', 'O1', 'O2', 'P8', 'AF4']

# Индексы полезных каналов
use_channel_inds = [channels.index(c) for c in useful_channels if c in channels]

# Пороговые значения для выделения состояний
mkpt1 = int(fs * 10 * 60)  # 10 минут в отсчетах
mkpt2 = int(fs * 20 * 60)  # 20 минут в отсчетах
mkpt3 = 214540  # Конец записи (примерное значение)
interval = mkpt3 - mkpt2

# Карта соответствия испытуемого и используемых дней (тестовые дни начинаются с 3-го)
subject_map = {
    s: [i for i in range(int(7 * (s - 1)) + 3, 
                         int(7 * (s - 1)) + (5 if s != 5 else 4) + 3)
        ]
    for s in range(1, n_subjects + 1)
}

# Директория с данными
inp_dir = 'data/EEG Data/'

# Сохранение данных в pickle для каждого испытуемого
for s in range(1, n_subjects + 1):
    data = {'channels': useful_channels, 'fs': fs}
    
    for i, t in enumerate(subject_map[s]):
        trial = {}
        trial_data = loadmat(f'{inp_dir}eeg_record{t}.mat')
        eeg = trial_data['o']['data'][0][0][:, 3:17]  # Извлекаем каналы
        eeg = eeg[:, use_channel_inds]  # Отбор полезных каналов
        
        trial['focussed'] = eeg[:interval]
        trial['unfocussed'] = eeg[mkpt1:mkpt1 + interval]
        trial['drowsed'] = eeg[mkpt2:mkpt2 + interval]
        data[f'trial_{i + 1}'] = trial
    
    with open(f'data/temp_files/subject_{s}.pkl', 'wb') as f:
        pickle.dump(data, f, pickle.HIGHEST_PROTOCOL)

print("Данные успешно сохранены в pickle-файлы.")


Данные успешно сохранены в pickle-файлы.


In [3]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import glob

# Загрузка данных из всех pickle-файлов
X, y = [], []
state_num = {'focussed': 0, 'unfocussed': 1, 'drowsed': 2}

for file in glob.glob('data/temp_files/subject_*.pkl'):
    with open(file, 'rb') as f:
        data = pickle.load(f)
        for trial_key, trial_data in data.items():
            if 'trial' in trial_key:
                for state, label in state_num.items():
                    X.append(trial_data[state])
                    y.append(np.full(trial_data[state].shape[0], label))

# Преобразование в numpy массивы
X = np.concatenate(X)
y = np.concatenate(y)

# Стандартизация данных
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Ресайз для подачи в LSTM (samples, timesteps, features)
X = X.reshape(-1, 1, X.shape[1])

# Разделение на тренировочную, валидационную и тестовую выборки
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, stratify=y, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, stratify=y_temp, random_state=42)

print(f"Train shape: {X_train.shape}, Validation shape: {X_val.shape}, Test shape: {X_test.shape}")


Train shape: (2632608, 1, 7), Validation shape: (877536, 1, 7), Test shape: (877536, 1, 7)


In [4]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, BatchNormalization, Dropout, Dense
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import classification_report, confusion_matrix

# Определение модели
model = Sequential()
model.add(LSTM(64, input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=True))
model.add(BatchNormalization())
model.add(Dropout(0.3))

model.add(LSTM(32, return_sequences=False))
model.add(BatchNormalization())
model.add(Dropout(0.3))

model.add(Dense(3, activation='softmax'))  # 3 класса: focussed, unfocussed, drowsed

# Компиляция модели
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Callback для ранней остановки
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Обучение модели
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=20,
    batch_size=64,
    callbacks=[early_stopping]
)

# Оценка модели на тестовой выборке
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=2)
print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")

  super().__init__(**kwargs)


Epoch 1/20
[1m41135/41135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m99s[0m 2ms/step - accuracy: 0.4916 - loss: 0.9928 - val_accuracy: 0.5818 - val_loss: 0.8696
Epoch 2/20
[1m41135/41135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m97s[0m 2ms/step - accuracy: 0.5435 - loss: 0.9217 - val_accuracy: 0.5987 - val_loss: 0.8454
Epoch 3/20
[1m41135/41135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m105s[0m 3ms/step - accuracy: 0.5558 - loss: 0.9065 - val_accuracy: 0.6087 - val_loss: 0.8294
Epoch 4/20
[1m41135/41135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m105s[0m 3ms/step - accuracy: 0.5625 - loss: 0.8978 - val_accuracy: 0.6133 - val_loss: 0.8236
Epoch 5/20
[1m41135/41135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m105s[0m 3ms/step - accuracy: 0.5664 - loss: 0.8913 - val_accuracy: 0.6156 - val_loss: 0.8187
Epoch 6/20
[1m41135/41135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m123s[0m 3ms/step - accuracy: 0.5696 - loss: 0.8869 - val_accuracy: 0.6179 - val_loss: 0

In [5]:
# Прогноз и оценка
y_pred = np.argmax(model.predict(X_test), axis=1)
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=['Focussed', 'Unfocussed', 'Drowsed']))

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

[1m27423/27423[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 1ms/step
Classification Report:
              precision    recall  f1-score   support

    Focussed       0.72      0.73      0.73    292512
  Unfocussed       0.58      0.51      0.54    292512
     Drowsed       0.58      0.65      0.62    292512

    accuracy                           0.63    877536
   macro avg       0.63      0.63      0.63    877536
weighted avg       0.63      0.63      0.63    877536

Confusion Matrix:
[[213055  38825  40632]
 [ 47851 148674  95987]
 [ 33495  67914 191103]]


In [7]:
# Сохранение обученной модели
model.save('microservice_architecture\model\src\lstm_model.h5')

  model.save('microservice_architecture\model\src\lstm_model.h5')
