# <h3>Импорт библиотек</h3>
# <p>Загрузка необходимых библиотек для работы с данными, нейросетями и их подготовки.</p>
# <hr>

In [1]:
import pandas as pd
from keras.layers import BatchNormalization, Dropout
from sklearn.preprocessing import MinMaxScaler
from keras.utils import to_categorical
import tensorflow as tf
from keras import Sequential
from keras.layers import Dense, Input, Conv1D, GlobalMaxPooling1D
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn.utils.class_weight import compute_class_weight
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report, f1_score
import os
import random
import numpy as np

<h3>Фиксация random seed</h3>
<p>Для воспроизводимости экспериментов устанавливается фиксированный random seed.</p>
<hr>

In [2]:
os.environ['PYTHONHASHSEED'] = str(42)
os.environ['TF_DETERMINISTIC_OPS'] = '1'
os.environ['TF_CUDNN_DETERMINISTIC'] = '1'
random.seed(42)
np.random.seed(42)
tf.random.set_seed(42)

<h3>Определение устройства для вычислений</h3>
<p>Автоматический выбор GPU или CPU для совместимости кода с любой платформой.</p>
<hr>

In [3]:
has_gpu = len(tf.config.list_physical_devices('GPU')) > 0
device_name = '/GPU:0' if has_gpu else '/CPU:0'
print(f"Используется устройство: {device_name}")

Используется устройство: /GPU:0


<h3>Загрузка данных</h3>
<p>Загрузка набора данных с сердечными сокращениями из CSV-файла.</p>
<hr>

In [4]:
heartbeats_df = pd.read_csv("../files/mitbih_test.csv", sep=',', header=None)

<h3>Предварительный просмотр данных</h3>
<p>Отображение первых 10 строк датасета.</p>
<hr>

In [5]:
heartbeats_df.head(10)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,178,179,180,181,182,183,184,185,186,187
0,1.0,0.758264,0.11157,0.0,0.080579,0.078512,0.066116,0.049587,0.047521,0.035124,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.908425,0.783883,0.531136,0.362637,0.3663,0.344322,0.333333,0.307692,0.296703,0.300366,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.730088,0.212389,0.0,0.119469,0.10177,0.10177,0.110619,0.123894,0.115044,0.132743,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1.0,0.910417,0.68125,0.472917,0.229167,0.06875,0.0,0.004167,0.014583,0.054167,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.57047,0.399329,0.238255,0.147651,0.0,0.003356,0.040268,0.080537,0.07047,0.090604,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,1.0,0.923664,0.656489,0.195929,0.111959,0.175573,0.122137,0.050891,0.035623,0.05598,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,1.0,0.79726,0.320548,0.043836,0.049315,0.065753,0.030137,0.008219,0.005479,0.010959,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,0.909408,0.97561,0.533101,0.134146,0.066202,0.0,0.010453,0.012195,0.031359,0.146341,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,0.927803,0.866359,0.299539,0.0,0.231951,0.317972,0.274962,0.262673,0.270353,0.268817,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,1.0,0.91423,0.473684,0.0,0.064327,0.317739,0.405458,0.391813,0.382066,0.401559,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


<h3>Анализ размерности данных</h3>
<p>Проверка количества строк и столбцов.</p>
<hr>

In [6]:
heartbeats_df.shape

(21892, 188)

<h3>Статистическое описание признаков</h3>
<p>Просмотр описательных статистик по каждому столбцу.</p>
<hr>

In [7]:
heartbeats_df.describe()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,178,179,180,181,182,183,184,185,186,187
count,21892.0,21892.0,21892.0,21892.0,21892.0,21892.0,21892.0,21892.0,21892.0,21892.0,...,21892.0,21892.0,21892.0,21892.0,21892.0,21892.0,21892.0,21892.0,21892.0,21892.0
mean,0.89441,0.761902,0.426627,0.221596,0.201676,0.209891,0.204805,0.200992,0.197634,0.196022,...,0.004588,0.004327,0.00402,0.003789,0.003638,0.003459,0.003166,0.003,0.002946,0.473689
std,0.23456,0.218659,0.228572,0.208711,0.177727,0.172194,0.177946,0.176142,0.170228,0.166707,...,0.043128,0.042187,0.040255,0.039397,0.038535,0.037717,0.035903,0.035522,0.035266,1.143447
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.92426,0.683366,0.251197,0.050505,0.082873,0.087912,0.072663,0.065997,0.064516,0.068493,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.990431,0.828996,0.432777,0.16763,0.147642,0.158111,0.144068,0.144509,0.150422,0.149029,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,1.0,0.912319,0.583991,0.347092,0.259211,0.287356,0.298453,0.294563,0.289907,0.282956,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.991429,...,0.980392,1.0,0.966102,1.0,1.0,1.0,1.0,0.996053,1.0,4.0


<h3>Информация о типах данных</h3>
<p>Анализ типов и наличия пропусков в данных.</p>
<hr>

In [8]:
heartbeats_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21892 entries, 0 to 21891
Columns: 188 entries, 0 to 187
dtypes: float64(188)
memory usage: 31.4 MB


<h3>Формирование целевой переменной</h3>
<p>Выделение целевой переменной и удаление её из признаков.</p>
<hr>

In [9]:
y = heartbeats_df.iloc[:, -1]
col_name = heartbeats_df.columns[-1]
heartbeats_df = heartbeats_df.drop(col_name, axis=1)

<h3>Анализ классов</h3>
<p>Проверка уникальных меток классов в целевой переменной.</p>
<hr>

In [10]:
y.unique()

array([0., 1., 2., 3., 4.])

<h3>Масштабирование признаков</h3>
<p>Нормализация данных с помощью MinMaxScaler для корректной работы модели.</p>
<hr>

In [11]:
scalar = MinMaxScaler()
heartbeats_df = scalar.fit_transform(heartbeats_df)

<h3>One-hot кодирование целевой переменной и изменение формы признаков</h3>
<p>Преобразование целевой переменной в one-hot формат, изменение формы X для подачи в Conv1D.</p>
<hr>

In [12]:
y = to_categorical(y, 5)
X = heartbeats_df.reshape(heartbeats_df.shape[0], heartbeats_df.shape[1], 1)

<h3>Проверка размерностей данных</h3>
<p>Печать форм признаков и целевой переменной для контроля.</p>
<hr>

In [13]:
print("X shape: ", X.shape)
print("y shape: ", y.shape)

X shape:  (21892, 187, 1)
y shape:  (21892, 5)


<h3>Инициализация callbacks</h3>
<p>Создание callbacks для ранней остановки обучения и динамической коррекции learning rate.</p>
<hr>

In [14]:
early_stop = EarlyStopping(monitor='val_loss', patience=4, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3)

<h3>Вычисление весов классов</h3>
<p>Для балансировки классов рассчитываются веса классов.</p>
<hr>

In [15]:
y_labels = np.argmax(y, axis=1)
class_weights = compute_class_weight('balanced', classes=np.unique(y_labels), y=y_labels)
class_weight_dict = dict(enumerate(class_weights))

<h3>Разделение данных на обучающую и тестовую выборки</h3>
<p>Данные делятся на обучающую и тестовую части с сохранением пропорций классов.</p>
<hr>

In [16]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

<h3>Подбор гиперпараметров модели</h3>
<p>Grid Search по числу фильтров, kernel_size, dropout и размерам Dense-слоя.</p>
<hr>

In [17]:
with tf.device(device_name):
    param_grid = {
        'filters': [64, 128],
        'kernel_size': [5, 9],
        'dropout': [0.2, 0.3],
        'dense_units': [32, 64]
    }

    results = []

    for filters in param_grid['filters']:
        for kernel_size in param_grid['kernel_size']:
            for dropout in param_grid['dropout']:
                for dense_units in param_grid['dense_units']:
                    print(f"\n=== filters={filters}, kernel_size={kernel_size}, dropout={dropout}, dense_units={dense_units} ===")

                    model = Sequential([
                        Input(shape=(X_train.shape[1], 1)),
                        Conv1D(filters, kernel_size, activation='relu', padding='same'),
                        BatchNormalization(),
                        Dropout(dropout),
                        Conv1D(filters, kernel_size, activation='relu', padding='same'),
                        BatchNormalization(),
                        Dropout(dropout),
                        GlobalMaxPooling1D(),
                        Dense(dense_units, activation='relu'),
                        Dropout(dropout),
                        Dense(5, activation='softmax')
                    ])

                    model.compile(
                        optimizer='adam',
                        loss='categorical_crossentropy',
                        metrics=['accuracy']
                    )

                    early_stop = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
                    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2)

                    history = model.fit(
                        X_train, y_train,
                        epochs=10,
                        batch_size=64,
                        validation_data=(X_test, y_test),
                        callbacks=[early_stop, reduce_lr],
                        class_weight=class_weight_dict,
                        verbose=0  # чтобы не засорять вывод
                    )

                    val_loss, val_acc = model.evaluate(X_test, y_test, verbose=0)
                    print(f"Validation accuracy: {val_acc:.4f}")
                    results.append({
                        'filters': filters,
                        'kernel_size': kernel_size,
                        'dropout': dropout,
                        'dense_units': dense_units,
                        'val_acc': val_acc
                    })


    results = sorted(results, key=lambda x: x['val_acc'], reverse=True)
    print("\nЛучшие комбинации параметров:")
    for r in results[:5]:
        print(r)


=== filters=64, kernel_size=5, dropout=0.2, dense_units=32 ===
Validation accuracy: 0.8401

=== filters=64, kernel_size=5, dropout=0.2, dense_units=64 ===
Validation accuracy: 0.7566

=== filters=64, kernel_size=5, dropout=0.3, dense_units=32 ===
Validation accuracy: 0.6495

=== filters=64, kernel_size=5, dropout=0.3, dense_units=64 ===
Validation accuracy: 0.7404

=== filters=64, kernel_size=9, dropout=0.2, dense_units=32 ===
Validation accuracy: 0.8733

=== filters=64, kernel_size=9, dropout=0.2, dense_units=64 ===
Validation accuracy: 0.9237

=== filters=64, kernel_size=9, dropout=0.3, dense_units=32 ===
Validation accuracy: 0.8888

=== filters=64, kernel_size=9, dropout=0.3, dense_units=64 ===
Validation accuracy: 0.8500

=== filters=128, kernel_size=5, dropout=0.2, dense_units=32 ===
Validation accuracy: 0.8838

=== filters=128, kernel_size=5, dropout=0.2, dense_units=64 ===
Validation accuracy: 0.8349

=== filters=128, kernel_size=5, dropout=0.3, dense_units=32 ===
Validation ac

<h3>Финальная модель с оптимальными гиперпараметрами</h3>
<p>Построение финальной архитектуры модели на лучших найденных параметрах (можно добавить третий Conv1D).</p>
<hr>

In [18]:
with tf.device(device_name):
    model = Sequential([
        Input(shape=(X.shape[1], 1)),
        Conv1D(128, 5, activation='relu', padding='same'),
        BatchNormalization(),
        Dropout(0.2),
        Conv1D(128, 5, activation='relu', padding='same'),
        BatchNormalization(),
        Dropout(0.2),
        Conv1D(256, 5, activation='relu', padding='same'),
        BatchNormalization(),
        Dropout(0.2),
        GlobalMaxPooling1D(),
        Dense(64, activation='relu'),
        Dropout(0.2),
        Dense(5, activation='softmax')
    ])

    model.compile(
        optimizer='Adam',
        loss='categorical_crossentropy',
        metrics=['accuracy', 'categorical_accuracy']
    )

<h3>Обучение финальной модели</h3>
<p>Запуск процесса обучения на 20 эпох с callbacks и использованием вычисленных весов классов.</p>
<hr>

In [19]:
with tf.device(device_name):
    history = model.fit(
        X_train, y_train,
        epochs=20,
        batch_size=64,
        validation_data=(X_test, y_test),
        callbacks=[early_stop, reduce_lr],
        class_weight=class_weight_dict,
        verbose=1
    )

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20


<h3>Оценка финальной модели</h3>
<p>Вычисление итоговых метрик качества на всём наборе данных.</p>
<hr>

In [21]:
loss, acc, cat_acc = model.evaluate(X_test, y_test, verbose=0)
print(f"Loss: {loss:.4f}")
print(f"Accuracy: {acc:.4f}")
print(f"Categorical accuracy: {cat_acc:.4f}")

y_pred_probs = model.predict(X_test)
y_pred = np.argmax(y_pred_probs, axis=1)
y_true = np.argmax(y_test, axis=1)

print("Confusion Matrix:")
print(confusion_matrix(y_true, y_pred))

print("\nClassification Report:")
print(classification_report(y_true, y_pred, digits=4))

print(f"Macro F1-score: {f1_score(y_true, y_pred, average='macro'):.4f}")

Loss: 0.4436
Accuracy: 0.9434
Categorical accuracy: 0.9434
Confusion Matrix:
[[3533   41   13    6   31]
 [  53   56    2    0    0]
 [  51    0  220    6   13]
 [  17    0    2   13    0]
 [   9    0    4    0  309]]

Classification Report:
              precision    recall  f1-score   support

           0     0.9645    0.9749    0.9697      3624
           1     0.5773    0.5045    0.5385       111
           2     0.9129    0.7586    0.8286       290
           3     0.5200    0.4062    0.4561        32
           4     0.8754    0.9596    0.9156       322

    accuracy                         0.9434      4379
   macro avg     0.7700    0.7208    0.7417      4379
weighted avg     0.9415    0.9434    0.9417      4379

Macro F1-score: 0.7417


In [22]:
import tensorflow as tf
print(tf.__version__)

2.10.1
