# Подключаем пакеты и определяем функции

In [2]:
# Для работы с данными
import pandas as pd
import numpy as np
import wfdb
import ast
from utils import utils
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt   # plotting
#import seaborn as sns   # plotting heatmap

# Для работы с моделями
import tensorflow as tf
from tensorflow import keras
from keras import layers

# Для метрик
from keras import backend as K
from keras.metrics import AUC, Recall, Precision, Accuracy, TruePositives, TrueNegatives, FalsePositives, FalseNegatives
from sklearn.metrics import fbeta_score, precision_score, recall_score, accuracy_score, roc_auc_score

# Функции
# Компиляция и обучение модели
def compile_fit(model, X_train, y_train, X_val, y_val, early_stopping, model_checkpoint):
  model.compile(loss = keras.losses.CategoricalCrossentropy(),
                optimizer=tf.optimizers.Adam(),
                metrics=['accuracy'])

  history = model.fit(X_train, y_train, epochs = 30, validation_data = (X_val, y_val), callbacks=[model_checkpoint, early_stopping])
  return history

tf.random.set_seed(42)
%matplotlib inline

# Скачиваем PTB-XL с использованием кода и обработки авторов исследуемой статьи (обработанные данные, т.е. проведена нормализация и категоризация)

In [5]:
sampling_frequency=100
datafolder='ptbxl/'
task='superdiagnostic'
outputfolder='output/'

# Load PTB-XL data
data, raw_labels = utils.load_dataset(datafolder, sampling_frequency)
# Preprocess label data
labels = utils.compute_label_aggregations(raw_labels, datafolder, task)
# Select relevant data and convert to one-hot
data, labels, Y, _ = utils.select_data(data, labels, task, min_samples=0, outputfolder=outputfolder)

# 1-9 for training 
X_train = data[labels.strat_fold < 10]
y_train = Y[labels.strat_fold < 10]
# 10 for validation
X_val = data[labels.strat_fold == 10]
y_val = Y[labels.strat_fold == 10]

X_train.shape, y_train.shape, X_val.shape, y_val.shape

((19230, 1000, 12), (19230, 5), (2158, 1000, 12), (2158, 5))

In [7]:
X_train

array([[[-1.190e-01, -5.500e-02,  6.400e-02, ..., -2.600e-02,
         -3.900e-02, -7.900e-02],
        [-1.160e-01, -5.100e-02,  6.500e-02, ..., -3.100e-02,
         -3.400e-02, -7.400e-02],
        [-1.200e-01, -4.400e-02,  7.600e-02, ..., -2.800e-02,
         -2.900e-02, -6.900e-02],
        ...,
        [ 6.900e-02,  0.000e+00, -6.900e-02, ...,  2.400e-02,
         -4.100e-02, -5.800e-02],
        [ 8.600e-02,  4.000e-03, -8.100e-02, ...,  2.420e-01,
         -4.600e-02, -9.800e-02],
        [ 2.200e-02, -3.100e-02, -5.400e-02, ...,  1.430e-01,
         -3.500e-02, -1.200e-01]],

       [[ 4.000e-03,  1.380e-01,  1.340e-01, ...,  1.920e-01,
          8.300e-02,  8.800e-02],
        [-2.000e-02,  1.160e-01,  1.360e-01, ...,  1.560e-01,
          5.700e-02,  6.300e-02],
        [-5.300e-02,  9.200e-02,  1.450e-01, ...,  1.070e-01,
          1.300e-02,  2.200e-02],
        ...,
        [ 1.210e-01,  3.980e-01,  2.770e-01, ..., -1.065e+00,
         -4.920e-01, -1.560e-01],
        [-3.

In [6]:
y_train

array([[0, 0, 0, 1, 0],
       [0, 0, 0, 1, 0],
       [0, 0, 0, 1, 0],
       ...,
       [0, 0, 0, 0, 1],
       [0, 0, 0, 1, 0],
       [0, 0, 0, 1, 0]])

In [3]:
# Сохранение наборов данных в файлы .npy для дальнейшего использования в Google Colab
np.save('ptbxl_helme/X_train_ptbxl_superdiag.npy', X_train)
np.save('ptbxl_helme/X_val_ptbxl_superdiag.npy', X_val)
np.save('ptbxl_helme/y_train_ptbxl_superdiag.npy', y_train)
np.save('ptbxl_helme/y_val_ptbxl_superdiag.npy', y_val)