In [1]:
import locale

import numpy as np
import numpy.typing as npt

import torch
from torch import nn
import torch.nn.functional as F

from ml_validation import database
from ml_validation.experiment import three_bases

In [2]:
locale.setlocale(locale.LC_ALL, "")
path_dataset = "datasets"

In [5]:
database.download(database.Type.THREE_BASES, path_dataset)

Archive already exists: datasets/three_bases.zip
Archive already exists: datasets/three_bases_bad_records.zip


In [None]:
# Начинаем эксперимент
experiment = three_bases.start_experiment(
    name="DummyUniform",
    description="Тестовая загрузка данных базы 'Three bases'",
    authors="Moskalenko Viktor",
    path_dir=path_dataset,
)

In [None]:
X, Y = experiment.get_data()
X.shape, Y.shape

((54953, 12, 5000), (54953, 5))

In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F
# как найти количество обучаемых параметров на каждом слое
class ECGCNN(nn.Module):
    def __init__(self, input_channels=12, num_classes=5):
        super(ECGCNN, self).__init__()
        # Первый свёрточный блок
        #padding-добавление нулей по бокам, stride-пропуск между kernels
        self.conv1 = nn.Conv1d(in_channels=input_channels, out_channels=32, kernel_size=9, padding=3, stride=1) #out:1249
        self.bn1 = nn.BatchNorm1d(32)
        self.pool1 = nn.MaxPool1d(kernel_size=4) #out:312

        # Второй свёрточный блок
        self.conv2 = nn.Conv1d(in_channels=32, out_channels=64, kernel_size=7, padding=2, stride=1) #out:155
        self.bn2 = nn.BatchNorm1d(64)
        self.pool2 = nn.MaxPool1d(kernel_size=4) #out:77

        # Третий свёрточный блок
        self.conv3 = nn.Conv1d(in_channels=64, out_channels=128, kernel_size=5, padding=1, stride=1) #out:18
        self.bn3 = nn.BatchNorm1d(128)
        self.pool3 = nn.MaxPool1d(kernel_size=4) #out: 4

        # Второй свёрточный блок
        self.conv2 = nn.Conv1d(in_channels=32, out_channels=64, kernel_size=7, padding=2, stride=1) #out:155
        self.bn2 = nn.BatchNorm1d(64)
        self.pool2 = nn.MaxPool1d(kernel_size=4) #out:77

        self.conv1 = nn.Conv1d(in_channels=input_channels, out_channels=32, kernel_size=9, padding=3, stride=1) #out:1249
        self.bn1 = nn.BatchNorm1d(32)
        self.pool1 = nn.MaxPool1d(kernel_size=4) #out:312

        # Полносвязные слои с Dropout
        self.fc1 = nn.Linear(128 * 4, 256)
        self.dropout = nn.Dropout(p=0.2)  # Dropout с вероятностью 20%
        self.fc2 = nn.Linear(256, num_classes)

    def forward(self, x):
        # Первый свёрточный блок
        x = F.relu(self.bn1(self.conv1(x)))
        x = self.pool1(x)

        # Второй свёрточный блок
        x = F.relu(self.bn2(self.conv2(x)))
        x = self.pool2(x)

        # Третий свёрточный блок
        x = F.relu(self.bn3(self.conv3(x)))
        x = self.pool3(x)

        # Преобразование для полносвязного слоя
        x = x.view(x.size(0), -1)

        # Полносвязные слои с Dropout
        x = F.relu(self.fc1(x))
        x = self.dropout(x)  # Применение Dropout
        x = self.fc2(x)
        return x


In [6]:
import numpy as np
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader

# Преобразование меток
Y = np.argmax(Y, axis=1)  # Преобразование one-hot в категории

# Нормализация данных по каждому каналу
X = (X - np.mean(X, axis=2, keepdims=True)) / np.std(X, axis=2, keepdims=True)

# Разделение данных
X_train, X_val, Y_train, Y_val = train_test_split(X, Y, test_size=0.2, random_state=42)

# Создание Dataset
class ECGDataset(Dataset):
    def __init__(self, signals, labels):
        self.signals = signals
        self.labels = labels

    def __len__(self):
        return len(self.signals)

    def __getitem__(self, idx):
        signal = self.signals[idx]
        label = self.labels[idx]
        return torch.tensor(signal, dtype=torch.float32), torch.tensor(label, dtype=torch.long)

train_dataset = ECGDataset(X_train, Y_train)
val_dataset = ECGDataset(X_val, Y_val)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)


In [10]:
# Инициализация модели, функции потерь и оптимизатора с L2-регуляризацией
'''
model = ECGCNN(input_channels=12, num_classes=5)
#criterion = nn.CrossEntropyLoss()
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)  # Добавлен weight_decay

# Тренировочный цикл
for epoch in range(10):  # 20 эпох
    model.train()
    train_loss = 0.0

    for signals, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(signals)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

    # Оценка на валидационной выборке
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for signals, labels in val_loader:
            outputs = model(signals)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f"Epoch {epoch+1}/{10}, Train Loss: {train_loss/len(train_loader):.4f}, "
          f"Val Loss: {val_loss/len(val_loader):.4f}, "
          f"Val Accuracy: {100 * correct / total:.2f}%")

# Сохранение модели
torch.save(model.state_dict(), "ecg_cnn_weights_with_regularization_less_rank.pth")
'''

'\nmodel = ECGCNN(input_channels=12, num_classes=5)\n#criterion = nn.CrossEntropyLoss()\ncriterion = nn.BCELoss()\noptimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)  # Добавлен weight_decay\n\n# Тренировочный цикл\nfor epoch in range(10):  # 20 эпох\n    model.train()\n    train_loss = 0.0\n\n    for signals, labels in train_loader:\n        optimizer.zero_grad()\n        outputs = model(signals)\n        loss = criterion(outputs, labels)\n        loss.backward()\n        optimizer.step()\n        train_loss += loss.item()\n\n    # Оценка на валидационной выборке\n    model.eval()\n    val_loss = 0.0\n    correct = 0\n    total = 0\n\n    with torch.no_grad():\n        for signals, labels in val_loader:\n            outputs = model(signals)\n            loss = criterion(outputs, labels)\n            val_loss += loss.item()\n\n            _, predicted = torch.max(outputs, 1)\n            total += labels.size(0)\n            correct += (predicted == labels).su

In [1]:
import torch
import torch.nn.functional as F
from sklearn.metrics import f1_score
import matplotlib.pyplot as plt

# Инициализация модели, функции потерь и оптимизатора с L2-регуляризацией
model = ECGCNN(input_channels=12, num_classes=5)  # num_classes=5 для многоклассовой задачи
#criterion = torch.nn.BCELoss()  # Бинарная кросс-энтропия
criterion = torch.nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)  # Добавлен weight_decay


NameError: name 'ECGCNN' is not defined

In [None]:

# Списки для хранения значений на каждой эпохе
train_losses = []
val_losses = []
f1_scores = []

# Тренировочный цикл
for epoch in range(15):
    model.train()
    train_loss = 0.0

    for signals, labels in train_loader:
        optimizer.zero_grad()
        
        # Прямой проход
        outputs = torch.sigmoid(model(signals))  # Sigmoid для приведения к диапазону [0, 1]

        # Преобразование меток в one-hot
        labels_one_hot = F.one_hot(labels, num_classes=5).float()  # [batch_size, num_classes]
        
        # Вычисление функции потерь
        loss = criterion(outputs, labels_one_hot)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

    train_losses.append(train_loss / len(train_loader))  # Сохраняем средний loss для трейна

    # Оценка на валидационной выборке
    model.eval()
    val_loss = 0.0
    all_labels = []
    all_preds = []

    with torch.no_grad():
        for signals, labels in val_loader:
            #outputs = torch.sigmoid(model(signals))  # Sigmoid для диапазона [0, 1]
            labels_one_hot = F.one_hot(labels, num_classes=5).float()
            
            # Вычисление функции потерь
            loss = criterion(outputs, labels_one_hot)
            val_loss += loss.item()

            # Для F1-score используем порог 0.5
            predicted = (outputs > 0.5).int()  # Преобразуем вероятности в бинарные значения
            all_labels.extend(labels_one_hot.cpu().numpy())
            all_preds.extend(predicted.cpu().numpy())

    val_losses.append(val_loss / len(val_loader))  # Сохраняем средний loss для валидации

    # Вычисление F1-score
    f1 = f1_score(all_labels, all_preds, average="macro")
    f1_scores.append(f1)

    print(f"Epoch {epoch+1}/{15}, Train Loss: {train_loss/len(train_loader):.4f}, "
          f"Val Loss: {val_loss/len(val_loader):.4f}, "
          f"F1 Score: {f1:.4f}")

# Построение графиков
epochs = range(1, 16)

# График зависимости Loss
plt.figure(figsize=(12, 6))
plt.plot(epochs, train_losses, label="Train Loss")
plt.plot(epochs, val_losses, label="Validation Loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.title("Loss vs Epochs")
plt.legend()
plt.grid(True)
plt.show()

# График зависимости F1-score
plt.figure(figsize=(12, 6))
plt.plot(epochs, f1_scores, label="F1 Score")
plt.xlabel("Epochs")
plt.ylabel("F1 Score")
plt.title("F1 Score vs Epochs")
plt.legend()
plt.grid(True)
plt.show()


RuntimeError: mat1 and mat2 shapes cannot be multiplied (64x9856 and 512x256)

In [12]:
from torchinfo import summary
# model – здесь указывается ваша PyTorch модель
# input_size – размерность входного тензора для вашей модели
summary(model, input_size=(3, 224, 224))

NameError: name 'model' is not defined

In [29]:
# Сохранение модели
torch.save(model.state_dict(), "ecg_cnn_weights_with_regularization_bin_cross_entropy.pth")

5 эпох после роста лосса на валидации, чтобы чекать еще один минимум
focal loss (модификация кросс-энтропии)

In [30]:
meta = experiment.get_meta()
meta.head()

Unnamed: 0_level_0,Age,Gender,Patient ID in source database,Source database name,Index in source database
Record index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,56.0,0,15709.0,ptb_xl,1
1,19.0,1,13243.0,ptb_xl,2
3,24.0,1,17014.0,ptb_xl,4
4,19.0,0,17448.0,ptb_xl,5
5,18.0,0,19005.0,ptb_xl,6


In [10]:
%pip install torchsummary

Note: you may need to restart the kernel to use updated packages.


In [31]:
import torchsummary

In [32]:
class MyAlgorithm:
    # Реализуем алгоритм диагностики

    def __init__(self) -> None:
        """
        Инициализация модели с загрузкой сохранённых весов.
        """
        model_weights_path = '/Users/petrzaznobin/Desktop/High School/ROBO-HEART/ml_validation/ecg_cnn_weights_with_regularization_bin_cross_entropy.pth'
        self.model = ECGCNN(input_channels=12, num_classes=5)
        self.model.load_state_dict(torch.load(model_weights_path))
        self.model.eval()  # Устанавливаем режим инференса

    def __call__(self, X_test: npt.NDArray[np.float32]) -> npt.NDArray[np.bool_]:
        """
        Применение модели для предсказания меток на тестовом наборе данных.

        Parameters:
        - X_test: Массив входных сигналов размерностью [N, 12, 5000].

        Returns:
        - Бинарный массив предсказаний размерностью [N, num_classes].
        """
        # Преобразуем входные данные в тензор
        inputs = torch.tensor(X_test, dtype=torch.float32)

        # Прогон данных через модель
        with torch.no_grad():
            logits = self.model(inputs)

        # Преобразуем логиты в вероятности
        probabilities = F.softmax(logits, dim=1).numpy()

        # Применяем пороговое значение 0.5 для бинарного вывода
        predictions = probabilities > 0.5
        
        return predictions

    def print_weights(self):
        print(torchsummary(model, input_size=(12, 5000), device="cpu"))

In [33]:
model = MyAlgorithm()
model.print_weights

<bound method MyAlgorithm.print_weights of <__main__.MyAlgorithm object at 0x17ef79b50>>

In [34]:
from torchsummary import summary  # Correct way to import the summary function

# Create an instance of MyAlgorithm and extract the model
my_algorithm = MyAlgorithm()
summary(my_algorithm.model, input_size=(12, 5000), device="cpu")

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv1d-1             [-1, 32, 1250]           3,488
       BatchNorm1d-2             [-1, 32, 1250]              64
         MaxPool1d-3              [-1, 32, 312]               0
            Conv1d-4              [-1, 64, 155]          14,400
       BatchNorm1d-5              [-1, 64, 155]             128
         MaxPool1d-6               [-1, 64, 38]               0
            Conv1d-7              [-1, 128, 18]          41,088
       BatchNorm1d-8              [-1, 128, 18]             256
         MaxPool1d-9               [-1, 128, 4]               0
           Linear-10                  [-1, 256]         131,328
          Dropout-11                  [-1, 256]               0
           Linear-12                    [-1, 5]           1,285
Total params: 192,037
Trainable params: 192,037
Non-trainable params: 0
-------------------------------

: 

In [26]:
report = experiment.validate(MyAlgorithm(), batch_size=512)

1.

In [27]:
print(report)

[1mВерсия[0m: 0.2.0
[1mНачало[0m: Wed Dec 18 22:48:21 2024 MSK
[1mКонец[0m: Wed Dec 18 23:42:59 2024 MSK
[1mНазвание[0m: DummyUniform
[1mОписание[0m: Тестовая загрузка данных базы 'Three bases'
[1mСсылка[0m: 
[1mАвтор[0m: Moskalenko Viktor

[1mМетрики[0m:
                 1        2        3        4     7  micro avg  macro avg  \
precision 0.557996 0.957895 0.588845      0.5   0.0   0.575609   0.520947   
recall    0.633478 0.061362 0.960061 0.001499   0.0   0.575256    0.33128   
f1-score  0.593346 0.115336  0.72997  0.00299   0.0   0.575432   0.288328   
support     5293.0   1483.0   3255.0    667.0 723.0    11421.0    11421.0   

           weighted avg  samples avg  
precision      0.580004     0.478549  
recall         0.575256     0.468388  
f1-score       0.498176     0.471775  
support         11421.0      11421.0  

[1mМатрицы рассогласования[0m:

+-----------+----------+-----------+---------+---------+
| 1         | 2        | 3         | 4       | 7       