In [1]:
# Импорт библиотек
import pandas as pd
import os
import random
from math import pi
import torch
from torch.utils.data import random_split, Dataset, DataLoader, TensorDataset
from torch import nn
from tqdm import tqdm
import matplotlib.pyplot as plt
import numpy as np
import tkinter as tk
from tkinter import filedialog

In [None]:
root = tk.Tk()
root.withdraw()

filePath = filedialog.askopenfilename(filetypes=[("Parquet Files", "*.parquet")])
if filePath:
    print("Выбранный файл:", filePath)
    fileName, fileExtension = os.path.splitext(filePath)
    if fileExtension == '.parquet':
        dfOriginal = pd.read_parquet(filePath)
        df = dfOriginal.copy()
    else:
        print("Выбран неверный файл")
        exit()
else:
    print("Выбор файла отменен.")
    exit()

df['value'] = (df['value'] - df['value'].min()) / (df['value'].max() - df['value'].min())       # Нормализация данных
df.reset_index(drop=True)

In [3]:
class SecondEcgDataset(Dataset):
    def __init__(self, df):
        self.df = df
        self.segmentStarts = df.index[df['edge'] > 0].tolist()
        self.startsCount = len(self.segmentStarts)

        self.cachedTensors = []
        self._get_segment_tensor()
        self.segmentsCount = len(self.cachedTensors)
    
    def _get_segment_tensor(self):
        for index in range(self.startsCount):
            result = 0
            if index + 1 < len(self.segmentStarts):
                if self.df.at[self.segmentStarts[index], 'edge'] < 2:
                    segment = self.df.iloc[self.segmentStarts[index]:self.segmentStarts[index+1]]
                    result = 1
            elif self.df.at[self.segmentStarts[index], 'edge'] < 2:
                segment = self.df.iloc[self.segmentStarts[index]:]
                result = 1

            if result:
                segmentValues = segment['value']
                segmentTensor = torch.tensor(segmentValues.to_numpy(), dtype=torch.float).unsqueeze(1)
                padding = 200 - len(segmentTensor)
                if padding > 0:
                    segmentTensor = torch.cat((segmentTensor, torch.zeros(padding, 1)), dim=0)
                elif padding < 0:
                    print('В данные зашёл сегмент неверной длины')
                self.cachedTensors.append(segmentTensor)

    # Часто вызываемые во врмемя вычислений функции (оптимизированы)

    def __len__(self):
        return self.segmentsCount

    def __getitem__(self, index):
        segmentTensor = self.cachedTensors[index]
        return segmentTensor

In [None]:
dataset = SecondEcgDataset(df)
print(len(dataset))
trainDataset, valDataset = random_split(dataset, [int(0.8 * len(dataset)), len(dataset) - int(0.8 * len(dataset))])
print(len(trainDataset))
print(len(valDataset))

trainDataloader = torch.utils.data.DataLoader(trainDataset, batch_size=10, shuffle=True)
valDataloader = torch.utils.data.DataLoader(valDataset, batch_size=10, shuffle=False)

In [28]:
class LSTMAutoencoder(nn.Module):
    def __init__(self):
        super(LSTMAutoencoder, self).__init__()

        # Encoder
        self.encoder = nn.LSTM(
            input_size=1,
            hidden_size=64,
            num_layers=1,
            batch_first=True,
            bidirectional=True
        )

        # Decoder
        self.decoder = nn.LSTM(
            input_size=128,
            hidden_size=1,
            num_layers=1,
            batch_first=True,
            bidirectional=False
        )

        # Sigmoid
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        # Encoder
        output, _ = self.encoder(x)

        # Decoder
        output, _ = self.decoder(output)

        # Sigmoid
        output = self.sigmoid(output)       # Shape torch.Size([10, 200, 1])

        return output

In [None]:
# Параметры для создания модели
input_size = 1 # Размерность сигнала
hidden_size = 64 # Кол-во нейронов в слое
num_layers = 1 # Кол-во своёв

# Создание экземпляра модели
device = ("cuda")
# model = LSTMAutoencoder(input_size, hidden_size, num_layers).to(device)
model = LSTMAutoencoder().to(device)
print(model)
# Определение функции потерь и оптимизатора
criterion = nn.MSELoss(reduction='none')
optimizer = torch.optim.Adam(model.parameters(), lr=0.00001)

def train(trainDataloader, valDataloader, model, criterion, optimizer):
    # Обучение модели
    model.train()
    trainPbar = tqdm(trainDataloader, desc="Training")
    for inputs in trainPbar:
        inputs = inputs.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        losses = criterion(outputs, inputs)
        # loss.backward()
        for loss in losses:
            averLoss = torch.mean(loss)
            averLoss.backward(retain_graph=True)
        optimizer.step()
        #trainPbar.set_postfix({'Loss': f'{losses[-1].item()}'})

    # Валидация модели
    model.eval() # Переводим модель в режим валидации
    valPbar = tqdm(valDataloader, desc="Validating")
    with torch.no_grad(): # Отключаем вычисление градиентов
        for inputs in valPbar:
            inputs = inputs.to(device)
            outputs = model(inputs)
            valLosses = criterion(outputs, inputs)
            for vloss in valLosses:
                averVLoss = torch.mean(vloss)
            # valPbar.set_postfix({'Val_Loss': f'{val_loss.item()}'})
    
    trainPbar.close()
    valPbar.close()

    return averLoss.item(), averVLoss.item()

In [None]:
ll, vl = 0, 0
for epoch in range(3000):
    print(f'Epoch: {epoch} ')
    ll, vl = train(trainDataloader, valDataloader, model, criterion, optimizer)

print('---------------------------------------------------------------------------------------------')
print('Done!')

In [None]:
print('Loss: ', ll)
print('ValLoss: ', vl)

In [27]:
# Сохранение модели
torch.save(model.state_dict(), '../00. Resources/Models/SecondStageModel.pt')