### Load package

In [2]:
import glob
import json
import logging
import os

import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
from joblib import Parallel, delayed
from sklearn.metrics import r2_score, mean_squared_error
from torch.cuda import device
from torch.utils.data import DataLoader, random_split

In [7]:
N_DATA = [2]
OFFSET = None
DEBUG = False
MODEL_TYPES = [1]  # , 2, 3, 4, 11, 12, 13, 14]#, 15, 16]
NUM_EPOCHS = 10
BATCH_SIZE = 32
LEARNING_RATE = 0.001
PLOT = False
SAVE_CAM = True  # Save CAM images
CAM_DIR = 'cam_images'  # Directory to save CAM images
os.makedirs(CAM_DIR, exist_ok=True)
dtype = 1

##### analysis.py load_and_preprocess_data

In [12]:
from data_source_utils import load_dataset, process_dataset
from preprocess_utils import preprocess_dataset
from analysis import load_json_file

In [9]:
data_list_path = "F:\homes\data_list.txt"

chunk_file_name_list = []
dataset_name = []

with open(data_list_path, 'r', encoding='utf-8') as file:
    for line in file:
        # 각 줄을 공백으로 분리
        parts = line.strip().split()
        if len(parts) == 2:  # 앞부분과 뒷부분이 존재하는 경우
            chunk_file_name_list.append(parts[0])  # 앞부분 저장
            dataset_name.append(int(parts[1]))  # 뒷부분 저장

# n_data에서 dataset_name과 중복된 항목 제거
n_data = [n for n in N_DATA if n not in dataset_name]

preprocessed_dataset = []
dataset = load_dataset(offset=OFFSET, n_data=n_data)
if len(dataset) > 0:
    for data in dataset:
        preprocessed_dataset.append(preprocess_dataset(data['data'], dtype, data['fs'], plot=PLOT, debug=DEBUG))

In [11]:
all_files = []
for chunk_file_name in chunk_file_name_list:
    file_pattern = "F:/homes/preprocessed_data/preprocessed_data_" + chunk_file_name + "_*.json"
    file_list = glob.glob(file_pattern)
    all_files.extend(file_list)  # 모든 파일을 리스트에 추가

In [13]:
# 병렬로 파일 로드
if all_files:
    loaded_data = Parallel(n_jobs=-1)(delayed(load_json_file)(file_name) for file_name in all_files)

    # 병렬 로드된 데이터를 하나의 리스트로 확장
    for data in loaded_data:
        preprocessed_dataset.extend(data)

In [15]:
len(preprocessed_dataset)

8528

In [24]:
def get_min_max_data_length(preprocessed_dataset):
    # 'data' 필드의 길이를 저장할 리스트
    data_lengths = [len(item['data']) for item in preprocessed_dataset if 'data' in item]

    if data_lengths:
        min_length = min(data_lengths)
        max_length = max(data_lengths)
        return min_length, max_length
    else:
        raise ValueError("The dataset does not contain 'data' field or is empty.")

In [None]:
preprocessed_dataset[0]

In [27]:
min, max = get_min_max_data_length(preprocessed_dataset)

TypeError: 'int' object is not callable

In [29]:
min,max

(1031, 6948)

In [30]:
def segment_ecg(data, sampling_rate=114, window_duration=3, overlap_duration=0.5):
    """
    ECG 데이터를 주어진 윈도우 크기와 겹침을 고려하여 분할하는 함수.
    
    :param data: ECG 데이터 (1D array 또는 list)
    :param sampling_rate: 샘플링 속도 (Hz)
    :param window_duration: 윈도우 크기 (초) - 최소 3개의 ECG 파형을 포함할 수 있는 크기
    :param overlap_duration: 겹침 크기 (초) - 0.5초씩 겹침
    :return: 분할된 ECG 데이터 세그먼트 리스트
    """
    # 윈도우 크기와 겹침 크기를 샘플 단위로 변환
    window_size = int(window_duration * sampling_rate)  # 3초에 해당하는 샘플 개수
    overlap_size = int(overlap_duration * sampling_rate)  # 0.5초에 해당하는 샘플 개수

    # 분할된 데이터를 저장할 리스트
    segmented_data = []

    # 슬라이딩 윈도우로 데이터 분할
    start = 0
    while start + window_size <= len(data):
        end = start + window_size
        segmented_data.append(data[start:end])
        start += window_size - overlap_size  # 겹침을 고려한 다음 윈도우의 시작 지점

    return segmented_data

In [31]:
for item in preprocessed_dataset:
    ecg_data = item['data']  # ECG 데이터 가져오기
    segmented_ecg = segment_ecg(ecg_data)  # 분할된 데이터
    item['segmented_data'] = segmented_ecg  # 원래 데이터에 'segmented_data' 추가

In [33]:
len(preprocessed_dataset[0]['segmented_data'])

11

In [44]:
preprocessed_dataset[0]['label']

'N'

In [74]:
import torch
from torch.utils.data import Dataset

class ECGDataset(Dataset):
    def __init__(self, preprocessed_dataset):
        """
        ECG 데이터를 PyTorch Dataset으로 변환하는 클래스.
        각 dict의 'data'와 'label' 필드를 쌍으로 반환.
        
        :param preprocessed_dataset: 분할된 ECG 데이터가 포함된 dict 리스트
        """
        self.data_label_pairs = []

        # 각 데이터에 대해 분할된 segment와 label을 쌍으로 저장
        for item in preprocessed_dataset:
            ecg_data = item['data']  # ECG 데이터
            label = item['label']    # 레이블 (예: 정상, 비정상 등)
            segmented_ecg = segment_ecg(ecg_data)  # ECG 데이터를 분할

            # 분할된 segment와 label을 쌍으로 저장
            for segment in segmented_ecg:
                self.data_label_pairs.append((segment, label))

    def __len__(self):
        """데이터셋의 전체 길이를 반환"""
        return len(self.data_label_pairs)

    def __getitem__(self, idx):
        """
        인덱스에 해당하는 (segment, label) 쌍을 반환. PyTorch Dataloader가 이 메서드를 호출함.
        데이터를 텐서로 변환하여 반환.
        """
        ecg_segment, label = self.data_label_pairs[idx]
        
        ecg_tensor = torch.permute(torch.tensor(ecg_segment, dtype=torch.float32),(1,0))
        if label == 'N':
            label = 0
        elif label ==  'A':
            label = 1
        elif label == 'O':
            label = 2
        elif label ==  '~' :
            label = 3
        
        label_tensor = torch.tensor(label, dtype=torch.long)  # 레이블은 보통 정수형
        return ecg_tensor, label_tensor


In [75]:
ecg_dataset = ECGDataset(preprocessed_dataset)

# DataLoader로 변환하여 배치 단위로 데이터를 가져오도록 설정
dataloader = DataLoader(ecg_dataset, batch_size=32, shuffle=True)


In [76]:
len(ecg_dataset.__getitem__(1)[0])

3

In [None]:
for batch in dataloader:
    ecg_segments, labels = batch
    print(ecg_segments.shape)  # 각 배치의 ECG 데이터 텐서 크기 출력
    print(labels.shape)  

In [81]:
import torch.nn.functional as F
input_length = 342
class ECGCNN(nn.Module):
    def __init__(self, num_classes=4):
        super(ECGCNN, self).__init__()
        # 1D Convolutional layers
        self.conv1 = nn.Conv1d(in_channels=3, out_channels=16, kernel_size=7, stride=1, padding=3)
        self.conv2 = nn.Conv1d(in_channels=16, out_channels=32, kernel_size=5, stride=1, padding=2)
        self.conv3 = nn.Conv1d(in_channels=32, out_channels=64, kernel_size=5, stride=1, padding=2)
        
        # Pooling layers
        self.pool = nn.MaxPool1d(kernel_size=2, stride=2)
        
        # Fully connected layers
        self.fc1 = nn.Linear(64 * (input_length // 8), 128)  # input_length는 각 segment의 길이
        self.fc2 = nn.Linear(128, num_classes)

    def forward(self, x):
        # 1D Convolution + Activation + Pooling
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        
        # Flatten
        x = x.view(x.size(0), -1)  # Flatten to (batch_size, num_features)
        
        # Fully connected layers
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [82]:
LEARNING_RATE = 0.001
ecgcnn = ECGCNN().to('cuda')
loss_fn = nn.CrossEntropyLoss()
optimzer = torch.optim.Adam(ecgcnn.parameters(), lr=LEARNING_RATE)


In [86]:
device='cuda'
num_epochs=100

for epoch in range(num_epochs):
    for ecg_segments, labels in dataloader:
        ecg_segments, labels = ecg_segments.to(device), labels.to(device)
        
        # Forward pass
        outputs = ecgcnn(ecg_segments)
        loss = loss_fn(outputs, labels)
        
        # Backward pass and optimization
        optimzer.zero_grad()
        loss.backward()
        optimzer.step()

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")


Epoch [1/100], Loss: 0.7754
Epoch [2/100], Loss: 1.0019
Epoch [3/100], Loss: 0.1630
Epoch [4/100], Loss: 0.7728
Epoch [5/100], Loss: 1.2270
Epoch [6/100], Loss: 0.1316
Epoch [7/100], Loss: 0.1066
Epoch [8/100], Loss: 0.9323
Epoch [9/100], Loss: 1.0292
Epoch [10/100], Loss: 0.5446
Epoch [11/100], Loss: 0.1132
Epoch [12/100], Loss: 0.0286
Epoch [13/100], Loss: 0.2972
Epoch [14/100], Loss: 0.2895
Epoch [15/100], Loss: 0.0863
Epoch [16/100], Loss: 0.1750
Epoch [17/100], Loss: 0.0076
Epoch [18/100], Loss: 0.1402
Epoch [19/100], Loss: 0.0736
Epoch [20/100], Loss: 0.3066
Epoch [21/100], Loss: 0.5503
Epoch [22/100], Loss: 0.1271
Epoch [23/100], Loss: 0.3150
Epoch [24/100], Loss: 1.2040
Epoch [25/100], Loss: 0.3678
Epoch [26/100], Loss: 0.1458
Epoch [27/100], Loss: 0.3943
Epoch [28/100], Loss: 0.1789
Epoch [29/100], Loss: 0.0101
Epoch [30/100], Loss: 0.0113
Epoch [31/100], Loss: 0.0018
Epoch [32/100], Loss: 0.0188
Epoch [33/100], Loss: 0.0064
Epoch [34/100], Loss: 0.0005
Epoch [35/100], Loss: 0