In [22]:
import os
import wfdb
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, random_split
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
from MlpMixerClinical import MlpBlock, MixerBlock, MlpMixer

In [23]:
class ClinicDataset(Dataset):
    def __init__(self, root_dir):
        self.root_dir = root_dir
        self.data, self.labels = self.load_and_preprocess_data()

    def load_and_preprocess_data(self):
        file_paths = ['MI_F_clinical.csv', 'MI_M_clinical.csv', 'NOT_MI_F_clinical.csv', 'NOT_MI_M_clinical.csv']
        class_labels = [1, 1, 0, 0]

        sequences = []
        all_labels = []
        max_length = 0

        for file_path, class_label in zip(file_paths, class_labels):
            full_path = os.path.join(self.root_dir, file_path)
            df = pd.read_csv(full_path)

            # NaN 값 처리
            df['ctn_value'] = df['ctn_value'].fillna(df.groupby('subject_id')['ctn_value'].transform('mean'))

            # 그룹별 평균으로도 처리되지 않는 NaN 값은 0.00으로 대체
            df['ctn_value'].fillna(0.00, inplace=True)

            for _, group in df.groupby('subject_id'):
                values = group['ctn_value'].values.tolist()
                sequences.append(values)
                all_labels.append(class_label)
                if len(values) > max_length:
                    max_length = len(values)

        # 모든 시퀀스를 최대 길이에 맞춰 패딩
        padded_sequences = [seq + [0.00] * (max_length - len(seq)) for seq in sequences]

        return padded_sequences, all_labels

    def __len__(self):

        return len(self.labels)

    def __getitem__(self, idx):
        rounded_data = [round(val, 2) for val in self.data[idx]]
        sequence = torch.tensor(rounded_data, dtype=torch.float)
        label = torch.tensor(self.labels[idx], dtype=torch.long)

        return sequence, label


In [24]:
root_dir = 'D:/mimiciv_data/mimic-iv-clinical_note/1.0/preprocessed'
dataset = ClinicDataset(root_dir=root_dir)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['ctn_value'].fillna(0.00, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['ctn_value'].fillna(0.00, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always 

In [25]:
print(len(dataset))

1742


In [26]:
# 첫 번째 데이터 항목 로드
first_data, first_label = dataset[10]

# ECG 데이터와 레이블 출력
print("First ECG Data Shape:", first_data.shape)
print("First ECG Data Label:", first_label)

First ECG Data Shape: torch.Size([243])
First ECG Data Label: tensor(1)


In [27]:
for i in range(len(dataset)):
    data, label = dataset[i]
    print(f"Data {i+1}:")
    print(data)
    print(f"Label: {label}\n")

Data 1:
tensor([0.0400, 0.0500, 0.0500, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 

In [28]:
# 데이터셋 크기
total_size = len(dataset)

# 훈련 세트와 테스트 세트의 크기 계산
train_size = int(0.8 * total_size)
test_size = total_size - train_size

# 데이터셋을 훈련 세트와 테스트 세트로 무작위 분할
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

# DataLoader 인스턴스 생성
train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

# 분할된 데이터셋 크기 확인
print(f'Total dataset size: {total_size}')
print(f'Train dataset size: {len(train_dataset)}')
print(f'Test dataset size: {len(test_dataset)}')

Total dataset size: 1742
Train dataset size: 1393
Test dataset size: 349


In [29]:
# MLP-Mixer 모델 인스턴스 생성: clinical 데이터에 맞게
model = MlpMixer(
    num_classes = 2,  # 분류할 클래스 수
    num_patches = 243,  # 입력 시퀀스의 길이 (데이터셋의 최대 길이)
    num_features = 1,  # 각 시퀀스 요소의 특징 수
    num_blocks = 4, # MixerBlock의 수
    tokens_mlp_dim = 64,  # 토큰 믹싱 MLP의 은닉층 차원
    channels_mlp_dim = 64  # 채널 믹싱 MLP의 은닉층 차원
)

# GPU가 사용 가능한지 확인하고, 사용 가능하다면 GPU를 사용합니다.
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)  # 모델을 GPU로 이동시킵니다.

print(device)

cuda


In [30]:
# 손실 함수 및 옵티마이저 정의
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.00001)
# # 학습률 스케줄러 설정
# scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=0.95, patience=10)

In [31]:
def train(model, train_loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for i, (inputs, labels) in enumerate(train_loader):
        inputs = inputs.to(device).float()  # [batch_size, seq_len, num_features] 형태를 가정
        inputs = inputs.unsqueeze(-1).transpose(1, 2)  # [batch_size, seq_len, 1]
        labels = labels.to(device).long()

        optimizer.zero_grad()

        outputs = model(inputs)  # inputs 차원 조정이 필요 없음
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    epoch_loss = running_loss / len(train_loader)
    epoch_acc = 100 * correct / total

    print(f'Training Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.4f}%')
    return epoch_acc

In [32]:
def validate(model, test_loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    best_val_loss = float('inf')  # 최고 검증 손실값 초기화
    save_dir = 'C:/Users/yumi/projects/deepdaiv_ecg/24w_MI_Multimodal_Prediction/clinical'

    with torch.no_grad():
        for i, (inputs, labels) in enumerate(test_loader):
            inputs = inputs.to(device).float()
            inputs = inputs.unsqueeze(-1).transpose(1, 2)  # [batch_size, seq_len, 1]
            labels = labels.to(device).long()

            outputs = model(inputs)
            loss = criterion(outputs, labels)

            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    epoch_loss = running_loss / len(test_loader)
    epoch_acc = 100 * correct / total

    if epoch_loss < best_val_loss:
        best_val_loss = epoch_loss
        model_path = os.path.join(save_dir, 'mlp_clinical_best.pth')
        torch.save(model.state_dict(), model_path)
        print(f"Model saved to {model_path}")

    print(f'Validation Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.4f}%')
    return epoch_acc

In [33]:
num_epochs = 600

In [34]:
# 정확도 기록을 저장할 리스트 초기화
train_losses = []
validation_losses = []
    
# 훈련 및 검증 루프
for epoch in range(num_epochs):
    print(f'Epoch {epoch+1}/{num_epochs}')
    train_loss = train(model, train_loader, criterion, optimizer, device)
    val_loss = validate(model, test_loader, criterion, device)
    
    # 손실 기록
    train_losses.append(train_loss)
    validation_losses.append(val_loss)

Epoch 1/600


Training Loss: 0.9764, Accuracy: 49.8205%
Model saved to C:/Users/yumi/projects/deepdaiv_ecg/24w_MI_Multimodal_Prediction/clinical\mlp_clinical_best.pth
Validation Loss: 0.9588, Accuracy: 50.7163%
Epoch 2/600
Training Loss: 0.9692, Accuracy: 49.8205%
Model saved to C:/Users/yumi/projects/deepdaiv_ecg/24w_MI_Multimodal_Prediction/clinical\mlp_clinical_best.pth
Validation Loss: 0.9519, Accuracy: 50.7163%
Epoch 3/600
Training Loss: 0.9621, Accuracy: 49.8205%
Model saved to C:/Users/yumi/projects/deepdaiv_ecg/24w_MI_Multimodal_Prediction/clinical\mlp_clinical_best.pth
Validation Loss: 0.9449, Accuracy: 50.7163%
Epoch 4/600
Training Loss: 0.9550, Accuracy: 49.8205%
Model saved to C:/Users/yumi/projects/deepdaiv_ecg/24w_MI_Multimodal_Prediction/clinical\mlp_clinical_best.pth
Validation Loss: 0.9381, Accuracy: 50.7163%
Epoch 5/600
Training Loss: 0.9479, Accuracy: 49.8205%
Model saved to C:/Users/yumi/projects/deepdaiv_ecg/24w_MI_Multimodal_Prediction/clinical\mlp_clinical_best.pth
Validation 

In [None]:
# 손실 그래프 시각화
plt.figure(figsize=(10, 6))
plt.plot(range(1, num_epochs + 1), train_losses, label='Train Loss')
plt.plot(range(1, num_epochs + 1), validation_losses, label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()
plt.show()