In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import TensorDataset, DataLoader
from sklearn.preprocessing import MinMaxScaler
import torch.nn.functional as F
import pickle
from tqdm import tqdm
from sklearn.metrics import r2_score, accuracy_score

In [2]:
data = pd.read_csv('/home/youngwoo/한국건강증진개발원_보건소 모바일 헬스케어_심박수_20240912.csv', encoding='cp949')
data.head()

Unnamed: 0,심박측정일시,심박측정,최고심박,최저심박,심박수_배열
0,2024-09-12 20:42,80.0,105.0,53.0,"93,81,85,89,86,86,86,85,85,85,86,83,89,89,96,7..."
1,2024-09-12 20:42,105.0,138.0,67.0,"90,103,127,114,117,117,120,80,86,86,93,110,114..."
2,2024-09-12 20:42,111.0,135.0,86.0,"120,90,110,110,130,101,108,116,117,120,120,124..."
3,2024-09-12 20:42,106.0,131.0,62.0,"121,123,126,62,66,71,86,90,94,115,116,118,81,9..."
4,2024-09-12 20:42,87.0,125.0,54.0,"78,83,88,74,86,99,75,77,79,75,76,78,73,83,93,8..."


In [8]:
import numpy as np

def makeNumpyData(data):
    """ Pandas DataFrame의 '심박수_배열' 열을 NumPy 배열로 변환 
        - 0이거나 숫자가 아닌 값은 제외
    """
    ppg_data = data['심박수_배열']
    
    ppg_data_cleaned = []
    for row in ppg_data.astype(str):
        try:
            numbers = [
                int(num.strip()) 
                for num in row.split(",") 
                if num.strip().isdigit() and int(num.strip()) != 0
            ]
            ppg_data_cleaned.extend(numbers)
        except ValueError as e:
            print(f"변환 오류 발생: {row} - {e}")

    return np.array(ppg_data_cleaned, dtype=np.int32)

def createSequence(data, seq_length):
    """ 시퀀스 데이터 및 레이블 생성 """
    sequences, labels = [], []
    for i in range(len(data) - seq_length):
        sequences.append(data[i:i+seq_length])  # seq_length만큼 입력
        labels.append(data[i+seq_length])  # 다음 값이 레이블
    return np.array(sequences, dtype=np.float32), np.array(labels, dtype=np.float32)  # float으로 변환

def makeTensorData(X, y):
    """ 데이터를 스케일링 후 PyTorch Tensor로 변환 """
    scaler_X = MinMaxScaler()
    scaler_y = MinMaxScaler()

    X_scaled = scaler_X.fit_transform(X).astype(np.float32)  # (batch, seq_length)
    y_scaled = scaler_y.fit_transform(y.reshape(-1, 1)).astype(np.float32)  # (batch, 1)

    # PyTorch Tensor 변환
    X_tensor = torch.tensor(X_scaled).unsqueeze(-1)  # (batch, seq_length, 1)
    y_tensor = torch.tensor(y_scaled).squeeze()  # (batch,)

    return X_tensor, y_tensor

def setTrainTestData(X_tensor, y_tensor, batch_size):
    """ 학습 및 테스트 데이터 분할 후 DataLoader 생성 """
    train_size = int(len(X_tensor) * 0.8)
    
    X_train, y_train = X_tensor[:train_size], y_tensor[:train_size]
    X_test, y_test = X_tensor[train_size:], y_tensor[train_size:]

    # DataLoader 적용
    train_dataset = TensorDataset(X_train, y_train)
    test_dataset = TensorDataset(X_test, y_test)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    print("데이터 변환 완료! X_train.shape:", X_train.shape, "y_train.shape:", y_train.shape)
    print("X_test.shape:", X_test.shape, "y_test.shape:", y_test.shape)
    
    return train_loader, test_loader, X_train, y_train, X_test, y_test

In [18]:
##수 정 할 변 수 들##
seq_length = 40 # Length of Sequence
batch_size = 64 # Literally Batch Size
num_epochs = 15 # How much time will model practice?
update_interval = 20  # tqdm 업데이트 주기 (매 10배치마다 갱신)
practice_num = 3 # set Pickle File Number (update +1 every time)

# main section
ppg_data = makeNumpyData(data)
X, y = createSequence(ppg_data, seq_length)
X_tensor, y_tensor = makeTensorData(X, y)
train_loader, test_loader, X_train, y_train, X_test, y_test = setTrainTestData(X_tensor, y_tensor, batch_size)

데이터 변환 완료! X_train.shape: torch.Size([2685618, 40, 1]) y_train.shape: torch.Size([2685618])
X_test.shape: torch.Size([671405, 40, 1]) y_test.shape: torch.Size([671405])


In [10]:
class LSTMModel(nn.Module): # Defying LSTM Model
    def __init__(self, input_size=1, hidden_size=50, num_layers=2, output_size=1):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        # LSTM layer
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        
        # Fully connected layer
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        # LSTM의 초기 hidden state와 cell state를 0으로 초기화
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

        # LSTM 순전파
        out, _ = self.lstm(x, (h0, c0))  # out: (batch, seq_len, hidden_size)

        # 마지막 타임스텝의 출력만 사용
        out = self.fc(out[:, -1, :])  # (batch, output_size)

        return out

In [11]:
import torch
print(torch.version.cuda)  # CUDA 버전 출력
print(torch.cuda.is_available())  # CUDA 사용 가능 여부


11.8
True


In [14]:
# 모델, 손실 함수, 옵티마이저 설정
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)  # 모델도 GPU로 보내기 # If NVIDIA GPU exist, then use cuda, if not, use CPU
print(model)
model = LSTMModel().to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 데이터도 GPU로 이동
X_train, y_train = X_train.to(device), y_train.to(device)
X_test, y_test = X_test.to(device), y_test.to(device)

LSTMModel(
  (lstm): LSTM(1, 50, num_layers=2, batch_first=True)
  (fc): Linear(in_features=50, out_features=1, bias=True)
)


In [19]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)  # 모델도 GPU로

for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0

    progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}", leave=True)

    for i, (X_batch, y_batch) in enumerate(progress_bar):
        optimizer.zero_grad()

        # 데이터도 GPU로 이동
        X_batch = X_batch.to(device)
        y_batch = y_batch.to(device).unsqueeze(-1)  # 한 번만 unsqueeze!

        y_pred = model(X_batch)
        loss = criterion(y_pred, y_batch)

        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()

        if i % update_interval == 0:
            progress_bar.set_postfix(loss=f"{loss.item():.4f}")

    ##### 평가 #####
    model.eval()
    with torch.no_grad():
        y_true, y_pred_list = [], []

        for X_batch, y_batch in test_loader:
            X_batch = X_batch.to(device)
            y_batch = y_batch.to(device).unsqueeze(-1)

            y_pred = model(X_batch)

            if isinstance(criterion, torch.nn.MSELoss):
                y_pred_list.extend(y_pred.cpu().numpy())
                y_true.extend(y_batch.cpu().numpy())
            else:
                y_pred_class = torch.argmax(F.softmax(y_pred, dim=1), dim=1)
                y_pred_list.extend(y_pred_class.cpu().numpy())
                y_true.extend(y_batch.cpu().numpy())

        if isinstance(criterion, torch.nn.MSELoss):
            accuracy = r2_score(y_true, y_pred_list)
        else:
            accuracy = accuracy_score(y_true, y_pred_list)

    print(f"\nEpoch [{epoch+1}/{num_epochs}], Average Loss: {epoch_loss / len(train_loader):.4f}, Accuracy: {accuracy:.4f}")

##### 모델 저장 #####
model_path = f"LSTM_model{practice_num}.pkl"
with open(model_path, "wb") as f:
    pickle.dump(model, f)
print(f"모델이 저장되었습니다: {model_path}")


Epoch 1/15: 100%|███████████| 41963/41963 [00:53<00:00, 783.68it/s, loss=0.0009]



Epoch [1/15], Average Loss: 0.0007, Accuracy: 0.8727


Epoch 2/15: 100%|███████████| 41963/41963 [00:53<00:00, 788.45it/s, loss=0.0002]



Epoch [2/15], Average Loss: 0.0007, Accuracy: 0.8718


Epoch 3/15: 100%|███████████| 41963/41963 [00:52<00:00, 792.27it/s, loss=0.0017]



Epoch [3/15], Average Loss: 0.0007, Accuracy: 0.8719


Epoch 4/15: 100%|███████████| 41963/41963 [00:52<00:00, 796.57it/s, loss=0.0006]



Epoch [4/15], Average Loss: 0.0007, Accuracy: 0.8718


Epoch 5/15: 100%|███████████| 41963/41963 [00:52<00:00, 796.56it/s, loss=0.0007]



Epoch [5/15], Average Loss: 0.0007, Accuracy: 0.8723


Epoch 6/15: 100%|███████████| 41963/41963 [00:52<00:00, 792.56it/s, loss=0.0010]



Epoch [6/15], Average Loss: 0.0007, Accuracy: 0.8716


Epoch 7/15: 100%|███████████| 41963/41963 [00:52<00:00, 797.52it/s, loss=0.0006]



Epoch [7/15], Average Loss: 0.0007, Accuracy: 0.8726


Epoch 8/15: 100%|███████████| 41963/41963 [00:52<00:00, 797.66it/s, loss=0.0013]



Epoch [8/15], Average Loss: 0.0007, Accuracy: 0.8710


Epoch 9/15: 100%|███████████| 41963/41963 [00:53<00:00, 784.59it/s, loss=0.0009]



Epoch [9/15], Average Loss: 0.0007, Accuracy: 0.8721


Epoch 10/15: 100%|██████████| 41963/41963 [00:52<00:00, 791.90it/s, loss=0.0009]



Epoch [10/15], Average Loss: 0.0007, Accuracy: 0.8717


Epoch 11/15: 100%|██████████| 41963/41963 [00:52<00:00, 801.34it/s, loss=0.0004]



Epoch [11/15], Average Loss: 0.0007, Accuracy: 0.8703


Epoch 12/15: 100%|██████████| 41963/41963 [00:52<00:00, 802.09it/s, loss=0.0006]



Epoch [12/15], Average Loss: 0.0007, Accuracy: 0.8727


Epoch 13/15: 100%|██████████| 41963/41963 [00:52<00:00, 793.98it/s, loss=0.0004]



Epoch [13/15], Average Loss: 0.0007, Accuracy: 0.8717


Epoch 14/15: 100%|██████████| 41963/41963 [00:54<00:00, 775.94it/s, loss=0.0009]



Epoch [14/15], Average Loss: 0.0007, Accuracy: 0.8718


Epoch 15/15: 100%|██████████| 41963/41963 [00:53<00:00, 787.34it/s, loss=0.0008]



Epoch [15/15], Average Loss: 0.0007, Accuracy: 0.8721
모델이 저장되었습니다: LSTM_model3.pkl
