# 🧠 Step 0 — Environment Setup

In [None]:

!nvidia-smi

import torch
import random
import numpy as np

# Device config
if torch.cuda.is_available():
    device = torch.device("cuda")
    print(f"✅ Using GPU: {torch.cuda.get_device_name(0)}")
else:
    device = torch.device("cpu")
    print("⚠️ Using CPU (expect slow training)")


# 🔹 Step 1 — Mount Google Drive

In [None]:

from google.colab import drive
drive.mount('/content/drive')

DATA_DIR = "/content/drive/MyDrive/ppg-bp-estimation/data/processed"
CKPT_DIR = "/content/drive/MyDrive/ppg-bp-estimation/checkpoints"

!mkdir -p $CKPT_DIR


# 🔹 Step 2 — Load Processed Data

In [None]:

import numpy as np
from torch.utils.data import TensorDataset, DataLoader

data = np.load(f"{DATA_DIR}/all_ppg_only.npz")
X_train, y_train = data['X_train'], data['y_train']
X_val, y_val = data['X_val'], data['y_val']
X_test, y_test = data['X_test'], data['y_test']

X_train = torch.tensor(X_train, dtype=torch.float32).unsqueeze(1)
y_train = torch.tensor(y_train, dtype=torch.float32)
X_val = torch.tensor(X_val, dtype=torch.float32).unsqueeze(1)
y_val = torch.tensor(y_val, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32).unsqueeze(1)
y_test = torch.tensor(y_test, dtype=torch.float32)

train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=128, shuffle=True, num_workers=4, pin_memory=True)
val_loader = DataLoader(TensorDataset(X_val, y_val), batch_size=128, shuffle=False, num_workers=4, pin_memory=True)
test_loader = DataLoader(TensorDataset(X_test, y_test), batch_size=128, shuffle=False, num_workers=4, pin_memory=True)


# 🔹 Step 3 — Define CNN Model, Loss, and Optimizer

In [None]:

import torch.nn as nn

class CNN_BP_Estimator(nn.Module):
    def __init__(self):
        super(CNN_BP_Estimator, self).__init__()
        self.conv_block = nn.Sequential(
            nn.Conv1d(1, 16, 7, padding=3), nn.BatchNorm1d(16), nn.ReLU(), nn.MaxPool1d(2),
            nn.Conv1d(16, 32, 5, padding=2), nn.BatchNorm1d(32), nn.ReLU(), nn.MaxPool1d(2),
            nn.Conv1d(32, 64, 3, padding=1), nn.BatchNorm1d(64), nn.ReLU(), nn.MaxPool1d(2),
            nn.Conv1d(64, 128, 3, padding=1), nn.BatchNorm1d(128), nn.ReLU(),
            nn.AdaptiveMaxPool1d(1)
        )
        self.fc = nn.Sequential(nn.Flatten(), nn.Linear(128, 64), nn.ReLU(), nn.Dropout(0.3))
        self.sbp_head = nn.Linear(64, 1)
        self.dbp_head = nn.Linear(64, 1)

    def forward(self, x):
        x = self.conv_block(x)
        x = self.fc(x)
        sbp = self.sbp_head(x)
        dbp = self.dbp_head(x)
        return torch.cat((sbp, dbp), dim=1)

model = CNN_BP_Estimator().to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)


# 🔹 Step 4 — Training Loop (with AMP + tqdm + Checkpointing)

In [None]:

from tqdm import tqdm
from torch.cuda.amp import autocast, GradScaler

scaler = GradScaler()
num_epochs = 30
best_val_loss = float('inf')

train_losses, val_losses, sbp_rmses, dbp_rmses = [], [], [], []

for epoch in range(num_epochs):
    model.train()
    total_train_loss = 0.0
    train_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs} [Train]")

    for X_batch, y_batch in train_bar:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()

        with autocast():
            preds = model(X_batch)
            loss = criterion(preds, y_batch)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        total_train_loss += loss.item() * X_batch.size(0)
        train_bar.set_postfix({"Loss": f"{loss.item():.4f}"})

    avg_train_loss = total_train_loss / len(train_loader.dataset)

    model.eval()
    val_loss, sbp_rmse_total, dbp_rmse_total = 0, 0, 0

    with torch.no_grad():
        for X_batch, y_batch in tqdm(val_loader, desc=f"Epoch {epoch+1}/{num_epochs} [Val]", leave=False):
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            preds = model(X_batch)
            loss = criterion(preds, y_batch)
            val_loss += loss.item() * X_batch.size(0)

            sbp_rmse_total += torch.sqrt(torch.mean((preds[:, 0]-y_batch[:, 0])**2)).item() * X_batch.size(0)
            dbp_rmse_total += torch.sqrt(torch.mean((preds[:, 1]-y_batch[:, 1])**2)).item() * X_batch.size(0)

    avg_val_loss = val_loss / len(val_loader.dataset)
    avg_sbp_rmse = sbp_rmse_total / len(val_loader.dataset)
    avg_dbp_rmse = dbp_rmse_total / len(val_loader.dataset)

    train_losses.append(avg_train_loss)
    val_losses.append(avg_val_loss)
    sbp_rmses.append(avg_sbp_rmse)
    dbp_rmses.append(avg_dbp_rmse)

    print(f"Epoch [{epoch+1}/{num_epochs}] | Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f} | SBP RMSE: {avg_sbp_rmse:.2f} | DBP RMSE: {avg_dbp_rmse:.2f}")

    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        torch.save(model.state_dict(), f"{CKPT_DIR}/best_cnn_model.pt")
        print(f"✅ Saved new best model at epoch {epoch+1}")
