In [14]:
import os
import numpy as np
import pandas as pd
from sklearn.metrics import recall_score, precision_score, f1_score, confusion_matrix

import torch
import torch.nn as nn
import torch.optim as optim

from torch.utils.data import Dataset, DataLoader, TensorDataset, random_split, WeightedRandomSampler


In [2]:
# =========================
# 1. load data & Dataset define
# =========================
data_dir = r".\archive"
train_path = os.path.join(data_dir, "mitbih_train.csv")
test_path = os.path.join(data_dir, "mitbih_test.csv")

train_df = pd.read_csv(train_path, header=None)
test_df = pd.read_csv(test_path, header=None)

X_train = train_df.iloc[:, :-1].values
y_train = train_df.iloc[:, -1].values.astype(int)

X_test = test_df.iloc[:, :-1].values
y_test = test_df.iloc[:, -1].values.astype(int)

# Z-score per sample
X_train = (X_train - X_train.mean(axis=1, keepdims=True)) / (X_train.std(axis=1, keepdims=True) + 1e-8)
X_test = (X_test - X_test.mean(axis=1, keepdims=True)) / (X_test.std(axis=1, keepdims=True) + 1e-8)

# reshape: (samples, 1, 187)
X_train = X_train[:, np.newaxis, :]
X_test = X_test[:, np.newaxis, :]

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)

X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)


In [3]:
#Defind model, couple CNN,
class ECG_CNN(nn.Module):
    def __init__(self, num_classes=5):
        super(ECG_CNN, self).__init__()
        
        # First couple conv
        self.conv1 = nn.Conv1d(1, 64, kernel_size=5, padding=2)
        self.bn1 = nn.BatchNorm1d(64)
        
        self.conv2 = nn.Conv1d(64, 64, kernel_size=5, padding=2)
        self.bn2 = nn.BatchNorm1d(64)
        self.drop_c2 = nn.Dropout(0.2)   # Dropout after C2
        
        self.pool1 = nn.MaxPool1d(kernel_size=2, stride=2)
        
        # Second couple conv
        self.conv3 = nn.Conv1d(64, 128, kernel_size=5, padding=2)
        self.bn3 = nn.BatchNorm1d(128)
        
        self.conv4 = nn.Conv1d(128, 128, kernel_size=5, padding=2)
        self.bn4 = nn.BatchNorm1d(128)
        self.drop_c4 = nn.Dropout(0.3)   # Dropout after C4
        
        self.pool2 = nn.MaxPool1d(kernel_size=2, stride=2)
        
        # Fully connected layers
        self.fc1 = nn.Linear(128 * 46, 256)  
        self.bn_fc1 = nn.BatchNorm1d(256)
        self.dropfc1 = nn.Dropout(0.5)      # Dropout after F1
        
        self.fc2 = nn.Linear(256, 128)
        self.bn_fc2 = nn.BatchNorm1d(128)
        self.dropfc2 = nn.Dropout(0.5)      # Dropout after F2
        
        self.fc3 = nn.Linear(128, num_classes)
        
        # Activation
        self.relu = nn.ReLU()

    def forward(self, x):
        # --- Conv Block 1 ---
        x = self.relu(self.bn1(self.conv1(x)))
        x = self.relu(self.bn2(self.conv2(x)))
        x = self.drop_c2(x)
        x = self.pool1(x)      # [batch, 64, 93]
        
        # --- Conv Block 2 ---
        x = self.relu(self.bn3(self.conv3(x)))
        x = self.relu(self.bn4(self.conv4(x)))
        x = self.drop_c4(x)
        x = self.pool2(x)      # [batch, 128, 46]
        
        # Flatten
        x = x.view(x.size(0), -1)  # [batch, 5888]
        
        # --- FC layers ---
        x = self.relu(self.bn_fc1(self.fc1(x)))
        x = self.dropfc1(x)
        
        x = self.relu(self.bn_fc2(self.fc2(x)))
        x = self.dropfc2(x)
        
        x = self.fc3(x)
        
        return x


In [29]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
save_dir = os.path.join(data_dir, "models")
os.makedirs(save_dir, exist_ok=True)
model_path = os.path.join(save_dir, "ECG_couple_CNN_sampler010.pt")


In [26]:
# =========================
# 3.Training
# =========================
#normal split
# dataset = TensorDataset(X_train_tensor, y_train_tensor)
# train_size = int(0.8 * len(dataset))
# val_size = len(dataset) - train_size
# train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

#keep same seed for all model
np.random.seed(42)

#Stratified split
class_ranges = [
    (0, 72471),
    (72471, 74694),
    (74694, 80483),
    (80483, 81123),
    (81123, 87554)
]

train_indices = []
val_indices = []

for start, end in class_ranges:
    idx = np.arange(start, end)
    np.random.shuffle(idx)  # shuffle
    n_val = int(len(idx) * 0.2)
    val_indices.extend(idx[:n_val])
    train_indices.extend(idx[n_val:])
    
#  tensor
train_dataset = TensorDataset(X_train_tensor[train_indices], y_train_tensor[train_indices])
val_dataset = TensorDataset(X_train_tensor[val_indices], y_train_tensor[val_indices])


#data loader
#sampler
use_sampler = True
target_ratio = 0.1
if use_sampler:
    y_train_split = y_train[train_indices]
    class_sample_counts = np.bincount(y_train_split)

    # class 0 as base
    target_count = int(class_sample_counts[0] * target_ratio)
    weights_per_class = []

    for i, count in enumerate(class_sample_counts):
        if i == 0:
            weight = 1.0  # class 0 weight1
        else:
            weight = target_count / count
        weights_per_class.append(weight)
    weights_per_class = np.array(weights_per_class)
    samples_weight = weights_per_class[y_train_split]
    sampler = WeightedRandomSampler(
        weights=samples_weight,
        num_samples=len(samples_weight),
        replacement=True
    )
    train_loader = DataLoader(train_dataset, batch_size=64, sampler=sampler)
else:
    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

#train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)

# weight loss
class_counts = [72471, 2223, 5789, 640, 6431]
total_samples = sum(class_counts)
num_classes = len(class_counts)
base_weights = [total_samples / (num_classes * n) for n in class_counts]

# alpha = 1, fully weight according to class frequency, alpha = 0 no weighting
# alpha = 1
# adjusted_weights = [1 + alpha * (w - 1) for w in base_weights]
# weights_tensor = torch.tensor(adjusted_weights, dtype=torch.float32).to(device)

model = ECG_CNN().to(device)
criterion = nn.CrossEntropyLoss()
# criterion = nn.CrossEntropyLoss(weight=weights_tensor)
#optimizer = optim.Adam(model.parameters(), lr=1e-3)
optimizer = optim.AdamW(model.parameters(), lr=1e-3,weight_decay=1e-3)
# Scheduler: reduce LR on plateau (val loss)
#scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode="min", factor=0.8, patience=3, min_lr=1e-6)

num_epochs = 100
best_val_loss = float("inf")
patience_es = 15
no_improve_count = 0
for epoch in range(num_epochs):
    # ---- Training ----
    model.train()
    train_loss = 0
    for X, y in train_loader:
        X, y = X.to(device), y.to(device)
        optimizer.zero_grad()
        outputs = model(X)
        loss = criterion(outputs, y)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * X.size(0)
    train_loss /= len(train_loader.dataset)

    # ---- Validation ----
    model.eval()
    val_loss = 0
    correct = 0
    with torch.no_grad():
        for X, y in val_loader:
            X, y = X.to(device), y.to(device)
            outputs = model(X)
            loss = criterion(outputs, y)
            val_loss += loss.item() * X.size(0)
            preds = outputs.argmax(dim=1)
            correct += (preds == y).sum().item()
    val_loss /= len(val_loader.dataset)
    val_acc = correct / len(val_loader.dataset)

    print(f"Epoch {epoch+1}/{num_epochs} - "
          f"Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.4f}")
    # scheduler step (use val loss)
    #scheduler.step(val_loss)

    # ---- Save Best ----
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), model_path)
        print("Best model saved.")
        no_improve_count = 0
    else:
        no_improve_count += 1
        if no_improve_count >= patience_es:
            print(f"Early stopping at epoch {epoch+1}")
            break



Epoch 1/100 - Train Loss: 0.3067 | Val Loss: 0.2860 | Val Acc: 0.8984
Best model saved.
Epoch 2/100 - Train Loss: 0.1497 | Val Loss: 0.1661 | Val Acc: 0.9445
Best model saved.
Epoch 3/100 - Train Loss: 0.1052 | Val Loss: 0.1502 | Val Acc: 0.9416
Best model saved.
Epoch 4/100 - Train Loss: 0.0878 | Val Loss: 0.0982 | Val Acc: 0.9683
Best model saved.
Epoch 5/100 - Train Loss: 0.0735 | Val Loss: 0.1135 | Val Acc: 0.9624
Epoch 6/100 - Train Loss: 0.0620 | Val Loss: 0.1169 | Val Acc: 0.9584
Epoch 7/100 - Train Loss: 0.0576 | Val Loss: 0.0877 | Val Acc: 0.9693
Best model saved.
Epoch 8/100 - Train Loss: 0.0512 | Val Loss: 0.0801 | Val Acc: 0.9728
Best model saved.
Epoch 9/100 - Train Loss: 0.0464 | Val Loss: 0.0712 | Val Acc: 0.9772
Best model saved.
Epoch 10/100 - Train Loss: 0.0421 | Val Loss: 0.0806 | Val Acc: 0.9720
Epoch 11/100 - Train Loss: 0.0416 | Val Loss: 0.0608 | Val Acc: 0.9811
Best model saved.
Epoch 12/100 - Train Loss: 0.0383 | Val Loss: 0.0735 | Val Acc: 0.9763
Epoch 13/100 

In [30]:
# =========================
# evaluation
# =========================
test_ds = TensorDataset(X_test_tensor, y_test_tensor)
test_loader = DataLoader(test_ds, batch_size=128, shuffle=False)

# load model
model = ECG_CNN().to(device)
model.load_state_dict(torch.load(model_path))
model.eval()
#eval
preds, labels = [], []
with torch.no_grad():
    for xb, yb in test_loader:
        xb, yb = xb.to(device), yb.to(device)
        out = model(xb)
        pred = torch.argmax(out, dim=1)
        preds.extend(pred.cpu().numpy())
        labels.extend(yb.cpu().numpy())
#save csv
csv_path = os.path.join(save_dir, "test_pred.csv")
pd.DataFrame({"y_true": labels, "y_pred": preds}).to_csv(csv_path, index=False)
print(f"Evaluation CSV saved at {csv_path}")


Evaluation CSV saved at .\archive\models\test_pred.csv


In [31]:
# =========================
# confusion matrix & metrics
# =========================
cm = confusion_matrix(labels, preds, labels=list(range(5)))

print("===== Confusion Matrix =====")
print(cm)

# calculate metrics for 5 classes
metrics_per_class = {"recall": [], "specificity": [], "precision": [], "f1": []}
class_counts = cm.sum(axis=1)
total_samples = class_counts.sum()
weights = class_counts / total_samples

for i in range(5):
    TP = cm[i, i]
    FP = cm[:, i].sum() - TP
    FN = cm[i, :].sum() - TP
    TN = cm.sum() - (TP + FP + FN)

    recall_i = TP / (TP + FN + 1e-8)
    specificity_i = TN / (TN + FP + 1e-8)
    precision_i = TP / (TP + FP + 1e-8)
    f1_i = 2 * recall_i * precision_i / (recall_i + precision_i + 1e-8)

    metrics_per_class["recall"].append(recall_i)
    metrics_per_class["specificity"].append(specificity_i)
    metrics_per_class["precision"].append(precision_i)
    metrics_per_class["f1"].append(f1_i)

macro_avg_metrics = {k: np.mean(v) for k, v in metrics_per_class.items()}
weighted_avg_metrics = {k: np.sum(np.array(v) * weights) for k, v in metrics_per_class.items()}

print("\n===== Per-Class Metrics =====")
for k, v in metrics_per_class.items():
    print(f"{k}: {np.round(v, 4)}")
print("\n===== Macro-Average Metrics =====")
for k, v in macro_avg_metrics.items():
    print(f"{k}: {v:.4f}")
print("\n===== Weighted-Average Metrics =====")
for k, v in weighted_avg_metrics.items():
    print(f"{k}: {v:.4f}")

===== Confusion Matrix =====
[[18009    60    34    12     3]
 [   91   456     9     0     0]
 [   23     3  1399    23     0]
 [   11     1    10   140     0]
 [   24     0     3     0  1581]]

===== Per-Class Metrics =====
recall: [0.994  0.8201 0.9662 0.8642 0.9832]
specificity: [0.9605 0.997  0.9973 0.9984 0.9999]
precision: [0.9918 0.8769 0.9615 0.8    0.9981]
f1: [0.9929 0.8476 0.9638 0.8309 0.9906]

===== Macro-Average Metrics =====
recall: 0.9255
specificity: 0.9906
precision: 0.9257
f1: 0.9252

===== Weighted-Average Metrics =====
recall: 0.9860
specificity: 0.9670
precision: 0.9859
f1: 0.9859
