Import Dependencies

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
from sklearn.preprocessing import LabelEncoder


Load the saved data

In [2]:
data = np.load('Preprocessed_data/ecg_data.npz')
beats = data['beats']     # shape: (num_beats, 90, 1)
labels = data['labels']   # shape: (num_beats,)

Prepare your ECG beats(in .npz) as PyTorch Dataset

In [3]:
# Encode string labels to integers
le = LabelEncoder()
labels_encoded = le.fit_transform(labels)  # numpy array of integers
# Convert numpy arrays to torch tensors
beats_tensor = torch.tensor(beats, dtype=torch.float32)   # (num_beats, 90, 1)
labels_tensor = torch.tensor(labels_encoded, dtype=torch.long)
  # labels as integers or strings? We'll encode later

# Transpose to (num_beats, channels, length) = (N, 1, 90)
beats_tensor = beats_tensor.permute(0, 2, 1)

# Create dataset and dataloader for training
dataset = TensorDataset(beats_tensor, beats_tensor)  # For autoencoder input = output
dataloader = DataLoader(dataset, batch_size=128, shuffle=True)

Autoencoder Model

In [4]:
class ECGAutoencoder(nn.Module):
    def __init__(self):
        super(ECGAutoencoder, self).__init__()
        # Encoder
        self.encoder = nn.Sequential(
            nn.Conv1d(1, 16, kernel_size=3, stride=2, padding=1),  # (N,16,45)
            nn.ReLU(),
            nn.Conv1d(16, 32, kernel_size=3, stride=2, padding=1),  # (N,32,23)
            nn.ReLU(),
            nn.Conv1d(32, 64, kernel_size=3, stride=2, padding=1),  # (N,64,12)
            nn.ReLU(),
        )
        
        # Decoder
        self.decoder = nn.Sequential(
            nn.ConvTranspose1d(64, 32, kernel_size=3, stride=2, padding=1, output_padding=1),  # (N,32,23)
            nn.ReLU(),
            nn.ConvTranspose1d(32, 16, kernel_size=3, stride=2, padding=1, output_padding=1),  # (N,16,45)
            nn.ReLU(),
            nn.ConvTranspose1d(16, 1, kernel_size=4, stride=2, padding=1, output_padding=0),   # (N,1,90)
            nn.Sigmoid()  # input normalized 0-1, output sigmoid for reconstruction
        )
        
    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded[:, :, :x.size(2)]  # force match length


# Instantiate model
model = ECGAutoencoder()

# Loss and optimizer
criterion = nn.MSELoss()  # Reconstruction loss
optimizer = optim.Adam(model.parameters(), lr=0.001)

Training the Autoencoder

In [5]:
# Device config (use GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Instantiate the model and move to device
model = ECGAutoencoder().to(device)

# Loss function & optimizer
criterion = nn.MSELoss()  # Mean Squared Error for reconstruction
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 20
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    
    for inputs, _ in dataloader:  # targets are same as inputs in autoencoder
        inputs = inputs.to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, inputs)  # Compare reconstruction to original
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item() * inputs.size(0)
    
    epoch_loss = running_loss / len(dataset)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.6f}")


Epoch [1/20], Loss: 0.007184
Epoch [2/20], Loss: 0.000251
Epoch [3/20], Loss: 0.000156
Epoch [4/20], Loss: 0.000115
Epoch [5/20], Loss: 0.000095
Epoch [6/20], Loss: 0.000082
Epoch [7/20], Loss: 0.000074
Epoch [8/20], Loss: 0.000069
Epoch [9/20], Loss: 0.000063
Epoch [10/20], Loss: 0.000060
Epoch [11/20], Loss: 0.000056
Epoch [12/20], Loss: 0.000052
Epoch [13/20], Loss: 0.000049
Epoch [14/20], Loss: 0.000048
Epoch [15/20], Loss: 0.000045
Epoch [16/20], Loss: 0.000043
Epoch [17/20], Loss: 0.000040
Epoch [18/20], Loss: 0.000039
Epoch [19/20], Loss: 0.000039
Epoch [20/20], Loss: 0.000038


Extract Latent Features for Classification

In [6]:
model.eval()

all_latent = []
with torch.no_grad():
    for inputs, _ in DataLoader(dataset, batch_size=128):
        inputs = inputs.to(device)
        
        # Pass inputs through encoder part of model
        encoded = model.encoder(inputs)  # shape: (batch_size, 64, 12)
        
        # Flatten encoded output to (batch_size, channels * length)
        encoded_flat = encoded.view(encoded.size(0), -1)
        
        # Move to CPU and collect
        all_latent.append(encoded_flat.cpu())

# Concatenate all batches to get full latent feature set
latent_features = torch.cat(all_latent, dim=0)
print("Latent features shape:", latent_features.shape)  # Expected: (num_samples, 64*12=768)


Latent features shape: torch.Size([109487, 768])


Save latent features as .npz

In [7]:
import numpy as np

latent_np = latent_features.numpy()
np.savez_compressed('Preprocessed_data/ecg_latent_features.npz', features=latent_np)
print("Saved latent features to Preprocessed_data/ecg_latent_features.npz")


Saved latent features to Preprocessed_data/ecg_latent_features.npz
