## Setting up the dataset

In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, roc_auc_score, roc_curve

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

from tqdm import tqdm
import wandb

In [6]:
"""
Loading both abnormal and normal datasets.
"""

normal_df = pd.read_csv("dataset/ptbdb_normal.csv").iloc[:, :-1]
abnormal_df = pd.read_csv(
    "dataset/ptbdb_abnormal.csv").iloc[:, :-1]

In [7]:
"""
To fix the imbalance, Trimming the abnormal set
"""

anomaly_df = abnormal_df.sample(n=2000, random_state=42)

In [8]:
"""
Dataset converted to numpy
"""
normal = normal_df.to_numpy()
anomaly = anomaly_df.to_numpy()

In [9]:
"""
Dataset split
"""

X_train, X_test = train_test_split(normal, test_size=0.15, random_state=42, shuffle=True)

In [10]:
"""
Custom dataset class for ECG Data
"""

class ECGDataset(Dataset):
    def __init__(self, data):
        self.data = torch.tensor(data, dtype=torch.float32)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.data[idx]  # AutoEncoder


In [11]:
"""
Setting up dataloaders
"""

train_loader = DataLoader(ECGDataset(X_train), batch_size=128, shuffle=True)
test_loader = DataLoader(ECGDataset(X_test), batch_size=128)
anomaly_loader = DataLoader(ECGDataset(anomaly), batch_size=128)

In [12]:
"""
Defining the AutoEnoder model.
including both encoder and decoder
"""


class Conv1DAutoEncoder(nn.Module):
    def __init__(self, input_dim, latent_dim=32):
        super(Conv1DAutoEncoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Conv1d(1, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.BatchNorm1d(128),
            nn.MaxPool1d(2),
            nn.Conv1d(128, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.BatchNorm1d(128),
            nn.MaxPool1d(2),
            nn.Conv1d(128, latent_dim, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.BatchNorm1d(latent_dim),
            nn.MaxPool1d(2),
        )
        self.decoder = nn.Sequential(
            nn.ConvTranspose1d(latent_dim, latent_dim,
                               kernel_size=3, padding=1),
            nn.ReLU(),
            nn.BatchNorm1d(latent_dim),
            nn.ConvTranspose1d(latent_dim, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.BatchNorm1d(128),
            nn.ConvTranspose1d(128, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.BatchNorm1d(128),
            nn.Flatten(),
            nn.Linear((input_dim//8)*128, input_dim)
        )

    def forward(self, x):
        x = x.unsqueeze(1)
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

#DeviceSetup

In [14]:


if torch.backends.mps.is_available():
    device = torch.device("mps")
elif torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")


print(device)

cuda


In [15]:

input_dim = X_train.shape[1]
model = Conv1DAutoEncoder(input_dim).to(device)
criterion = nn.L1Loss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

In [16]:
# 7. INIT WAND B
wandb.init(project="ecg-anomaly-detection", name="drive-conv1d-autoencoder")

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mmariumw784[0m ([33mmariumw784-city-university-of-london[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Training

In [17]:
epochs = 50
for epoch in range(epochs):
    model.train()
    running_loss = 0
    loop = tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}")
    for batch_X, _ in loop:
        batch_X = batch_X.to(device)
        optimizer.zero_grad()
        outputs = model(batch_X)
        loss = criterion(outputs, batch_X)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * batch_X.size(0)
        loop.set_postfix(loss=loss.item())
    epoch_loss = running_loss / len(train_loader.dataset)
    wandb.log({"train_loss": epoch_loss})

Epoch 1/50: 100%|██████████| 27/27 [00:01<00:00, 16.93it/s, loss=0.966]
Epoch 2/50: 100%|██████████| 27/27 [00:00<00:00, 117.96it/s, loss=0.64]
Epoch 3/50: 100%|██████████| 27/27 [00:00<00:00, 123.23it/s, loss=0.451]
Epoch 4/50: 100%|██████████| 27/27 [00:00<00:00, 127.09it/s, loss=0.342]
Epoch 5/50: 100%|██████████| 27/27 [00:00<00:00, 126.89it/s, loss=0.29]
Epoch 6/50: 100%|██████████| 27/27 [00:00<00:00, 125.28it/s, loss=0.253]
Epoch 7/50: 100%|██████████| 27/27 [00:00<00:00, 120.41it/s, loss=0.212]
Epoch 8/50: 100%|██████████| 27/27 [00:00<00:00, 130.19it/s, loss=0.174]
Epoch 9/50: 100%|██████████| 27/27 [00:00<00:00, 125.56it/s, loss=0.156]
Epoch 10/50: 100%|██████████| 27/27 [00:00<00:00, 126.30it/s, loss=0.14]
Epoch 11/50: 100%|██████████| 27/27 [00:00<00:00, 124.17it/s, loss=0.107]
Epoch 12/50: 100%|██████████| 27/27 [00:00<00:00, 117.89it/s, loss=0.0877]
Epoch 13/50: 100%|██████████| 27/27 [00:00<00:00, 127.90it/s, loss=0.0783]
Epoch 14/50: 100%|██████████| 27/27 [00:00<00:00,