In [None]:
import pandas as pd
import numpy as np
import os
from pyedflib import highlevel
from scipy import signal as sgn
from tqdm import tqdm

PATH = "chb-mit-scalp-eeg-database-1.0.0/chb-mit-scalp-eeg-database-1.0.0"


all_chb_folders = os.listdir(PATH)

def get_signal(file_path):
    signals, signal_headers, headers = highlevel.read_edf(file_path)
    signals = np.array([sgn.resample(sig, 100000) for sig in signals])

    sample = np.max(signals, axis=0)
    return sample/ sample.max()


signal_cols = [i for i in range(100000)]

def save_npys(folder_name):
    all_files = os.listdir(f"{PATH}/{folder_name}")
    edf_files = [f for f in all_files if f.endswith('edf')]
    seizure_files = ['.'.join(f.split('.')[:-1]) for f in all_files if f.endswith('seizures')]
    df = pd.DataFrame(columns=signal_cols)
    labels = []
    for f in tqdm(edf_files, leave=False):
        sampled_signal = get_signal(os.path.join(PATH, folder_name, f))
        df.loc[df.shape[0]] = sampled_signal
        if f in seizure_files:
            label = 1
        else:
            label = 0
        labels.append(label)
    df['label'] = labels

    np.save(f"./converted_artifacts_1lac/{folder_name}.npy", df.to_numpy())

for folder in tqdm(all_chb_folders):
    save_npys(folder)


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import numpy as np
import torch
import os
from tqdm import tqdm
from torch.utils.data import DataLoader


class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, data_path):
        super(CustomDataset, self).__init__()
        self.data_path = data_path
        self.all_files = os.listdir(data_path)
        self.data = np.zeros((0, 10000))
        self.labels = np.zeros((0))
        for file_path in tqdm(self.all_files, leave=False):
            file_path = os.path.join(data_path, file_path)
            signal = np.load(file_path)
            self.data = np.vstack((self.data, signal[:, :-1]))
            self.labels = np.append(self.labels, signal[:, -1])

    def __len__(self):
        return self.data.shape[0]

    def __getitem__(self, idx):
        return {'signal' :self.data[idx].astype(np.float32), 'label':self.labels[idx].astype(np.int64)}

In [None]:
## Model
import numpy as np
import torch
class LSTMModel(torch.nn.Module):
    def __init__(self):
        super(LSTMModel, self).__init__()
        self.lstm1 = torch.nn.LSTM(input_size=10000, hidden_size=1000, num_layers=2, batch_first=True)
        self.relu = torch.nn.ReLU()
        self.dropout = torch.nn.Dropout(0.2)
        self.fc1 = torch.nn.Linear(1000, 512)
        self.fc2 = torch.nn.Linear(512, 32)
        self.out = torch.nn.Linear(32, 2)
        self.softmax = torch.nn.Softmax(dim=1)

    def forward(self, x):
        h_t = torch.zeros(2, x.size(0), 1000, dtype=torch.float32).to(x.device)
        c_t = torch.zeros(2, x.size(0), 1000, dtype=torch.float32).to(x.device)
        x, _ = self.lstm1(x, (h_t, c_t))
        x = x[:, -1, :]
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.out(x)
        # x = self.softmax(x)
        return x

In [None]:
from torch.utils.data import DataLoader
import torch
from tqdm.notebook import tqdm
from sklearn.metrics import classification_report

dataset = CustomDataset("/content/drive/MyDrive/converted_artifacts")
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [500, 186])
train_dataloader = DataLoader(train_dataset, batch_size=4, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=4, shuffle=False)

  0%|          | 0/24 [00:00<?, ?it/s]

In [None]:
import torch

import torch
import torch.nn as nn

class CNNModel(nn.Module):
    """
    This is a CNN model to classify the ECG signals into two classes. The input shape of each signal is 10000.
    """
    def __init__(self):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=1, out_channels=1024, kernel_size=5)
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool1d(kernel_size=2)
        self.dropout = nn.Dropout(0.1)
        self.fc1 = nn.Linear(1024 * 4998, 32)  # Calculated based on the input size after convolutions
        self.fc2 = nn.Linear(32, 2)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = x.unsqueeze(1)  # Add a channel dimension to the input
        x = self.conv1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        # x = self.conv2(x)
        # x = self.relu(x)
        # x = self.maxpool(x)

        # x = self.conv3(x)
        # x = self.relu(x)
        # x = self.maxpool(x)

        x = x.view(x.size(0), -1)  # Flatten the tensor before fully connected layers
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.softmax(x)
        return x




In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
# model = LSTMModel()
model = CNNModel()
model.to(device)
model.train()
epochs = 10

loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
for _ in tqdm(range(epochs), leave=True):
    running_loss = 0
    for batch in tqdm(train_dataloader, leave=True):
        x = batch['signal'].to(device)
        y = batch['label']
        # x = torch.tensor(x.reshape(-1, 1, 10000))
        y_hat = model(x)
        loss = loss_fn(y_hat.cpu(), y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()/len(train_dataloader)
    print(f"Loss: {running_loss}")

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/125 [00:00<?, ?it/s]

Loss: 0.532220113515854


  0%|          | 0/125 [00:00<?, ?it/s]

Loss: 0.5292616517543793


  0%|          | 0/125 [00:00<?, ?it/s]

Loss: 0.5292616529464723


  0%|          | 0/125 [00:00<?, ?it/s]

Loss: 0.5292616505622865


  0%|          | 0/125 [00:00<?, ?it/s]

Loss: 0.5292616496086122


  0%|          | 0/125 [00:00<?, ?it/s]

Loss: 0.5292616496086118


  0%|          | 0/125 [00:00<?, ?it/s]

Loss: 0.529261651992798


  0%|          | 0/125 [00:00<?, ?it/s]

Loss: 0.5292616512775421


  0%|          | 0/125 [00:00<?, ?it/s]

Loss: 0.5292616550922397


  0%|          | 0/125 [00:00<?, ?it/s]

Loss: 0.5292616515159605


In [None]:
model.eval()

preds = []
labels = []
for batch in test_dataloader:
    x = batch['signal'].to(device)
    y = batch['label']
    # x = torch.tensor(x.reshape(-1, 1, 10000))
    y_hat = model(x)
    pred = y_hat.argmax(dim=1)
    labels.extend(y.tolist())
    preds.extend(pred.tolist())

correct = [1 if p==l else 0 for p, l in zip(preds, labels)]
print(sum(correct)/ len(preds))
print(classification_report(labels, preds))

0.8225806451612904
              precision    recall  f1-score   support

           0       0.82      1.00      0.90       153
           1       0.00      0.00      0.00        33

    accuracy                           0.82       186
   macro avg       0.41      0.50      0.45       186
weighted avg       0.68      0.82      0.74       186



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
