In [1]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

import random
from glob import glob
from tqdm import tqdm
from scipy.io import loadmat

import torch
from torch import nn
from torch.optim import Adam
from torch.optim.lr_scheduler import CosineAnnealingLR
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
from audiomentations import *
import warnings
from sklearn.metrics import classification_report
from sklearn.metrics import roc_curve, auc
from sklearn.metrics import roc_auc_score
from sklearn import metrics
from datetime import datetime
import json

import h5py

In [2]:
data_dir = "/media/mountHDD3/data_storage/biomedical_data/ecg_data/SPH"
print(os.listdir(data_dir))

['metadata.csv', 'data_df.csv', 'data_df_no1.csv', 'records']


In [3]:
main_df = pd.read_csv(data_dir + "/data_df.csv")
main_df

Unnamed: 0.1,Unnamed: 0,File name,Label,New Label
0,0,A00002,1,0
1,1,A00003,1,0
2,2,A00004,23,3
3,3,A00005,146,19
4,4,A00006,1,0
...,...,...,...,...
20787,20787,A25765,1,0
20788,20788,A25766,146,19
20789,20789,A25767,23,3
20790,20790,A25768,147,20


In [4]:
single_fns = main_df["File name"].values.tolist()
single_mat_paths = [data_dir + f"/records/{x}.h5" for x in single_fns]

In [5]:
# single_main_df.to_csv(data_dir + "/single_label.csv")

In [6]:
ratio = [0.9, 0.1]

train_index = int(len(single_mat_paths)*ratio[0])

train_mat_paths = single_mat_paths[:train_index]
valid_mat_paths = single_mat_paths[train_index:]


In [7]:
sample_sig = torch.randn(1, 12, 32)
conv_test = nn.Conv1d(12, 12, 3, 1, 1)
print(conv_test(sample_sig).shape)

torch.Size([1, 12, 32])


In [8]:
# len_lst = [loadmat(x)['ECG'][0][0][2].shape[1] for x in single_mat_paths]
# print(f"MAX: {max(len_lst)}")
# print(f"MIN: {min(len_lst)}")
# print(f"AVG: {sum(len_lst)/len(len_lst)}")

In [9]:
class BasicBlock(nn.Module):
    def __init__(self, channel_num):
        super(BasicBlock, self).__init__()
        self.conv_block1 = nn.Sequential(
			nn.Conv1d(channel_num, channel_num, 3, padding=1),
			nn.BatchNorm1d(channel_num),
			nn.LeakyReLU(0.2),
		)
        self.conv_block2 = nn.Sequential(
			nn.Conv1d(channel_num, channel_num, 3, padding=1),
			nn.BatchNorm1d(channel_num),
		)
        self.relu = nn.LeakyReLU(0.2)
        torch.nn.init.kaiming_normal_(self.conv_block1[0].weight)
        torch.nn.init.kaiming_normal_(self.conv_block2[0].weight)
        
    def forward(self, x):
        residual = x
        x = self.conv_block1(x)
        x = self.conv_block2(x)
        x = x + residual
        out = self.relu(x)
        return out

In [10]:
test_basic_block = BasicBlock(2)
sample_sig = torch.randn(1, 2, 32)
print(test_basic_block(sample_sig).shape)

torch.Size([1, 2, 32])


In [11]:
class ResNet(nn.Module):
    def __init__(self, in_channels = 12, type = 18, num_classes = 31):
        super(ResNet, self).__init__()
        self.struc_dict = {
            18: {
                "num_channels" : [64, 128, 256, 512],
                "counts" : [2, 2, 2, 2]
            }
        }
        self.conv1 = nn.Conv1d(in_channels=in_channels, out_channels=64, kernel_size=7, stride=2)
        torch.nn.init.kaiming_normal_(self.conv1.weight)
        self.max1 = nn.MaxPool1d(kernel_size=3, stride=2)
        self.main = nn.Sequential()
        for idx, struc in enumerate(
            zip(
                self.struc_dict[type]["num_channels"], 
                self.struc_dict[type]["counts"]
            )
        ):
            num_channel, cnt = struc
            for i in range(cnt):
                self.main.add_module(f"conv{idx+1}_{i}", BasicBlock(num_channel))
            if idx < len(self.struc_dict[type]["num_channels"]) - 1:
                self.main.add_module(f"ext_{idx}", nn.Conv1d(num_channel, self.struc_dict[type]["num_channels"][idx+1], 3, 1))
                self.main.add_module(f"extbn_{idx}", nn.BatchNorm1d(self.struc_dict[type]["num_channels"][idx+1]))
                                     
        self.avg = torch.nn.AdaptiveAvgPool1d((1))
        self.lin = nn.Linear(self.struc_dict[type]["num_channels"][-1], num_classes)
        torch.nn.init.kaiming_normal_(self.lin.weight)
    def forward(self, x):
        x = self.conv1(x)
        x = self.max1(x)
        x = self.main(x)
        x = self.avg(x)
        x = x.reshape(x.shape[0], -1)
        x = self.lin(x)
        return x

In [12]:
model = ResNet()
sample_sig = torch.randn(1, 12, 3000)
model(sample_sig).shape

torch.Size([1, 31])

In [13]:
class ECG(Dataset):
    def __init__(self, data_paths, train):
        self.data_paths = data_paths
        random.shuffle(self.data_paths)

        self.train = train

    def __getitem__(self, idx):
        augment = Compose([
            AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.01, p=0.2),
            AddGaussianSNR(min_snr_db=3.0, max_snr_db=12.0, p=0.2),
            Gain(min_gain_db = -10.0, max_gain_db = 10.0, p =0.2),
            GainTransition(min_gain_db = -15.0, max_gain_db = 15.0, 
                           min_duration = 50, max_duration = 200, duration_unit = "samples", 
                           p = 0.2),
            TimeStretch(min_rate=0.8, max_rate=1.25, p=0.2),
            PitchShift(min_semitones=-4, max_semitones=4, p=0.2),
            Shift(min_shift=-0.5, max_shift=0.5, shift_unit = "fraction", p=0.2),
        ])
        
        data_path = self.data_paths[idx]        

        filename = data_path.split("/")[-1].split(".")[0]
        label = main_df[main_df["File name"] == filename]["New Label"].values.item()
          
        tensor_lst = []
        for i in range(12):
            a = h5py.File(data_path, 'r')
            data_h5 = a['ecg']
            data = np.array(data_h5)
            clip_data = data[i][300:]
            desired_length = 2500
            if self.train == True:
                pad_length = max(desired_length - len(clip_data), 0)
                if pad_length == 0:
                    padded_augment = clip_data[:desired_length]
                else:
                    padded_augment = np.pad(clip_data, (0, pad_length), mode='constant')

            else:
                augmented_samples = augment(samples=clip_data, sample_rate=500)
                pad_length = max(desired_length - len(augmented_samples), 0)
                if pad_length == 0:
                    padded_augment = augmented_samples[:desired_length]
                else:
                    padded_augment = np.pad(clip_data, (0, pad_length), mode='constant')

            augment_signal_tensor = torch.tensor(padded_augment).float()
            tensor_lst.append(augment_signal_tensor)
            
        n = torch.stack(tensor_lst, 0).float()

        return n, label

    def __len__(self):
        return len(self.data_paths)

In [14]:
# check_ds = ECG(data_paths=single_mat_paths, label_df=single_main_df)
# sample, lbl = check_ds[0]
# print(sample.shape, lbl)

In [15]:
train_ds = ECG(train_mat_paths, train = True)
valid_ds = ECG(valid_mat_paths, train = False)

In [16]:
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu", index = 0)
device = torch.device( "cpu", index = 0)
batch_size = 32

traindl = DataLoader(
    train_ds,
    batch_size=batch_size, 
    shuffle=True, 
    pin_memory=True, 
    num_workers=os.cpu_count()//2
)

validdl = DataLoader(
    valid_ds,
    batch_size=1, 
    shuffle=True, 
    pin_memory=True, 
    num_workers=os.cpu_count()//2
)

print(len(traindl))
print(len(validdl))

585
2080


In [17]:
class FocalClassifierV0(nn.Module):
    def __init__(self, gamma=0.3): #Change gamma value here in order to acquire other results
        super().__init__()
        
        self.gamma = gamma
        self.act = nn.LogSoftmax(dim=1)

    
    def forward(self, pred, target):

        logits = self.act(pred)

        B, C = tuple(logits.size())

        entropy = torch.pow(1 - logits, self.gamma) * logits * F.one_hot(target, num_classes=C).float()

        return (-1 / B) * torch.sum(entropy)

focalloss_fn = FocalClassifierV0()

In [18]:
epoch = 150
lr = 0.0005
best_acc = 0
best_ep = 0

model.to(device)
optimizer = Adam(model.parameters(), lr=lr)
scheduler = CosineAnnealingLR(optimizer=optimizer, T_max=epoch*len(traindl))
loss_fn = nn.CrossEntropyLoss()

In [19]:
for e in range(epoch):
    model.train()
    print(f"Epoch: {e}")
    y_true_list = [] 
    pred_list = []
    batch_cnt = 0
    total_loss = 0
    correct = 0
    for batch, (train_sig, train_label) in tqdm(enumerate(traindl)):
        batch_cnt = batch
        train_sig = train_sig.to(device)
        train_label = train_label.to(device)
        
        pred = model(train_sig)
        loss = loss_fn(pred, train_label)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        scheduler.step()
        
        total_loss += loss.item()
        correct += (pred.argmax(1) == train_label).type(torch.float).sum().item()
    
    total_loss /= batch_cnt
    correct /= len(traindl.dataset)
    
    print(f"train loss: {total_loss} - train acc: {100*correct}")
    
    batch_cnt = 0
    val_total_loss = 0
    val_correct = 0
    model.eval()
    with torch.no_grad():
        for batch, (valid_sig, valid_label) in tqdm(enumerate(validdl)):
            batch_cnt = batch
            valid_sig = valid_sig.to(device)
            valid_label = valid_label.to(device)
            
            pred = model(valid_sig)
            
            pred_pos = pred.argmax(1)
            y_true_list.append(valid_label)
            pred_list.append(pred_pos)
            
            loss = loss_fn(pred, valid_label)
            
            val_total_loss += loss.item()
            val_correct += (pred.argmax(1) == valid_label).type(torch.float).sum().item()
    
        val_total_loss /= batch_cnt
        val_correct /= len(validdl.dataset)
        if val_correct > best_acc:
            best_acc = val_correct
            best_ep = e
        
        print(f"valid loss: {val_total_loss} - valid acc: {100*val_correct}")
        
y_true = torch.cat(y_true_list).cpu().numpy()
pred = torch.cat(pred_list).cpu().numpy()

reports = classification_report(y_true, pred, output_dict=True) 

print(reports)
print(f"Best acuracy: {best_acc} at epoch {best_ep}")

Epoch: 0


585it [09:34,  1.02it/s]

train loss: 1.0012748044443458 - train acc: 72.25309961522018



0it [00:01, ?it/s]


NotImplementedError: Caught NotImplementedError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/media/mountHDD2/thao/git/.env/lib/python3.10/site-packages/torch/utils/data/_utils/worker.py", line 308, in _worker_loop
    data = fetcher.fetch(index)
  File "/media/mountHDD2/thao/git/.env/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 51, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/media/mountHDD2/thao/git/.env/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 51, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/tmp/ipykernel_766773/1458598755.py", line 41, in __getitem__
    augmented_samples = augment(samples=clip_data, sample_rate=500)
  File "/media/mountHDD2/thao/git/.env/lib/python3.10/site-packages/audiomentations/core/composition.py", line 91, in __call__
    samples = transform(samples, sample_rate)
  File "/media/mountHDD2/thao/git/.env/lib/python3.10/site-packages/audiomentations/core/transforms_interface.py", line 93, in __call__
    return self.apply(samples, sample_rate)
  File "/media/mountHDD2/thao/git/.env/lib/python3.10/site-packages/audiomentations/augmentations/time_stretch.py", line 41, in apply
    time_stretched_samples = librosa.effects.time_stretch(
  File "/media/mountHDD2/thao/git/.env/lib/python3.10/site-packages/librosa/effects.py", line 398, in time_stretch
    y_stretch = core.istft(stft_stretch, dtype=y.dtype, length=len_stretch, **kwargs)
  File "/media/mountHDD2/thao/git/.env/lib/python3.10/site-packages/librosa/core/spectrum.py", line 571, in istft
    __overlap_add(head_buffer, ytmp, hop_length)
  File "/media/mountHDD2/thao/git/.env/lib/python3.10/site-packages/numba/core/dispatcher.py", line 442, in _compile_for_args
    raise e
  File "/media/mountHDD2/thao/git/.env/lib/python3.10/site-packages/numba/core/dispatcher.py", line 375, in _compile_for_args
    return_val = self.compile(tuple(argtypes))
  File "/media/mountHDD2/thao/git/.env/lib/python3.10/site-packages/numba/core/dispatcher.py", line 905, in compile
    cres = self._compiler.compile(args, return_type)
  File "/media/mountHDD2/thao/git/.env/lib/python3.10/site-packages/numba/core/dispatcher.py", line 80, in compile
    status, retval = self._compile_cached(args, return_type)
  File "/media/mountHDD2/thao/git/.env/lib/python3.10/site-packages/numba/core/dispatcher.py", line 94, in _compile_cached
    retval = self._compile_core(args, return_type)
  File "/media/mountHDD2/thao/git/.env/lib/python3.10/site-packages/numba/core/dispatcher.py", line 107, in _compile_core
    cres = compiler.compile_extra(self.targetdescr.typing_context,
  File "/media/mountHDD2/thao/git/.env/lib/python3.10/site-packages/numba/core/compiler.py", line 744, in compile_extra
    return pipeline.compile_extra(func)
  File "/media/mountHDD2/thao/git/.env/lib/python3.10/site-packages/numba/core/compiler.py", line 438, in compile_extra
    return self._compile_bytecode()
  File "/media/mountHDD2/thao/git/.env/lib/python3.10/site-packages/numba/core/compiler.py", line 506, in _compile_bytecode
    return self._compile_core()
  File "/media/mountHDD2/thao/git/.env/lib/python3.10/site-packages/numba/core/compiler.py", line 481, in _compile_core
    raise e
  File "/media/mountHDD2/thao/git/.env/lib/python3.10/site-packages/numba/core/compiler.py", line 472, in _compile_core
    pm.run(self.state)
  File "/media/mountHDD2/thao/git/.env/lib/python3.10/site-packages/numba/core/compiler_machinery.py", line 364, in run
    raise e
  File "/media/mountHDD2/thao/git/.env/lib/python3.10/site-packages/numba/core/compiler_machinery.py", line 356, in run
    self._runPass(idx, pass_inst, state)
  File "/media/mountHDD2/thao/git/.env/lib/python3.10/site-packages/numba/core/compiler_lock.py", line 35, in _acquire_compile_lock
    return func(*args, **kwargs)
  File "/media/mountHDD2/thao/git/.env/lib/python3.10/site-packages/numba/core/compiler_machinery.py", line 311, in _runPass
    mutated |= check(pss.run_pass, internal_state)
  File "/media/mountHDD2/thao/git/.env/lib/python3.10/site-packages/numba/core/compiler_machinery.py", line 273, in check
    mangled = func(compiler_state)
  File "/media/mountHDD2/thao/git/.env/lib/python3.10/site-packages/numba/core/typed_passes.py", line 468, in run_pass
    lower.lower()
  File "/media/mountHDD2/thao/git/.env/lib/python3.10/site-packages/numba/core/lowering.py", line 187, in lower
    self.lower_normal_function(self.fndesc)
  File "/media/mountHDD2/thao/git/.env/lib/python3.10/site-packages/numba/core/lowering.py", line 222, in lower_normal_function
    self.setup_function(fndesc)
  File "/media/mountHDD2/thao/git/.env/lib/python3.10/site-packages/numba/core/lowering.py", line 297, in setup_function
    self.function = self.context.declare_function(self.module, fndesc)
  File "/media/mountHDD2/thao/git/.env/lib/python3.10/site-packages/numba/core/base.py", line 413, in declare_function
    fnty = self.call_conv.get_function_type(fndesc.restype, fndesc.argtypes)
  File "/media/mountHDD2/thao/git/.env/lib/python3.10/site-packages/numba/core/callconv.py", line 805, in get_function_type
    arginfo = self._get_arg_packer(argtypes)
  File "/media/mountHDD2/thao/git/.env/lib/python3.10/site-packages/numba/core/callconv.py", line 165, in _get_arg_packer
    return self.context.get_arg_packer(argtypes)
  File "/media/mountHDD2/thao/git/.env/lib/python3.10/site-packages/numba/core/base.py", line 320, in get_arg_packer
    return datamodel.ArgPacker(self.data_model_manager, fe_args)
  File "/media/mountHDD2/thao/git/.env/lib/python3.10/site-packages/numba/core/datamodel/packer.py", line 81, in __init__
    dm = self._dmm.lookup(ty)
  File "/media/mountHDD2/thao/git/.env/lib/python3.10/site-packages/numba/core/datamodel/manager.py", line 37, in lookup
    model = self._cache[fetype] = handler(self, fetype)
  File "/media/mountHDD2/thao/git/.env/lib/python3.10/site-packages/numba/core/datamodel/models.py", line 885, in __init__
    super(ArrayModel, self).__init__(dmm, fe_type, members)
  File "/media/mountHDD2/thao/git/.env/lib/python3.10/site-packages/numba/core/datamodel/models.py", line 524, in __init__
    self._models = tuple([self._dmm.lookup(t) for t in self._members])
  File "/media/mountHDD2/thao/git/.env/lib/python3.10/site-packages/numba/core/datamodel/models.py", line 524, in <listcomp>
    self._models = tuple([self._dmm.lookup(t) for t in self._members])
  File "/media/mountHDD2/thao/git/.env/lib/python3.10/site-packages/numba/core/datamodel/manager.py", line 37, in lookup
    model = self._cache[fetype] = handler(self, fetype)
  File "/media/mountHDD2/thao/git/.env/lib/python3.10/site-packages/numba/core/datamodel/models.py", line 377, in __init__
    self._pointee_model = dmm.lookup(fe_type.dtype)
  File "/media/mountHDD2/thao/git/.env/lib/python3.10/site-packages/numba/core/datamodel/manager.py", line 37, in lookup
    model = self._cache[fetype] = handler(self, fetype)
  File "/media/mountHDD2/thao/git/.env/lib/python3.10/site-packages/numba/core/datamodel/models.py", line 370, in __init__
    raise NotImplementedError(fe_type)
NotImplementedError: float16
