In [2]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

import random
from glob import glob
from tqdm import tqdm
from scipy.io import loadmat
import scipy

import torch
from torch import nn
from torch.optim import Adam
from torch.optim.lr_scheduler import CosineAnnealingLR
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F

from sklearn.metrics import classification_report

In [3]:
data_dir = "/media/mountHDD3/data_storage/biomedical_data/ecg_data/SPH"
print(os.listdir(data_dir))

['metadata.csv', 'data_df.csv', 'data_df_no1.csv', 'records']


In [4]:
main_df = pd.read_csv(data_dir + "/data_df_no1.csv")
main_df.shape

(20835, 4)

In [5]:
single_fns = main_df["File name"].values.tolist()
single_mat_paths = [data_dir + f"/records/{x}.h5" for x in single_fns]

In [6]:
ratio = [0.8, 0.1]

train_index = int(len(single_mat_paths)*ratio[0])
valid_index = int(len(single_mat_paths)*(ratio[0]+ratio[1]))

train_mat_paths = single_mat_paths[:train_index]
valid_mat_paths = single_mat_paths[valid_index:]

In [7]:
import h5py

class HeartData(Dataset):
    def __init__(self, data_paths):
        self.data_paths = data_paths
        random.shuffle(self.data_paths)

    def __getitem__(self, idx):
        data_path = self.data_paths[idx]
        a = h5py.File(data_path, 'r')
        data_h5 = a['ecg']
        data = np.array(data_h5)
        clip_data = data[:, 500:5000]

        filename = data_path.split("/")[-1].split(".")[0]
        label = main_df[main_df["File name"] == filename]["New Label"].values.item()

        torch_data = torch.from_numpy(clip_data)

        return torch_data.float(), label

    def __len__(self):
        return len(self.data_paths)

In [8]:
train_ds = HeartData(train_mat_paths)
valid_ds = HeartData(valid_mat_paths)

In [9]:
train_dl = DataLoader(train_ds, batch_size = 64, shuffle = True, pin_memory = True, num_workers = 48)
valid_dl = DataLoader(valid_ds, batch_size = 64, shuffle = True, pin_memory = True, num_workers = 48)

In [10]:
class XGBoost_Data(Dataset):
    def __init__(self, data_paths):
        self.data_paths = data_paths
        random.shuffle(self.data_paths)

    def __getitem__(self, idx):
        data_path = self.data_paths[idx]
        a = h5py.File(data_path, 'r')
        data_h5 = a['ecg']
        data = np.array(data_h5)
        clip_data = data[:, 500:5000]

        data_fea = []
        
        for i in range (12):
            list_features = []
            data = clip_data[i]
            list_features.append(np.mean(data))
            list_features.append(np.median(data))
            list_features.append(np.std(data))
            list_features.append(np.max(data)-np.min(data))
            q3, q1 = np.percentile(data, [75 ,25])
            list_features.append(q3 - q1)
            sk = scipy.stats.skew(data) 
            list_features.append(sk)
            kur = scipy.stats.kurtosis(data)
            list_features.append(kur)
            data_fea.append(list_features)

        data_fea = torch.tensor(data_fea)
        data_all = torch.cat((data_fea[0], data_fea[1], data_fea[2], data_fea[3], data_fea[3], data_fea[5], 
                              data_fea[6], data_fea[7], data_fea[8], data_fea[9], data_fea[10], data_fea[11]))

        filename = data_path.split("/")[-1].split(".")[0]
        label = main_df[main_df["File name"] == filename]["New Label"].values.item()

        # torch_data = torch.from_numpy(data_all)

        return data_all.float(), label

    def __len__(self):
        return len(self.data_paths)    
    

In [11]:
class CNN_CelebA(nn.Module):
    def __init__(self):
        super(CNN_CelebA, self).__init__()

        self.encoder = nn.Sequential(
            # 12*4500
            nn.Conv1d(12, 32, kernel_size= 3, stride=2, padding=1),
            nn.BatchNorm1d(32),
            nn.LeakyReLU(),
            nn.Conv1d(32, 64, kernel_size= 3, stride=2, padding=1),
            nn.BatchNorm1d(64),
            nn.LeakyReLU(),
            nn.Conv1d(64, 128, kernel_size= 3, stride=2, padding=1),
            nn.BatchNorm1d(128),
            nn.LeakyReLU(),
            nn.Conv1d(128, 256, kernel_size= 3, stride=2, padding=1),
            nn.BatchNorm1d(256),
            nn.LeakyReLU(),
            nn.Conv1d(256, 512, kernel_size= 3, stride=2, padding=1),
            nn.BatchNorm1d(512),
            nn.LeakyReLU(),
            nn.Flatten()
        )

        self.cls = nn.Sequential(
            nn.Dropout(p=0.2, inplace=False),
            nn.Linear(72192, out_features=31, bias=True)
        ) 

        self.z_mean = nn.Linear(72192, 128)
        self.z_log_var = nn.Linear(72192, 128)

        self.decoder = nn.Sequential(
            nn.Linear(128, 72192),
            nn.Unflatten(1, (512, 141)),
            nn.ConvTranspose1d(512, 256, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.BatchNorm1d(256),
            nn.LeakyReLU(),
            nn.ConvTranspose1d(256, 128, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.BatchNorm1d(128),
            nn.LeakyReLU(),
            nn.ConvTranspose1d(128, 64, kernel_size=3, stride=2, padding=1),
            nn.BatchNorm1d(64),
            nn.LeakyReLU(),
            nn.ConvTranspose1d(64, 32, kernel_size=3, stride=2, padding=2),
            nn.BatchNorm1d(32),
            nn.LeakyReLU(),            
            nn.ConvTranspose1d(32, 32, kernel_size=3, stride=2, padding=2, output_padding=1),
            nn.BatchNorm1d(32),
            nn.LeakyReLU(),
            nn.Conv1d(32, 12, kernel_size= 3, padding= 1),
            nn.Tanh()
        )

    def forward(self, x):
        enc = self.encoder(x)
        logit = self.cls(enc)
        mu = self.z_mean(enc)
        lv = self.z_log_var(enc)
        lat = self.reparam(mu, lv)
        dec = self.decoder(lat)
        
        return dec, mu, lv, logit

    def reparam(self, mu, lv):
        std = torch.exp(0.5 * lv)
        eps = torch.randn_like(std)
        return mu + std * eps

    def get_latent(self, x):
        enc = self.encoder(x)
        mu = self.z_mean(enc)
        lv = self.z_log_var(enc)
        lat = self.reparam(mu, lv)

        return lat

In [12]:
# model = nn.Sequential(
#     # 12*4500
#     nn.Conv1d(12, 32, kernel_size= 3, stride=2, padding=1),
#     nn.BatchNorm1d(32),
#     nn.LeakyReLU(),
#     nn.Conv1d(32, 64, kernel_size= 3, stride=2, padding=1),
#     nn.BatchNorm1d(64),
#     nn.LeakyReLU(),
#     nn.Conv1d(64, 128, kernel_size= 3, stride=2, padding=1),
#     nn.BatchNorm1d(128),
#     nn.LeakyReLU(),
#     nn.Conv1d(128, 256, kernel_size= 3, stride=2, padding=1),
#     nn.BatchNorm1d(256),
#     nn.LeakyReLU(),
#     nn.Conv1d(256, 512, kernel_size= 3, stride=2, padding=1),
#     nn.BatchNorm1d(512),
#     nn.LeakyReLU(),
#     nn.Flatten()
# )
# signal = torch.rand(1, 12, 4500)
# a = model(signal)
# print(a.size())

In [13]:
class BasicBlock(nn.Module):
    def __init__(self, channel_num):
        super(BasicBlock, self).__init__()
        self.conv_block1 = nn.Sequential(
			nn.Conv1d(channel_num, channel_num, 3, padding=1),
			nn.BatchNorm1d(channel_num),
			nn.ReLU(),
		)
        self.conv_block2 = nn.Sequential(
			nn.Conv1d(channel_num, channel_num, 3, padding=1),
			nn.BatchNorm1d(channel_num),
		)
        self.relu = nn.ReLU()
        torch.nn.init.kaiming_normal_(self.conv_block1[0].weight)
        torch.nn.init.kaiming_normal_(self.conv_block2[0].weight)
        
    def forward(self, x):
        residual = x
        x = self.conv_block1(x)
        x = self.conv_block2(x)
        x = x + residual
        out = self.relu(x)
        return out

In [14]:
class ResNet(nn.Module):
    def __init__(self, in_channels = 12, type = 18, num_classes = 31):
        super(ResNet, self).__init__()
        self.struc_dict = {
            18: {
                "num_channels" : [64, 128, 256, 512],
                "counts" : [2, 2, 2, 2]
            }
        }
        self.conv1 = nn.Conv1d(in_channels=in_channels, out_channels=64, kernel_size=7, stride=2)
        torch.nn.init.kaiming_normal_(self.conv1.weight)
        self.max1 = nn.MaxPool1d(kernel_size=3, stride=2)
        self.main = nn.Sequential()
        for idx, struc in enumerate(
            zip(
                self.struc_dict[type]["num_channels"], 
                self.struc_dict[type]["counts"]
            )
        ):
            num_channel, cnt = struc
            for i in range(cnt):
                self.main.add_module(f"conv{idx+1}_{i}", BasicBlock(num_channel))
            if idx < len(self.struc_dict[type]["num_channels"]) - 1:
                self.main.add_module(f"ext_{idx}", nn.Conv1d(num_channel, self.struc_dict[type]["num_channels"][idx+1], 3, 1))
                self.main.add_module(f"extbn_{idx}", nn.BatchNorm1d(self.struc_dict[type]["num_channels"][idx+1]))
                                     
        self.avg = torch.nn.AdaptiveAvgPool1d((1))
        self.lin = nn.Linear(self.struc_dict[type]["num_channels"][-1], num_classes)
        torch.nn.init.kaiming_normal_(self.lin.weight)
    def forward(self, x):
        x = self.conv1(x)
        x = self.max1(x)
        x = self.main(x)
        x = self.avg(x)
        x = x.reshape(x.shape[0], -1)
        x = self.lin(x)
        return x

In [15]:
epoch = 150
lr = 0.0005
best_acc = 0
best_ep = 0

device = torch.device("cuda" if torch.cuda.is_available() else "cpu", index = 1)

resnet = ResNet().to(device)
CVAE = CNN_CelebA().to(device)

optimizer_rn = Adam(resnet.parameters(), lr=lr)
scheduler_rn = CosineAnnealingLR(optimizer=optimizer_rn, T_max=epoch)

optimizer_ae = Adam(CVAE.parameters(), lr=lr)
scheduler_ae = CosineAnnealingLR(optimizer=optimizer_ae, T_max=epoch)

In [15]:
recon_loss = nn.MSELoss(reduction='none')

def loss_fn_sig(recon_x, x):
    return torch.mean(torch.sum(recon_loss(recon_x, x), dim=(1,2)))

def gaussian_kls(mu, logvar):
    kld_loss = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp(), dim=1)
    return torch.mean(kld_loss)

loss_fn_cls = nn.CrossEntropyLoss()

for e in range(epoch):
    CVAE.train()
    print(f"Epoch: {e}")
    y_true_list = [] 
    pred_list = []
    batch_cnt = 0
    total_loss = 0
    correct = 0
    for batch, (train_sig, train_label) in tqdm(enumerate(train_dl)):
        batch_cnt = batch
        train_sig = train_sig.to(device)
        train_label = train_label.to(device)
        
        res_sig, train_mu, train_lv, pred_cls = CVAE(train_sig)
        loss_cls = loss_fn_cls(pred_cls, train_label)
        loss_sig = loss_fn_sig(res_sig, train_sig)
        loss_kl = gaussian_kls(train_mu, train_lv)
        loss_tot = loss_cls + loss_sig + loss_kl
        
        optimizer_ae.zero_grad()
        loss_tot.backward()
        optimizer_ae.step()
        
        scheduler_ae.step()
        
        total_loss += loss_tot.item()
        correct += (pred_cls.argmax(1) == train_label).type(torch.float).sum().item()
    
    total_loss /= batch_cnt
    correct /= len(train_dl.dataset)
    
    print(f"train loss: {total_loss} - train acc: {100*correct}")
    
    batch_cnt = 0
    val_total_loss = 0
    val_correct = 0
    CVAE.eval()
    with torch.no_grad():
        for batch, (valid_sig, valid_label) in tqdm(enumerate(valid_dl)):
            batch_cnt = batch
            valid_sig = valid_sig.to(device)
            valid_label = valid_label.to(device)
            
            res_sig, valid_mu, valid_lv, pred_cls = CVAE(valid_sig)
            loss_cls = loss_fn_cls(pred_cls, valid_label)
            loss_sig = loss_fn_sig(res_sig, valid_sig)
            loss_kl = gaussian_kls(valid_mu, valid_lv)
            loss_tot = loss_cls + loss_sig + loss_kl
            # print(loss_cls)
            # print(loss_sig)
            # print(loss_kl)
            
            pred_pos = pred_cls.argmax(1)
            y_true_list.append(valid_label)
            pred_list.append(pred_pos)

            val_total_loss += loss_tot.item()
            val_correct += (pred_cls.argmax(1) == valid_label).type(torch.float).sum().item()
    
        val_total_loss /= batch_cnt
        val_correct /= len(valid_dl.dataset)
        if val_correct > best_acc:
            best_acc = val_correct
            best_ep = e
        
        print(f"valid loss: {val_total_loss} - valid acc: {100*val_correct}")
        
y_true = torch.cat(y_true_list).cpu().numpy()
pred = torch.cat(pred_list).cpu().numpy()

reports = classification_report(y_true, pred, output_dict=True)

print(reports)
print(f"Best acuracy: {best_acc} at epoch {best_ep}")

# pred_list_test = []
# model.eval()
# with torch.no_grad():
#     va_total_loss = {
#         "rec_loss" : 0,
#         "kl_loss" : 0
#     }
#     for test_sig, test_label in tqdm(test_dl):
#         valid_img = valid_img.to(device)

#         res_sig, valid_mu, valid_lv, pred_cls = CVAE(test_sig)
#         loss_cls = loss_fn_cls(pred_cls, test_label)
#         loss_sig = loss_fn_sig(res_sig, test_sig)
#         loss_kl = gaussian_kls(valid_mu, valid_lv)
#         loss_tot = loss_cls + loss_sig + loss_kl

#         pred_pos = pred_cls.argmax(1)
#         pred_list_test.append(pred_pos)

# print(pred_list_test)

Epoch: 0


261it [00:17, 14.69it/s]

train loss: nan - train acc: 64.18286537077034



10it [00:00, 24.82it/s]

tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, de

28it [00:00, 54.01it/s]

tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, de

33it [00:01, 24.96it/s]

valid loss: nan - valid acc: 65.83493282149712
Epoch: 1



261it [00:12, 21.26it/s]

train loss: nan - train acc: 66.90664746820254



1it [00:00,  3.48it/s]

tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')


6it [00:00, 11.39it/s]

tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')


8it [00:00, 13.20it/s]

tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')


14it [00:01, 17.18it/s]

tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')


19it [00:01, 18.46it/s]

tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')


22it [00:01, 19.10it/s]

tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')


27it [00:01, 19.41it/s]

tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')


30it [00:01, 19.83it/s]

tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')
tensor(nan, device='cuda:1')


33it [00:02, 14.86it/s]

valid loss: nan - valid acc: 65.83493282149712
Epoch: 2



81it [00:05, 14.58it/s]


KeyboardInterrupt: 

In [19]:
loss_fn = nn.CrossEntropyLoss()

for e in range(epoch):
    resnet.train()
    print(f"Epoch: {e}")
    y_true_list = [] 
    pred_list = []
    batch_cnt = 0
    total_loss = 0
    correct = 0
    for batch, (train_sig, train_label) in tqdm(enumerate(train_dl)):
        batch_cnt = batch
        train_sig = train_sig.to(device)
        train_label = train_label.to(device)
        
        pred = resnet(train_sig)
        loss = loss_fn(pred, train_label)
        
        optimizer_rn.zero_grad()
        loss.backward()
        optimizer_rn.step()
        
        scheduler_rn.step()
        
        total_loss += loss.item()
        correct += (pred.argmax(1) == train_label).type(torch.float).sum().item()
    
    total_loss /= batch_cnt
    correct /= len(train_dl.dataset)
    
    print(f"train loss: {total_loss} - train acc: {100*correct}")
    
    batch_cnt = 0
    val_total_loss = 0
    val_correct = 0
    resnet.eval()
    with torch.no_grad():
        for batch, (valid_sig, valid_label) in tqdm(enumerate(valid_dl)):
            batch_cnt = batch
            valid_sig = valid_sig.to(device)
            valid_label = valid_label.to(device)
            
            pred = resnet(valid_sig)
            
            pred_pos = pred.argmax(1)
            y_true_list.append(valid_label)
            pred_list.append(pred_pos)
            
            loss = loss_fn(pred, valid_label)
            
            val_total_loss += loss.item()
            val_correct += (pred.argmax(1) == valid_label).type(torch.float).sum().item()
    
        val_total_loss /= batch_cnt
        val_correct /= len(valid_dl.dataset)
        if val_correct > best_acc:
            best_acc = val_correct
            best_ep = e
        
        print(f"valid loss: {val_total_loss} - valid acc: {100*val_correct}")
        
y_true = torch.cat(y_true_list).cpu().numpy()
pred = torch.cat(pred_list).cpu().numpy()

# reports = classification_report(y_true, pred, output_dict=True) 

# print(reports)
print(f"Best acuracy: {best_acc} at epoch {best_ep}")

Epoch: 0


261it [01:10,  3.72it/s]

train loss: 0.8579657912254334 - train acc: 75.41996640268779



33it [00:03,  9.96it/s]

valid loss: 1.8543766140937805 - valid acc: 47.88867562380039
Epoch: 1



75it [00:12,  6.01it/s]


KeyboardInterrupt: 

In [None]:
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier

XG = XGBClassifier(device = device, learning_rate = lr)

all_fea = []
all_label = []

for i in range (len(single_mat_paths)):
    data_path = single_mat_paths[i]
    a = h5py.File(data_path, 'r')
    data_h5 = a['ecg']
    data = np.array(data_h5)
    clip_data = data[:, 500:5000]

    list_features = []
    
    for i in range (12):
        # list_features = []
        data = clip_data[i]
        
        list_features.append(np.mean(data))
        list_features.append(np.median(data))
        list_features.append(np.std(data))
        
        list_features.append(np.max(data)-np.min(data))
        
        q3, q1 = np.percentile(data, [75 ,25])
        list_features.append(q3 - q1)
        
        sk = scipy.stats.skew(data) 
        list_features.append(sk)
        
        kur = scipy.stats.kurtosis(data)
        list_features.append(kur)
    all_fea.append(list_features)

    # data_fea = torch.tensor(data_fea)
    # data_all = torch.cat((data_fea[0], data_fea[1], data_fea[2], data_fea[3], data_fea[3], data_fea[5], 
    #                       data_fea[6], data_fea[7], data_fea[8], data_fea[9], data_fea[10], data_fea[11]))

    filename = data_path.split("/")[-1].split(".")[0]
    label = main_df[main_df["File name"] == filename]["New Label"].values.item()
    all_label.append(label)

# XG.fit(sig_train, label_train, eval_set=[(sig_test, label_test)])

  rel_diff = np.max(np.abs(a_zero_mean), axis=axis,
  s = s**2
  s *= a_zero_mean
  ret = umr_sum(x, axis, dtype, out, keepdims=keepdims, where=where)
  x = um.multiply(x, x, out=x)
  s = a_zero_mean**2
  ret = umr_sum(arr, axis, dtype, out, keepdims, where=where)


In [None]:
print(len(all_label)