In [1]:
import sys
sys.path.append("..")

import numpy as np
from src.utils import set_seed
from src.data import (load_data, split_labels, 
                      remove_labels, data_clean,
                      label_encode, data_split,
                      data_standardize, get_dataloader)

set_seed()

In [2]:
data, labels = load_data(100, 44)
labels.diagnostic.value_counts()

diagnostic
[NORM]                          7032
[]                              4578
[NDT]                           1461
[IRBBB]                          602
[LAFB]                           591
                                ... 
[LMI, LPFB, CRBBB]                 1
[LVH, IMI, ISC_, AMI, IRBBB]       1
[ISCAL, ISCIN, LAO/LAE]            1
[ISCLA, AMI, IMI, LAO/LAE]         1
[ISCIL, RAO/RAE]                   1
Name: count, Length: 852, dtype: int64

In [3]:
data, labels = load_data(100, 23)
labels.diagnostic.value_counts()

diagnostic
[NORM]                        7032
[]                            4578
[STTC]                        1564
[AMI]                          641
[LAFB/LPFB]                    604
                              ... 
[LMI, STTC]                      1
[IMI, IVCD, LAFB/LPFB]           1
[ISCI, ILBBB, LAO/LAE]           1
[LAO/LAE, _AVB, IMI, IVCD]       1
[ISCI, RAO/RAE]                  1
Name: count, Length: 495, dtype: int64

In [4]:
data, labels = load_data(100, 5)
labels.diagnostic.value_counts()

diagnostic
[NORM]                 7032
[]                     4578
[CD]                   2715
[STTC]                 2685
[MI]                   1476
[CD, MI]                763
[HYP, STTC]             554
[CD, STTC]              531
[MI, STTC]              306
[HYP, CD]               207
[HYP]                   204
[HYP, CD, STTC]         178
[CD, NORM]              138
[HYP, MI, STTC]         134
[CD, MI, STTC]          106
[HYP, CD, MI, STTC]      70
[HYP, CD, MI]            69
[HYP, MI]                51
[CD, NORM, STTC]          1
[HYP, NORM]               1
Name: count, dtype: int64

In [5]:
data, labels = load_data(100, 2)
labels.diagnostic.value_counts()

diagnostic
[ABNORMAL]            10049
[NORMAL]               7032
[]                     4578
[NORMAL, ABNORMAL]      140
Name: count, dtype: int64

In [6]:
data, labels = load_data(100, 2)
data, labels = data_clean(data, labels)
labels.diagnostic.value_counts()

diagnostic
[ABNORMAL]            10049
[NORMAL]               7032
[NORMAL, ABNORMAL]      140
Name: count, dtype: int64

In [7]:
fold, y, encoder = label_encode(labels, False)
np.unique(y.astype(str), axis=0, return_counts=True)

(array([['0', '1'],
        ['1', '0'],
        ['1', '1']], dtype='<U21'),
 array([ 7032, 10049,   140]))

In [8]:
data, labels = load_data(100, 2)
data, labels = split_labels(data, labels)
labels.diagnostic.value_counts()

diagnostic
ABNORMAL    10189
NORMAL       7172
Name: count, dtype: int64

In [9]:
fold, y, encoder = label_encode(labels, True)
np.unique(y.astype(str), axis=0, return_counts=True)

(array(['0', '1'], dtype='<U21'), array([10189,  7172]))

In [10]:
data, labels = load_data(100, 2)
labels = remove_labels(labels)
data, labels = data_clean(data, labels)
labels.diagnostic.value_counts()

diagnostic
[ABNORMAL]    10189
[NORMAL]       7032
Name: count, dtype: int64

In [11]:
fold, y, encoder = label_encode(labels, False)
np.unique(y.astype(str), axis=0, return_counts=True)

(array(['0', '1'], dtype='<U21'), array([10189,  7032]))

In [12]:
train_X, train_y, val_X, val_y, test_X, test_y = data_split(data, fold, y)
len(train_X), len(train_y), len(val_X), len(val_y), len(test_X), len(test_y)

(12083, 12083, 1720, 1720, 3418, 3418)

In [13]:
train_X, val_X, test_X, scaler = data_standardize(train_X, val_X, test_X)
train_X.shape, val_X.shape, test_X.shape

((12083, 1000, 12), (1720, 1000, 12), (3418, 1000, 12))

In [14]:
train_loader, val_loader, test_loader = get_dataloader(train_X, train_y, val_X, val_y, test_X, test_y)
len(train_loader.dataset), len(val_loader.dataset), len(test_loader.dataset)

(12083, 1720, 3418)