In [1]:
import sys
sys.path.append("..")

import numpy as np
from src.utils import set_seed, save_model
from src.data import (load_data, split_labels, 
                      remove_labels, data_clean,
                      label_encode, data_split,
                      data_standardize, get_dataloader)

set_seed()

In [2]:
data, labels = load_data(100, 44)
labels.diagnostic.value_counts()

diagnostic
[NORM]                            7032
[]                                4578
[NDT]                             1461
[IRBBB]                            602
[LAFB]                             591
                                  ... 
[1AVB, ILMI, IVCD, LAFB, ASMI]       1
[1AVB, IPMI, LNGQT]                  1
[ILBBB, ISCIL, LAO/LAE]              1
[IMI, IVCD, LAFB]                    1
[RAO/RAE, ISCIL]                     1
Name: count, Length: 853, dtype: int64

In [3]:
data, labels = load_data(100, 23)
labels.diagnostic.value_counts()

diagnostic
[NORM]                         7032
[]                             4578
[STTC]                         1564
[AMI]                           641
[LAFB/LPFB]                     604
                               ... 
[ILBBB, ISCI]                     1
[IMI, IVCD, AMI, LAFB/LPFB]       1
[LAFB/LPFB, ISCA, ISCI]           1
[AMI, LMI, IVCD, _AVB]            1
[RAO/RAE, ISCI]                   1
Name: count, Length: 518, dtype: int64

In [4]:
data, labels = load_data(100, 5)
labels.diagnostic.value_counts()

diagnostic
[NORM]                 7032
[]                     4578
[CD]                   2715
[STTC]                 2685
[MI]                   1476
[CD, MI]                763
[STTC, HYP]             554
[STTC, MI]              306
[CD, STTC]              271
[STTC, CD]              260
[CD, HYP]               207
[HYP]                   204
[NORM, CD]              138
[STTC, MI, HYP]         134
[STTC, CD, HYP]         121
[CD, MI, HYP]            69
[CD, STTC, HYP]          57
[CD, MI, STTC]           54
[STTC, CD, MI]           52
[MI, HYP]                51
[STTC, CD, MI, HYP]      42
[CD, MI, STTC, HYP]      28
[NORM, CD, STTC]          1
[NORM, HYP]               1
Name: count, dtype: int64

In [5]:
data, labels = load_data(100, 2)
labels.diagnostic.value_counts()

diagnostic
[ABNORMAL]            10049
[NORMAL]               7032
[]                     4578
[ABNORMAL, NORMAL]      140
Name: count, dtype: int64

In [6]:
data, labels = load_data(100, 2)
data, labels = data_clean(data, labels)
labels.diagnostic.value_counts()

diagnostic
[ABNORMAL]            10049
[NORMAL]               7032
[ABNORMAL, NORMAL]      140
Name: count, dtype: int64

In [7]:
fold, y, encoder = label_encode(labels, False)
save_model(encoder, "binary_multi_encoder")
np.unique(y.astype(str), axis=0, return_counts=True)

Model saved to /Users/aaron/USYD/Research Pathway/models/binary_multi_encoder.pkl


(array([['0', '1'],
        ['1', '0'],
        ['1', '1']], dtype='<U21'),
 array([ 7032, 10049,   140]))

In [8]:
train_X, train_y, val_X, val_y, test_X, test_y = data_split(data, fold, y)
train_X, val_X, test_X, scaler = data_standardize(train_X, val_X, test_X)
save_model(scaler, "binary_multi_scaler")
train_loader, val_loader, test_loader = get_dataloader(train_X, train_y, val_X, val_y, test_X, test_y)
len(train_loader.dataset), len(val_loader.dataset), len(test_loader.dataset)

Model saved to /Users/aaron/USYD/Research Pathway/models/binary_multi_scaler.pkl


(12083, 1720, 3418)

In [9]:
data, labels = load_data(100, 2)
data, labels = split_labels(data, labels)
labels.diagnostic.value_counts()

diagnostic
ABNORMAL    10189
NORMAL       7172
Name: count, dtype: int64

In [10]:
fold, y, encoder = label_encode(labels, True)
save_model(encoder, "binary_split_encoder")
np.unique(y.astype(str), axis=0, return_counts=True)

Model saved to /Users/aaron/USYD/Research Pathway/models/binary_split_encoder.pkl


(array(['0', '1'], dtype='<U21'), array([10189,  7172]))

In [11]:
train_X, train_y, val_X, val_y, test_X, test_y = data_split(data, fold, y)
train_X, val_X, test_X, scaler = data_standardize(train_X, val_X, test_X)
save_model(scaler, "binary_split_scaler")
train_loader, val_loader, test_loader = get_dataloader(train_X, train_y, val_X, val_y, test_X, test_y)
len(train_loader.dataset), len(val_loader.dataset), len(test_loader.dataset)

Model saved to /Users/aaron/USYD/Research Pathway/models/binary_split_scaler.pkl


(12130, 1715, 3516)

In [12]:
data, labels = load_data(100, 2)
labels = remove_labels(labels)
data, labels = data_clean(data, labels)
labels.diagnostic.value_counts()

diagnostic
[ABNORMAL]    10189
[NORMAL]       7032
Name: count, dtype: int64

In [13]:
fold, y, encoder = label_encode(labels, False)
save_model(encoder, "binary_remove_encoder")
np.unique(y.astype(str), axis=0, return_counts=True)

Model saved to /Users/aaron/USYD/Research Pathway/models/binary_remove_encoder.pkl


(array(['0', '1'], dtype='<U21'), array([10189,  7032]))

In [14]:
train_X, train_y, val_X, val_y, test_X, test_y = data_split(data, fold, y)
train_X, val_X, test_X, scaler = data_standardize(train_X, val_X, test_X)
save_model(scaler, "binary_remove_scaler")
train_loader, val_loader, test_loader = get_dataloader(train_X, train_y, val_X, val_y, test_X, test_y)
len(train_loader.dataset), len(val_loader.dataset), len(test_loader.dataset)

Model saved to /Users/aaron/USYD/Research Pathway/models/binary_remove_scaler.pkl


(12015, 1720, 3486)