# Prepare Data

In [None]:
from helper_functions import load_patient, extract_training_windows
import numpy as np
import os


#_______________________________________________
# Initializations
#_______________________________________________

# Window/Segment length 
l = 20 #seconds
# window stride for testing. 
s = 10 #seconds

# Base folder path
base_path = '../'

training_data_path = base_path + 'training_data/'

#_______________________________________________
# Dont change These Values
#_______________________________________________
# Sapmling frequency of ecg signal is 400 hz. (CPSC2020 Data)
fs = 400
# Window/Segment length in samples. 
win_size = l*fs
# Stride for test window in samples. 
stride = s*fs
#_______________________________________________

if not os.path.exists(training_data_path):
    os.makedirs(training_data_path)
    
for pat_num in range(1,11):

    #_______________________________________________
    # Training Data Preparation
    #_______________________________________________
    # Load 1 patient ecg and annotations
    ecg, R_ann, S_ann, V_ann = load_patient(base_path, pat_num)
    X_train, y_train, R_w, S_w, V_w = extract_training_windows(ecg, R_ann, S_ann, V_ann, win_size)

    print('Total Windows : ', len(X_train))

    # Indexes of windows where V beats are present.
    s_w_idx = []
    for idx,ann in enumerate(S_w):    
        if ann.any():
            s_w_idx.append(idx)
    s_w_idx = np.asarray(s_w_idx, dtype=np.int32)

    # Indexes of windows where V beats are present.
    v_w_idx = []
    for idx,ann in enumerate(V_w):    
        if ann.any():
            v_w_idx.append(idx)
    v_w_idx = np.asarray(v_w_idx, dtype=np.int32)


    # Indexes of windows for V beats and S beats combined.
    sv_idx = np.unique(np.concatenate((s_w_idx,v_w_idx)))

    # All indexes of training windows.
    idx = np.arange(len(X_train))

    # indexes other than S and V beats. (Normal R peaks)
    rem_idx = np.delete(idx, sv_idx, 0)

    # Choose 50% of remaining. 
    norm_idx = np.random.choice(rem_idx, size=int(len(rem_idx)*(1/3)), replace=False)

    X_train = np.concatenate((X_train[norm_idx],X_train[sv_idx]))
    y_train = np.concatenate((y_train[norm_idx],y_train[sv_idx]))

    assert len(X_train) == len(y_train)

    print('Selected Windows : ', len(X_train))

    print('Saving Data')
    f_X = training_data_path+ 'X_train_P' + str(pat_num).zfill(2) + '.npy'
    f_y = training_data_path+ 'y_train_P' + str(pat_num).zfill(2) + '.npy'
    np.save(f_X, X_train)
    np.save(f_y, y_train)
    print('Done..')

# Prepare Data with Augmentation

In [None]:
from helper_functions import load_patient, extract_training_windows, get_noise,normalize_bound
import numpy as np
import os
from tqdm import tqdm


#_______________________________________________
# Initializations
#_______________________________________________

# Window/Segment length 
l = 20 #seconds
# window stride for testing. 
s = 10 #seconds

# Base folder path
base_path = '../'

training_data_path = base_path + 'training_data_aug/'

#_______________________________________________
# Dont change These Values
#_______________________________________________
# Sapmling frequency of ecg signal is 400 hz. (CPSC2020 Data)
fs = 400
# Window/Segment length in samples. 
win_size = l*fs
# Stride for test window in samples. 
stride = s*fs
#_______________________________________________

if not os.path.exists(training_data_path):
    os.makedirs(training_data_path)



ma = np.load(base_path +'data/ma.npy')
bw = np.load(base_path +'data/bw.npy')


aug_factor = 1 # How many times to augment.



for pat_num in range(1,11):


    #_______________________________________________
    # Training Data Preparation
    #_______________________________________________
    # Load 1 patient ecg and annotations
    ecg, R_ann, S_ann, V_ann = load_patient(base_path, pat_num)
    X_train, y_train, R_w, S_w, V_w = extract_training_windows(ecg, R_ann, S_ann, V_ann, win_size)

    print('Total Windows : ', len(X_train))

    # Indexes of windows where S beats are present.
    s_w_idx = []
    for idx,ann in enumerate(S_w):    
        if ann.any():
            s_w_idx.append(idx)
    s_w_idx = np.asarray(s_w_idx, dtype=np.int32)


    # Indexes of windows where V beats are present.
    v_w_idx = []
    for idx,ann in enumerate(V_w):    
        if ann.any():
            v_w_idx.append(idx)
    v_w_idx = np.asarray(v_w_idx, dtype=np.int32)

    # Indexes of windows for V beats and S beats combined.
    sv_idx = np.unique(np.concatenate((s_w_idx,v_w_idx)))
    # All indexes of training windows.
    idx = np.arange(len(X_train))
    # indexes other than S and V beats. (Normal R peaks)
    rem_idx = np.delete(idx, sv_idx, 0)
    
    #_____________________________________________________________________
    # Choose 50% of remaining. 
    #norm_idx = np.random.choice(rem_idx, size=int(len(rem_idx)*(1/2)), replace=False)
    
    norm_idx = rem_idx[::2]
    
    # Placeholder for augmented beats
    X_aug = np.zeros((int(len(sv_idx)*aug_factor), win_size))
    y_aug = np.zeros((int(len(sv_idx)*aug_factor), win_size))
    
    count = 0
    for i in tqdm(range(len(sv_idx))):

        current_window = X_train[sv_idx[i]]
        current_window = np.squeeze(current_window)

        for j in range(aug_factor):

            noise = get_noise(ma, bw, win_size)
            aug_window = current_window + noise

            X_aug[count] = normalize_bound(aug_window, lb=-1, ub=1)

            y_aug[count] = np.squeeze(y_train[sv_idx[i]])

            count += 1
            
    X_aug = np.expand_dims(X_aug, axis=2)
    y_aug = np.expand_dims(y_aug, axis=2)
    
    
    #X_train = np.concatenate((X_train,X_aug))
    #y_train = np.concatenate((y_train,y_aug))
    
    X_train = np.concatenate((X_train[norm_idx],X_train[sv_idx], X_aug))
    y_train = np.concatenate((y_train[norm_idx],y_train[sv_idx], y_aug))


    assert len(X_train) == len(y_train)

    print('Normal Beat Windows : ', len(rem_idx))
    print('Abnormal Beat Windows : ', len(sv_idx))
    print('Augmented abnormal Beat Windows : ', len(X_aug))

    print('Saving Data')
    f_X = training_data_path+ 'X_train_P' + str(pat_num).zfill(2) + '.npy'
    f_y = training_data_path+ 'y_train_P' + str(pat_num).zfill(2) + '.npy'
    np.save(f_X, X_train)
    np.save(f_y, y_train)
    print('Done..')

In [2]:
from helper_functions import train_for_patient, test_for_patient
import os
import numpy as np
import pandas as pd

# Epochs
epochs = 30
# Base folder path
base_path = '../'
training_data_path = base_path + 'training_data_aug/'
model_name = 'sig2sig_unet' # or sig2sig_cnn

all_pat = [1,2,3,4,5,6,7,8,9,10]

results_all = np.zeros((10,5), dtype = np.int32)
perc_all = np.zeros((10,3), dtype = np.float32)
results_S = np.zeros((7,4), dtype = np.int32)
results_V = np.zeros((8,4), dtype = np.int32)
s_count = 0
v_count = 0

for run in [1]:
    
    print('______________________________________________')
    print('Run : ', str(run))
    print('______________________________________________')
    
    for pat_num in all_pat:

        train_for_patient(model_name, pat_num, epochs = epochs , run = run, input_size = 8000, train_path = training_data_path)
        stats_R, stats_S, stats_V = test_for_patient(model_name, pat_num, epochs = epochs , 
                                                        run = run, threshold = 0.1,input_size = 8000)

        #_______________________________________________
        # Saving stats
        #_______________________________________________
        if stats_S != []:
            results_S[s_count][0] = pat_num
            results_S[s_count][1:] = stats_S[:3]

            df_S = pd.DataFrame(results_S)
            df_S.columns = ['Patient No', 'Total Beats', 'Detected', 'Missed']
            f = base_path + 'Results/'+ model_name +'_S_r' + str(run) + '.csv'
            df_S.to_csv (r'{}'.format(f), index = False, header=True)

            s_count += 1
        
        if stats_V != []:
            results_V[v_count][0] = pat_num
            results_V[v_count][1:] = stats_V[:3]

            df_V = pd.DataFrame(results_V)
            df_V.columns = ['Patient No', 'Total Beats', 'Detected', 'Missed']
            f = base_path + 'Results/'+ model_name +'_V_r' + str(run) + '.csv'
            df_V.to_csv (r'{}'.format(f), index = False, header=True)

            v_count += 1
        
        results_all[pat_num-1][0] = pat_num
        results_all[pat_num-1][1:] = stats_R[:4]
        perc_all[pat_num-1] = stats_R[4:7]



        df_all = pd.DataFrame(results_all)
        df_all = pd.concat([df_all, pd.DataFrame(perc_all, dtype = np.float32)], axis=1)
        df_all.columns = ['Patient No', 'Total Beats', 'TP', 'FN', 'FP', 'Recall', 'Precision', 'F1']

        if not os.path.exists(base_path + 'results/'):
            os.makedirs(base_path + 'results/')

        f = base_path + 'results/'+ model_name +'_all_r' + str(run) + '.csv'

        df_all.to_csv (r'{}'.format(f), index = False, header=True)


______________________________________________
Run :  1
______________________________________________
______________________________________________
Loading Data for Patient : 1
______________________________________________
Total Beats :  109731
S beats :  24
V beats :  0
______________________________________________
Predicting Test Patient....
______________________________________________


100%|██████████| 110285/110285 [01:05<00:00, 1686.62it/s]


109539
______________________________________________
All Beats
______________________________________________
_________Calculating Stats____________________
______________________________________________
TP's:109402 FN's:329 FP's:116
Recall:99.7, Precision(FNR):99.89, F1-Score:99.79
Total 109731
______________________________________________
S Beats
______________________________________________
_________Calculating Stats____________________
______________________________________________
TP's:24 FN's:0 FP's:96617
Recall:100.0, Precision(FNR):0.02, F1-Score:0.04
Total 24
______________________________________________
Loading Data for Patient : 2
______________________________________________
Total Beats :  108297
S beats :  0
V beats :  4554
______________________________________________
Predicting Test Patient....
______________________________________________


OSError: Unable to open file (unable to open file: name = '../models/sig2sig_unetP02_r1.h5', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)