# PhysioNet/Computing in Cardiology Challenge 2020
## Classification of 12-lead ECGs
### 3. Train Model

# Setup Notebook

In [1]:
# Import 3rd party libraries
import os
import sys
import ast
import time
import json
import numpy as np
import pandas as pd

# Import local Libraries
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(os.getcwd()))))))
from kardioml import DATA_PATH, ECG_LEADS, FS, LABELS_LOOKUP, LABELS_COUNT
from kardioml.models.physionet2017.training.xgboost_model import Model
from kardioml.data.data_loader import load_challenge_data

# Configure Notebook
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline
%load_ext autoreload
%autoreload 2

# Import Data
### Meta Data

In [2]:
# Import to DataFrame
meta_data = pd.read_csv(os.path.join(DATA_PATH, 'physionet_2017', 'training', 'meta_data.csv'))

# View DataFrame
meta_data.head()

Unnamed: 0,age,channel_order,filename,label_train,labels,labels_full,labels_int,sex,shape,label_count,length,labels_concat
0,74.0,"['I', 'II', 'III', 'aVR', 'aVL', 'aVF', 'V1', ...",A0001,"[0, 0, 0, 0, 0, 0, 1, 0, 0]",['RBBB'],['Right bundle branch block'],[6],Male,"[12, 7500]",1,15.0,Right bundle branch block
1,49.0,"['I', 'II', 'III', 'aVR', 'aVL', 'aVF', 'V1', ...",A0002,"[0, 0, 0, 1, 0, 0, 0, 0, 0]",['Normal'],['Normal sinus rhythm'],[3],Female,"[12, 5000]",1,10.0,Normal sinus rhythm
2,81.0,"['I', 'II', 'III', 'aVR', 'aVL', 'aVF', 'V1', ...",A0003,"[1, 0, 0, 0, 0, 0, 0, 0, 0]",['AF'],['Atrial fibrillation'],[0],Female,"[12, 5000]",1,10.0,Atrial fibrillation
3,45.0,"['I', 'II', 'III', 'aVR', 'aVL', 'aVF', 'V1', ...",A0004,"[1, 0, 0, 0, 0, 0, 0, 0, 0]",['AF'],['Atrial fibrillation'],[0],Male,"[12, 5974]",1,11.948,Atrial fibrillation
4,53.0,"['I', 'II', 'III', 'aVR', 'aVL', 'aVF', 'V1', ...",A0005,"[0, 0, 0, 0, 0, 1, 0, 0, 0]",['PVC'],['Premature ventricular complex'],[5],Male,"[12, 12500]",1,25.0,Premature ventricular complex


### Features

In [3]:
# Import to DataFrame
features = pd.read_csv(os.path.join(DATA_PATH, 'physionet_2017', 'training', 'features.csv'))

# View DataFrame
features.head()

Unnamed: 0,full_waveform_min,full_waveform_max,full_waveform_mean,full_waveform_median,full_waveform_std,full_waveform_skew,full_waveform_kurtosis,full_waveform_duration,swt_d_1_low_power_ratio,swt_d_1_med_power_ratio,...,rpeak_entropy,rpeak_higuchi_fractal_dimension,template_corr_coeff_mean,template_corr_coeff_std,qrs_corr_coeff_mean,qrs_corr_coeff_std,p_wave_corr_coeff_mean,p_wave_corr_coeff_std,t_wave_corr_coeff_mean,t_wave_corr_coeff_std
0,-0.976816,1.061664,0.000247,-0.001915,0.267624,0.035163,4.168482,14.996,0.423375,0.113496,...,3.317816,2.465088,0.987405,0.003816,0.995713,0.002541,0.866373,0.067781,0.942713,0.022492
1,-0.625051,1.044477,-0.000199,-0.005681,0.167347,2.358342,15.457311,9.996,0.60013,0.098219,...,3.091042,,0.979281,0.006906,0.990972,0.006404,0.886879,0.045675,0.688547,0.088916
2,-0.360254,1.081835,0.000477,-0.025866,0.196894,3.331854,12.586657,9.996,0.565341,0.08548,...,3.135494,2.286132,0.67216,0.178956,0.984642,0.007182,0.471329,0.307302,0.060507,0.327653
3,-0.595681,1.099412,0.000537,-0.015885,0.208745,2.669416,9.934822,11.944,0.60149,0.095649,...,2.877468,2.719416,0.849063,0.125237,0.977497,0.01182,0.376888,0.213236,0.121306,0.238374
4,-0.972058,1.31559,-0.000117,-0.014703,0.252693,1.051541,5.625249,24.996,0.682703,0.091991,...,3.020868,2.183621,0.840088,0.07672,0.946327,0.055893,0.433917,0.288018,0.430445,0.169273


### Labels

In [4]:
# Import to DataFrame
labels = pd.read_csv(os.path.join(DATA_PATH, 'physionet_2017', 'training', 'labels.csv'))

# View DataFrame
labels.head()

Unnamed: 0,AF,I-AVB,LBBB,Normal,PAC,PVC,RBBB,STD,STE
0,0,0,0,0,0,0,1,0,0
1,0,0,0,1,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0
3,1,0,0,0,0,0,0,0,0
4,0,0,0,0,0,1,0,0,0


# Hyper-Parameter Tuning

In [5]:
# Set parameter bounds
param_bounds = {'learning_rate': (0.01, 1.0),
                'n_estimators': (300, 1000),
                'max_depth': (2, 12),
                'subsample': (0.5, 1.0),  
                'colsample': (0.5, 1.0),
                'gamma': (0.001, 10.0),
                'min_child_weight': (0, 20),
                'max_delta_step': (0, 10)}

# Set number of iterations
n_iter = 35

# Set number CV folds
cv_folds = 4

# Get 1-D labels for stratifying
stratifier = meta_data['labels'].map(lambda val: ast.literal_eval(val)[0])

# Initialize model
model = Model(features=features, labels=labels, cv_folds=cv_folds, stratifier=stratifier)

# Run hyper-paramter search
model.tune_hyper_parameters(param_bounds=param_bounds, n_iter=n_iter)

# Save model
model.save()

|   iter    |  target   | colsample |   gamma   | learni... | max_de... | max_depth | min_ch... | n_esti... | subsample |
-------------------------------------------------------------------------------------------------------------------------
| [0m 1       [0m | [0m 0.6566  [0m | [0m 0.7744  [0m | [0m 7.152   [0m | [0m 0.6067  [0m | [0m 5.449   [0m | [0m 6.237   [0m | [0m 12.92   [0m | [0m 606.3   [0m | [0m 0.9459  [0m |
| [0m 2       [0m | [0m 0.6221  [0m | [0m 0.9818  [0m | [0m 3.835   [0m | [0m 0.7938  [0m | [0m 5.289   [0m | [0m 7.68    [0m | [0m 18.51   [0m | [0m 349.7   [0m | [0m 0.5436  [0m |
| [0m 3       [0m | [0m 0.6513  [0m | [0m 0.5101  [0m | [0m 8.326   [0m | [0m 0.7804  [0m | [0m 8.7     [0m | [0m 11.79   [0m | [0m 15.98   [0m | [0m 623.0   [0m | [0m 0.8903  [0m |
| [95m 4       [0m | [95m 0.6667  [0m | [95m 0.5591  [0m | [95m 6.4     [0m | [95m 0.1519  [0m | [95m 9.447   [0m | [95m 7.218   [0m | 

# Test Inference

In [9]:
# Load test data
data, header_data = load_challenge_data(filename=os.path.join(DATA_PATH, 'raw', 'Training_WFDB', 'A0001.mat'))

# Run inference
model.challenge_prediction(data=data, header_data=header_data)

(array([0, 0, 0, 0, 0, 0, 1, 0, 0]),
 array([2.6637601e-04, 8.3147711e-04, 7.7584069e-03, 6.2077896e-05,
        1.2449280e-03, 3.9598050e-05, 9.8802918e-01, 9.6495810e-04,
        1.3097521e-04], dtype=float32))