# PhysioNet/Computing in Cardiology Challenge 2020
## Classification of 12-lead ECGs
### 3. Train Model

# Setup Notebook

In [None]:
# Import 3rd party libraries
import os
import sys
import ast
import time
import json
import numpy as np
import pandas as pd

# Import local Libraries
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(os.getcwd()))))))
from kardioml import DATA_PATH
from kardioml.models.physionet2017.training.xgboost_model import Model
from kardioml.data.data_loader import load_challenge_data

# Configure Notebook
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline
%load_ext autoreload
%autoreload 2

# Import Data
### Meta Data

In [None]:
# Import to DataFrame
meta_data = pd.read_csv(os.path.join(DATA_PATH, 'training', 'physionet_2017', 'meta_data.csv'))

# View DataFrame
meta_data.head()

### Features

In [None]:
# Import to DataFrame
features = pd.read_csv(os.path.join(DATA_PATH, 'training', 'physionet_2017', 'features.csv'))

# View DataFrame
features.head()

### Labels

In [None]:
# Import to DataFrame
labels = pd.read_csv(os.path.join(DATA_PATH, 'training', 'physionet_2017', 'labels.csv'))

# View DataFrame
labels.head()

# Hyper-Parameter Tuning

In [None]:
# Set parameter bounds
param_bounds = {'learning_rate': (0.01, 1.0),
                'n_estimators': (500, 1500),
                'max_depth': (2, 8),
                'subsample': (0.5, 1.0),  
                'colsample_bytree': (0.5, 1.0),
                'gamma': (0.001, 2.0),
                'min_child_weight': (0, 10),
                'max_delta_step': (0, 10)}

# Set number of iterations
n_iter = 40

# Set number CV folds
cv_folds = 4

# Get 1-D labels for stratifying
stratifier = meta_data['labels'].map(lambda val: ast.literal_eval(val)[0])

# Initialize model
model = Model(features=features.drop(['dataset', 'filename', 'lead'], axis=1), labels=labels, 
              cv_folds=cv_folds, stratifier=stratifier)

# Run hyper-paramter search
model.tune_hyper_parameters(param_bounds=param_bounds, n_iter=n_iter)

# Save model
model.save()

# Test Inference

In [None]:
# Load test data
data, header_data = load_challenge_data(filename=os.path.join(DATA_PATH, 'raw', 'Training_WFDB', 'A0100.mat'))

# Run inference
model.challenge_prediction(data=data, header_data=header_data)