# PhysioNet/Computing in Cardiology Challenge 2020
## Classification of 12-lead ECGs
### 1. Extract Features

# Setup Notebook

In [None]:
# Import 3rd party libraries
import os
import sys
import time
import numpy as np
from joblib import Parallel, delayed

# Import local Libraries
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(os.getcwd()))))))
from kardioml.models.physionet2017.features.feature_extractor import Features
from kardioml import DATA_PATH, ECG_LEADS, FILTER_BAND_LIMITS

# Configure Notebook
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline
%load_ext autoreload
%autoreload 2

# Get List of files

In [None]:
# Path
path = 'physionet_2020_1'

# Get meta data files
filenames = [file.split('.')[0] for file in os.listdir(os.path.join(DATA_PATH, path, 'formatted')) 
             if 'json' in file]

# Extract Features

In [None]:
def process_lead(idx, filenames, lead):
    
    # Extract features
    features = Features(filename=filenames[idx], path=path)
    features.extract_features(lead=lead, feature_groups=['full_waveform_features', 'rri_features', 'template_features'], 
                              filter_bandwidth=FILTER_BAND_LIMITS)

    # Save features
    features.save_features(lead=lead)

In [None]:
# Loop through leads
for lead in ECG_LEADS[0:1]:
    print('Processing {} lead {} signals.'.format(len(filenames), lead))
    start_time = time.time()
    # Loop through filenames
    _ = Parallel(n_jobs=-1)(delayed(process_lead)(idx, filenames, lead) for idx in range(len(filenames)))  
    print('Completed in {} minutes.'.format(np.round((time.time() - start_time) / 60, 2)))

# Extract Features DEBUG

In [None]:
# Loop through leads
for lead in ECG_LEADS[0:1]:

    # Loop through filenames
    for filename in filenames[0:200]:

        # Extract features
        features = Features(filename=filename, path=path)
        features.extract_features(lead=lead, feature_groups=['full_waveform_features', 'rri_features', 'template_features'], 
                                  filter_bandwidth=[3, 45])
        
        # Save features
        features.save_features(lead=lead)