# PhysioNet/Computing in Cardiology Challenge 2020
## Classification of 12-lead ECGs
### 0. Resampling

# Setup Noteboook

In [17]:
# Import 3rd party libraries
import os
import sys
import json
import shutil
import random
import numpy as np
import pandas as pd
from scipy import signal
from joblib import Parallel, delayed

# Import local Libraries
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(os.getcwd()))))))
from kardioml import DATA_PATH

# Configure Notebook
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [18]:
def process_signal(path, filename, fs):
    
    # Import meta data
    meta_data = json.load(open(os.path.join(path, 'formatted', '{}.json'.format(filename))))
    meta_data['fs_training'] = fs
    
    # Import waveform
    waveform = np.load(os.path.join(path, 'formatted', '{}.npy'.format(filename)))
    
    # Rescale
    waveform = scale_waveforms(waveform=waveform, rpeaks=meta_data['rpeaks'])

    # Resample waveform
    samples = int(waveform.shape[0] * fs / meta_data['fs_resampled'])
    waveform = signal.resample(x=waveform, num=samples, axis=0)
    
    # Save meta data
    with open(os.path.join(path, str(fs), '{}.json'.format(filename)), 'w') as file:
        json.dump(meta_data, file, sort_keys=False, indent=4)

    # Save waveform data npy file
    np.save(os.path.join(path, str(fs), '{}.npy'.format(filename)), waveform)
    
def scale_waveforms(waveform, rpeaks):
    """Get rpeaks for each channel and scale waveform amplitude by median rpeak amplitude of lead I."""
    if rpeaks:
        for rpeak_array in rpeaks:
            if rpeak_array:
                return waveform / np.median(waveform[rpeaks[0], 0])
    return (waveform - waveform.mean()) / waveform.std()

In [19]:
# Set sample frequencies
sample_frequencies = [350]

# Set datasets
datasets = ['A', 'B', 'D', 'E', 'F']

# Loop through sample frequencies
for fs in sample_frequencies:
    
    # Loop through datasets
    for dataset in datasets:
        
        # Get filenames
        filenames = [filename.split('.')[0] for filename in os.listdir(os.path.join(DATA_PATH, dataset, 
                                                                                    'formatted')) if 'json' in filename]
        
        # Create directory for formatted data
        os.makedirs(os.path.join(DATA_PATH, dataset, str(fs)), exist_ok=True)
        
        # Loop through files
        _ = Parallel(n_jobs=-1)(delayed(process_signal)(os.path.join(DATA_PATH, dataset), filename, fs) 
                                for filename in filenames)