# PhysioNet/Computing in Cardiology Challenge 2020
## Classification of 12-lead ECGs
### 0. Resampling

# Setup Noteboook

In [None]:
# Import 3rd party libraries
import os
import sys
import json
import shutil
import random
import numpy as np
import pandas as pd
from scipy import signal
from joblib import Parallel, delayed
from scipy.signal.windows import blackmanharris

# Import local Libraries
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(os.getcwd()))))))
from kardioml import DATA_PATH

# Configure Notebook
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [None]:
def process_signal(path, filename, fs):
    
    # Import meta data
    meta_data = json.load(open(os.path.join(path, 'formatted', '{}.json'.format(filename))))
    meta_data['fs_training'] = fs
    
    # Import waveform
    waveform = np.load(os.path.join(path, 'formatted', '{}.npy'.format(filename)))
    
    # Rescale
    waveform = scale_waveforms(waveform=waveform, rpeaks=meta_data['rpeaks'])
    
    # Resample waveform
    samples = int(waveform.shape[0] * fs / meta_data['fs_resampled'])
    waveform = signal.resample(x=waveform, num=samples, axis=0)
    
    # Get peak arrays
    rpeak_array = create_peak_array(waveform=waveform, peaks=meta_data['rpeaks'], fs_old=meta_data['fs_resampled'], fs_new=fs)
    p_wave_array = create_peak_array(waveform=waveform, peaks=meta_data['p_waves'], fs_old=meta_data['fs_resampled'], fs_new=fs)
    t_wave_array = create_peak_array(waveform=waveform, peaks=meta_data['t_waves'], fs_old=meta_data['fs_resampled'], fs_new=fs)

    # Add R, P, and T waves
    features = np.concatenate([rpeak_array.reshape([-1, 1]),
                               p_wave_array.reshape([-1, 1]), 
                               t_wave_array.reshape([-1, 1])], axis=1)
    waveform = np.append(waveform, features, axis=1)
    
    # Save meta data
    with open(os.path.join(path, str(fs), '{}.json'.format(filename)), 'w') as file:
        json.dump(meta_data, file, sort_keys=False, indent=4)

    # Save waveform data npy file
    np.save(os.path.join(path, str(fs), '{}.npy'.format(filename)), waveform)
    
def scale_waveforms(waveform, rpeaks):
    """Get rpeaks for each channel and scale waveform amplitude by median rpeak amplitude of lead I."""
    if rpeaks:
        for rpeak_array in rpeaks:
            if rpeak_array:
                return waveform / np.median(waveform[rpeaks[0], 0])
    return (waveform - waveform.mean()) / waveform.std()

def create_peak_array(waveform, peaks, fs_old, fs_new):
    """Return a binary array of contiguous peak sections."""
    peak_array = np.zeros(waveform.shape[0], dtype=np.float32)
    window = blackmanharris(21)
    if peaks:
        for peak_ids in peaks:
            if peak_ids:
                for peak_id in peak_ids:
                    peak_time = peak_id / fs_old
                    peak_id_new = int(peak_time * fs_new)
                    if len(peak_array[peak_id_new - 10:peak_id_new + 11]) >= 21:
                        peak_array[peak_id_new-10:peak_id_new+11] += window
        peak_array[peak_array <= 1] = 0
        peak_array /= np.max(peak_array)
    return peak_array

In [None]:
# Set sample frequencies
sample_frequencies = [500, 550, 600]

# Set datasets
datasets = ['A', 'B', 'C', 'D', 'E', 'F']

# Loop through sample frequencies
for fs in sample_frequencies:
    
    # Loop through datasets
    for dataset in datasets:
        
        # Get filenames
        filenames = [filename.split('.')[0] for filename in os.listdir(os.path.join(DATA_PATH, dataset, 
                                                                                    'formatted')) if 'json' in filename]
        
        # Create directory for formatted data
        os.makedirs(os.path.join(DATA_PATH, dataset, str(fs)), exist_ok=True)
        
        # Loop through files
        _ = Parallel(n_jobs=-1)(delayed(process_signal)(os.path.join(DATA_PATH, dataset), filename, fs) 
                                for filename in filenames)

In [None]:
fs = 350

# Import meta data
meta_data = json.load(open(r'C:\Users\sebastian goodfellow\Documents\code\physionet-challenge-2020\data\A\formatted\A0001.json'))
meta_data['fs_training'] = fs

# Import waveform
waveform = np.load(r'C:\Users\sebastian goodfellow\Documents\code\physionet-challenge-2020\data\A\formatted\A0001.npy')

# Rescale
waveform = scale_waveforms(waveform=waveform, rpeaks=meta_data['rpeaks'])

# Resample waveform
samples = int(waveform.shape[0] * fs / meta_data['fs_resampled'])
waveform = signal.resample(x=waveform, num=samples, axis=0)

# Get peak arrays
rpeak_array = create_peak_array(waveforms=waveform, peaks=meta_data['rpeaks'], 
                                fs_old=meta_data['fs_resampled'], fs_new=fs)
p_wave_array = create_peak_array(waveforms=waveform, peaks=meta_data['p_waves'], 
                                fs_old=meta_data['fs_resampled'], fs_new=fs)
t_wave_array = create_peak_array(waveforms=waveform, peaks=meta_data['t_waves'], 
                                fs_old=meta_data['fs_resampled'], fs_new=fs)

# Add R, P, and T waves
features = np.concatenate([rpeak_array.reshape([-1, 1])], axis=1)
#                            p_wave_array.reshape([-1, 1]), 
#                            t_wave_array.reshape([-1, 1])], axis=1)
waveform = np.append(waveform, features, axis=1)

In [None]:
import matplotlib.pylab as plt

In [None]:
fig = plt.figure(figsize=(15, 5), facecolor='w')
plt.plot(waveform[:, 10], '-k')
plt.show()

In [None]:
fig = plt.figure(figsize=(15, 5), facecolor='w')
plt.plot(waveform[:, 14], '-k')
plt.show()

In [None]:
# Import meta data
meta_data = json.load(open(r'C:\Users\sebastian goodfellow\Documents\code\physionet-challenge-2020\data\A\350\A0001.json'))

# Import waveform
waveform = np.load(r'C:\Users\sebastian goodfellow\Documents\code\physionet-challenge-2020\data\A\350\A0001.npy')

In [None]:
fig = plt.figure(figsize=(15, 5), facecolor='w')
plt.plot(waveform[:, 0], '-k')
plt.show()

In [None]:
fig = plt.figure(figsize=(15, 5), facecolor='w')
plt.plot(waveform[:, 11], '-k')
plt.show()

In [None]:
waveform.shape

In [None]:
60*350