# ECG DB 3 - MIT-BIH Normal Sinus Rhythm 
### Sebastian D. Goodfellow, Ph.D.

# Setup Notebook

In [1]:
# Import 3rd party libraries
import os
import sys
import wfdb
import numpy as np
import pandas as pd
from scipy import interpolate
import matplotlib.pylab as plt

# Import local Libraries
sys.path.insert(0, r'C:\Users\sebig\Documents\Code\ecg_db')
from ecgdb.config.config import DATA_DIR

# Configure Notebook
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline
%load_ext autoreload
%autoreload 2

# Download Dataset

In [2]:
# Set database name
db_name = 'db3'

# Set raw data path
raw_path = os.path.join(DATA_DIR, db_name, 'raw')

# Download data set
# wfdb.dl_database('nsrdb', raw_path)

# Get list of recordings
recording_ids = [file.split('.')[0] for file in os.listdir(raw_path) if '.dat' in file]

# Print list of recordings
print(recording_ids)

# Get sample frequency (Hz)
record = wfdb.rdrecord(os.path.join(raw_path, recording_ids[0]))
fs = record.__dict__['fs']

['16265', '16272', '16273', '16420', '16483', '16539', '16773', '16786', '16795', '17052', '17453', '18177', '18184', '19088', '19090', '19093', '19140', '19830']


# Get Continuous NSR Sections

In [3]:
def contiguous_regions(condition):
    """Get start and stop indices contiguous NRS regions."""
    # Find the indices of changes in "condition"
    d = np.diff(condition)
    idx, = d.nonzero()

    # Shift the index by 1 to the right
    idx += 1

    if condition[0]:
        # If the start of condition is True prepend a 0
        idx = np.r_[0, idx]

    if condition[-1]:
        # If the end of condition is True, append the length of the array
        idx = np.r_[idx, condition.size]  # Edit

    # Reshape the result into two columns
    idx.shape = (-1, 2)

    return idx

In [4]:
# Empty dictionary for arrhythmia sections
sections = list()

# Set minimum labels per section
min_labels = 10

# Loop through recordings
for recording_id in recording_ids[0:1]:
    
    # Import recording
    record = wfdb.rdrecord(os.path.join(raw_path, recording_id))
    
    # Import annotations
    annotation = wfdb.rdann(os.path.join(raw_path, recording_id), 'atr')
    
    # Get waveform data
    data = record.__dict__['p_signal']
    
    # labels
    labels = np.array(annotation.__dict__['symbol'])
    
    # Samples
    sample = annotation.__dict__['sample']
    
    # Loop through labels
    for start, stop in contiguous_regions(labels == 'N'):

        if stop - start >= min_labels:
        
            sections.append({'label': 'N', 'waveform': 1, 'record': recording_id,
                             'db': 'nsrdb', 'data': data[sample[start]:sample[stop - 1], 0]})
            sections.append({'label': 'N', 'waveform': 2, 'record': recording_id,
                             'db': 'nsrdb', 'data': data[sample[start]:sample[stop - 1], 1]})

# Get Samples

In [5]:
# Empty dictionary for arrhythmia samples
samples = list()

# Set sample length in seconds
sample_length = 60 

# Get sample length in sample points
sample_length_sp = sample_length * fs

# Loop through sections
for section in sections:
    
    # Set index
    idx = 0
    
    # Get number of samples in section
    num_samples = int(np.ceil(len(section['data']) / sample_length_sp))
    
    # Loop through samples
    for sample_id in range(num_samples):
        
        # Get sample
        if sample_id != num_samples - 1:
            samples.append({'label': section['label'], 'waveform': section['waveform'], 'record': section['record'],
                            'sample': sample_id, 'db': section['db'], 'data': section['data'][idx:idx + sample_length_sp]})
            idx += sample_length_sp
            
        elif sample_id == num_samples - 1:
            samples.append({'label': section['label'], 'waveform': section['waveform'], 'record': section['record'],
                            'sample': sample_id, 'db': section['db'], 'data': section['data'][idx:]})

# Resample

In [6]:
# Set resample frequency (Hz)
fs_rs = 300

# Get time array
time = np.arange(sample_length_sp) * 1 / fs

# Loop through samples
for idx, sample in enumerate(samples):
    
    # Get time array
    time = np.arange(len(sample['data'])) * 1 / fs

    # Generate new resampling time array
    times_rs = np.arange(0, time[-1], 1 / fs_rs)

    # Setup interpolation function
    interp_func = interpolate.interp1d(x=time, y=sample['data'], kind='linear')

    # Interpolate contiguous segment
    values_rs = interp_func(times_rs)
    
    # Save resampled waveform
    samples[idx]['data'] = values_rs

# Save Training Dataset

In [7]:
# Set processed data path
processed_path = os.path.join(DATA_DIR, db_name, 'processed')

# Create empty DataFrame
labels = pd.DataFrame(data=[], columns=['db', 'record', 'label', 'sample', 'signal_id', 'train_label', 'file_name'])

# Loop through samples
for idx, sample in enumerate(samples):

    # Set file name
    file_name = '{}_{}_signal{}_sample{}.npy'.format(sample['db'], sample['record'], sample['waveform'], sample['sample'])
    
    # Get labels
    labels = labels.append(pd.Series({'db': sample['db'], 'record': sample['record'], 'label': sample['label'], 
                                      'sample': sample['sample'], 'signal_id': sample['waveform'], 
                                      'train_label': 'N', 'file_name': file_name}), ignore_index=True)
    
    # Save waveform as .npy
    np.save(os.path.join(processed_path, 'waveforms', file_name), sample['data'])
    
# Save labels
labels.to_csv(os.path.join(processed_path, 'labels', 'labels.csv'), index=False)