In [1]:
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm

from utils import (
    load_example,
    HealthChecker,
    plot_array,
    show_beats,
    isolate_beats,
    get_beats,
    plot_beats,
    flatten,
    normalize,
)

%matplotlib widget

In [2]:
data_folder = "/home/edent/Projects/Demos/ECG-Classification-CNN/data/physionet.org/files/ptbdb/1.0.0/"

paths = list(Path(data_folder).rglob('*.hea'))
    
test_data, test_info = load_example(paths[0])

In [3]:
if 'fig' in locals():
    plt.close(fig)
fig = plot_array(test_data[:10000, ...], title='Sample plot of 15 lead ECG data'.title())

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [4]:
x = test_data[:int(1e4), ...]
if 'ffig' in locals():
    plt.close(ffig)
ffig = plot_array(x, c='b', title="Sample plot of 15 lead ECG data with flattened data (green)".title())
fx = flatten(x)
for i, ax in enumerate(ffig.axes):
    ax.plot(range(fx.shape[0]), fx[:, i], c='forestgreen')
ffig.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [10]:
n = int(1e4)
x = show_beats(test_data[:n,...], 50)
if 'fig1' in locals():
    plt.close(fig1)
fig1, ax = plt.subplots(figsize=(15, 5))
ax.plot(range(n), x[:n], label='Aggregated Processed Data')
ax.hlines(x.ptp()/3, 0, n, color='g', label="Lower Bound for Peak")
ymin, ymax = ax.get_ylim()
for i, peak in enumerate(isolate_beats(x[:n])):
    ax.vlines(peak, ymin, ymax, colors='r', label='Identified Peak' if i == 0 else None)
ax.margins(x=0)
ax.set_title("Located peaks and identifiers".title())
ax.legend()
fig1.tight_layout()
fig1.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [11]:
if 'fig2' in locals():
    plt.close(fig2)
plot_data = test_data#[:10000, ...]
fig2 = plot_beats(plot_data)
fig2.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

## Investigate Best Padding
It appears we've discovered a suitable way of isolating the peaks. We want to take a snapshot of each peak as a sample and add some padding on either side. We'll investigate the spacing between peaks and decide on the best strategy.

In [12]:
def period_stats(paths):
    period_sum, n_periods = 0, 0
    period_min = np.inf
    period_max = 0
    for path in tqdm(paths, leave=False):
        data = load_example(path)[0]
        beats = get_beats(data)
        diffs = np.diff(beats)
        period_min = min(period_min, diffs.min())
        period_max = max(period_max, diffs.max())
        period_sum += diffs.sum()
        n_periods += len(diffs)
    return period_min, period_max, period_sum/n_periods

In [16]:
data = load_example(paths[505])[0]

if 'tfig' in locals():
    plt.close(tfig)
tfig = plot_beats(data)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [17]:
mins, maxs, means = period_stats(paths)

HBox(children=(FloatProgress(value=0.0, max=549.0), HTML(value='')))

In [18]:
mins, maxs, means

(100, 9508, 743.4597841247503)