In [None]:
import os

import warnings
warnings.filterwarnings('ignore') # :clown:

In [None]:
import numpy as np
import pandas as pd
from tqdm import tqdm

In [None]:
import neurokit2 as nk

In [None]:
import multiprocessing
import joblib
from joblib import Parallel, delayed
num_cores = multiprocessing.cpu_count()

In [None]:
import matplotlib.pyplot as plt
from matplotlib.colors import BoundaryNorm
import matplotlib.dates as md

In [None]:
def get_csv_file_paths(target):
    if os.path.isdir(target):
        file_paths = [os.path.join(target, f) for f in os.listdir(target) if f.endswith('.csv')]
    elif os.path.isfile(target):
        file_paths = [target]
    else:
        raise Exception('Target is not a file or directory.')

    return file_paths

In [None]:
PRE_PROCESSED_DATA_PATH = './data/ecg_preprocessed'
PROCESSED_DATA_PATH = './data/ecg_processed'
FILE_NAME = '30100'

In [None]:
SAMPLE_RATE = 1000             # 1000hz = 1000 samples per second. Thus, one datapoint is 00:00:00.001000+00:00 = 1ms
WINDOW_SIZE = SAMPLE_RATE * 2

In [None]:
preprocessed_df = pd.read_csv(
    f'{PRE_PROCESSED_DATA_PATH}/{FILE_NAME}.csv', 
    skiprows=[0],
    names=['timestamp','signal','signal_normalised','subject_id','category','code','ECG_Raw','ECG_Clean','ECG_Rate','ECG_Quality','ECG_R_Peaks','ECG_P_Peaks','ECG_P_Onsets','ECG_P_Offsets','ECG_Q_Peaks','ECG_R_Onsets','ECG_R_Offsets','ECG_S_Peaks','ECG_T_Peaks','ECG_T_Onsets','ECG_T_Offsets','ECG_Phase_Atrial','ECG_Phase_Completion_Atrial','ECG_Phase_Ventricular','ECG_Phase_Completion_Ventricular','Index','Label'],#'heartbeat'],
    dtype={
        'timestamp': str, 
        'signal': float, 
        'signal_normalised': float, 
        'subject_id': str, 
        'category': str, 
        'code': str, 
        'ECG_Raw': float, 
        'ECG_Clean': float, 
        'ECG_Rate': float, 
        'ECG_Quality': float, 
        'ECG_R_Peaks': float, 
        'ECG_P_Peaks': float, 
        'ECG_P_Onsets': float, 
        'ECG_P_Offsets': float, 
        'ECG_Q_Peaks': float, 
        'ECG_R_Onsets': float, 
        'ECG_R_Offsets': float, 
        'ECG_S_Peaks': float, 
        'ECG_T_Peaks': float, 
        'ECG_T_Onsets': float, 
        'ECG_T_Offsets': float, 
        'ECG_Phase_Atrial': float, 
        'ECG_Phase_Completion_Atrial': float, 
        'ECG_Phase_Ventricular': float, 
        'ECG_Phase_Completion_Ventricular': float, 
        'Index': 'Int64', 
        'Label': str, 
        # 'heartbeat': int  
    }
)

# processed_df = pd.read_csv(
#     f'{PROCESSED_DATA_PATH}/{FILE_NAME}.csv',
#     skiprows=[0],
#     names=['timestamp','signal','signal_normalised','subject_id','category','code'],
#     dtype={
#         'timestamp': str, 
#         'signal': float, 
#         'signal_normalised': float, 
#         'subject_id': str, 
#         'category': str, 
#         'code': str
#     }
# )

In [None]:
preprocessed_window = preprocessed_df.sort_values(['timestamp']).groupby('category').tail(WINDOW_SIZE)[['timestamp','category','ECG_Clean']]
preprocessed_window = preprocessed_window.drop(preprocessed_window[preprocessed_window['category'] == '0'].index)
preprocessed_categories = preprocessed_window['category'].unique()

# processed_window = processed_df.sort_values(['timestamp']).groupby('category').tail(WINDOW_SIZE)[['timestamp','category','signal']]
# processed_window = processed_window.drop(processed_window[processed_window['category'] == '0'].index)
# processed_categories = processed_window['category'].unique()

In [None]:
plt.rcParams["figure.dpi"] = 200

with tqdm(total=len(preprocessed_categories)) as progress_bar:
    # Loop over the categories and read the data in batches
    for category in preprocessed_categories:
        progress_bar.set_description(f'Graphing category {category}...')

        data = preprocessed_window[preprocessed_window['category'] == category]
        fig, ax = plt.subplots(figsize=(30, 10), dpi=300)

        ax.set_xlabel('Timestamp')
        ax.set_title(f'Cleaned ECG Data of Category "{category}"')

        y_minor_ticks = np.arange(data['ECG_Clean'].min(), data['ECG_Clean'].max(), 0.0001)
        y_major_ticks = np.arange(data['ECG_Clean'].min(), data['ECG_Clean'].max(), 0.0005) 

        ax.set_yticks(y_minor_ticks, minor=True)
        ax.set_yticks(y_major_ticks)

        ax.axes.yaxis.set_ticklabels([])

        x_minor_ticks = np.arange(0, len(data), SAMPLE_RATE * 0.04) 
        x_major_ticks = np.arange(0, len(data), SAMPLE_RATE * 0.2)
        ax.set_xticks(x_minor_ticks, minor=True)
        ax.set_xticks(x_major_ticks)
        ax.axes.xaxis.set_ticklabels([])
        ax.set_xlim(0, len(data))

        # Or if you want different settings for the grids:
        ax.grid(which='minor', alpha=0.2, color='red')
        ax.grid(which='major', alpha=0.5, color='red', linewidth=2)

        ax.plot(data.timestamp, data['ECG_Clean'], color='black', linewidth=2)    

        # Plot the average line
        plt.axhline(y=0, color='black', linestyle='--')
        ax.set_ylabel('Processed Signal')
        
        # Save the plot
        if category != '':
            path = f'./figures/category/{category}'
        else :
            path = f'./figures/category/_'

        os.makedirs(path, exist_ok=True)
        path += f'/{FILE_NAME}'

        plt.savefig(f'{path}_preprocessed_{int(WINDOW_SIZE / 1000)}s.png', dpi=300, bbox_inches='tight', pad_inches=0)

        plt.clf()
        plt.close(fig)

        progress_bar.update(1)

In [None]:
plt.rcParams["figure.dpi"] = 200

with tqdm(total=len(processed_categories)) as progress_bar:
    # Loop over the categories and read the data in batches
    for category in processed_categories:
        progress_bar.set_description(f'Graphing category {category}...')

        data = processed_window[processed_window['category'] == category]
        fig, ax = plt.subplots(figsize=(30, 10), dpi=300)

        ax.set_xlabel('Timestamp')
        ax.set_title(f'ECG Data of Category "{category}"')

        y_minor_ticks = np.arange(data['signal'].min(), data['signal'].max(), 0.0001)
        y_major_ticks = np.arange(data['signal'].min(), data['signal'].max(), 0.0005) 

        ax.set_yticks(y_minor_ticks, minor=True)
        ax.set_yticks(y_major_ticks)

        ax.axes.yaxis.set_ticklabels([])

        x_minor_ticks = np.arange(0, len(data), SAMPLE_RATE * 0.04) 
        x_major_ticks = np.arange(0, len(data), SAMPLE_RATE * 0.2)
        ax.set_xticks(x_minor_ticks, minor=True)
        ax.set_xticks(x_major_ticks)
        ax.axes.xaxis.set_ticklabels([])
        ax.set_xlim(0, len(data))

        # Or if you want different settings for the grids:
        ax.grid(which='minor', alpha=0.2, color='red')
        ax.grid(which='major', alpha=0.5, color='red', linewidth=2)

        ax.plot(data.timestamp, data['signal'], color='black', linewidth=2)    

        # Plot the average line
        plt.axhline(y=0, color='black', linestyle='--')
        ax.set_ylabel('Signal')
        
        # Save the plot
        if category != '':
            path = f'./figures/category/{category}'
        else :
            path = f'./figures/category/_'

        os.makedirs(path, exist_ok=True)
        path += f'/{FILE_NAME}'

        plt.savefig(f'{path}_processed_{int(WINDOW_SIZE / 1000)}s.png', dpi=300, bbox_inches='tight', pad_inches=0)

        plt.clf()
        plt.close(fig)

        progress_bar.update(1)

In [None]:
plt.rcParams["figure.dpi"] = 200

with tqdm(total=len(preprocessed_categories)) as progress_bar:
    # Loop over the categories and read the data in batches
    for category in preprocessed_categories:
        progress_bar.set_description(f'Graphing category {category}...')

        preprocessed_data = preprocessed_window[preprocessed_window['category'] == category]
        processed_data = processed_window[processed_window['category'] == category]
        fig, ax = plt.subplots(figsize=(30, 10), dpi=300)

        ax.set_xlabel('Timestamp')
        ax.set_title(f'Comparison ECG Data of Category "{category}"')

        y_minor_ticks = np.arange(preprocessed_data['ECG_Clean'].min(), preprocessed_data['ECG_Clean'].max(), 0.0001)
        y_major_ticks = np.arange(preprocessed_data['ECG_Clean'].min(), preprocessed_data['ECG_Clean'].max(), 0.0005) 

        ax.set_yticks(y_minor_ticks, minor=True)
        ax.set_yticks(y_major_ticks)

        ax.axes.yaxis.set_ticklabels([])

        x_minor_ticks = np.arange(0, len(preprocessed_data), SAMPLE_RATE * 0.04) 
        x_major_ticks = np.arange(0, len(preprocessed_data), SAMPLE_RATE * 0.2)
        ax.set_xticks(x_minor_ticks, minor=True)
        ax.set_xticks(x_major_ticks)
        ax.axes.xaxis.set_ticklabels([])
        ax.set_xlim(0, len(processed_data))

        # Or if you want different settings for the grids:
        ax.grid(which='minor', alpha=0.2, color='red')
        ax.grid(which='major', alpha=0.5, color='red', linewidth=2)

        ax.plot(preprocessed_data.timestamp, preprocessed_data['ECG_Clean'], color='red', linewidth=2, label='pre-processed')    
        ax.plot(processed_data.timestamp, processed_data['signal'], color='blue', linewidth=2, label='raw')    

        # Plot the average line
        plt.axhline(y=0, color='black', linestyle='--')
        ax.set_ylabel('Signal')
        
        # Save the plot
        if category != '':
            path = f'./figures/category/{category}'
        else :
            path = f'./figures/category/_'

        os.makedirs(path, exist_ok=True)
        path += f'/{FILE_NAME}'

        plt.legend()

        plt.savefig(f'{path}_comparison_{int(COMPARISON_WINDOW_SIZE / 1000)}s.png', dpi=300, bbox_inches='tight', pad_inches=0)

        plt.clf()
        plt.close(fig)

        progress_bar.update(1)

In [None]:
plt.rcParams["figure.dpi"] = 200

COMPARISON_WINDOW_SIZE = SAMPLE_RATE * 20

preprocessed_window = preprocessed_df.sort_values(['timestamp']).groupby('category').tail(COMPARISON_WINDOW_SIZE)[['timestamp','category','ECG_Clean']]
preprocessed_window = preprocessed_window.drop(preprocessed_window[preprocessed_window['category'] == '0'].index)
preprocessed_categories = preprocessed_window['category'].unique()

processed_window = processed_df.sort_values(['timestamp']).groupby('category').tail(COMPARISON_WINDOW_SIZE)[['timestamp','category','signal']]
processed_window = processed_window.drop(processed_window[processed_window['category'] == '0'].index)
processed_categories = processed_window['category'].unique()

fig, ax = plt.subplots(figsize=(120, 10), dpi=300)
ax.set_xlabel('Timestamp')
ax.set_title(f'Category Comparison')
for category in ['Recov1', 'Recov2', 'Recov3', 'Recov4', 'Recov5', 'SSST_Sing_countdown']:
    progress_bar.set_description(f'Graphing category {category}...')

    data = preprocessed_window[preprocessed_window['category'] == category]
    data = data.reset_index(drop=True)

    ax.plot(data.index, data['ECG_Clean'], linewidth=2, label=category) 

y_minor_ticks = np.arange(data['ECG_Clean'].min(), data['ECG_Clean'].max(), 0.0001)
y_major_ticks = np.arange(data['ECG_Clean'].min(), data['ECG_Clean'].max(), 0.0005) 

ax.set_yticks(y_minor_ticks, minor=True)
ax.set_yticks(y_major_ticks)

ax.axes.yaxis.set_ticklabels([])

x_minor_ticks = np.arange(0, len(data), SAMPLE_RATE * 0.04) 
x_major_ticks = np.arange(0, len(data), SAMPLE_RATE * 0.2)
ax.set_xticks(x_minor_ticks, minor=True)
ax.set_xticks(x_major_ticks)
ax.axes.xaxis.set_ticklabels([])
ax.set_xlim(0, len(data))

# Or if you want different settings for the grids:
ax.grid(which='minor', alpha=0.2, color='red')
ax.grid(which='major', alpha=0.5, color='red', linewidth=2)

# Plot the average line
plt.axhline(y=0, color='black', linestyle='--')
ax.set_ylabel('Signal')

path = f'./figures'
path += f'/{FILE_NAME}'

plt.legend()

plt.savefig(f'{path}_{int(COMPARISON_WINDOW_SIZE / 1000)}s.png', dpi=300, bbox_inches='tight', pad_inches=0)

plt.clf()
plt.close(fig)