# Notebook to start overviewing the data from ECG channel

## Import libraries

In [1]:
import os
import mne
import matplotlib.pyplot as plt
import neurokit2 as nk
import pandas as pd

import matplotlib as mpl
mpl.rcParams['figure.figsize'] = (12, 6)
mpl.rcParams['figure.dpi'] = 120

%matplotlib tk

# Part 1: Data Loading

## Load ECG Signal from Parquet

In [2]:
# Constants
sampling_rate = 1000
data_root = './task_data'
subject_id = "032"

# Build subject-specific folder
data_folder = os.path.join(data_root, f"sub-{subject_id}")

# Load single subject data for exploration
parquet_file = f"sub-{subject_id}_task-memory_ecg_task.parquet"
full_path = os.path.join(data_folder, parquet_file)
data = pd.read_parquet(full_path)

ecg_signal = data['ecg_nk']
ecg_signal = -ecg_signal
raw_signal = data['ecg_raw']

print(f"‚úÖ Data loaded successfully")
print(f"   Subject: sub-{subject_id}")
print(f"   Path:    {full_path}")
print(f"   Shape: {data.shape}")
print(f"   Columns: {list(data.columns)}")

‚úÖ Data loaded successfully
   Subject: sub-032
   Path:    ./task_data/sub-032/sub-032_task-memory_ecg_task.parquet
   Shape: (7395320, 4)
   Columns: ['time', 'ecg_raw', 'ecg_mod', 'ecg_nk']


# Part 2: Analysis & Metrics Extraction

## Process ECG Signal

Clean the signal and extract R-peaks for wave delineation

In [3]:
# Process the ECG signal
signals, info = nk.ecg_process(ecg_signal, sampling_rate=sampling_rate)

print(f"‚úÖ ECG processing complete")
print(f"   Signals shape: {signals.shape}")
print(f"   Available metrics: {list(info.keys())}")

‚úÖ ECG processing complete
   Signals shape: (7395320, 19)
   Available metrics: ['method_peaks', 'method_fixpeaks', 'ECG_R_Peaks', 'ECG_R_Peaks_Uncorrected', 'ECG_fixpeaks_ectopic', 'ECG_fixpeaks_missed', 'ECG_fixpeaks_extra', 'ECG_fixpeaks_longshort', 'ECG_fixpeaks_method', 'ECG_fixpeaks_rr', 'ECG_fixpeaks_drrs', 'ECG_fixpeaks_mrrs', 'ECG_fixpeaks_s12', 'ECG_fixpeaks_s22', 'ECG_fixpeaks_c1', 'ECG_fixpeaks_c2', 'sampling_rate', 'ECG_P_Peaks', 'ECG_P_Onsets', 'ECG_P_Offsets', 'ECG_Q_Peaks', 'ECG_R_Onsets', 'ECG_R_Offsets', 'ECG_S_Peaks', 'ECG_T_Peaks', 'ECG_T_Onsets', 'ECG_T_Offsets']


In [4]:
# Extract R-peaks and clean signal for delineation
rpeaks = info['ECG_R_Peaks']
clean = nk.ecg_clean(ecg_signal, sampling_rate=sampling_rate)

print(f"‚úÖ R-peaks extracted and signal cleaned")
print(f"   Number of R-peaks: {len(rpeaks)}")
print(f"   Clean signal shape: {clean.shape}")

‚úÖ R-peaks extracted and signal cleaned
   Number of R-peaks: 12644
   Clean signal shape: (7395320,)


In [None]:
# Perform wave delineation to identify P, QRS, and T waves
signals_delineate, waves = nk.ecg_delineate(clean, rpeaks=rpeaks, sampling_rate=sampling_rate, method="dwt", show=False)

print(f"‚úÖ Wave delineation complete")
print(f"   P-waves: {len(waves['ECG_P_Peaks'])}")
print(f"   Q-waves: {len(waves['ECG_Q_Peaks'])}")
print(f"   S-waves: {len(waves['ECG_S_Peaks'])}")
print(f"   T-waves: {len(waves['ECG_T_Peaks'])}")

## Calculate Heart Rate Variability (HRV)

Compute time, frequency, and non-linear domain HRV indices

In [None]:
# Calculate HRV metrics (Time, Frequency, and Non-Linear domains)
hrv_indices = nk.hrv(info, sampling_rate=sampling_rate, show=True)

print(f"‚úÖ HRV analysis complete")
print(f"   Computed {len(hrv_indices.columns)} HRV indices")
display(hrv_indices)

# Part 3: Visualization & Results

## ECG Signal with Delineated Waves

Visualize processed ECG with P, QRS, and T waves identified

In [6]:
nk.ecg_plot(signals, info)


In [5]:
nk.ecg_peaks(clean, sampling_rate = 1000, show = True)

(         ECG_R_Peaks
 0                  0
 1                  0
 2                  0
 3                  0
 4                  0
 ...              ...
 7395315            0
 7395316            0
 7395317            0
 7395318            0
 7395319            0
 
 [7395320 rows x 1 columns],
 {'method_peaks': 'neurokit',
  'method_fixpeaks': 'None',
  'ECG_R_Peaks': array([    349,     975,    1588, ..., 7394085, 7394649, 7395219],
        shape=(12622,)),
  'sampling_rate': 1000})

## Segment

In [None]:
cqrs_epochs = nk.ecg_segment(clean, rpeaks=rpeaks, sampling_rate=1000, show=True)

## Summary Statistics

Key metrics and diagnostic information

In [None]:
# Display comprehensive summary of analysis
print("\n" + "="*70)
print("üìä ECG ANALYSIS SUMMARY")
print("="*70)

print(f"\nüìà Recording Details:")
print(f"   Duration: {len(ecg_signal)/sampling_rate:.2f} seconds")
print(f"   Sampling Rate: {sampling_rate} Hz")
print(f"   Total Samples: {len(ecg_signal):,}")

print(f"\n‚ù§Ô∏è  Heart Rate Analysis:")
print(f"   R-peaks Detected: {len(info['ECG_R_Peaks'])}")
print(f"   Average HR: {signals['ECG_Rate'].mean():.1f} bpm")
print(f"   HR Min: {signals['ECG_Rate'].min():.1f} bpm")
print(f"   HR Max: {signals['ECG_Rate'].max():.1f} bpm")

print(f"\nüåä Wave Delineation:")
print(f"   P-waves: {len(info['ECG_P_Peaks'])} detected")
print(f"   Q-waves: {len(info['ECG_Q_Peaks'])} detected")
print(f"   S-waves: {len(info['ECG_S_Peaks'])} detected")
print(f"   T-waves: {len(info['ECG_T_Peaks'])} detected")

print(f"\nüìã HRV Indices Calculated:")
print(f"   Total: {len(hrv_indices.columns)} indices")
print(f"\n   Top HRV Metrics:")
for col in list(hrv_indices.columns)[:5]:
    print(f"   - {col}: {hrv_indices[col].values[0]:.4f}")

print("\n" + "="*70)