In [None]:
import pyarrow as pa
import pyarrow.parquet as pq
import polars as pl
import numpy as np
from pathlib import Path

In [None]:
# try to read again
FILE_NAMES = [
    "red_100Hz_2024-04-01_11-29-27",
    "red_100Hz_2024-04-01_14-40-56",
]
FILE_NAME = Path(FILE_NAMES[1] + ".parquet")
table = pq.read_table(FILE_NAME)
# read sample rate from filename
sample_rate_str = FILE_NAME.stem.split("_")[1]
_hz_idx = sample_rate_str.find("Hz")
sample_rate = int(sample_rate_str[:_hz_idx])
SAMPLE_RATE = sample_rate
SAMPLE_INTERVAL = 1 / SAMPLE_RATE
display(f"Sample rate: {SAMPLE_RATE} Hz")

In [None]:
data = table["red"].to_numpy()
data.shape

In [None]:
import matplotlib.pyplot as plt

# sample rate is 800Hz (1.25ms per sample)
xs = np.arange(0, data.shape[0] * 1.25e-3, 1.25e-3)
# set x axis label
plt.xlabel("Time (s)")
plt.ylabel("Red LED Reading (ADC Value)")
plt.plot(xs, data)

In [None]:
from enum import Enum, auto

THRESHOLD = 1.5e6


class Level(Enum):
    LOW = auto()
    HIGH = auto()


Segment = tuple[int, int, Level]


def segment_data(data: np.ndarray, threshold: float | int) -> list[Segment]:
    last_index = 0
    last_state = Level.HIGH if data[0] > threshold else Level.LOW
    segments: list[Segment] = []
    for i, n in enumerate(data):
        if n > threshold:
            if last_state == Level.LOW:
                segments.append((last_index, i, Level.LOW))
                last_index = i
                last_state = Level.HIGH
            else:
                continue
        else:
            if last_state == Level.HIGH:
                segments.append((last_index, i, Level.HIGH))
                last_index = i
                last_state = Level.LOW
            else:
                continue
        if i == len(data) - 1:
            segments.append((last_index, i, last_state))
    return segments

segments = segment_data(data, THRESHOLD)

In [None]:
def segment_length(segment: Segment) -> int:
    return segment[1] - segment[0]

segment_lens = [segment_length(segment) for segment in segments]
np.percentile(segment_lens, 75)

In [None]:
real_segments = [s for s in segments if segment_length(s) > 100]
display(real_segments)

In [None]:
# high plot as red, low plot as blue
for segment in real_segments:
    color = "red" if segment[2] == Level.HIGH else "blue"
    plt.axvspan(segment[0] * 1.25e-3, segment[1] * 1.25e-3, color=color, alpha=0.5)

In [None]:
import random
import plotly.express as px
import plotly.graph_objects as go
# we're only interested in the high segments
high_segments_idx = [s for s in real_segments if s[2] == Level.HIGH]
display(high_segments_idx)
high_segments = [data[s[0]:s[1]] for s in high_segments_idx]

# lucky = random.sample(high_segments, 1)[0]
# lucky_idx = random.randint(0, len(high_segments) - 1)
lucky_idx = 0
display(f"lucky index: {lucky_idx}")
# 2 might be a good one
# 1484 : 70_000
lucky = high_segments[lucky_idx]
# filter out below 1 percentile and above 99 percentile
# filtered_lucky = np.clip(lucky, np.percentile(lucky, 1),
#                          np.percentile(lucky, 99))
# TODO: maybe doing some edge detection
# like 1D canny
# I don't feel the necessity if DC offset is removed (we have different significant DC offset)
xs = np.array(range(len(lucky)))
xs_time = xs * SAMPLE_INTERVAL
# px.line(y=lucky, x=xs).show()
trace = go.Scatter(x=xs, y=lucky, mode="lines")
trace_time = go.Scatter(x=xs_time, y=lucky, mode="lines")
fig = go.Figure(data=[trace_time, trace])
# https://community.plotly.com/t/can-plotly-support-2-x-axis-and-2-y-axis-in-one-graph/38303/2
fig.update_layout(
    xaxis=dict(title="Sample Index"),
    yaxis=dict(title="Red LED Reading (ADC Value)"),
    xaxis2=dict(title="Time (s)", overlaying="x", side="top"),
)
fig.data[0].update(xaxis="x2", yaxis="y", line=dict(color="rgba(0,0,0,0)")) # type: ignore
fig.update_layout(showlegend=False)
fig.show()

In [None]:
from typing import Optional


workable_data:Optional[np.ndarray] = lucky
# if FILE_NAME.stem == "red_100Hz_2024-04-01_11-29-27":
#     if lucky_idx == 1:
#         workable_data = lucky[4299:-100]
#     if lucky_idx == 2:
#         workable_data = lucky[765:-50]
#     if lucky_idx == 6:
#         workable_data = lucky[1678:-200]

xs_time = np.array(range(len(workable_data))) * SAMPLE_INTERVAL # type: ignore
px.line(y=workable_data, x=xs_time).show()

In [None]:
import heartpy as hp
from scipy.signal import butter, detrend, filtfilt, iirnotch, savgol_filter, wiener, sosfilt, sosfiltfilt
from scipy.io import loadmat
from heartpy import filter_signal

mat = loadmat("HR_filter_ba.mat")
display(mat)

In [None]:
# 0.4Hz to 100Hz
# https://github.com/paulvangentcom/heartrate_analysis_python/blob/master/examples/1_regular_PPG/Analysing_a_PPG_signal.ipynb
# https://github.com/paulvangentcom/heartrate_analysis_python/blob/master/examples/5_noisy_ECG/Analysing_Noisy_ECG.ipynb
# https://github.com/paulvangentcom/heartrate_analysis_python/blob/master/docs/algorithmfunctioning.rst
# https://github.com/paulvangentcom/heartrate_analysis_python/blob/master/docs/heartrateanalysis.rst

# remove_baseline_wander is just a notch filter applied to low frequency (to remove DC offset)
# notch filter to remove DC offset
# enhance_ecg_peaks is useless
# the high pass/low pass/band pass filter here are all butterworth filter

# We will use the bandpass variant.
# we filter out frequencies below 0.8Hz (<= 48 bpm) (bpm = 60 x Hz)
# and above 3Hz (>= 180 bpm)
# Second-order sections (SOS) matrix and gain values (G) from MATLAB

# by default it only has 2nd order filter
scipy_bp = butter(2, [0.75, 6.5], btype="band", fs=SAMPLE_RATE, output="sos")
filtered_scipy = sosfiltfilt(scipy_bp, workable_data)

b = np.array(mat["b"]).reshape(-1)
a = np.array(mat["a"]).reshape(-1)

filtered_mat = filtfilt(b, a, workable_data)

# drop the rediculously high values
# I'm not sure about the value range
filtered_scipy = np.clip(filtered_scipy, -255, 255 - 1)
filtered_mat = np.clip(filtered_mat, -255, 255 - 1)

# scale the data (maybe not necessary)
# filtered_scipy = hp.scale_data(filtered_scipy)
# filtered_mat = hp.scale_data(filtered_mat)



trace_bp_matlab = go.Scatter(x=xs_time,
                               y=filtered_mat,
                               mode="lines",
                               name="Bandpass Filtered (MATLAB)")
trace_bp = go.Scatter(x=xs_time,
                      y=filtered_scipy,
                      mode="lines",
                      name="Bandpass Filtered (Scipy)")
fig = go.Figure(data=[trace_bp, trace_bp_matlab])
fig.update_layout(
    xaxis=dict(title="Time (s)"),
    yaxis=dict(title="Red LED Reading (ADC Value)"),
)
fig.show()

In [None]:
# calc_freq: whether to calculate frequency domain measures
# interp_threshold: the amplitude threshold beyond which will be checked for
# clipping. Recommended is to take this as the maximum value of the ADC with
# some margin for signal noise
# reject_segmentwise: whether to reject segments with more than 30% rejected
# beats. By default looks at segments of 10 beats at a time.

# clean_rr uses by default quotient-filtering, which is a bit aggressive.
# You can set 'iqr' or 'z-score' with the clean_rr_method flag.
working, measures = hp.process(filtered_mat, sample_rate=SAMPLE_RATE, freq_method="welch", interp_clipping=False, clean_rr_method="quotient-filtering")

# Take into consideration that the scale for RMSSD doesn't typically exceed +/-
# 130, SDSD doesn't differ by much. This means that even a few incorrectly
# detected peaks are already introducing large measurement errors into the output
# variables. The algorithm described here is specifically designed to handle noisy
# PPG data from cheap sensors. The main design criteria was to minimise the number
# of incorrectly placed peaks as to minimise the error introduced into the output
# measures.

display(measures)
hp.plotter(working, measures, figsize=(18, 4), moving_average=True)

In [None]:
hp.plot_breathing(working, measures, figsize=(18, 4))

In [None]:
hp.plot_poincare(working, measures, figsize=(4, 4))