In [None]:
import pyarrow as pa
import pyarrow.parquet as pq
import polars as pl
import numpy as np

In [None]:
# try to read again
table = pq.read_table("red.parquet")

In [None]:
data = table["red"].to_numpy()
data.shape

In [None]:
import matplotlib.pyplot as plt

SAMPLE_RATE = 800
# sample rate is 800Hz (1.25ms per sample)
xs = np.arange(0, data.shape[0] * 1.25e-3, 1.25e-3)
# set x axis label
plt.xlabel("Time (s)")
plt.ylabel("Red LED Reading (ADC Value)")
plt.plot(xs, data)

In [None]:
from enum import Enum, auto

THRESHOLD = 1.5e6


class Level(Enum):
    LOW = auto()
    HIGH = auto()


Segment = tuple[int, int, Level]


def segment_data(data: np.ndarray, threshold: float | int) -> list[Segment]:
    last_index = 0
    last_state = Level.HIGH if data[0] > threshold else Level.LOW
    segments: list[Segment] = []
    for i, n in enumerate(data):
        if n > threshold:
            if last_state == Level.LOW:
                segments.append((last_index, i, Level.LOW))
                last_index = i
                last_state = Level.HIGH
            else:
                continue
        else:
            if last_state == Level.HIGH:
                segments.append((last_index, i, Level.HIGH))
                last_index = i
                last_state = Level.LOW
            else:
                continue
        if i == len(data) - 1:
            segments.append((last_index, i, last_state))
    return segments

segments = segment_data(data, THRESHOLD)

In [None]:
def segment_length(segment: Segment) -> int:
    return segment[1] - segment[0]

segment_lens = [segment_length(segment) for segment in segments]
np.percentile(segment_lens, 75)

In [None]:
real_segments = [s for s in segments if segment_length(s) > 100]
display(real_segments)

In [None]:
# high plot as red, low plot as blue
for segment in real_segments:
    color = "red" if segment[2] == Level.HIGH else "blue"
    plt.axvspan(segment[0] * 1.25e-3, segment[1] * 1.25e-3, color=color, alpha=0.5)

In [None]:
import random
import plotly.express as px
import plotly.graph_objects as go
# we're only interested in the high segments
high_segments_idx = [s for s in real_segments if s[2] == Level.HIGH]
display(high_segments_idx)
high_segments = [data[s[0]:s[1]] for s in high_segments_idx]

# lucky = random.sample(high_segments, 1)[0]
# lucky_idx = random.randint(0, len(high_segments) - 1)
lucky_idx = 0
display(f"lucky index: {lucky_idx}")
# 2 might be a good one
# 1484 : 70_000
lucky = high_segments[lucky_idx]
# filter out below 1 percentile and above 99 percentile
# filtered_lucky = np.clip(lucky, np.percentile(lucky, 1),
#                          np.percentile(lucky, 99))
# TODO: maybe doing some edge detection
# like 1D canny
# I don't feel the necessity if DC offset is removed (we have different significant DC offset)
# px.line(y=lucky).show()

In [None]:
# 0
# workable_data = high_segments[0][1192:67_300]
# workable_data = high_segments[0][12192:67_300]
workable_data = high_segments[0][68_401:79_000]
# 1
# workable_data = high_segments[1][1009:9530]
# workable_data = high_segments[1][11_387:42_097]
# workable_data = high_segments[1][14_000:14_000+16_000]
# 2 is unusable (maybe?)
# workable_data = high_segments[2][5885:70_000]
# workable_data = high_segments[2][2994:2994+16_000]
# 3 is unusable (can almost confirm there's no valid signal)
# workable_data = high_segments[3][1536:19_000]

# a window of 10k at 800Hz seems to be a optimal window size
# HF power is usually computed over a minimum of 1 minute of good signal
# workable_data = high_segments[0][40_000:40_000+10_000]
# workable_data = high_segments[0][30_000:30_000+10_000]

# remove DC offset
# detrended = detrend(workable_data)
px.line(y=workable_data).show()

In [None]:
from scipy.signal import detrend
import heartpy as hp
from scipy.signal import wiener

# 0.4Hz to 100Hz
# https://github.com/paulvangentcom/heartrate_analysis_python/blob/master/examples/1_regular_PPG/Analysing_a_PPG_signal.ipynb
# https://github.com/paulvangentcom/heartrate_analysis_python/blob/master/examples/5_noisy_ECG/Analysing_Noisy_ECG.ipynb
# https://github.com/paulvangentcom/heartrate_analysis_python/blob/master/docs/algorithmfunctioning.rst
# https://github.com/paulvangentcom/heartrate_analysis_python/blob/master/docs/heartrateanalysis.rst

# remove_baseline_wander is just a notch filter applied to low frequency (to remove DC offset)
# notch filter to remove DC offset
w_filtered = hp.filter_signal(workable_data, sample_rate=SAMPLE_RATE, filtertype="notch", order=2, cutoff=0.005, return_top=False)
# enhance_ecg_peaks is useless
# the high pass/low pass/band pass filter here are all butterworth filter

# We will use the bandpass variant.
# we filter out frequencies below 0.8Hz (<= 48 bpm) (bpm = 60 x Hz)
# and above 3Hz (>= 180 bpm)
w_filtered = hp.filter_signal(w_filtered, sample_rate=SAMPLE_RATE, filtertype="bandpass", order=3, cutoff=(0.7, 6), return_top=False)
# w_filtered = hp.filter_signal(w_filtered, sample_rate=SAMPLE_RATE, filtertype="lowpass", order=3, cutoff=6, return_top=False)
w_filtered = hp.scale_data(w_filtered)
px.line(y=w_filtered).show()

In [None]:
# calc_freq: whether to calculate frequency domain measures
# interp_threshold: the amplitude threshold beyond which will be checked for
# clipping. Recommended is to take this as the maximum value of the ADC with
# some margin for signal noise
# reject_segmentwise: whether to reject segments with more than 30% rejected
# beats. By default looks at segments of 10 beats at a time.

# clean_rr uses by default quotient-filtering, which is a bit aggressive.
# You can set 'iqr' or 'z-score' with the clean_rr_method flag.
working, measures = hp.process(w_filtered, sample_rate=SAMPLE_RATE, freq_method="welch", interp_clipping=True, clean_rr_method="quotient-filtering")

# Take into consideration that the scale for RMSSD doesn't typically exceed +/-
# 130, SDSD doesn't differ by much. This means that even a few incorrectly
# detected peaks are already introducing large measurement errors into the output
# variables. The algorithm described here is specifically designed to handle noisy
# PPG data from cheap sensors. The main design criteria was to minimise the number
# of incorrectly placed peaks as to minimise the error introduced into the output
# measures.

display(measures)
hp.plotter(working, measures, figsize=(18, 4), moving_average=True)

In [None]:
hp.plot_breathing(working, measures, figsize=(18, 4))

In [None]:
hp.plot_poincare(working, measures, figsize=(4, 4))