Data Loading & Parsing

Resampling (140 Hz→50 Hz)

Body‑Movement & Bed‑Occupancy Detection

DWT S4 Extraction (J‑peak emphasis)

J‑Peak Detection

BCG‑based HR Calculation

Reference HR Extraction (RR files via Pan–Tompkins)

Synchronization of BCG & ECG HR

Error Metrics & Statistics

Plots & Reporting



### This notebook is for testing and running functions separately to ensure correctness

In [4]:
import os
import pandas as pd

DATA_ROOT = "../dataset/data"

def parse_filename(fn):
    """
    Parse names like "01_20231104_BCG.csv" → subject="01", date="20231104", dtype="BCG"
    """
    base, _ = os.path.splitext(fn)
    subj, date, dtype = base.split("_")
    return subj, date, dtype

def discover_files(data_root=DATA_ROOT):
    """
    Walks data_root and returns a dict:
        files[subj][date] = {"BCG": [paths...], "RR": [paths...], "resp": [...]}
    """
    files = {}
    for subj in sorted(os.listdir(data_root)):
        subj_dir = os.path.join(data_root, subj)
        if not os.path.isdir(subj_dir):
            continue
        files[subj] = {}
        # BCG folder
        for dtype in ("BCG", "Reference"):
            dpath = os.path.join(subj_dir, dtype)
            if not os.path.isdir(dpath):
                continue
            # handle RR and resp under Reference
            if dtype == "BCG":
                for fn in os.listdir(dpath):
                    if not fn.endswith(".csv"):
                        continue
                    _, date, dt = parse_filename(fn)
                    files[subj].setdefault(date, {}).setdefault("BCG", []).append(os.path.join(dpath, fn))
            else:
                # Reference/RR and Reference/resp
                for ref_type in ("RR", "resp"):
                    rpath = os.path.join(dpath, ref_type)
                    if not os.path.isdir(rpath):
                        continue
                    for fn in os.listdir(rpath):
                        if not fn.endswith(".csv"):
                            continue
                        _, date, dt = parse_filename(fn)
                        files[subj].setdefault(date, {}).setdefault(dt, []).append(os.path.join(rpath, fn))
    return files

# Discover
files = discover_files()
print(files)
print("____________*******************&&&&&&&&&&&&&&&&^^^^^^^^^^$$$$$$$$$$")
# Summarize
for subj, dates in files.items():
    total_dates = len(dates)
    with_rr = sum(1 for d in dates.values() if "RR" in d)
    with_bcg = sum(1 for d in dates.values() if "BCG" in d)
    print(f"Subject {subj}: {total_dates} dates, {with_bcg} have BCG, {with_rr} have RR reference")

# List example for subject "01"
print("\nExample entries for subject 01:")
for date, types in files["01"].items():
    print(f"  Date {date}: has BCG×{len(types.get('BCG',[]))}, RR×{len(types.get('RR',[]))}, resp×{len(types.get('resp',[]))}")


{'01': {'20231104': {'BCG': ['../dataset/data\\01\\BCG\\01_20231104_BCG.csv'], 'RR': ['../dataset/data\\01\\Reference\\RR\\01_20231104_RR.csv']}, '20231105': {'BCG': ['../dataset/data\\01\\BCG\\01_20231105_BCG.csv'], 'RR': ['../dataset/data\\01\\Reference\\RR\\01_20231105_RR.csv']}, '20231106': {'BCG': ['../dataset/data\\01\\BCG\\01_20231106_BCG.csv']}, '20231107': {'BCG': ['../dataset/data\\01\\BCG\\01_20231107_BCG.csv']}, '20231108': {'BCG': ['../dataset/data\\01\\BCG\\01_20231108_BCG.csv']}, '20231109': {'BCG': ['../dataset/data\\01\\BCG\\01_20231109_BCG.csv']}}, '02': {'20231103': {'BCG': ['../dataset/data\\02\\BCG\\02_20231103_BCG.csv'], 'RR': ['../dataset/data\\02\\Reference\\RR\\02_20231103_RR.csv']}, '20231104': {'BCG': ['../dataset/data\\02\\BCG\\02_20231104_BCG.csv'], 'RR': ['../dataset/data\\02\\Reference\\RR\\02_20231104_RR.csv']}, '20231105': {'BCG': ['../dataset/data\\02\\BCG\\02_20231105_BCG.csv']}, '20231106': {'BCG': ['../dataset/data\\02\\BCG\\02_20231106_BCG.csv']}, 

In [9]:
import os
import pandas as pd
import numpy as np
from scipy.signal import resample_poly
from math import gcd

#— parse filenames —
def parse_filename(fn):
    subj, date, kind = os.path.splitext(fn)[0].split("_")
    return subj, date, kind  # e.g. "01","20231104","BCG"

#— load raw BCG CSV —
def load_bcg_csv(path):
    df = pd.read_csv(path)
    sig = df.iloc[:,0].astype(float).to_numpy()
    fs  = float(df.iloc[0,2])
    return sig, fs

#— load RR CSV (reference) —
def load_rr_csv(path):
    """
    Load Reference RR CSV with columns:
      Timestamp (yyyy/MM/dd H:mm:ss), Heart Rate (bpm), RR Interval in seconds
    Returns:
      times: 1‑d array of seconds (float) from first beat
      hr:    1‑d array of heart‑rate (bpm)
    """
    df = pd.read_csv(path)
    # parse the Timestamp strings:
    dt = pd.to_datetime(df.iloc[:,0], format="%Y/%m/%d %H:%M:%S")
    # convert to seconds since first timestamp
    t_secs = (dt.astype(np.int64) / 1e9)  - (dt.astype(np.int64).iloc[0] / 1e9)
    hr     = df.iloc[:,1].astype(float).to_numpy()
    return t_secs.to_numpy(), hr

#— rational resample via polyphase (no aliasing) —
def resample_signal(sig, orig_fs, target_fs):
    up, down = int(target_fs), int(orig_fs)
    g = gcd(up,down)
    up//=g; down//=g
    return resample_poly(sig.astype(float), up, down)


#### loading and pairing each patient's bcg with its reference

In [10]:
# discover all BCG & RR paths
DATA = "../dataset/data"
pairs = []   # list of (subj, date, bcg_path, rr_path_or_None)
for subj in sorted(os.listdir(DATA)):
  bdir = os.path.join(DATA,subj,"BCG")
  rdir = os.path.join(DATA,subj,"Reference","RR")
  if not os.path.isdir(bdir): continue
  bcg_files = [f for f in os.listdir(bdir) if f.endswith("_BCG.csv")]
  rr_files  = os.path.isdir(rdir) and [f for f in os.listdir(rdir) if f.endswith("_RR.csv")] or []
  for bfn in bcg_files:
    subj_,date,kind = parse_filename(bfn)
    # find matching rr
    match_rr = None
    for rfn in rr_files:
      if parse_filename(rfn)[1]==date:
        match_rr = os.path.join(rdir,rfn)
    pairs.append((subj, date,
                  os.path.join(bdir,bfn),
                  match_rr))
# quick summary
print(f"Found {len(pairs)} BCG nights, of which {sum(1 for p in pairs if p[3])} have RR.")
print(pairs)


Found 212 BCG nights, of which 42 have RR.
[('01', '20231104', '../dataset/data\\01\\BCG\\01_20231104_BCG.csv', '../dataset/data\\01\\Reference\\RR\\01_20231104_RR.csv'), ('01', '20231105', '../dataset/data\\01\\BCG\\01_20231105_BCG.csv', '../dataset/data\\01\\Reference\\RR\\01_20231105_RR.csv'), ('01', '20231106', '../dataset/data\\01\\BCG\\01_20231106_BCG.csv', None), ('01', '20231107', '../dataset/data\\01\\BCG\\01_20231107_BCG.csv', None), ('01', '20231108', '../dataset/data\\01\\BCG\\01_20231108_BCG.csv', None), ('01', '20231109', '../dataset/data\\01\\BCG\\01_20231109_BCG.csv', None), ('02', '20231103', '../dataset/data\\02\\BCG\\02_20231103_BCG.csv', '../dataset/data\\02\\Reference\\RR\\02_20231103_RR.csv'), ('02', '20231104', '../dataset/data\\02\\BCG\\02_20231104_BCG.csv', '../dataset/data\\02\\Reference\\RR\\02_20231104_RR.csv'), ('02', '20231105', '../dataset/data\\02\\BCG\\02_20231105_BCG.csv', None), ('02', '20231106', '../dataset/data\\02\\BCG\\02_20231106_BCG.csv', None)

##### paired bcg and corresponding RR ref files

In [12]:
# now filter to only those pairs with an RR reference
paired = [(s,d,bcg,rr) for s,d,bcg,rr in pairs if rr is not None]
paired

[('01',
  '20231104',
  '../dataset/data\\01\\BCG\\01_20231104_BCG.csv',
  '../dataset/data\\01\\Reference\\RR\\01_20231104_RR.csv'),
 ('01',
  '20231105',
  '../dataset/data\\01\\BCG\\01_20231105_BCG.csv',
  '../dataset/data\\01\\Reference\\RR\\01_20231105_RR.csv'),
 ('02',
  '20231103',
  '../dataset/data\\02\\BCG\\02_20231103_BCG.csv',
  '../dataset/data\\02\\Reference\\RR\\02_20231103_RR.csv'),
 ('02',
  '20231104',
  '../dataset/data\\02\\BCG\\02_20231104_BCG.csv',
  '../dataset/data\\02\\Reference\\RR\\02_20231104_RR.csv'),
 ('03',
  '20231103',
  '../dataset/data\\03\\BCG\\03_20231103_BCG.csv',
  '../dataset/data\\03\\Reference\\RR\\03_20231103_RR.csv'),
 ('03',
  '20231105',
  '../dataset/data\\03\\BCG\\03_20231105_BCG.csv',
  '../dataset/data\\03\\Reference\\RR\\03_20231105_RR.csv'),
 ('04',
  '20231103',
  '../dataset/data\\04\\BCG\\04_20231103_BCG.csv',
  '../dataset/data\\04\\Reference\\RR\\04_20231103_RR.csv'),
 ('04',
  '20231104',
  '../dataset/data\\04\\BCG\\04_20231104

In [14]:
# parameters
TARGET_FS = 50.0
WIN_SEC   = 6*60
WIN_SAMPS = int(WIN_SEC*TARGET_FS)

bcg_windows = {}
rr_windows  = {}

for subj, date, bcg_path, rr_path in paired:
    # 1) load & resample BCG → 50 Hz
    bcg, fs0 = load_bcg_csv(bcg_path)
    bcg50    = resample_signal(bcg, fs0, TARGET_FS)

    # 2) load & interpolate RR → 50 Hz grid
    rr_t, rr_hr = load_rr_csv(rr_path)
    uni_t = np.arange(0, rr_t.max(), 1.0/TARGET_FS)
    rr50  = np.interp(uni_t, rr_t, rr_hr)

    # 3) how many full windows?
    nwin_b = len(bcg50) // WIN_SAMPS
    nwin_r = len(rr50)  // WIN_SAMPS
    nwin   = min(nwin_b, nwin_r)

    # 4) slice
    bcg_windows[(subj,date)] = [ bcg50[i*WIN_SAMPS:(i+1)*WIN_SAMPS] for i in range(nwin) ]
    rr_windows[(subj,date)]  = [ rr50[i*WIN_SAMPS:(i+1)*WIN_SAMPS]  for i in range(nwin) ]

# sanity check
for key in list(bcg_windows):
    print(key,
          "→", len(bcg_windows[key]), "windows BCG  |",
          len(rr_windows.get(key,[])), "windows RR") 


('01', '20231104') → 71 windows BCG  | 71 windows RR
('01', '20231105') → 108 windows BCG  | 108 windows RR
('02', '20231103') → 89 windows BCG  | 89 windows RR
('02', '20231104') → 83 windows BCG  | 83 windows RR
('03', '20231103') → 66 windows BCG  | 66 windows RR
('03', '20231105') → 94 windows BCG  | 94 windows RR
('04', '20231103') → 92 windows BCG  | 92 windows RR
('04', '20231104') → 90 windows BCG  | 90 windows RR
('05', '20231103') → 82 windows BCG  | 82 windows RR
('05', '20231104') → 98 windows BCG  | 98 windows RR
('06', '20231110') → 77 windows BCG  | 77 windows RR
('06', '20231111') → 78 windows BCG  | 78 windows RR
('07', '20231111') → 128 windows BCG  | 128 windows RR
('07', '20231112') → 98 windows BCG  | 98 windows RR
('08', '20231110') → 72 windows BCG  | 72 windows RR
('08', '20231111') → 86 windows BCG  | 86 windows RR
('09', '20231110') → 100 windows BCG  | 100 windows RR
('09', '20231111') → 87 windows BCG  | 87 windows RR
('10', '20231110') → 75 windows BCG  | 7

In [17]:
bcg_windows[('01', '20231104')]

[array([-60.30089056, -94.25285993, -84.24473276, ..., -60.89721647,
        -64.7920215 , -70.48334942], shape=(18000,)),
 array([ -72.07307946,  -69.09050382,  -67.21229853, ...,   22.15137356,
         -35.45521505, -200.64668552], shape=(18000,)),
 array([-193.72211869,   19.266572  ,   19.77280663, ...,  666.05085011,
        2822.9315012 , 2614.52511731], shape=(18000,)),
 array([  3815.49787698,    217.8640663 ,   -179.04170609, ...,
        -32766.74349705, -32766.74349705, -32768.00890243], shape=(18000,)),
 array([-3.27704952e+04, -3.27680089e+04, -3.27667435e+04, ...,
        -8.03053510e+01, -2.98553752e+01,  5.81656274e+01], shape=(18000,)),
 array([  30.98162537, -122.39239502,  -96.46124788, ...,  -94.1578171 ,
         -28.18945986, -106.01293337], shape=(18000,)),
 array([ -203.43321156,  -124.55347158,  -122.44123821, ...,
         -979.36274855, -2780.15150837, -3564.40914342], shape=(18000,)),
 array([-4344.41358667, -2793.03597029,   706.90834879, ...,
         -15