In [15]:
import os
import numpy as np
import sys
import h5py
import pandas as pd
from IPython.display import display
from random import choice

ROOT_DIR = '../'
sys.path.insert(0, os.path.abspath(f'/{ROOT_DIR}/heart_rhythm_analysis/'))    # make repo root importable

from heart_rhythm_analysis.get_data.CapnoBaseETL import CapnoBaseETL
from heart_rhythm_analysis.get_data.MimicETL import MimicETL
from heart_rhythm_analysis.utils.timeseries_viewer import make_window_figure, create_time_series_viewer

bSetUpDB = True
WINDOW_LEN = 8

import dash
app = dash.Dash(__name__)

In [16]:
def load_as_df(file_path, filename,
               store_signals=True,
               group_by_record=False):
    h5_path = f"{file_path}/{filename}.h5"
    print(f'Loading: {h5_path}')
    hf = h5py.File(h5_path, "r")
    rows = []
    for subj in hf.keys():
        subj_grp = hf[subj]
        # print(subj)
        window_count = 0
        for win_id in subj_grp.keys():
            window_count = window_count+1
            win_grp = subj_grp[win_id]
            # pull out everything you need
            rec_id      = win_grp.attrs['rec_id']
            label       = win_grp.attrs['label']
            raw_ppg     = win_grp["raw_ppg"][:]
            proc_ppg    = win_grp["proc_ppg"][:]
            raw_ekg     = win_grp["raw_ekg"][:]
            raw_abp     = win_grp["raw_abp"][:]
            raw_ppg_fs  = win_grp.attrs["raw_ppg_fs"]
            ekg_fs      = win_grp.attrs["ekg_fs"]
            ppg_fs      = win_grp.attrs["ppg_fs"]
            abp_fs      = win_grp.attrs["abp_fs"]
            notes       = win_grp.attrs.get("notes", "")
            print(notes)
            # build the window row
            row = {
                "subject": subj,
                "window_id": win_id,
                "window_count": window_count,
                "rec_id": rec_id,
                "label": label,
                "raw_ppg_fs": raw_ppg_fs,
                "ppg_fs_out": ppg_fs,
                "ekg_fs_out": ekg_fs,
                "abp_fs_out": abp_fs,
                "raw_len": len(raw_ppg),
                "proc_len": len(proc_ppg),
                "duration_raw_s": len(raw_ppg) / ppg_fs,
                "duration_proc_s": len(proc_ppg) / ppg_fs,
                "notes": notes
            }
            if store_signals:
                row.update({
                    "raw_ppg": raw_ppg,
                    "proc_ppg": proc_ppg,
                    "raw_ekg": raw_ekg,
                    "raw_abp": raw_abp
                })
            rows.append(row)

    df = pd.DataFrame(rows)

    if group_by_record and store_signals:
        # define how to aggregate each column
        agg_dict = {
            "subject":     "first",
            "label":       "first",
            "raw_ppg_fs":  "first",
            "ppg_fs_out":  "first",
            "ekg_fs_out":  "first",
            "abp_fs_out":  "first",
            "notes":       lambda x: list(x),
            "raw_len":     lambda x: np.sum(x),
            "proc_len":    lambda x: np.sum(x),
            "duration_raw_s":  lambda x: np.sum(x),
            "duration_proc_s": lambda x: np.sum(x),
            # now the key: concatenate all windows into one long array
            "raw_ppg":  lambda series: np.concatenate(series.values),
            "proc_ppg": lambda series: np.concatenate(series.values),
            "raw_ekg":  lambda series: np.concatenate(series.values),
            "raw_abp":  lambda series: np.concatenate(series.values),
        }
        df = (
            df
            .groupby("rec_id", as_index=False)
            .agg(agg_dict)
        )
    return df

# MIMIC Exploration

In [13]:
def main():
    mimic_num = "4"
    root_path = os.path.join(f'../data/raw/mimic3_data/mimic{mimic_num}_struct.mat')
    out_filename = f'mimic{mimic_num}_db'
    out_path = os.path.join(f'../data/processed/length_full/{out_filename}')
    df = load_as_df(out_path,out_filename)
    # display(df.head(10))
    return df


In [14]:
if __name__ == "__main__":
    df_mimic = main()
    # first = df_mimic3_gen.iloc[np.random.randint(0, len(df_mimic3_gen))] 
    chosen_subj = df_mimic['subject'].unique()[0]

    surr_subject_df = df_mimic[df_mimic['subject'] == chosen_subj]

    display(surr_subject_df.head())

    specs = [
        {"key":"raw_ppg",  "fs_key":"raw_ppg_fs",  "subplot":1, "legend":"Raw PPG"},
        {"key":"proc_ppg", "fs_key":"ppg_fs_out",  "subplot":1, "legend":"Proc PPG"},
        {"key":"raw_ekg",  "fs_key":"ekg_fs_out",  "subplot":2,               },
        {"key":"raw_abp",  "fs_key":"abp_fs_out",  "subplot":3,               },
    ]

    fig_fn = lambda window: make_window_figure(window, specs)
    
    app = create_time_series_viewer(
    surr_subject_df,
    fig_fn,
    specs,
    index_label_fn=lambda idx, row: f"{row.subject}-{row.window_count}"
)
    print(f'Total # of Subjects: {len(df_mimic['subject'].unique())}')
    print(f'Total # of Windows: {df_mimic.shape[0]}')
    print("Launching Dash at http://127.0.0.1:8050 …")
    app.run(debug=True,port = 8080)

Loading: ../data/processed/length_full/mimic4_db/mimic4_db.h5












































































































































































































































































































































































































































































































































































































































































































































































































































































































































































Unnamed: 0,subject,window_id,window_count,rec_id,label,raw_ppg_fs,ppg_fs_out,ekg_fs_out,abp_fs_out,raw_len,proc_len,duration_raw_s,duration_proc_s,notes,raw_ppg,proc_ppg,raw_ekg,raw_abp
0,p10020306,00360aeb-282c-4403-b70f-14ab34f1b7ea,1,83404654,-1,62.5,20.83,62.5,62.5,499,167,23.955833,8.017283,,"[0.37060546875, 0.366455078125, 0.358154296875...","[-0.5895205, -0.22575125, -0.25129005, -0.0213...","[-0.025, -0.02, -0.02, -0.005, 0.01, 0.01, -0....","[93.6875, 88.8125, 82.125, 76.25, 72.75, 71.0,..."
1,p10020306,015fd36b-3ac6-43f5-a181-6171b48f6b94,2,83404654,-1,62.5,20.83,62.5,62.5,499,167,23.955833,8.017283,,"[0.466552734375, 0.458251953125, 0.4541015625,...","[-0.5543456, 0.02814585, -0.17611787, -0.02600...","[-0.005, -0.02, -0.025, -0.02, -0.015, -0.02, ...","[66.125, 65.6875, 64.9375, 64.0, 63.0625, 62.2..."
2,p10020306,01d6d4cd-138d-4d83-8b9b-75076a8f82e2,3,83404654,-1,62.5,20.83,62.5,62.5,499,167,23.955833,8.017283,,"[0.75048828125, 0.740966796875, 0.734375, 0.72...","[-0.20644447, 0.2584164, 0.07711214, 0.1138857...","[-0.06, -0.045, -0.03, -0.025, -0.02, -0.01, -...","[62.1875, 73.3125, 83.875, 90.875, 94.0, 94.0,..."
3,p10020306,029017e6-0907-4f2b-81ee-1fc6ebfd7a9e,4,83404654,-1,62.5,20.83,62.5,62.5,499,167,23.955833,8.017283,,"[0.49560546875, 0.4873046875, 0.483154296875, ...","[-0.4486087, -0.057038963, -0.23012866, -0.209...","[-0.015, -0.03, -0.04, -0.035, 0.025, 0.255, 0...","[60.875, 60.5625, 60.4375, 60.125, 59.9375, 59..."
4,p10020306,02a6163c-803b-4ace-90a7-8bd2aac2c48a,5,83404654,-1,62.5,20.83,62.5,62.5,499,167,23.955833,8.017283,,"[0.387451171875, 0.379150390625, 0.37060546875...","[-0.5267661, -0.24221948, -0.3754885, -0.34613...","[2.205, 1.505, 1.27, 1.17, 0.985, 0.735, 0.465...","[100.5625, 114.5625, 120.3125, 123.1875, 125.3..."


Total # of Subjects: 51
Total # of Windows: 22899
Launching Dash at http://127.0.0.1:8050 …


In [10]:
subject_agg = {
    # sampling & metadata: just take the first (they're constant per subject)
    "raw_ppg_fs": "first",
    "ppg_fs_out": "first",
    "ekg_fs_out": "first",
    "abp_fs_out": "first",
    # lengths & durations: sum across windows
    "raw_len":       "sum",
    "proc_len":      "sum",
    "duration_raw_s":  "sum",
    "duration_proc_s": "sum",
    # signals: concatenate all windows end-to-end
    "raw_ppg":  lambda s: np.concatenate(s.values),
    "proc_ppg": lambda s: np.concatenate(s.values),
    "raw_ekg":  lambda s: np.concatenate(s.values),
    "raw_abp":  lambda s: np.concatenate(s.values),
    # if you want to keep track of window_ids or rec_ids:
    "window_id": lambda s: list(s.values),
    "window_count": lambda s: list(s.values),
    "rec_id":    lambda s: list(s.values),
    "label":     lambda s: list(s.values),
    "notes": lambda s: list(s.values),
}

# ── 2) group by subject ─
df_by_subject = (
    df_mimic
    .groupby("subject", as_index=False)
    .agg(subject_agg)
)

# ── 3) now df_by_subject has one row per subject, with each signal a long array
print(df_by_subject.shape)
print(df_by_subject.raw_ppg.iloc[0].shape)  # e.g. (sum of all its windows,) 

# bind specs & figure fn exactly as before
fig_fn = lambda row: make_window_figure(row, specs)

# subject‐level dashboard (labels are just the subject IDs)
app = create_time_series_viewer(
    df_by_subject,
    fig_fn,
    specs,
    index_label_fn=lambda idx, row: str(row.subject)
)
print(df_by_subject['notes'])
print("Launching Dash at http://127.0.0.1:8080 …")
app.run(debug=True,port=8020)

(51, 18)
(224051,)
0     [, , , , , , , , , , , , , , , , , , , , , , ,...
1     [, , , , , , , , , , , , , , , , , , , , , , ,...
2     [, , , , , , , , , , , , , , , , , , , , , , ,...
3     [, , , , , , , , , , , , , , , , , , , , , , ,...
4     [, , , , , , , , , , , , , , , , , , , , , , ,...
5     [, , , , , , , , , , , , , , , , , , , , , , ,...
6     [, , , , , , , , , , , , , , , , , , , , , , ,...
7     [, , , , , , , , , , , , , , , , , , , , , , ,...
8     [, , , , , , , , , , , , , , , , , , , , , , ,...
9     [, , , , , , , , , , , , , , , , , , , , , , ,...
10    [, , , , , , , , , , , , , , , , , , , , , , ,...
11    [, , , , , , , , , , , , , , , , , , , , , , ,...
12    [, , , , , , , , , , , , , , , , , , , , , , ,...
13    [, , , , , , , , , , , , , , , , , , , , , , ,...
14    [, , , , , , , , , , , , , , , , , , , , , , ,...
15    [, , , , , , , , , , , , , , , , , , , , , , ,...
16    [, , , , , , , , , , , , , , , , , , , , , , ,...
17    [, , , , , , , , , , , 

In [None]:

plt.close()

NameError: name 'plt' is not defined