In [2]:
from src.utils import init_notebook
import pandas as pd

init_notebook()

In [3]:
admissions = pd.read_csv('data/mimiciv/mimic-iv-2.2/hosp/admissions.csv')
labevents = pd.read_csv('data/mimiciv/mimic-iv-2.2/hosp/labevents.csv')
d_labitems = pd.read_csv('data/mimiciv/mimic-iv-2.2/hosp/d_labitems.csv')

labevents_with_description = labevents.merge(d_labitems, on='itemid')

In [6]:
abd_pain = pd.read_csv('data/reasoning/abdominal_pain/hosp_10_match_categories.csv')
abd_pain = abd_pain.merge(admissions[['hadm_id', 'admittime', 'dischtime']], on='hadm_id')
abd_pain = abd_pain.drop(columns = ['charttime'])

In [None]:
abd_pain_labevents = labevents_with_description[labevents_with_description['hadm_id'].isin(abd_pain['hadm_id'])]
abd_pain_labevents = abd_pain_labevents.merge(abd_pain[['hadm_id', 'admittime', 'dischtime']], on='hadm_id')
abd_pain_labevents['storetime'] = pd.to_datetime(abd_pain_labevents['storetime'])
abd_pain_labevents['admittime'] = pd.to_datetime(abd_pain_labevents['admittime'])
abd_pain_labevents['dischtime'] = pd.to_datetime(abd_pain_labevents['dischtime'])
abd_pain_labevents['time_diff'] = abd_pain_labevents['storetime'] - abd_pain_labevents['admittime']

In [8]:
# function for extracting lab events within first n hours of admission for given hadm_id (abdominal_pain_events are precalculated above)
def extract_events_within_hours(labevents, hadm_id_list, hours, first_only=False):
    """
    Extract lab events within the first `hours` of admission for given hadm_ids.

    Parameters
    ----------
    hadm_id_list : list-like
        List of hospital admission IDs to filter.
    hours : int
        Time window (in hours) from admission.
    first_only : bool, optional (default=False)
        If True, keep only the first event per (hadm_id, itemid).
    """
    mask = (
        labevents['hadm_id'].isin(hadm_id_list)
        & (labevents['time_diff'] >= pd.Timedelta(microseconds=0))
        & (labevents['time_diff'] <= pd.Timedelta(hours=hours))
        & (labevents['storetime'] <= labevents['dischtime'])
    )
    labevents_within_hours = labevents.loc[mask].copy()

    if first_only:
        # keep the earliest row per hadm_id & itemid
        labevents_within_hours = (
            labevents_within_hours.sort_values(["hadm_id", "itemid", "storetime"])
            .groupby(["hadm_id", "itemid"], as_index=False)
            .first()
        )

    return labevents_within_hours

In [None]:
abd_pain_labevents_12h = extract_events_within_hours(abd_pain_labevents, abd_pain['hadm_id'], 12, first_only=True)
abd_pain_labevents_12h.to_parquet('data/reasoning/abdominal_pain/abd_pain_labevents_12h.pq')