In [None]:
import sys 
import os
import polars as pl 
import matplotlib.pyplot as plt
project_root = os.path.dirname(os.getcwd())
sys.path.insert(0, project_root)
from utils.config import config
from utils.io import read_data
from utils.strobe_diagram import create_consort_diagram
from clifpy.utils.stitching_encounters import stitch_encounters

In [None]:
site_name = config['site_name']
tables_path = config['tables_path']
file_type = config['file_type']
print(f"Site Name: {site_name}")
print(f"Tables Path: {tables_path}")
print(f"File Type: {file_type}")

In [None]:
# read required tables
adt_filepath = f"{tables_path}/clif_adt.{file_type}"
hospitalization_filepath = f"{tables_path}/clif_hospitalization.{file_type}"
patient_filepath = f"{tables_path}/clif_patient.{file_type}"
resp_filepath = f"{tables_path}/clif_respiratory_support.{file_type}"
labs_filepath = f"{tables_path}/clif_labs.{file_type}"
micro_culture_filepath = f"{tables_path}/clif_microbiology_culture.{file_type}"


adt_df = read_data(adt_filepath, file_type)
hospitalization_df = read_data(hospitalization_filepath, file_type)
patient_df = read_data(patient_filepath, file_type)
resp_df = read_data(resp_filepath, file_type)
labs_df = read_data(labs_filepath, file_type)
micro_culture = read_data(micro_culture_filepath, file_type)

total_patients = hospitalization_df["patient_id"].n_unique()

In [None]:
hosp_stitched, adt_stitched, encounter_mapping = stitch_encounters(
      hospitalization=hospitalization_df.to_pandas(),
      adt=adt_df.to_pandas(),
      time_interval=12
  )

hosp_stitched = pl.from_pandas(hosp_stitched)
adt_stitched = pl.from_pandas(adt_stitched)
encounter_mapping = pl.from_pandas(encounter_mapping)

In [None]:
# Step 1: Get the admission dates for the expired
# filter to patients that expired or discharged to hospice 
expired_encounters_df = hosp_stitched.filter(
    pl.col('discharge_category').str.to_lowercase() == 'expired'
)
expired_hospitalizations = (
    expired_encounters_df
    .select([
        'patient_id',
        'hospitalization_id',
        'encounter_block',
        'admission_dttm',
        'discharge_dttm'  # discharge datetime for the death hospitalization
    ])
    .unique()
)
expired_patients_n = expired_hospitalizations["patient_id"].n_unique()
expired_hospitalizations.head()

In [None]:
# Step 2- On invasive mechanical ventilation at or within 48h of death.

# Expired patients ever on IMV
imv_resp_encounters = resp_df.filter(pl.col("device_category")=="IMV") 
imv_expired = expired_hospitalizations.join(imv_resp_encounters.select(["hospitalization_id", "recorded_dttm"]), on = "hospitalization_id", how = "inner")

resp_expired_latest_recorded_imv = (
    imv_expired
    .sort("recorded_dttm", descending=True)
    .group_by("patient_id")
    .agg(pl.all().first())
)

resp_expired_imv_hrs = resp_expired_latest_recorded_imv.with_columns(
    (
        (pl.col("discharge_dttm") - pl.col("recorded_dttm")).dt.total_seconds() / 3600
    ).alias("hr_2death_last_imv")
)

# Filter to patients who were on IMV at death or before 48hrs of death 
resp_expired_cohort = resp_expired_imv_hrs.filter(pl.col('hr_2death_last_imv')<=48)

imv_expired_patients = imv_expired["patient_id"].n_unique()
imv_after_expire = resp_expired_imv_hrs.filter(pl.col('hr_2death_last_imv') <= 0)["patient_id"].n_unique()
imv_48hr_expire = resp_expired_cohort["patient_id"].n_unique()


In [None]:
# Step 3 - Pass the potential organ quality assessment check (independent assessment) using labs from the 48 hours prior to death

# Step 3A - Kidney: creatinine <4  AND not on CRRT

# Step 3B - Liver: Total bilirubin < 4, AST < 700, AND ALT< 700

In [None]:
# Step - 4 CONTRAINDICATIONS

# Step 4A - No positive blood cultures within 48hrs

# Step 4B - History of Cancer or Sepsis

# Step 4C - Age < 75
relevant_cohort_with_birth = resp_expired_cohort.join(
    patient_df.select(['patient_id', 'birth_date']),
    on='patient_id',
    how='left'
    )

# Calculate age at death as (discharge_dttm - birth_date) in years (using .dt.total_days()/365.25)
relevant_cohort_with_deathage = relevant_cohort_with_birth.with_columns(
(
    (pl.col('discharge_dttm') - pl.col('birth_date')).dt.total_days() / 365.25
).alias('age_at_death')
)

age_relevant_cohort = relevant_cohort_with_deathage.filter(
    (pl.col('age_at_death') >= 18) & (pl.col('age_at_death') <= 75)
)
age_relevant_cohort_n = age_relevant_cohort["patient_id"].n_unique()

# Final cohort

In [None]:
# relevant_cohort_with_deathage.select([
#     "patient_id", 
#     "hospitalization_id", 
#     "encounter_block", 
#     "age_at_death"
# ]).unique().write_parquet("../output/intermediate/relevant_cohort_with_deathage.parquet")

In [None]:
steps = [
    {
        'label': 'All Patients',
        'n': total_patients,
        'color': 'blue'
    },
    {
        'label': 'Deceased Patients',
        'note': '(not including Hospice)',
        'n': expired_patients_n,
        'color': 'blue',
    },
    {
        'label': 'Ever on IMV',
        'n': imv_expired_patients,
        'color': 'blue'
    },
    {
        'label': 'On IMV at death \n or 48hrs prior\n',
        'n': imv_48hr_expire,
        'color': 'blue',
        'split': [
            {
                'label': 'Patients on IMV\n after recorded death time\n',
                'n': imv_after_expire,
                'color': 'red'
            },
            {
                'label': 'Patients on IMV within\n 48hrs before or at death\n',
                'note': "Deceased Patients who were IMV on since 48hrs before death",
                'n': imv_48hr_expire - imv_after_expire,
                'color': 'red'
            }
        ]
    },
    {
        'label': 'Patients aged 75 or less\n at death\n',
        'n': age_relevant_cohort_n,
        'color': 'blue'
    },
]


fig = create_consort_diagram(
    steps,
    title="COHORT SELECTION Flow Diagram",
    subtitle="Potential Organ Donors"
)
fig.savefig("../output/final/cohort_strobe.png", bbox_inches="tight", dpi=300)
plt.show()

