In [1]:
import sys 
import os
import polars as pl 
import matplotlib.pyplot as plt
project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.getcwd())))
sys.path.insert(0, project_root)
from utils.config import config
from utils.io import read_data
from utils.strobe_diagram import create_consort_diagram

Loaded configuration from config.json


In [None]:
os.getcwd()

In [None]:
site_name = config['site_name']
tables_path = config['tables_path']
file_type = config['file_type']
print(f"Site Name: {site_name}")
print(f"Tables Path: {tables_path}")
print(f"File Type: {file_type}")

In [None]:
# read required tables
adt_filepath = f"{tables_path}/clif_adt.{file_type}"
hospitalization_filepath = f"{tables_path}/clif_hospitalization.{file_type}"
patient_filepath = f"{tables_path}/clif_patient.{file_type}"
adt_df = read_data(adt_filepath, file_type)
hospitalization_df = read_data(hospitalization_filepath, file_type)
patient_df = read_data(patient_filepath, file_type)
total_patients = hospitalization_df["patient_id"].n_unique()

In [None]:
## Inclusion Criteria - "Were immediately followed (same discharge and admission date) by readmission and subsequent death"
# Code to identify hospitalizations that meet the criteria
# Step 1: Get the admission dates for the expired/hospice hospitalizations
# filter to patients that expired or discharged to hospice 
death_encounters_df = hospitalization_df.filter(
    (pl.col('discharge_category').is_in(['Expired']))
)
death_hospitalizations = (
    death_encounters_df 
    .select([
        'patient_id',
        'hospitalization_id',
        'admission_dttm',
        'discharge_dttm',  # discharge datetime for the death hospitalization
        'discharge_category'
    ])
    .rename({
        'hospitalization_id': 'death_hosp_id',
        'admission_dttm': 'admission_death_dttm',
        'discharge_dttm': 'death_discharge_dttm'
    })
    .with_columns(
        (pl.col('death_discharge_dttm') - pl.col('admission_death_dttm')).dt.total_hours().alias('hours_to_death')
    )
)
death_hospitalizations.head()

# Step 2: Get all prior hospitalizations for these patients
prior_hospitalizations = hospitalization_df.filter(
    pl.col('patient_id').is_in(death_hospitalizations['patient_id'].implode())
).select([
    'patient_id',
    'hospitalization_id',
    'discharge_dttm'  # discharge datetime
]).rename({'hospitalization_id': 'prior_hosp_id', 'discharge_dttm': 'prior_discharge_dttm'})

# Step 3: Join to find immediate readmissions within readmit_gap_hours 
readmit_gap_hours = 24
readmit_to_death = (
    death_hospitalizations
    .join(
        prior_hospitalizations,
        on='patient_id',
        how='inner'
    )
    .filter(
        # Make sure it's a different hospitalization
        (pl.col('prior_hosp_id') != pl.col('death_hosp_id'))
    )
)

immediate_readmit_to_death = readmit_to_death.with_columns(
    (pl.col('admission_death_dttm') - pl.col('prior_discharge_dttm')).dt.total_hours().alias('hrs_bfr_readm')
)
immediate_readmit_to_death  = immediate_readmit_to_death.sort(['patient_id', 'prior_discharge_dttm'])

death_readmit_within_gap = immediate_readmit_to_death.filter(pl.col("hrs_bfr_readm") <= readmit_gap_hours)
patients_readmitted = death_readmit_within_gap["patient_id"].n_unique()

In [None]:
death_or_hospice= hospitalization_df.filter(pl.col('discharge_category').is_in(["Expired", "Hospice"]))
death_or_hospice_n = death_or_hospice["patient_id"].n_unique()
relevant_cohort = hospitalization_df.filter(
    (
        (pl.col('hospitalization_id').is_in(death_readmit_within_gap['prior_hosp_id'].unique().implode()))
        |
        (pl.col('hospitalization_id').is_in(death_or_hospice["hospitalization_id"].unique().implode()))
    )
)
relevant_cohort_n = death_or_hospice["patient_id"].n_unique()

In [None]:
# Add birth_date from patient_df and join to relevant_deaths to calculate age_at_death
# Join to get birth_date
relevant_cohort_with_birth = relevant_cohort.join(
    patient_df.select(['patient_id', 'birth_date']),
    on='patient_id',
    how='left'
)

# Calculate age at death as (discharge_dttm - birth_date) in years (using .dt.total_days()/365.25)
relevant_cohort_with_birth = relevant_cohort_with_birth.with_columns(
    (
        (pl.col('discharge_dttm') - pl.col('birth_date')).dt.total_days() / 365.25
    ).alias('age_at_death')
)

In [None]:
cohort_df = relevant_cohort_with_birth.filter(pl.col("age_at_death") <= 75)

In [None]:
steps = [
    {
        'label': 'All Patients',
        'n': total_patients,
        'color': 'blue'
    },
    {
        'label': 'Deceased/Hospice Patients',
        'note': f'(inclu. {patients_readmitted} patients readmitted\nwithin {readmit_gap_hours} hours where death occurs)',
        'n': relevant_cohort_n,
        'color': 'blue',
        'split': [
            {
                'label': 'Deceased Patients',
                'n': death_encounters_df["patient_id"].n_unique(),
                'color': 'red'
            },
            {
                'label': 'Hospice Patients',
                'n': relevant_cohort_n - death_encounters_df["patient_id"].n_unique(),
                'color': 'green'
            }
        ]
    },
    {
        'label': 'Patients aged 75 or less\n at death/hospice',
        'n': cohort_df["patient_id"].n_unique(),
        'color': 'blue'
    },
]


fig = create_consort_diagram(
    steps,
    title="COHORT SELECTION Flow Diagram",
    subtitle="Potential Organ Donors"
)
plt.show()
