In [1]:
import pandas as pd

from tableone import TableOne

In [2]:
data = pd.read_gbq("""
WITH ed AS (
    SELECT
          stays.hadm_id
        , SUM(TIMESTAMP_DIFF(stays.outtime, stays.intime, HOUR)) AS ed_los
        , MAX(triage.acuity) AS ed_acuity
    FROM `lcp-internal.mimic_ed.edstays` stays
    LEFT JOIN `lcp-internal.mimic_ed.triage` triage
        ON stays.stay_id = triage.stay_id
    LEFT JOIN `lcp-internal.mimic_ed.diagnosis` diag
        ON stays.stay_id = diag.stay_id
        AND diag.seq_num = 1
    GROUP BY stays.hadm_id
)
SELECT
      pat.subject_id
    , adm.hadm_id
    , icu.stay_id
    , CASE
        WHEN FIRST_VALUE(icu.stay_id) OVER icustay_window = icu.stay_id THEN 1
        ELSE 0
      END AS pat_count
    , CASE
        WHEN FIRST_VALUE(adm.hadm_id) OVER hadm_window = adm.hadm_id THEN 1
        ELSE 0
      END AS hadm_count
    , pat.anchor_age + (EXTRACT(YEAR FROM icu.intime) - pat.anchor_year) AS age
    , pat.gender
    , adm.insurance
    , ed.ed_acuity
    , ed.ed_los
    , icu.first_careunit
    , icu.los AS icu_los
    , TIMESTAMP_DIFF(adm.dischtime, adm.admittime, HOUR) / 24 AS hosp_los
    , adm.hospital_expire_flag
FROM `lcp-internal.mimic_hosp.patients` pat
INNER JOIN `lcp-internal.mimic_hosp.admissions` adm
    ON pat.subject_id = adm.subject_id
INNER JOIN `lcp-internal.mimic_icu.icustays` icu
    ON adm.hadm_id = icu.hadm_id
LEFT JOIN ed
    ON adm.hadm_id = ed.hadm_id
WINDOW hadm_window AS (PARTITION BY pat.subject_id ORDER BY adm.admittime)
     , icustay_window AS (PARTITION BY pat.subject_id ORDER BY icu.intime)
""", "lcp-internal")

In [3]:
int_cols = data.dtypes.values=="Int64"
data.loc[:, int_cols] = data.loc[:, int_cols].astype(float)
data.loc[:, int_cols] = data.loc[:, int_cols].astype(int, errors="ignore")

In [4]:
columns = [
    "pat_count", "hadm_count",
    "age", "gender", "insurance",
    "first_careunit",
    "ed_acuity", "ed_los",
    "icu_los", "hosp_los"
]

categorical = [
    "pat_count", "hadm_count",
    "gender", "insurance",
    "first_careunit",
    "ed_acuity",
]

order = {
    "pat_count": [1, 0], "hadm_count": [1, 0],
    "gender": ["F", "M"]
}

limit = {
    "pat_count": 1, "hadm_count": 1,
    "gender": 1
}

rename = {
    "pat_count": "Distinct patients", "hadm_count": "Distinct hospitalizations",
    "age": "Age", "gender": "Gender", "insurance": "Insurance",
    "first_careunit": "First ICU stay, unit type",
    "ed_acuity": "ED acuity", "ed_los": "ED length of stay",
    "icu_los": "ICU length of stay", "hosp_los": "Hospital length of stay"
}

table = TableOne(data, columns=columns, categorical=categorical, order=order, limit=limit, rename=rename,
                 groupby="hospital_expire_flag")
table

  df['percent'] = df['freq'].div(df.freq.sum(level=0),
  df['percent'] = df['freq'].div(df.freq.sum(level=0),
  df['percent'] = df['freq'].div(df.freq.sum(level=0),


Unnamed: 0_level_0,Unnamed: 1_level_0,Grouped by hospital_expire_flag,Grouped by hospital_expire_flag,Grouped by hospital_expire_flag,Grouped by hospital_expire_flag
Unnamed: 0_level_1,Unnamed: 1_level_1,Missing,Overall,0,1
n,,,76943,68149,8794
"Distinct patients, n (%)",1,0.0,53569 (69.6),48065 (70.5),5504 (62.6)
"Distinct hospitalizations, n (%)",1,0.0,58881 (76.5),52403 (76.9),6478 (73.7)
"Age, mean (SD)",,0.0,64.7 (16.9),63.9 (16.9),70.5 (15.2)
"Gender, n (%)",F,0.0,34091 (44.3),30106 (44.2),3985 (45.3)
"Insurance, n (%)",Medicaid,0.0,5825 (7.6),5292 (7.8),533 (6.1)
"Insurance, n (%)",Medicare,,34740 (45.2),29995 (44.0),4745 (54.0)
"Insurance, n (%)",Other,,36378 (47.3),32862 (48.2),3516 (40.0)
"First ICU stay, unit type, n (%)",Cardiac Vascular Intensive Care Unit (CVICU),0.0,12148 (15.8),11633 (17.1),515 (5.9)
"First ICU stay, unit type, n (%)",Coronary Care Unit (CCU),,8730 (11.3),7602 (11.2),1128 (12.8)
