In [2]:
import sys
from pathlib import Path
import pandas as pd
import numpy as np

# Projekt-Root automatisch finden (sucht Ordner, der "src" enthält)
cwd = Path().resolve()
PROJECT_ROOT = next(p for p in [cwd] + list(cwd.parents) if (p / "src").exists())
sys.path.insert(0, str(PROJECT_ROOT))

from src.db import q  # deine q()-Funktion


In [3]:
import importlib
from src.db import q
import src.cohort as cohort

importlib.reload(cohort)  # wichtig, falls du cohort.py geändert hast

aki = q(cohort.AKI_ICU_SQL)

# Datetime + MIMIC age cap + LOS
import numpy as np
import pandas as pd

for c in ["intime","outtime","admittime","dischtime","deathtime"]:
    if c in aki.columns:
        aki[c] = pd.to_datetime(aki[c], errors="coerce")

if "age" in aki.columns:
    aki.loc[aki["age"] > 89, "age"] = 90

aki["icu_los_days"]  = (aki["outtime"] - aki["intime"]).dt.total_seconds() / 86400
aki["hosp_los_days"] = (aki["dischtime"] - aki["admittime"]).dt.total_seconds() / 86400
aki.loc[aki["icu_los_days"] < 0, "icu_los_days"] = np.nan
aki.loc[aki["hosp_los_days"] < 0, "hosp_los_days"] = np.nan

aki.head()



Unnamed: 0,subject_id,hadm_id,icustay_id,intime,outtime,gender,dob,admittime,dischtime,deathtime,ethnicity,age,hospital_mortality,icu_los_days,hosp_los_days
0,268,110404,280836,2198-02-14 23:27:38,2198-02-18 05:26:11,F,2132-02-21,2198-02-11 13:40:00,2198-02-18 03:55:00,2198-02-18 03:55:00,HISPANIC OR LATINO,65.0,1,3.248993,6.59375
1,275,129886,219649,2170-10-07 11:28:53,2170-10-14 14:38:07,M,2088-08-07,2170-10-06 03:09:00,2170-10-19 15:35:00,2170-10-19 15:35:00,WHITE,82.0,1,7.131412,13.518056
2,279,192224,204407,2164-06-14 21:18:27,2164-06-18 22:53:12,M,2090-02-27,2164-06-13 19:19:00,2164-06-19 17:15:00,NaT,WHITE,74.0,0,4.065799,5.913889
3,281,111199,257572,2101-10-18 04:45:22,2101-10-25 22:29:25,F,2041-10-12,2101-10-18 04:42:00,2101-10-25 19:10:00,2101-10-25 19:10:00,BLACK/AFRICAN AMERICAN,60.0,1,7.738924,7.602778
4,294,152578,222074,2118-01-17 21:45:05,2118-01-20 11:12:45,M,2039-05-21,2118-01-17 21:44:00,2118-02-02 16:07:00,NaT,UNKNOWN/NOT SPECIFIED,78.0,0,2.56088,15.765972


In [3]:
tables_to_check = [
    "procedureevents_mv",
    "inputevents_mv",
    "inputevents_cv",
    "chartevents",
    "d_items"
]

for t in tables_to_check:
    try:
        q(f"SELECT 1 FROM {t} LIMIT 1;")
        print(f"OK: {t}")
    except Exception as e:
        print(f"NO: {t} -> {str(e)[:80]}")


OK: procedureevents_mv
OK: inputevents_mv
OK: inputevents_cv
OK: chartevents
OK: d_items


In [4]:
pressor_items = q("""
SELECT itemid, label
FROM d_items
WHERE lower(label) SIMILAR TO '%(norepinephrine|noradrenaline|epinephrine|adrenaline|vasopressin|dopamine|phenylephrine)%'
ORDER BY label;
""")

rrt_items = q("""
SELECT itemid, label
FROM d_items
WHERE lower(label) SIMILAR TO '%(dialysis|cvvh|crrt|hemofiltration|hemodialysis|renal replacement)%'
ORDER BY label;
""")

vent_items = q("""
SELECT itemid, label
FROM d_items
WHERE lower(label) SIMILAR TO '%(mechanical ventilation|ventilator|ventilation mode)%'
ORDER BY label;
""")

print(pressor_items)
print(rrt_items)
print(vent_items)


    itemid                 label
0    30043              Dopamine
1   221662              Dopamine
2     5329              Dopamine
3    30307         Dopamine Drip
4     4501         DOPAMINE DRIP
5     5805  DOPAMINE MICS/KG/MIN
6   221289           Epinephrine
7    30044           Epinephrine
8    30119         Epinephrine-k
9    30309      Epinephrine Drip
10    3112   epinephrine mcg/min
11  221906        Norepinephrine
12    5656         phenylephrine
13  221749         Phenylephrine
14    5461   phenylephrine drops
15    6512     Phenylephrine gtt
16    6690    phenylephrine gtts
17    6395    Phenylephrine gtts
18    6217   phenylephrine nasal
19    7210  Phenylephrine nose s
20    1136           vasopressin
21  222315           Vasopressin
22    2445           Vasopressin
23   30051           Vasopressin
24    1222           VASOPRESSIN
25    2765  VASOPRESSIN   UNIT/R
26   42802   VASOPRESSIN  CC/HR.
27    7341     Vasopressin  u/hr
28    6255  VAsopressin 0.04   s
29    2334

In [5]:
vaso_counts_aki = q("""
SELECT di.label AS intervention,
       COUNT(DISTINCT ie.icustay_id) AS n_icustays
FROM inputevents_mv ie
JOIN d_items di
  ON di.itemid = ie.itemid
JOIN diagnoses_icd d
  ON ie.subject_id = d.subject_id
 AND ie.hadm_id    = d.hadm_id
WHERE d.icd9_code LIKE '584%'
  AND lower(di.label) SIMILAR TO
      '%(norepinephrine|noradrenaline|epinephrine|adrenaline|vasopressin|dopamine|phenylephrine)%'
GROUP BY di.label
ORDER BY n_icustays DESC;
""")

vaso_counts_aki


Unnamed: 0,intervention,n_icustays
0,Norepinephrine,1756
1,Phenylephrine,1425
2,Vasopressin,634
3,Dopamine,527
4,Epinephrine,220


In [6]:
rrt_counts_aki = q("""
SELECT di.label AS intervention,
       COUNT(DISTINCT pe.icustay_id) AS n_icustays
FROM procedureevents_mv pe
JOIN d_items di
  ON di.itemid = pe.itemid
JOIN diagnoses_icd d
  ON pe.subject_id = d.subject_id
 AND pe.hadm_id    = d.hadm_id
WHERE d.icd9_code LIKE '584%'
  AND lower(di.label) SIMILAR TO
      '%(dialysis|cvvh|crrt|hemofiltration|hemodialysis|renal replacement)%'
GROUP BY di.label
ORDER BY n_icustays DESC;
""")

rrt_counts_aki


Unnamed: 0,intervention,n_icustays
0,Dialysis Catheter,704
1,Dialysis - CRRT,430
2,Hemodialysis,397
3,CRRT Filter Change,67
4,Peritoneal Dialysis,2


In [7]:
# vent_counts_aki = q("""
# SELECT di.label AS intervention,
#        COUNT(DISTINCT ce.icustay_id) AS n_icustays
# FROM chartevents ce
# JOIN d_items di
#   ON di.itemid = ce.itemid
# JOIN diagnoses_icd d
#   ON ce.subject_id = d.subject_id
#  AND ce.hadm_id    = d.hadm_id
# WHERE d.icd9_code LIKE '584%'
#   AND lower(di.label) SIMILAR TO
#       '%(mechanical ventilation|ventilator|ventilation mode)%'
# GROUP BY di.label
# ORDER BY n_icustays DESC;
# """)

# vent_counts_aki


In [8]:
furo_aki = q("""
SELECT
  di.label AS drug,
  COUNT(*) AS n_events,
  COUNT(DISTINCT ie.icustay_id) AS n_icustays
FROM inputevents_mv ie
JOIN d_items di ON di.itemid = ie.itemid
JOIN diagnoses_icd d
  ON ie.subject_id = d.subject_id AND ie.hadm_id = d.hadm_id
WHERE d.icd9_code LIKE '584%'
  AND lower(di.label) SIMILAR TO '%(furosemide|lasix)%'
GROUP BY di.label
ORDER BY n_icustays DESC;
""")

furo_aki


Unnamed: 0,drug,n_events,n_icustays
0,Furosemide (Lasix),26359,2743
1,Furosemide (Lasix) 500/100,1675,195


In [10]:
diuretics_aki = q("""
SELECT
  di.label AS drug,
  COUNT(*) AS n_events,
  COUNT(DISTINCT ie.icustay_id) AS n_icustays
FROM inputevents_mv ie
JOIN d_items di ON di.itemid = ie.itemid
JOIN diagnoses_icd d
  ON ie.subject_id = d.subject_id AND ie.hadm_id = d.hadm_id
WHERE d.icd9_code LIKE '584%'
  AND lower(di.label) SIMILAR TO '%(furosemide|lasix|bumetanide|torsemide|ethacrynic|hydrochlorothiazide|chlorthalidone|metolazone|spironolactone|eplerenone|amiloride|triamterene)%'
GROUP BY di.label
ORDER BY n_icustays DESC;
""")

diuretics_aki


Unnamed: 0,drug,n_events,n_icustays
0,Furosemide (Lasix),26359,2743
1,Furosemide (Lasix) 500/100,1675,195


In [11]:
n_aki_stays = q("""
SELECT COUNT(DISTINCT i.icustay_id) AS n
FROM icustays i
JOIN diagnoses_icd d
  ON i.subject_id = d.subject_id AND i.hadm_id = d.hadm_id
WHERE d.icd9_code LIKE '584%';
""")["n"].iloc[0]

n_furo_aki_stays = q("""
SELECT COUNT(DISTINCT ie.icustay_id) AS n
FROM inputevents_mv ie
JOIN d_items di ON di.itemid = ie.itemid
JOIN diagnoses_icd d
  ON ie.subject_id = d.subject_id AND ie.hadm_id = d.hadm_id
WHERE d.icd9_code LIKE '584%'
  AND lower(di.label) SIMILAR TO '%(furosemide|lasix)%';
""")["n"].iloc[0]

rate = n_furo_aki_stays / n_aki_stays * 100
rate


np.float64(21.430235266713254)

In [12]:
aki_furo_flag = q("""
WITH aki_icu AS (
  SELECT DISTINCT i.icustay_id, i.subject_id, i.hadm_id
  FROM icustays i
  JOIN diagnoses_icd d
    ON i.subject_id = d.subject_id AND i.hadm_id = d.hadm_id
  WHERE d.icd9_code LIKE '584%'
),
furo AS (
  SELECT DISTINCT ie.icustay_id
  FROM inputevents_mv ie
  JOIN d_items di ON di.itemid = ie.itemid
  WHERE lower(di.label) SIMILAR TO '%(furosemide|lasix)%'
)
SELECT
  a.icustay_id,
  CASE WHEN f.icustay_id IS NOT NULL THEN 1 ELSE 0 END AS furosemid,
  CASE WHEN adm.deathtime IS NOT NULL THEN 1 ELSE 0 END AS hospital_mortality
FROM aki_icu a
JOIN admissions adm
  ON a.subject_id = adm.subject_id AND a.hadm_id = adm.hadm_id
LEFT JOIN furo f
  ON a.icustay_id = f.icustay_id;
""")

aki_furo_flag.head()


Unnamed: 0,icustay_id,furosemid,hospital_mortality
0,200024,0,1
1,200030,0,0
2,200036,0,0
3,200049,1,0
4,200053,1,0


In [13]:
mortality_by_furo = (
    aki_furo_flag
    .groupby("furosemid")["hospital_mortality"]
    .agg(
        n="count",
        deaths="sum",
        mortality_rate=lambda x: x.mean() * 100
    )
)

mortality_by_furo


Unnamed: 0_level_0,n,deaths,mortality_rate
furosemid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,10119,2446,24.172349
1,2760,572,20.724638


Mortalität schonmal gereinger, jetzt noch LOS checken

In [14]:
aki_furo_los = q("""
WITH aki_icu AS (
  SELECT DISTINCT
    i.icustay_id,
    i.subject_id,
    i.hadm_id,
    i.intime,
    i.outtime
  FROM icustays i
  JOIN diagnoses_icd d
    ON i.subject_id = d.subject_id
   AND i.hadm_id    = d.hadm_id
  WHERE d.icd9_code LIKE '584%'
),
furo AS (
  SELECT DISTINCT ie.icustay_id
  FROM inputevents_mv ie
  JOIN d_items di ON di.itemid = ie.itemid
  WHERE lower(di.label) SIMILAR TO '%(furosemide|lasix)%'
)
SELECT
  a.icustay_id,
  CASE WHEN f.icustay_id IS NOT NULL THEN 1 ELSE 0 END AS furosemid,
  EXTRACT(EPOCH FROM (a.outtime - a.intime)) / 86400.0 AS icu_los_days,
  EXTRACT(EPOCH FROM (adm.dischtime - adm.admittime)) / 86400.0 AS hosp_los_days
FROM aki_icu a
JOIN admissions adm
  ON a.subject_id = adm.subject_id
 AND a.hadm_id    = adm.hadm_id
LEFT JOIN furo f
  ON a.icustay_id = f.icustay_id;
""")

aki_furo_los.head()


Unnamed: 0,icustay_id,furosemid,icu_los_days,hosp_los_days
0,200024,0,0.381238,10.167361
1,200030,0,6.155521,21.159028
2,200036,0,2.993322,2.95
3,200049,1,1.420428,23.210417
4,200053,1,5.990938,17.274306


In [15]:
icu_los_by_furo = (
    aki_furo_los
    .dropna(subset=["icu_los_days"])
    .groupby("furosemid")["icu_los_days"]
    .agg(
        n="count",
        median="median",
        p25=lambda x: x.quantile(0.25),
        p75=lambda x: x.quantile(0.75),
        mean="mean"
    )
)

icu_los_by_furo.rename(
    index={0: "No furosemid", 1: "Furosemid"},
    inplace=True
)

icu_los_by_furo


Unnamed: 0_level_0,n,median,p25,p75,mean
furosemid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
No furosemid,10119,2.645961,1.461476,5.377813,5.373661
Furosemid,2760,5.212749,2.834835,10.515969,8.267742


In [16]:
hosp_los_by_furo = (
    aki_furo_los
    .dropna(subset=["hosp_los_days"])
    .groupby("furosemid")["hosp_los_days"]
    .agg(
        n="count",
        median="median",
        p25=lambda x: x.quantile(0.25),
        p75=lambda x: x.quantile(0.75),
        mean="mean"
    )
)

hosp_los_by_furo.rename(
    index={0: "No furosemid", 1: "Furosemid"},
    inplace=True
)

hosp_los_by_furo


Unnamed: 0_level_0,n,median,p25,p75,mean
furosemid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
No furosemid,10119,9.797917,5.170486,19.752083,15.519763
Furosemid,2760,13.194444,7.935764,22.347396,17.780293


In [17]:
aki_furo_rrt = q("""
WITH aki_icu AS (
  SELECT DISTINCT
    i.icustay_id,
    i.subject_id,
    i.hadm_id,
    i.intime,
    i.outtime
  FROM icustays i
  JOIN diagnoses_icd d
    ON i.subject_id = d.subject_id
   AND i.hadm_id    = d.hadm_id
  WHERE d.icd9_code LIKE '584%'
),
furo AS (
  SELECT DISTINCT ie.icustay_id
  FROM inputevents_mv ie
  JOIN d_items di ON di.itemid = ie.itemid
  WHERE lower(di.label) SIMILAR TO '%(furosemide|lasix)%'
),
rrt AS (
  SELECT DISTINCT pe.icustay_id
  FROM procedureevents_mv pe
  JOIN d_items di ON di.itemid = pe.itemid
  WHERE lower(di.label) SIMILAR TO '%(dialysis|cvvh|crrt|hemofiltration|hemodialysis|renal replacement)%'
)
SELECT
  a.icustay_id,
  CASE WHEN f.icustay_id IS NOT NULL THEN 1 ELSE 0 END AS furosemid,
  CASE WHEN r.icustay_id IS NOT NULL THEN 1 ELSE 0 END AS rrt,
  CASE WHEN adm.deathtime IS NOT NULL THEN 1 ELSE 0 END AS hospital_mortality,
  EXTRACT(EPOCH FROM (a.outtime - a.intime)) / 86400.0 AS icu_los_days,
  EXTRACT(EPOCH FROM (adm.dischtime - adm.admittime)) / 86400.0 AS hosp_los_days
FROM aki_icu a
JOIN admissions adm
  ON a.subject_id = adm.subject_id
 AND a.hadm_id    = adm.hadm_id
LEFT JOIN furo f
  ON a.icustay_id = f.icustay_id
LEFT JOIN rrt r
  ON a.icustay_id = r.icustay_id;
""")

aki_furo_rrt.head()


Unnamed: 0,icustay_id,furosemid,rrt,hospital_mortality,icu_los_days,hosp_los_days
0,200024,0,0,1,0.381238,10.167361
1,200030,0,0,0,6.155521,21.159028
2,200036,0,0,0,2.993322,2.95
3,200049,1,0,0,1.420428,23.210417
4,200053,1,0,0,5.990938,17.274306


In [18]:
df = aki_furo_rrt.copy()

df["group"] = np.select(
    [
        (df["furosemid"] == 0) & (df["rrt"] == 0),
        (df["furosemid"] == 1) & (df["rrt"] == 0),
        (df["furosemid"] == 0) & (df["rrt"] == 1),
        (df["furosemid"] == 1) & (df["rrt"] == 1),
    ],
    ["None", "Furo only", "RRT only", "Furo + RRT"],
    default="Unknown"
)

df["group"].value_counts()


group
None          9742
Furo only     2390
RRT only       377
Furo + RRT     370
Name: count, dtype: int64

In [19]:
mort_by_group = (
    df.groupby("group")["hospital_mortality"]
      .agg(n="count", deaths="sum", mortality_rate=lambda x: x.mean()*100)
      .sort_values("mortality_rate", ascending=False)
)

mort_by_group


Unnamed: 0_level_0,n,deaths,mortality_rate
group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
RRT only,377,160,42.440318
Furo + RRT,370,129,34.864865
,9742,2286,23.465408
Furo only,2390,443,18.535565


In [20]:
def los_agg(s):
    s = pd.to_numeric(s, errors="coerce").dropna()
    return pd.Series({
        "n": len(s),
        "median": s.median(),
        "p25": s.quantile(0.25),
        "p75": s.quantile(0.75),
        "mean": s.mean()
    })

icu_los_by_group = df.groupby("group")["icu_los_days"].apply(los_agg).unstack()
icu_los_by_group.loc[["None","Furo only","RRT only","Furo + RRT"]]


Unnamed: 0_level_0,n,median,p25,p75,mean
group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
,9742.0,2.591094,1.434358,5.198354,5.260006
Furo only,2390.0,4.746713,2.580584,8.953027,7.165541
RRT only,377.0,4.289514,2.104225,9.918333,8.310603
Furo + RRT,370.0,12.224144,6.704271,19.611652,15.387365


führt furo häufiger zu Dialysepflicht?

In [21]:
aki_furo_rrt_flag = q("""
WITH aki_icu AS (
  SELECT DISTINCT
    i.icustay_id,
    i.subject_id,
    i.hadm_id
  FROM icustays i
  JOIN diagnoses_icd d
    ON i.subject_id = d.subject_id
   AND i.hadm_id    = d.hadm_id
  WHERE d.icd9_code LIKE '584%'
),
furo AS (
  SELECT DISTINCT ie.icustay_id
  FROM inputevents_mv ie
  JOIN d_items di ON di.itemid = ie.itemid
  WHERE lower(di.label) SIMILAR TO '%(furosemide|lasix)%'
),
rrt AS (
  SELECT DISTINCT pe.icustay_id
  FROM procedureevents_mv pe
  JOIN d_items di ON di.itemid = pe.itemid
  WHERE lower(di.label) SIMILAR TO
        '%(dialysis|cvvh|crrt|hemofiltration|hemodialysis|renal replacement)%'
)
SELECT
  a.icustay_id,
  CASE WHEN f.icustay_id IS NOT NULL THEN 1 ELSE 0 END AS furosemid,
  CASE WHEN r.icustay_id IS NOT NULL THEN 1 ELSE 0 END AS rrt
FROM aki_icu a
LEFT JOIN furo f ON a.icustay_id = f.icustay_id
LEFT JOIN rrt  r ON a.icustay_id = r.icustay_id;
""")

aki_furo_rrt_flag.head()


Unnamed: 0,icustay_id,furosemid,rrt
0,200024,0,0
1,200030,0,0
2,200036,0,0
3,200049,1,0
4,200053,1,0


In [22]:
rrt_rate_by_furo = (
    aki_furo_rrt_flag
    .groupby("furosemid")["rrt"]
    .agg(
        n="count",
        rrt_cases="sum",
        rrt_rate=lambda x: x.mean() * 100
    )
)

rrt_rate_by_furo.rename(
    index={0: "No furosemid", 1: "Furosemid"},
    inplace=True
)

rrt_rate_by_furo


Unnamed: 0_level_0,n,rrt_cases,rrt_rate
furosemid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
No furosemid,10119,377,3.725665
Furosemid,2760,370,13.405797


In [23]:
aki_furo_base = q("""
WITH aki_icu AS (
  SELECT DISTINCT
    i.icustay_id,
    i.subject_id,
    i.hadm_id,
    i.intime
  FROM icustays i
  JOIN diagnoses_icd d
    ON i.subject_id = d.subject_id
   AND i.hadm_id    = d.hadm_id
  WHERE d.icd9_code LIKE '584%'
),
furo AS (
  SELECT DISTINCT ie.icustay_id
  FROM inputevents_mv ie
  JOIN d_items di ON di.itemid = ie.itemid
  WHERE lower(di.label) SIMILAR TO '%(furosemide|lasix)%'
)
SELECT
  a.icustay_id,
  a.subject_id,
  a.hadm_id,
  a.intime,
  CASE WHEN f.icustay_id IS NOT NULL THEN 1 ELSE 0 END AS furosemid
FROM aki_icu a
LEFT JOIN furo f
  ON a.icustay_id = f.icustay_id;
""")

aki_furo_base.head()


Unnamed: 0,icustay_id,subject_id,hadm_id,intime,furosemid
0,200024,76603,179633,2127-03-03 16:09:07,0
1,200030,14987,165426,2150-11-13 14:08:02,0
2,200036,9960,159243,2181-08-26 16:38:25,0
3,200049,73241,149216,2118-08-28 08:56:44,1
4,200053,78895,100696,2166-02-27 18:45:49,1


In [26]:
urine_24h = q("""
WITH aki_icu AS (
  SELECT DISTINCT
    i.icustay_id,
    i.subject_id,
    i.hadm_id,
    i.intime
  FROM icustays i
  JOIN diagnoses_icd d
    ON i.subject_id = d.subject_id
   AND i.hadm_id    = d.hadm_id
  WHERE d.icd9_code LIKE '584%'
),
uo AS (
  SELECT
    oe.icustay_id,
    SUM(oe.value) AS urine_ml_24h
  FROM outputevents oe
  JOIN aki_icu a ON a.icustay_id = oe.icustay_id
  WHERE oe.charttime >= a.intime
    AND oe.charttime <  a.intime + interval '24 hours'
  GROUP BY oe.icustay_id
)
SELECT * FROM uo;
""")

urine_24h.head()


Unnamed: 0,icustay_id,urine_ml_24h
0,200024,500.0
1,200030,4210.0
2,200036,1160.0
3,200053,4482.0
4,200063,1155.0


In [27]:
creat_48h = q("""
WITH aki_icu AS (
  SELECT DISTINCT
    i.icustay_id,
    i.subject_id,
    i.hadm_id,
    i.intime
  FROM icustays i
  JOIN diagnoses_icd d
    ON i.subject_id = d.subject_id
   AND i.hadm_id    = d.hadm_id
  WHERE d.icd9_code LIKE '584%'
),
creat_item AS (
  SELECT itemid
  FROM d_labitems
  WHERE lower(label) = 'creatinine'
  LIMIT 1
),
creat AS (
  SELECT
    a.icustay_id,
    le.charttime,
    le.valuenum
  FROM labevents le
  JOIN aki_icu a
    ON a.subject_id = le.subject_id
   AND a.hadm_id    = le.hadm_id
  WHERE le.itemid = (SELECT itemid FROM creat_item)
    AND le.valuenum IS NOT NULL
    AND le.charttime >= a.intime
    AND le.charttime <  a.intime + interval '48 hours'
),
agg AS (
  SELECT
    icustay_id,
    MIN(valuenum) AS creat_min_48h,
    MAX(valuenum) AS creat_max_48h
  FROM creat
  GROUP BY icustay_id
),
firstval AS (
  SELECT DISTINCT ON (icustay_id)
    icustay_id,
    valuenum AS creat_first_48h
  FROM creat
  ORDER BY icustay_id, charttime
)
SELECT
  a.icustay_id,
  a.creat_min_48h,
  a.creat_max_48h,
  f.creat_first_48h
FROM agg a
JOIN firstval f USING (icustay_id);
""")

creat_48h.head()


Unnamed: 0,icustay_id,creat_min_48h,creat_max_48h,creat_first_48h
0,233338,0.3,0.7,0.7
1,235102,1.5,1.8,1.7
2,277169,2.9,3.8,2.9
3,297372,1.7,5.4,5.4
4,257858,0.9,1.8,1.8


In [28]:
df = aki_furo_base.merge(urine_24h, on="icustay_id", how="left").merge(creat_48h, on="icustay_id", how="left")
df.head()


Unnamed: 0,icustay_id,subject_id,hadm_id,intime,furosemid,urine_ml_24h,creat_min_48h,creat_max_48h,creat_first_48h
0,200024,76603,179633,2127-03-03 16:09:07,0,500.0,,,
1,200030,14987,165426,2150-11-13 14:08:02,0,4210.0,0.9,1.0,1.0
2,200036,9960,159243,2181-08-26 16:38:25,0,1160.0,1.0,1.4,1.4
3,200049,73241,149216,2118-08-28 08:56:44,1,,4.4,5.4,4.4
4,200053,78895,100696,2166-02-27 18:45:49,1,4482.0,2.4,4.4,2.4


In [29]:
def median_iqr(s):
    s = pd.to_numeric(s, errors="coerce").dropna()
    return pd.Series({
        "n": len(s),
        "median": s.median(),
        "p25": s.quantile(0.25),
        "p75": s.quantile(0.75),
        "mean": s.mean()
    })

summary_uo = df.groupby("furosemid")["urine_ml_24h"].apply(median_iqr).unstack()
summary_creat_first = df.groupby("furosemid")["creat_first_48h"].apply(median_iqr).unstack()
summary_creat_max = df.groupby("furosemid")["creat_max_48h"].apply(median_iqr).unstack()

summary_uo.rename(index={0:"No furosemid", 1:"Furosemid"}, inplace=True)
summary_creat_first.rename(index={0:"No furosemid", 1:"Furosemid"}, inplace=True)
summary_creat_max.rename(index={0:"No furosemid", 1:"Furosemid"}, inplace=True)

print("Urine output first 24h (ml):")
display(summary_uo)

print("Creatinine first value within 48h:")
display(summary_creat_first)

print("Creatinine max within 48h:")
display(summary_creat_max)


Urine output first 24h (ml):


Unnamed: 0_level_0,n,median,p25,p75,mean
furosemid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
No furosemid,9523.0,1710.0,925.0,2815.0,2247.13952
Furosemid,2751.0,1891.0,1126.0,3035.0,2451.219157


Creatinine first value within 48h:


Unnamed: 0_level_0,n,median,p25,p75,mean
furosemid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
No furosemid,9955.0,1.7,1.2,2.6,2.200211
Furosemid,2756.0,1.5,1.1,2.3,1.917235


Creatinine max within 48h:


Unnamed: 0_level_0,n,median,p25,p75,mean
furosemid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
No furosemid,9955.0,1.9,1.3,2.9,2.438825
Furosemid,2756.0,1.9,1.3,2.7,2.254136


In [30]:
aki_furo_vaso = q("""
WITH aki_icu AS (
  SELECT DISTINCT
    i.icustay_id,
    i.subject_id,
    i.hadm_id,
    i.intime,
    i.outtime
  FROM icustays i
  JOIN diagnoses_icd d
    ON i.subject_id = d.subject_id
   AND i.hadm_id    = d.hadm_id
  WHERE d.icd9_code LIKE '584%'
),
furo AS (
  SELECT DISTINCT ie.icustay_id
  FROM inputevents_mv ie
  JOIN d_items di ON di.itemid = ie.itemid
  WHERE lower(di.label) SIMILAR TO '%(furosemide|lasix)%'
),
vaso AS (
  SELECT DISTINCT ie.icustay_id
  FROM inputevents_mv ie
  JOIN d_items di ON di.itemid = ie.itemid
  WHERE lower(di.label) SIMILAR TO
        '%(norepinephrine|noradrenaline|epinephrine|adrenaline|vasopressin|dopamine|phenylephrine)%'
),
rrt AS (
  SELECT DISTINCT pe.icustay_id
  FROM procedureevents_mv pe
  JOIN d_items di ON di.itemid = pe.itemid
  WHERE lower(di.label) SIMILAR TO
        '%(dialysis|cvvh|crrt|hemofiltration|hemodialysis|renal replacement)%'
)
SELECT
  a.icustay_id,
  CASE WHEN f.icustay_id IS NOT NULL THEN 1 ELSE 0 END AS furosemid,
  CASE WHEN v.icustay_id IS NOT NULL THEN 1 ELSE 0 END AS vasopressor,
  CASE WHEN r.icustay_id IS NOT NULL THEN 1 ELSE 0 END AS rrt,
  CASE WHEN adm.deathtime IS NOT NULL THEN 1 ELSE 0 END AS hospital_mortality,
  EXTRACT(EPOCH FROM (a.outtime - a.intime)) / 86400.0 AS icu_los_days,
  EXTRACT(EPOCH FROM (adm.dischtime - adm.admittime)) / 86400.0 AS hosp_los_days
FROM aki_icu a
JOIN admissions adm
  ON a.subject_id = adm.subject_id
 AND a.hadm_id    = adm.hadm_id
LEFT JOIN furo f ON a.icustay_id = f.icustay_id
LEFT JOIN vaso v ON a.icustay_id = v.icustay_id
LEFT JOIN rrt  r ON a.icustay_id = r.icustay_id;
""")

aki_furo_vaso.head()


Unnamed: 0,icustay_id,furosemid,vasopressor,rrt,hospital_mortality,icu_los_days,hosp_los_days
0,200024,0,1,0,1,0.381238,10.167361
1,200030,0,0,0,0,6.155521,21.159028
2,200036,0,0,0,0,2.993322,2.95
3,200049,1,0,0,0,1.420428,23.210417
4,200053,1,0,0,0,5.990938,17.274306


In [31]:
import numpy as np
import pandas as pd

df = aki_furo_vaso.copy()

df["group"] = np.select(
    [
        (df["furosemid"] == 0) & (df["vasopressor"] == 0),
        (df["furosemid"] == 1) & (df["vasopressor"] == 0),
        (df["furosemid"] == 0) & (df["vasopressor"] == 1),
        (df["furosemid"] == 1) & (df["vasopressor"] == 1),
    ],
    ["None", "Furo only", "Vaso only", "Furo + Vaso"],
    default="Unknown"
)

df["group"].value_counts()


group
None           9021
Furo + Vaso    1430
Furo only      1330
Vaso only      1098
Name: count, dtype: int64

In [32]:
rates_by_group = df.groupby("group").agg(
    n=("icustay_id", "count"),
    mortality_rate=("hospital_mortality", lambda x: x.mean() * 100),
    rrt_rate=("rrt", lambda x: x.mean() * 100)
).sort_values("mortality_rate", ascending=False)

rates_by_group.loc[["None","Furo only","Vaso only","Furo + Vaso"]]


Unnamed: 0_level_0,n,mortality_rate,rrt_rate
group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
,9021,21.760337,1.662787
Furo only,1330,12.781955,6.992481
Vaso only,1098,43.989071,20.673953
Furo + Vaso,1430,28.111888,19.370629


In [33]:
def los_agg(s):
    s = pd.to_numeric(s, errors="coerce").dropna()
    return pd.Series({
        "n": len(s),
        "median": s.median(),
        "p25": s.quantile(0.25),
        "p75": s.quantile(0.75),
        "mean": s.mean()
    })

icu_los = df.groupby("group")["icu_los_days"].apply(los_agg).unstack()
hosp_los = df.groupby("group")["hosp_los_days"].apply(los_agg).unstack()

print("ICU LOS (days):")
display(icu_los.loc[["None","Furo only","Vaso only","Furo + Vaso"]])

print("Hospital LOS (days):")
display(hosp_los.loc[["None","Furo only","Vaso only","Furo + Vaso"]])


ICU LOS (days):


Unnamed: 0_level_0,n,median,p25,p75,mean
group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
,9021.0,2.620289,1.43103,5.468461,5.417296
Furo only,1330.0,3.436742,1.967705,6.105926,4.988285
Vaso only,1098.0,2.855775,1.698223,5.13798,5.01517
Furo + Vaso,1430.0,8.25816,4.364389,14.892034,11.317866


Hospital LOS (days):


Unnamed: 0_level_0,n,median,p25,p75,mean
group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
,9021.0,9.961806,5.406944,19.883333,15.692696
Furo only,1330.0,10.712847,6.708333,18.066146,15.260515
Vaso only,1098.0,7.681597,4.113368,16.959549,14.098976
Furo + Vaso,1430.0,15.757639,9.835243,24.95625,20.123863


In [None]:
import pandas as pd

aki_seq_landmark = q("""
WITH aki_icu AS (
  SELECT DISTINCT
    i.icustay_id,
    i.subject_id,
    i.hadm_id,
    i.intime,
    i.outtime
  FROM icustays i
  JOIN diagnoses_icd d
    ON i.subject_id = d.subject_id
   AND i.hadm_id    = d.hadm_id
  WHERE d.icd9_code LIKE '584%'
),

params AS (
  SELECT interval '24 hours' AS lm
),

-- only keep stays that are still in ICU at least until the landmark time
lm_cohort AS (
  SELECT
    a.*,
    a.intime + (SELECT lm FROM params) AS landmark_time
  FROM aki_icu a
  WHERE a.outtime >= a.intime + (SELECT lm FROM params)
),

-- IMPORTANT: itemids vary by build. Replace these with your verified lists.
vaso_items AS (
  SELECT unnest(ARRAY[30047,30120,30119,30044,30043,30309,30307,30306])::int AS itemid
),
furo_items AS (
  SELECT unnest(ARRAY[221794,30123,221795])::int AS itemid
),

first_vaso AS (
  SELECT
    c.icustay_id,
    MIN(ie.starttime) AS t_vaso
  FROM lm_cohort c
  JOIN inputevents_mv ie
    ON ie.icustay_id = c.icustay_id
   AND ie.hadm_id    = c.hadm_id
   AND ie.subject_id = c.subject_id
  WHERE ie.statusdescription != 'Rewritten'
    AND ie.starttime >= c.intime
    AND ie.starttime <  c.landmark_time
    AND ie.itemid IN (SELECT itemid FROM vaso_items)
  GROUP BY c.icustay_id
),

first_furo AS (
  SELECT
    c.icustay_id,
    MIN(ie.starttime) AS t_furo
  FROM lm_cohort c
  JOIN inputevents_mv ie
    ON ie.icustay_id = c.icustay_id
   AND ie.hadm_id    = c.hadm_id
   AND ie.subject_id = c.subject_id
  WHERE ie.statusdescription != 'Rewritten'
    AND ie.starttime >= c.intime
    AND ie.starttime <  c.landmark_time
    AND ie.itemid IN (SELECT itemid FROM furo_items)
  GROUP BY c.icustay_id
),

times AS (
  SELECT
    c.icustay_id,
    c.subject_id,
    c.hadm_id,
    c.intime,
    c.outtime,
    c.landmark_time,
    v.t_vaso,
    f.t_furo
  FROM lm_cohort c
  LEFT JOIN first_vaso v USING (icustay_id)
  LEFT JOIN first_furo f USING (icustay_id)
),

seq AS (
  SELECT
    t.*,
    CASE
      WHEN t.t_vaso IS NULL AND t.t_furo IS NULL THEN 'None'
      WHEN t.t_vaso IS NOT NULL AND t.t_furo IS NULL THEN 'Vaso only'
      WHEN t.t_vaso IS NULL AND t.t_furo IS NOT NULL THEN 'Furo only'
      WHEN abs(extract(epoch from (t.t_vaso - t.t_furo))) <= 3600 THEN 'Near-simultaneous (<=1h)'
      WHEN t.t_vaso < t.t_furo THEN 'Vaso-first'
      WHEN t.t_furo < t.t_vaso THEN 'Furo-first'
      ELSE 'Other'
    END AS grp
  FROM times t
),

mortality AS (
  SELECT
    s.icustay_id,
    CASE
      WHEN a.deathtime IS NOT NULL AND a.deathtime > s.landmark_time THEN 1 ELSE 0
    END AS death_in_hosp_after_lm,
    CASE
      WHEN p.dod IS NOT NULL
       AND p.dod > s.landmark_time
       AND p.dod <= s.landmark_time + interval '30 day' THEN 1 ELSE 0
    END AS death_30d_after_lm
  FROM seq s
  JOIN admissions a
    ON a.subject_id = s.subject_id
   AND a.hadm_id    = s.hadm_id
  JOIN patients p
    ON p.subject_id = s.subject_id
),

rrt AS (
  SELECT
    s.icustay_id,
    CASE WHEN EXISTS (
      SELECT 1
      FROM procedures_icd pr
      WHERE pr.subject_id = s.subject_id
        AND pr.hadm_id    = s.hadm_id
        AND pr.icd9_code IN ('3995','5498','5497')
    ) THEN 1 ELSE 0 END AS rrt_any_hosp
  FROM seq s
),

final AS (
  SELECT
    s.grp,
    s.icustay_id,
    m.death_in_hosp_after_lm,
    m.death_30d_after_lm,
    r.rrt_any_hosp
  FROM seq s
  LEFT JOIN mortality m USING (icustay_id)
  LEFT JOIN rrt r USING (icustay_id)
)

SELECT
  grp,
  COUNT(*) AS n,
  100.0 * AVG(death_in_hosp_after_lm::float) AS mortality_in_hosp_rate_after_lm,
  100.0 * AVG(death_30d_after_lm::float)     AS mortality_30d_rate_after_lm,
  100.0 * AVG(rrt_any_hosp::float)           AS rrt_rate_any_hosp
FROM final
GROUP BY grp
ORDER BY
  CASE grp
    WHEN 'None' THEN 1
    WHEN 'Furo only' THEN 2
    WHEN 'Vaso only' THEN 3
    WHEN 'Near-simultaneous (<=1h)' THEN 4
    WHEN 'Furo-first' THEN 5
    WHEN 'Vaso-first' THEN 6
    ELSE 99
  END;
""")

aki_seq_landmark.head()


In [4]:
# Step 1) Find dopamine itemids (do this once, then hardcode them)
dop_items = q("""
SELECT itemid, label
FROM d_items
WHERE lower(label) LIKE '%dopamine%';
""")

display(dop_items.sort_values("itemid"))


Unnamed: 0,itemid,label
0,4501,DOPAMINE DRIP
2,5329,Dopamine
1,5805,DOPAMINE MICS/KG/MIN
3,30043,Dopamine
4,30307,Dopamine Drip
5,221662,Dopamine


In [5]:
# Step 2) Define AKI-ICU cohort (ICD9 584.*) with ICU times
aki_icu = q("""
SELECT DISTINCT
  i.icustay_id,
  i.subject_id,
  i.hadm_id,
  i.intime,
  i.outtime
FROM icustays i
JOIN diagnoses_icd d
  ON i.subject_id = d.subject_id
 AND i.hadm_id    = d.hadm_id
WHERE d.icd9_code LIKE '584%';
""")

display(aki_icu.head())
print("AKI ICU stays:", len(aki_icu))


Unnamed: 0,icustay_id,subject_id,hadm_id,intime,outtime
0,200024,76603,179633,2127-03-03 16:09:07,2127-03-04 01:18:06
1,200030,14987,165426,2150-11-13 14:08:02,2150-11-19 17:51:59
2,200036,9960,159243,2181-08-26 16:38:25,2181-08-29 16:28:48
3,200049,73241,149216,2118-08-28 08:56:44,2118-08-29 19:02:09
4,200053,78895,100696,2166-02-27 18:45:49,2166-03-05 18:32:46


AKI ICU stays: 12879


In [None]:
# Step 3) Pull dopamine infusions within first 24h (MetaVision inputevents_mv)
# IMPORTANT:
# - MIMIC stores rates in different units depending on item. We'll treat "rate" as the dose proxy.
# - Later you can restrict to items with units like 'mcg/kg/min' if you want stricter.
dopamine_24h = q("""
WITH aki AS (
  SELECT DISTINCT
    i.icustay_id,
    i.subject_id,
    i.hadm_id,
    i.intime,
    i.outtime
  FROM icustays i
  JOIN diagnoses_icd d
    ON i.subject_id = d.subject_id
   AND i.hadm_id    = d.hadm_id
  WHERE d.icd9_code LIKE '584%'
),
dop_items AS (
  SELECT itemid
  FROM d_items
  WHERE lower(label) LIKE '%dopamine%'
)
SELECT
  a.icustay_id,
  MIN(ie.starttime) AS t_dop_first,
  AVG(ie.rate)      AS rate_mean_24h,
  MAX(ie.rate)      AS rate_max_24h,
  COUNT(*)          AS n_admins_24h
FROM aki a
JOIN inputevents_mv ie
  ON ie.icustay_id = a.icustay_id
JOIN dop_items di
  ON di.itemid = ie.itemid
WHERE ie.statusdescription != 'Rewritten'
  AND ie.starttime >= a.intime
  AND ie.starttime <  a.intime + interval '24 hours'
  AND ie.rate IS NOT NULL
GROUP BY a.icustay_id;
""")

display(dopamine_24h.head())
print("Stays with dopamine in first 24h:", len(dopamine_24h))
