In [1]:
import sys
from pathlib import Path
import pandas as pd

PROJECT_ROOT = Path.cwd().resolve().parents[0]
sys.path.insert(0, str(PROJECT_ROOT))

In [9]:
from src.cohort import load_aki_cohort
from src.utils import (
    add_icu_los_days,
    add_dialysis_flag,
    add_early_dopamine_flag,
    add_sofa_score,
    add_vasopressor_flags,
    add_mechanical_ventilation_flag,
    add_early_late_dialysis_flags,
    extract_dialysis_timing,              # falls in src.utils vorhanden
    add_dialysis_near_icu_discharge_flag,
    recode_ethnicity,
    add_rrt_persistence_near_discharge,
)

# 1) Load cohort
df_aki = load_aki_cohort()

# 2) Ensure time columns are datetime (super wichtig!)
for c in ["intime", "outtime", "admittime", "dischtime", "deathtime"]:
    if c in df_aki.columns:
        df_aki[c] = df_aki[c].astype("datetime64[ns]")

# 3) Add baseline / outcomes
df_aki = add_icu_los_days(df_aki)

# 4) Dialysis "ever" flag (pragmatic; ICD + MV)
df_aki = add_dialysis_flag(df_aki)

# 5) SOFA + interventions
df_aki = add_sofa_score(df_aki)
df_aki = add_mechanical_ventilation_flag(df_aki)

# 6) Vasopressors / dopamine early flags
df_aki = add_vasopressor_flags(df_aki, window_hours=24)
df_aki = add_early_dopamine_flag(df_aki, window_hours=24)

# 7) Ethnicity grouping
df_aki = recode_ethnicity(df_aki)   # sollte eine neue Spalte wie ethnicity_grp erzeugen

# 8) Timing-aware dialysis (nur MV Events)
df_aki = add_early_late_dialysis_flags(
    df_aki,
    window_hours=24,
    include_inputevents=True,
    allow_negative_hours=False
)

# 9) Optional: Dialysis start/end/duration (falls du die Funktion hast)
# df_aki = extract_dialysis_timing(df_aki)

# 10) Dialysis near ICU discharge (last 6h)
df_aki = add_dialysis_near_icu_discharge_flag(
    df_aki,
    hours_before_discharge=6,
    include_inputevents=True
)
df_aki = extract_dialysis_timing(df_aki)
df_aki = add_rrt_persistence_near_discharge(
    df_aki,
    hours_before_discharge=6
) 

# 11) Quick sanity checks
print("Rows:", len(df_aki))
print(df_aki[[
    "dialysis", "dialysis_timed", "dialysis_icd_only",
    "early_dialysis", "late_dialysis",
    "any_vasopressor", "mechanical_ventilation"
]].value_counts().head(15))

df_aki.head()


Rows: 10485
dialysis  dialysis_timed  dialysis_icd_only  early_dialysis  late_dialysis  any_vasopressor  mechanical_ventilation
0         0               0                  0               0              0                0                         4691
                                                                                             1                         3145
                                                                            1                1                          902
1         0               1                  0               0              0                1                          449
0         0               0                  0               0              1                0                          357
1         0               1                  0               0              0                0                          311
          1               0                  0               1              1                1                          137
    

Unnamed: 0,subject_id,hadm_id,icustay_id,intime,outtime,gender,dob,admittime,dischtime,deathtime,...,late_dialysis,dialysis_icd_only,dialysis_last_6h,dialysis_start,dialysis_end,dialysis_duration_hours,rrt_any_in_last6h,rrt_active_at_outtime,rrt_persistent_last6h,rrt_max_overlap_hours_in_window
0,3,145834,211552,2101-10-20 19:10:11,2101-10-26 20:43:09,M,2025-04-11,2101-10-20 19:08:00,2101-10-31 13:58:00,NaT,...,0,0,0,NaT,NaT,,0,0,0,0.0
1,9,150750,220597,2149-11-09 13:07:02,2149-11-14 20:52:14,M,2108-01-26,2149-11-09 13:06:00,2149-11-14 10:15:00,2149-11-14 10:15:00,...,0,0,0,NaT,NaT,,0,0,0,0.0
2,21,109451,217847,2134-09-11 20:50:04,2134-09-17 18:28:32,M,2047-04-04,2134-09-11 12:17:00,2134-09-24 16:15:00,NaT,...,0,1,0,NaT,NaT,,0,0,0,0.0
3,38,185910,248910,2166-08-10 00:29:36,2166-09-04 13:39:23,M,2090-08-31,2166-08-10 00:28:00,2166-09-04 11:30:00,NaT,...,0,0,0,NaT,NaT,,0,0,0,0.0
4,52,190797,261857,2191-01-10 02:12:55,2191-01-11 22:37:31,M,2152-11-26,2191-01-10 02:12:00,2191-01-19 16:10:00,NaT,...,0,0,0,NaT,NaT,,0,0,0,0.0


In [15]:
sub = df_aki[
    (df_aki["sofa"] >= 8) &
    (df_aki["sofa_renal"] >= 3) 
].copy()

print(sub["ethnicity_grp"].value_counts())
sub["sofa"].describe()


ethnicity_grp
White       1062
Other        226
Black        172
Hispanic      39
Asian         30
Name: count, dtype: int64


count    1529.000000
mean       11.302158
std         2.946335
min         8.000000
25%         9.000000
50%        11.000000
75%        13.000000
max        22.000000
Name: sofa, dtype: float64

In [14]:
out = (
    sub
    .groupby(["ethnicity_grp", "any_vasopressor"])
    ["rrt_persistent_last6h"]
    .mean()
    .reset_index()
)

out["rate_pct"] = out["rrt_persistent_last6h"] * 100
print(out)


  ethnicity_grp  any_vasopressor  rrt_persistent_last6h   rate_pct
0         Asian                0               0.142857  14.285714
1         Asian                1               0.142857  14.285714
2         Black                0               0.088235   8.823529
3         Black                1               0.193548  19.354839
4      Hispanic                0               0.200000  20.000000
5      Hispanic                1               0.666667  66.666667
6         Other                0               0.061856   6.185567
7         Other                1               0.400000  40.000000
8         White                0               0.089850   8.985025
9         White                1               0.232044  23.204420


In [18]:
sub.groupby(["ethnicity_grp", "any_vasopressor"])["rrt_persistent_last6h"].mean()
print(sub.groupby(["ethnicity_grp", "any_vasopressor"])["rrt_persistent_last6h"].value_counts())


ethnicity_grp  any_vasopressor  rrt_persistent_last6h
Asian          0                0                         14
                                1                          3
               1                0                         10
                                1                          3
Black          0                0                        112
                                1                         12
               1                0                         37
                                1                         11
Hispanic       0                0                         23
                                1                          5
               1                0                          6
                                1                          5
Other          0                0                        145
                                1                          7
               1                0                         45
                               