In [3]:
import pandas as pd


In [4]:
import duckdb
con = duckdb.connect('mimiciv.duckdb')


In [5]:
# Scr 项目编号（MIMIC-IV 中默认为 50912）
scr_itemid = 50912

# 提取 Scr 数据（入 ICU 后 7 天内）
query_scr = f"""
SELECT
    l.subject_id,
    l.hadm_id,
    l.charttime,
    l.valuenum AS scr,
    c.stay_id,
    c.intime
FROM labevents l
INNER JOIN dka_cohort c
    ON l.hadm_id = c.hadm_id
WHERE l.itemid = {scr_itemid}
"""
scr_df = con.execute(query_scr).df()


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

In [6]:
# 时间处理
scr_df['charttime'] = pd.to_datetime(scr_df['charttime'])
scr_df['intime'] = pd.to_datetime(scr_df['intime'])
scr_df['hours_from_icu'] = (scr_df['charttime'] - scr_df['intime']).dt.total_seconds() / 3600

# 筛选 ICU 后 0-168 小时（7 天）内的值
scr_df = scr_df[(scr_df['hours_from_icu'] >= 0) & (scr_df['hours_from_icu'] <= 168)]


In [7]:
aki_list = []

for stay_id, group in scr_df.groupby('stay_id'):
    baseline = group[group['hours_from_icu'] <= 12]['scr'].min()  # baseline 取最早 12h 最低值
    peak_48h = group[group['hours_from_icu'] <= 48]['scr'].max()  # 48h 峰值

    # AKI 判定逻辑
    if pd.notnull(baseline) and pd.notnull(peak_48h):
        if (peak_48h - baseline >= 0.3) or (peak_48h >= 1.5 * baseline):
            aki_list.append((stay_id, 1))
        else:
            aki_list.append((stay_id, 0))
    else:
        aki_list.append((stay_id, 0))  # 缺失数据视为未发生


In [9]:
aki_df = pd.DataFrame(aki_list, columns=['stay_id', 'aki_label'])

# 保存 AKI 标签临时表（供后续使用）
con.register('aki_temp', aki_df)

# # 构建新 cohort，附带 AKI 标签
# query_merge = """
# CREATE OR REPLACE VIEW dka_with_aki AS
# SELECT c.*, a.aki_label
# FROM dka_cohort c
# LEFT JOIN aki_temp a ON c.stay_id = a.stay_id
# """
# con.execute(query_merge)


# 将 AKI 标签写入临时 DuckDB 表（替代 register）
aki_df.to_parquet("aki_table.parquet")  # ✅ 持久化成文件
con.execute("CREATE OR REPLACE TABLE aki_table AS SELECT * FROM read_parquet('aki_table.parquet');")


# 删除已有的 View
con.execute("DROP VIEW IF EXISTS dka_with_aki")

# 构建并保存最终 dka_with_aki 表
query_merge = """
CREATE OR REPLACE TABLE dka_with_aki AS
SELECT c.*, a.aki_label
FROM dka_cohort c
LEFT JOIN aki_table a ON c.stay_id = a.stay_id
"""
con.execute(query_merge)



<duckdb.duckdb.DuckDBPyConnection at 0x7f6ccfd61630>

In [10]:
# 查看新 cohort 表前几行
con.execute("SELECT * FROM dka_with_aki LIMIT 5").df()

# 查看标签分布
con.execute("SELECT aki_label, COUNT(*) FROM dka_with_aki GROUP BY aki_label").df()


Unnamed: 0,aki_label,count_star()
0,,13
1,0.0,927
2,1.0,460
