In [1]:
import numpy as np
import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt
import random
import seaborn as sns
pd.options.display.max_rows=300
pd.set_option('display.max_columns', None)

In [2]:
df_type = pd.read_csv("../../02.AUMCdb_SAKI_trajCluster/df_mixAK_fea3_C3_aumc.csv")
df_type_filt = df_type.loc[:,["stay_id","groupHPD"]]
df_type_filt = df_type_filt.drop_duplicates()
all_lst = df_type_filt.stay_id.unique().tolist()
print("纳入患者数量：",len(all_lst))

df_type_filt_count = pd.DataFrame(df_type_filt["groupHPD"].value_counts()).reset_index()
df_type_filt_count.columns = ["groupHPD","count_all"]
df_type_filt_count

纳入患者数量： 2183


Unnamed: 0,groupHPD,count_all
0,2,1400
1,1,564
2,3,219


In [3]:
df_time = pd.read_csv("/public/hanl/jupyter_dir/kidney_sepsis_penotype_v3/00.data_aumc/disease_definition/AKI/aumcdb_sk_event_time.csv")
df_time = df_time[["stay_id","sepsis_onset", "saki_onset"]]
df_time = pd.merge(df_type_filt, df_time, how="inner", on="stay_id")
df_time.head(2)

Unnamed: 0,stay_id,groupHPD,sepsis_onset,saki_onset
0,5,2,0.0,3.0
1,15,2,1.0,3.0


# 获取输入数据

## 体重 + age + gender

In [4]:
df_demo = pd.read_csv("../../00.data_aumc//feature_data/df_aumc_basicinfo.csv")
df_demo = df_demo[["stay_id","weight", 'gender', 'age',"height"]]
df_demo = df_demo[df_demo["stay_id"].isin(all_lst)].drop_duplicates()
df_demo.loc[df_demo["weight"]<10,"weight"] = np.nan
df_demo["weight"].fillna(df_demo["weight"].mean(), inplace=True)

df_demo.loc[df_demo["gender"]=="M","gender"] = 1
df_demo.loc[df_demo["gender"]=="F","gender"] = 0
df_demo.loc[df_demo["gender"].isnull(),"gender"] = 2
df_demo.gender = df_demo.gender.astype("int64")

df_base_crea = pd.read_csv("/public/hanl/jupyter_dir/kidney_sepsis_penotype_v3/00.data_aumc//disease_definition/AKI/baseline_creatinine.csv")
df_base_crea.columns = ["stay_id","baseline_Scr"]
df_base_crea["baseline_Scr"] *= 0.01131
df_demo = pd.merge(df_demo,df_base_crea,how="left",on="stay_id")
print(len(df_demo.stay_id.unique()))
df_demo.head(1)

2183


Unnamed: 0,stay_id,weight,gender,age,height,baseline_Scr
0,5,65.0,1,55,165.0,0.63336


In [5]:
import miceforest as mf
kernel = mf.ImputationKernel(
    df_demo,
    datasets=3,
    save_all_iterations=True,
    random_state=10
)
kernel.mice(iterations = 3, n_jobs=-1)
df_demo = kernel.complete_data(dataset=1)
df_demo.head(1)

Unnamed: 0,stay_id,weight,gender,age,height,baseline_Scr
0,5,65.0,1,55,165.0,0.63336


## 利尿剂
- 单位转换
'Furosemide (Lasix)', 'Bumetanide (Burinex)'
![image.png](attachment:image.png)

In [6]:
df_di = pd.read_csv("/public/hanl/jupyter_dir/database/AMUCdb/clean/medication/AUMCdb_diuretics.csv")
df_di = df_di[df_di.stay_id.isin(all_lst)]
print(df_di.drug.unique(),df_di.drug.value_counts())
# 药物单位转换
df_di.loc[df_di["drug"]=='Bumetanide (Burinex)',"administered"] *= 80
df_di = df_di[['stay_id','starttime', 'endtime', 'administered']]
df_di.columns = ['stay_id', 'starttime', 'endtime','amount']
df_di.head(1)

Unnamed: 0,stay_id,starttime,endtime,amount
0,4,2212.0,2213.0,20.0


In [7]:
import datetime
def dateHourRange(beginDateHour, endDateHour):
    dhours = []
    dhour = beginDateHour 
    date = beginDateHour
    while date <= endDateHour:
        dhours.append(date)
        dhour = dhour + 61
        date = dhour 
    return dhours

tem_di = df_di
tem_di["time_lst"] = tem_di.apply(lambda x: dateHourRange(x['starttime'],x['endtime']),axis=1)
tem_di["time_num"] = tem_di["time_lst"].map(lambda x: len(x))
tem_di["amount_hourly"] =  tem_di["amount"]/tem_di["time_num"] 

tem_di['idx'] = range(len(tem_di))
tem_add = tem_di.set_index('idx').time_lst.apply(pd.Series).stack().reset_index(level=0).rename(columns={0:'diu_starttime'})
tem_di_hourly = pd.merge(tem_di, tem_add,how="inner", on="idx")
tem_di_hourly["diu_endtime"] = tem_di_hourly["diu_starttime"] + 60
tem_di_hourly['diu_endtime'] = tem_di_hourly[['diu_endtime', 'endtime']].min(axis=1) 
tem_di_hourly = tem_di_hourly.loc[:,['stay_id', 'diu_starttime','diu_endtime',  'amount_hourly']] # 'starttime', 'endtime', 'amount',
tem_di_hourly = tem_di_hourly.rename(columns={"amount_hourly":"amount"})
tem_di_hourly.head(2)

Unnamed: 0,stay_id,diu_starttime,diu_endtime,amount
0,4,2212.0,2213.0,20.0
1,5,2570.0,2571.0,20.0


In [8]:
# 24h hour window 
df_di = tem_di_hourly
df_di["diu_starttime"] = df_di["diu_starttime"]/60
df_di = df_di[df_di["stay_id"].isin(all_lst)]
df_time_cut = df_time[['stay_id', 'saki_onset']]
df_di = pd.merge(df_di,df_time_cut, how="right",on="stay_id")

df_di["time"] = (df_di["diu_starttime"] - df_di["saki_onset"])/24 #.map(lambda x:x/np.timedelta64(24, 'h')).tolist()
df_di["time"] = df_di["time"].map(lambda x:x//1) # -1指sepsis前6小时的数据，0指sepsis后第一个6小时的数据
df_di["time"] = df_di["time"].map(lambda x: x+1 if x>=0 else x)
df_di = df_di[(df_di["time"]>=-2) &(df_di["time"]<=7)] # 前七天与后7天
df_di = df_di.groupby(["stay_id","time"]).agg({"amount":"sum"}).reset_index()
df_di = df_di.rename(columns={"amount":"furosemide_amount"})
df_di["is_diu"] = 0
df_di.loc[df_di["furosemide_amount"]!=0,"is_diu"] = 1
df_di.head(1)

Unnamed: 0,stay_id,time,furosemide_amount,is_diu
0,5,2.0,20.0,1


## 液体给入

In [9]:
df_fluid = pd.read_csv("/public/hanl/jupyter_dir/database/AMUCdb/clean/treatment//AUMCdb_fluid_NoNut_hourly.csv")
df_fluid = df_fluid[df_fluid["stay_id"].isin(all_lst)]
df_fluid = df_fluid[df_fluid["amount"]<9000]

df_fluid = df_fluid[['stay_id', 'fluid_starttime', 'fluid_endtime','amount_hourly']]
df_fluid["fluid_starttime"] = df_fluid["fluid_starttime"]/60
df_fluid["fluid_endtime"] = df_fluid["fluid_endtime"]/60

df_time_cut = df_time[['stay_id', 'saki_onset']]
df_fluid = pd.merge(df_fluid, df_time_cut, how="right",on="stay_id")

df_fluid["time"] = (df_fluid["fluid_starttime"] - df_fluid["saki_onset"])/24
df_fluid["time"] = df_fluid["time"].map(lambda x:x//1) # -1指sepsis前6小时的数据，0指sepsis后第一个6小时的数据
df_fluid["time"] = df_fluid["time"].map(lambda x: x+1 if x>=0 else x)
df_fluid = df_fluid[(df_fluid["time"]>=-2) &(df_fluid["time"]<=7)] # 前七天与后7天
df_fluid = df_fluid.groupby(["stay_id", "time"]).agg({"amount_hourly":"sum"}).reset_index()
df_fluid = df_fluid.rename(columns={"amount_hourly":"colloid_bolus"})
print(len(df_fluid.stay_id.unique()))
df_fluid.head(1)

2173


Unnamed: 0,stay_id,time,colloid_bolus
0,5,-1.0,1206.381429


## 尿量

In [10]:
df_uo = pd.read_csv("../../00.data_aumc/feature_data/aumc_icu_feature.csv")
df_uo = df_uo[['stay_id', 'charttime','urineoutput']]

df_uo = df_uo[df_uo["stay_id"].isin(all_lst)]

df_time_cut = df_time[['stay_id','saki_onset']]
df_uo = pd.merge(df_uo,df_time_cut, how="inner",on="stay_id")

df_uo["time"] = (df_uo["charttime"] - df_uo["saki_onset"])/24
df_uo["time"] = df_uo["time"].map(lambda x:x//1) # -1指sepsis前6小时的数据，0指sepsis后第一个6小时的数据
df_uo["time"] = df_uo["time"].map(lambda x: x+1 if x>=0 else x)
df_uo = df_uo[(df_uo["time"]>=-2) &(df_uo["time"]<=7)] # 前七天与后7天
df_uo = df_uo.groupby(["stay_id","time"]).agg({"urineoutput":"sum"}).reset_index()
print(len(df_uo.stay_id.unique()))
df_uo.head(1)

2183


Unnamed: 0,stay_id,time,urineoutput
0,5,-1.0,40.0


## 液体平衡 

In [11]:
df_fb_fluid = pd.read_csv("/public/hanl/jupyter_dir/database/AMUCdb/clean/treatment//AUMCdb_fluid_addNut_hourly.csv")
df_fb_fluid = df_fb_fluid[df_fb_fluid["stay_id"].isin(all_lst)]
df_fb_fluid = df_fb_fluid[['stay_id', 'fluid_starttime', 'fluid_endtime','amount_hourly']]
df_fb_fluid["fluid_starttime"] = df_fb_fluid["fluid_starttime"]/60
df_fb_fluid["fluid_endtime"] = df_fb_fluid["fluid_endtime"]/60

df_time_cut = df_time[['stay_id', 'saki_onset']]
df_fb_fluid = pd.merge(df_fb_fluid, df_time_cut, how="right",on="stay_id")

df_fb_fluid["time"] = (df_fb_fluid["fluid_starttime"] - df_fb_fluid["saki_onset"])/24
df_fb_fluid["time"] = df_fb_fluid["time"].map(lambda x:x//1) # -1指sepsis前6小时的数据，0指sepsis后第一个6小时的数据
df_fb_fluid["time"] = df_fb_fluid["time"].map(lambda x: x+1 if x>=0 else x)
df_fb_fluid = df_fb_fluid[(df_fb_fluid["time"]>=-2) &(df_fb_fluid["time"]<=7)] # 前七天与后7天
df_fb_fluid = df_fb_fluid.groupby(["stay_id", "time"]).agg({"amount_hourly":"sum"}).reset_index()
df_fb_fluid = df_fb_fluid.rename(columns={"amount_hourly":"colloid_bolus"})

In [12]:
# 生成累计液体信息
df_fb = pd.merge(df_fb_fluid, df_uo, how="outer",on=["stay_id","time"])
df_fb= df_fb.sort_values(["stay_id","time"])
df_fb = df_fb.fillna(0)
df_fb["fluidbance"] = round(df_fb["colloid_bolus"] - df_fb["urineoutput"], 2)
df_fb["tmp_fluidbance"] = df_fb["fluidbance"]
df_fb.loc[df_fb["time"]<=0,"tmp_fluidbance"] = 0
df_fb["cum_fluidbance"] = df_fb.groupby('stay_id')['tmp_fluidbance'].cumsum()
df_fb = df_fb[["stay_id","time","fluidbance","cum_fluidbance"]]
df_fb.head(1)

Unnamed: 0,stay_id,time,fluidbance,cum_fluidbance
0,5,-1.0,1166.38,0.0


## 特征-psm

In [13]:
df_fea = pd.read_csv("../../00.data_aumc/feature_data/aumc_icu_feature.csv")
df_fea = df_fea.drop(["urineoutput"],axis=1)
df_fea = df_fea[['stay_id', 'charttime', 'baseexcess', 'rbc', 'mch', 'mcv', 'mchc',
       'rdw', 'wbc', 'hematocrit', 'hemoglobin', 'pt', 'ptt', 'platelet',
       'dbp', 'mbp', 'sbp', 'glucose', 'lactate', 'ph', 'temperature',
       'resp_rate', 'heart_rate', 'spo2', 'bicarbonate', 'aniongap', 'calcium',
       'sodium', 'potassium', 'chloride', 'creatinine', 'gcs', 'gcs_eyes',
       'gcs_motor', 'gcs_verbal', 'po2', 'pco2', 'fio2' ]]

df_time_cut = df_time[['stay_id','saki_onset']]
df_fea = pd.merge(df_fea,df_time_cut, how="inner",on="stay_id")

df_fea["time"] = (df_fea["charttime"] - df_fea["saki_onset"])/24
df_fea["time"] = df_fea["time"].map(lambda x:x//1) # -1指sepsis前6小时的数据，0指sepsis后第一个6小时的数据
df_fea["time"] = df_fea["time"].map(lambda x: x+1 if x>=0 else x)
df_fea = df_fea[(df_fea["time"]>=-2) &(df_fea["time"]<=7)] # 前七天与后7天

df_fea = df_fea.groupby(["stay_id","time"]).agg('mean').reset_index()
df_fea.head(1)

Unnamed: 0,stay_id,time,charttime,baseexcess,rbc,mch,mcv,mchc,rdw,wbc,hematocrit,hemoglobin,pt,ptt,platelet,dbp,mbp,sbp,glucose,lactate,ph,temperature,resp_rate,heart_rate,spo2,bicarbonate,aniongap,calcium,sodium,potassium,chloride,creatinine,gcs,gcs_eyes,gcs_motor,gcs_verbal,po2,pco2,fio2,saki_onset
0,5,-1.0,0.5,4.2,,30.100952,96.0,30.6147,,20.600001,0.443333,14.28686,10.3,34.0,,70.2341,84.676544,114.899234,76.200002,1.0,7.33,35.8,19.124155,118.30542,98.000002,21.0,11.5,11.1,140.0,4.0,105.0,0.6554,15.0,4.0,6.0,5.0,162.0,42.0,41.0,3.0


## 特征-psm sofa 

In [14]:
df_sofa = pd.read_csv("../../04.other_feature_in_three_dataset/03.sofa_feature/aumc_sofa_clean.csv")
df_sofa.head(1)

Unnamed: 0,stay_id,time,respiration_sofa,coagulation_sofa,liver_sofa,cardiovascular_sofa,cns_sofa,renal_sofa,sofa,groupHPD
0,5,-2,0.0,0.0,0.0,4.0,0.0,0.0,4.0,2.0


In [15]:
df_sofa = pd.read_csv("../../04.other_feature_in_three_dataset/03.sofa_feature/aumc_sofa_clean.csv")
df_sofa = df_sofa.rename(columns={"time":"charttime"})

df_time_cut = df_time[['stay_id','saki_onset']]
df_sofa = pd.merge(df_sofa,df_time_cut, how="inner",on="stay_id")

df_sofa["time"] = (df_sofa["charttime"] - df_sofa["saki_onset"])/24
df_sofa["time"] = df_sofa["time"].map(lambda x:x//1) # -1指sepsis前6小时的数据，0指sepsis后第一个6小时的数据
df_sofa["time"] = df_sofa["time"].map(lambda x: x+1 if x>=0 else x)
df_sofa = df_sofa[(df_sofa["time"]>=-2) &(df_sofa["time"]<=7)] # 前七天与后7天

df_sofa = df_sofa.groupby(["stay_id","time"]).agg('mean').reset_index()
df_sofa = df_sofa.drop(['groupHPD',"charttime","saki_onset"],axis=1)
df_sofa.head(1)

Unnamed: 0,stay_id,time,respiration_sofa,coagulation_sofa,liver_sofa,cardiovascular_sofa,cns_sofa,renal_sofa,sofa
0,5,-1.0,0.5,0.0,0.0,2.5,0.0,0.75,3.75


## 生存

In [16]:
df_sur = pd.read_csv("../../02.AUMCdb_SAKI_trajCluster/sk_survival.csv")
df_sur = df_sur[['stay_id','groupHPD', 'mortality_28d', 'survival_28day']]
df_sur.head(1)

Unnamed: 0,stay_id,groupHPD,mortality_28d,survival_28day
0,5,2,0,2.0


In [17]:
df_dia = pd.read_csv("/public/hanl/jupyter_dir/database/AMUCdb/raw/diagnoses/combined_diagnoses.csv")
df_dia = df_dia[["admissionid","diagnosis"]] # Congestief hartfalen  'CHF, congestive heart failure'
df_dia = df_dia[~df_dia["diagnosis"].isnull()]
df_CHF = df_dia[df_dia['diagnosis'].str.contains('CHF',case=False)]#.diagnosis.unique()
df_CHF["Congestive_heart_failure"] = 1
df_CHF = df_CHF[["admissionid","Congestive_heart_failure"]]
df_CHF.columns = ["stay_id","Congestive_heart_failure"]
df_CHF.head(1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_CHF["Congestive_heart_failure"] = 1


Unnamed: 0,stay_id,Congestive_heart_failure
82,231,1


## 合并
分类型
- df_disease
- df_basic
- df_weight
- df_sur

连续型
- df_fb
- df_uo
- df_fluid
- df_fea
- df_di

In [18]:
# 连续型变量合并
df_m1 = pd.merge(df_fea,df_fluid,how="outer",on=["stay_id","time"])
df_m1 = pd.merge(df_m1,df_di,how="outer",on=["stay_id","time"])
df_m1 = pd.merge(df_m1, df_uo, how="outer",on=["stay_id","time"])
df_m1 = pd.merge(df_m1, df_fb, how="outer",on=["stay_id","time"])
df_m1 = pd.merge(df_m1, df_sofa, how="outer",on=["stay_id","time"])

df_m1 = df_m1[df_m1["stay_id"].isin(all_lst)]

df_m1.loc[df_m1["time"]==-1,"time"] = 0
df_m1.loc[df_m1["time"]==-2,"time"] = -1

com_lst = []
for i in df_m1.stay_id.unique().tolist():
    max_t = int(max(df_m1[df_m1.stay_id==i].time))
    for j in range(-1,max_t+1,1):
        com_lst.append([i,j])
df_complete = pd.DataFrame(com_lst,columns=["stay_id", "time"])
df_m1 = pd.merge(df_complete,df_m1,how="left",on=["stay_id","time"])
df_m1[["colloid_bolus","urineoutput","furosemide_amount","is_diu","fluidbance","cum_fluidbance"]] = df_m1[["colloid_bolus","urineoutput","furosemide_amount","is_diu","fluidbance","cum_fluidbance"]].fillna(0)

df_m1['stay_id_tmp'] = df_m1['stay_id']
df_m1 = df_m1.groupby('stay_id_tmp').fillna(method='ffill').fillna(method='bfill')
df_m1.loc[df_m1["time"]==-1,"time"] = -2
df_m1.loc[df_m1["time"]==0,"time"] = -1
df_m1.head(1)

Unnamed: 0,stay_id,time,charttime,baseexcess,rbc,mch,mcv,mchc,rdw,wbc,hematocrit,hemoglobin,pt,ptt,platelet,dbp,mbp,sbp,glucose,lactate,ph,temperature,resp_rate,heart_rate,spo2,bicarbonate,aniongap,calcium,sodium,potassium,chloride,creatinine,gcs,gcs_eyes,gcs_motor,gcs_verbal,po2,pco2,fio2,saki_onset,colloid_bolus,furosemide_amount,is_diu,urineoutput,fluidbance,cum_fluidbance,respiration_sofa,coagulation_sofa,liver_sofa,cardiovascular_sofa,cns_sofa,renal_sofa,sofa
0,5,-2,0.5,4.2,3.0,30.100952,96.0,30.6147,13.9,20.600001,0.443333,14.28686,10.3,34.0,,70.2341,84.676544,114.899234,76.200002,1.0,7.33,35.8,19.124155,118.30542,98.000002,21.0,11.5,11.1,140.0,4.0,105.0,0.6554,15.0,4.0,6.0,5.0,162.0,42.0,41.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,2.5,0.0,0.75,3.75


In [19]:
# 分类变量合并
df_m2 = pd.merge(df_sur,df_demo,how="left",on=["stay_id"])
df_m2 = pd.merge(df_m2,df_CHF,how="left",on=["stay_id"])
df_m2 = df_m2.fillna(0)
df_m2.head(1)

Unnamed: 0,stay_id,groupHPD,mortality_28d,survival_28day,weight,gender,age,height,baseline_Scr,Congestive_heart_failure
0,5,2,0,2.0,65.0,1,55,165.0,0.63336,0.0


In [20]:
df_m = pd.merge(df_m1, df_m2, how="inner",on="stay_id")
df_sur_add = df_m 
df_sur_add["furosemide_amount_kg"] = round(df_sur_add["furosemide_amount"] / df_sur_add["weight"],2)
df_sur_add['sofa_norenal'] = df_sur_add['sofa'] - df_sur_add['renal_sofa']

tmp = df_sur_add.groupby(["stay_id"]).agg({"furosemide_amount":"sum"}).reset_index()
tmp["is_diu_byid"] = 1
tmp.loc[tmp["furosemide_amount"]==0,"is_diu_byid"] = 0
tmp = tmp[["stay_id","is_diu_byid"]]
df_sur_add = pd.merge(df_sur_add,tmp,how="inner",on="stay_id")


tmp = df_sur_add[(df_sur_add["is_diu"]!=0)]  
tmp = tmp.sort_values(["stay_id","time"])
tmp = tmp.groupby("stay_id").first().reset_index()
tmp =  tmp[["stay_id","time","furosemide_amount"]]
tmp.columns = ["stay_id", "first_use_time","first_use_amount"]
df_sur_add = pd.merge(df_sur_add,tmp,how="left",on="stay_id")
df_sur_add.to_csv("./aumcdb_diuretic_survival_all.csv",index=False)

# 取和mimic交集的特征
aumc_fea = df_sur_add.columns
tmp_df = pd.read_csv("../01.mimic/mimic_diuretic_survival_all.csv") 
tmp_df = tmp_df.rename(columns={'admission_age':'age'})
mimic_fea = tmp_df.columns
inter_lst = list(set(aumc_fea).intersection(set(mimic_fea)))
df_sur_add = df_sur_add[inter_lst] 
df_sur_add[["first_use_time","first_use_amount"]] = df_sur_add[["first_use_time","first_use_amount"]].fillna(0)
df_sur_add["rdw"] = df_sur_add["rdw"].fillna(df_sur_add["rdw"].mean())
df_sur_add.to_csv("./aumcdb_diuretic_survival.csv",index=False)
df_sur_add.head(2)


Unnamed: 0,time,heart_rate,weight,survival_28day,rdw,first_use_time,platelet,sbp,gcs_verbal,liver_sofa,furosemide_amount_kg,mch,po2,potassium,mortality_28d,first_use_amount,creatinine,stay_id,mchc,temperature,spo2,gcs_motor,furosemide_amount,baseline_Scr,gcs_eyes,Congestive_heart_failure,cardiovascular_sofa,glucose,groupHPD,rbc,dbp,fluidbance,is_diu_byid,hemoglobin,cns_sofa,resp_rate,calcium,sodium,aniongap,height,coagulation_sofa,is_diu,bicarbonate,pt,gender,mcv,urineoutput,wbc,chloride,colloid_bolus,pco2,baseexcess,respiration_sofa,renal_sofa,sofa_norenal,hematocrit,mbp,ptt,ph,lactate,sofa,cum_fluidbance,gcs,fio2,age
0,-2,118.30542,65.0,2.0,13.9,2.0,,114.899234,5.0,0.0,0.0,30.100952,162.0,4.0,0,20.0,0.6554,5,30.6147,35.8,98.000002,6.0,0.0,0.63336,4.0,0.0,2.5,76.200002,2,3.0,70.2341,0.0,1,14.28686,0.0,19.124155,11.1,140.0,11.5,165.0,0.0,0.0,21.0,10.3,1,96.0,0.0,20.600001,105.0,0.0,42.0,4.2,0.5,0.75,3.0,0.443333,84.676544,34.0,7.33,1.0,3.75,0.0,15.0,41.0,55
1,-1,118.30542,65.0,2.0,13.9,2.0,,114.899234,5.0,0.0,0.0,30.100952,162.0,4.0,0,20.0,0.6554,5,30.6147,35.8,98.000002,6.0,0.0,0.63336,4.0,0.0,2.5,76.200002,2,3.0,70.2341,1166.38,1,14.28686,0.0,19.124155,11.1,140.0,11.5,165.0,0.0,0.0,21.0,10.3,1,96.0,40.0,20.600001,105.0,1206.381429,42.0,4.2,0.5,0.75,3.0,0.443333,84.676544,34.0,7.33,1.0,3.75,0.0,15.0,41.0,55


In [21]:
df_sur_add.columns

Index(['time', 'heart_rate', 'weight', 'survival_28day', 'rdw',
       'first_use_time', 'platelet', 'sbp', 'gcs_verbal', 'liver_sofa',
       'furosemide_amount_kg', 'mch', 'po2', 'potassium', 'mortality_28d',
       'first_use_amount', 'creatinine', 'stay_id', 'mchc', 'temperature',
       'spo2', 'gcs_motor', 'furosemide_amount', 'baseline_Scr', 'gcs_eyes',
       'Congestive_heart_failure', 'cardiovascular_sofa', 'glucose',
       'groupHPD', 'rbc', 'dbp', 'fluidbance', 'is_diu_byid', 'hemoglobin',
       'cns_sofa', 'resp_rate', 'calcium', 'sodium', 'aniongap', 'height',
       'coagulation_sofa', 'is_diu', 'bicarbonate', 'pt', 'gender', 'mcv',
       'urineoutput', 'wbc', 'chloride', 'colloid_bolus', 'pco2', 'baseexcess',
       'respiration_sofa', 'renal_sofa', 'sofa_norenal', 'hematocrit', 'mbp',
       'ptt', 'ph', 'lactate', 'sofa', 'cum_fluidbance', 'gcs', 'fio2', 'age'],
      dtype='object')