### 入排标准确定患者

In [1]:
import pandas as pd
diagnoses = pd.read_csv("mimic-iv-2/hosp/diagnoses_icd.csv", header=0)
### 获取诊断为肺炎的患者
diagnoses_bpn = diagnoses.loc[diagnoses["icd_code"].str.match("48[1,2,3]\d|J1[3,4,5]\d"),:]
diagnoses_bpn["infection_type"] = 'BACT'
diagnoses_vpn = diagnoses.loc[diagnoses["icd_code"].str.match("48[0,7,8]\d|J1[0,1,2]\d|J09\d"),:]
diagnoses_vpn["infection_type"] = 'VIRUS'
diagnoses_pn = pd.concat([diagnoses_bpn,diagnoses_vpn],axis=0)

### 区分混合感染
diagnoses_pn["mixed_infection"] = diagnoses_pn["infection_type"]
for i in diagnoses_pn["hadm_id"]:
    if diagnoses_pn.loc[diagnoses_pn["hadm_id"]==i, "infection_type"].shape[0]>1:
        if diagnoses_pn.loc[diagnoses_pn["hadm_id"]==i, "infection_type"].drop_duplicates().shape[0]>1:
            diagnoses_pn.loc[diagnoses_pn["hadm_id"]==i, "mixed_infection"] = "MIXED_BV"
        else:
            diagnoses_pn.loc[diagnoses_pn["hadm_id"]==i, "mixed_infection"] = "MIXED_%s" % diagnoses_pn.loc[diagnoses_pn["hadm_id"]==i, "infection_type"].iloc[0]
diagnoses_pn = diagnoses_pn.drop("infection_type", axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  diagnoses_bpn["infection_type"] = 'BACT'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  diagnoses_vpn["infection_type"] = 'VIRUS'


### 结局-入住ICU

In [4]:
### 获取入院和入住ICU信息
hosp_adm = pd.read_csv("mimic-iv-2/hosp/admissions.csv")
icu_adm = pd.read_csv("mimic-iv-2/icu/icustays.csv")
hosp_icu= pd.merge(hosp_adm[["subject_id","hadm_id","admittime","dischtime","deathtime"]], icu_adm, how="left")
hosp_icu["ICU_adm"] = 0
hosp_icu.loc[~hosp_icu["intime"].isna(),"ICU_adm"] = 1

### 去除入住ICU大于1次的患者
hosp_icu = hosp_icu.loc[~(hosp_icu["hadm_id"].isin(icu_adm["hadm_id"].value_counts().index[icu_adm["hadm_id"].value_counts()>1]))]

### 合并hosp和icu入住信息
adm_pn = pd.merge(diagnoses_pn[['subject_id', 'hadm_id', 'mixed_infection']].drop_duplicates(), hosp_icu)

adm_pn[["subject_id","hadm_id","mixed_infection"]].drop_duplicates().to_csv("pneumonia/patients_pn.csv")
adm_pn[["subject_id","hadm_id","admittime","dischtime"]].drop_duplicates().to_csv("pneumonia/outcome_admission.csv")
adm_pn[["subject_id","hadm_id","stay_id","ICU_adm","intime","outtime"]].drop_duplicates().to_csv("pneumonia/outcome_admission_icu.csv")

In [5]:
diagnoses_code = pd.read_csv("mimic-iv-2/hosp/d_icd_diagnoses.csv")
diagnoses_pn = diagnoses_pn.loc[diagnoses_pn["hadm_id"].isin(adm_pn["hadm_id"])]
diagnoses_pn = pd.merge(diagnoses_pn, diagnoses_code)
diagnoses_pn.drop_duplicates().to_csv("pneumonia/diagnoses_pn.csv")

### 结局-Sepsis

In [33]:
sepsis3 = pd.read_csv("concepts/sepsis/sepsis3.csv")
sepsis3 = pd.merge(adm_pn[["subject_id", "hadm_id","stay_id"]], sepsis3[['subject_id', 'stay_id','sofa_time','sofa_score','sepsis3']], how="left")
sepsis3.loc[sepsis3["sepsis3"].isna(),"sepsis3"]=0
sepsis3.loc[~sepsis3["sepsis3"].isna(),"sepsis3"]=1
sepsis3.drop_duplicates().to_csv("pneumonia/outcome_sepsis3.csv")

### 结局-院内死亡和院外死亡

In [3]:
patients = pd.read_csv("mimic-iv-2/hosp/patients.csv")
patients['dod'] = pd.to_datetime(patients['dod']).dt.strftime('%Y-%m-%d') + " 00:00:00"
death = pd.merge(adm_pn[["subject_id", "hadm_id","deathtime"]], patients[['subject_id', 'dod']],how='left')
death["dod"] = death['deathtime'].combine_first(death['dod'])
death["inhospital_death"] = 0
death["overall_death"] = 0
death.loc[~death["deathtime"].isna(),"inhospital_death"] = 1
death.loc[~death["dod"].isna(),"overall_death"] = 1
death.columns = ['subject_id', 'hadm_id', 'inhospital_deathtime', 'overall_deathtime', 'inhospital_death','overall_death']
death.drop_duplicates().to_csv("pneumonia/outcome_death.csv")