# readMe


定义：脓毒性休克指： 脓毒症患者 尽管充分的液体复苏仍存在持续的低血压,需要用升压药维持平均动脉压在 65 mmHg 以上,血乳酸在 2 mmol / L 以上,符合这一标准临床病死率超过 40% 。              



'1_获取MIMICIV脓毒症休克定义需要的指标.ipynb'， 产生成  ‘pickle_mimic_iv_derived_table_v5.pkl’



输出数据为 pickle_mimiciv_septic_shock_v6.pkl


注意： 基于患者已经是脓毒症的情况下：                    
1. 液体复苏：  3h窗口 至少1000ml ,          
2. mbp/vaso: 3h窗口后 (map最近一个值 小于等于 65) 或 vaso 使用
3. lactate:  3h窗口后 6H内 最近一个值 大于等于 2.0      
4. 所有条件都满足的点,即乳酸的点，
5. onset time. lac time


In [4]:
import os
import pandas as pd
import time
import datetime as dt
import nbimporter
from tqdm import tqdm, trange
import matplotlib.pyplot as plt

os.chdir(r'D:\my_dir_jupyter\final_predict_septic_shock')
import my_save_load_v2
pd.set_option('display.max_rows',200)

%matplotlib inline

In [5]:
def splite_data(sepsis3_tem,step=100):
    data = [sepsis3_tem.iloc[i:i+step,:] for i in range(0,sepsis3_tem.shape[0],step) ]
    return data



## 获取 vaso 前3h 输液量，
def fluid_3h_pre_vaso(sub_tem_sepsis3,tem_vasoactive_vasopressor,tem_fluids_bolus,nh_bef,nh_aft):
    
    sub_tem_vaso = pd.merge(sub_tem_sepsis3, tem_vasoactive_vasopressor, on='stay_id', how='inner')
    sub_tem_vaso = sub_tem_vaso.loc[sub_tem_vaso.vaso_starttime >= sub_tem_vaso.sofa_time, :]
    sub_tem_vaso.loc[:, 'vaso_nh_pre'] = sub_tem_vaso.vaso_starttime - dt.timedelta(hours=nh_bef)
    sub_tem_vaso.loc[:, 'vaso_nh_aft'] = sub_tem_vaso.vaso_starttime + dt.timedelta(hours=nh_aft)

    sub_tem_vaso_fluid = pd.merge(sub_tem_vaso, tem_fluids_bolus, on=['stay_id'], how='inner')

    #去除输液只有1分钟的数据
    sub_tem_vaso_fluid = sub_tem_vaso_fluid.loc[(sub_tem_vaso_fluid.loc[:, 'fluid_endtime'] - sub_tem_vaso_fluid.loc[:, 'fluid_starttime']) > dt.timedelta(minutes=1),:]

    tem = (sub_tem_vaso_fluid.loc[:, ['vaso_starttime', 'fluid_endtime']].min(axis=1) - sub_tem_vaso_fluid.loc[:, ['vaso_nh_pre','fluid_starttime']].max(axis=1))
    tem = tem / (sub_tem_vaso_fluid.loc[:, 'fluid_endtime'] - sub_tem_vaso_fluid.loc[:, 'fluid_starttime'])

    tem_fluids_bolus_3h = tem.clip(0, tem) * sub_tem_vaso_fluid.loc[:, 'colloid_bolus']
    sub_tem_vaso_fluid.loc[:, 'fluids_bolus_3h'] = tem_fluids_bolus_3h
    sub_tem_vaso_fluid.drop(['fluid_starttime', 'fluid_endtime', 'colloid_bolus'], axis=1, inplace=True)
    sub_tem_vaso_fluid = sub_tem_vaso_fluid.groupby(
        ['stay_id', 'sofa_time', 'vaso_starttime', 'vaso_endtime', 'vaso_nh_pre','vaso_nh_aft'], as_index=False).agg(sum)
    return sub_tem_vaso_fluid


# 获取 vaso 后 第一个 lactate 值
def lactate_aft_vaso(tem_vaso_fluid,tem_lactate):

    sub_tem_fluid_vaso_lactate = pd.merge(tem_vaso_fluid, tem_lactate, on=['stay_id'], how='inner')
    
    sub_tem_fluid_vaso_lactate = sub_tem_fluid_vaso_lactate.loc[
                                     sub_tem_fluid_vaso_lactate.lactate_charttime >= sub_tem_fluid_vaso_lactate.vaso_starttime,
                                     :]

    sub_tem_fluid_vaso_lactate = sub_tem_fluid_vaso_lactate.sort_values(
        by=['stay_id', 'sofa_time', 'vaso_starttime', 'lactate_charttime'])
    
    sub_tem_fluid_vaso_lactate = sub_tem_fluid_vaso_lactate.groupby(
        ['stay_id', 'sofa_time', 'vaso_starttime', 'fluids_bolus_3h'], 
        as_index=False).apply(lambda x: x.iloc[0, :])
    
    return(sub_tem_fluid_vaso_lactate)


## 获取 mbp( mbp<=65 的时间) 前3h 输液量
def fluid_3h_pre_mbp(sub_tem_sepsis3,tem_mbp,tem_fluids_bolus,nh_bef,nh_aft):
    
    sub_tem_mbp = pd.merge(sub_tem_sepsis3, tem_mbp, on='stay_id', how='inner')
    sub_tem_mbp = sub_tem_mbp.loc[sub_tem_mbp.mbp_charttime >= sub_tem_mbp.sofa_time, :]
    
    sub_tem_mbp.loc[:, 'mbp_nh_pre'] = sub_tem_mbp.mbp_charttime - dt.timedelta(hours=nh_bef)
    sub_tem_mbp.loc[:, 'mbp_nh_aft'] = sub_tem_mbp.mbp_charttime + dt.timedelta(hours=nh_aft)
    
    sub_tem_mbp_fluid = pd.merge(sub_tem_mbp, tem_fluids_bolus, on=['stay_id'], how='inner')
    tem = (sub_tem_mbp_fluid.loc[:, ['mbp_charttime', 'fluid_endtime']].min(axis=1) - sub_tem_mbp_fluid.loc[:, ['mbp_nh_pre','fluid_starttime']].max(axis=1))
    tem = tem / (sub_tem_mbp_fluid.loc[:, 'fluid_endtime'] - sub_tem_mbp_fluid.loc[:, 'fluid_starttime'])

    tem_fluids_bolus_3h = tem.clip(0, tem) * sub_tem_mbp_fluid.loc[:, 'colloid_bolus']
    sub_tem_mbp_fluid.loc[:, 'fluids_bolus_3h'] = tem_fluids_bolus_3h
    sub_tem_mbp_fluid.drop(['fluid_starttime', 'fluid_endtime', 'colloid_bolus'], axis=1, inplace=True)
    sub_tem_mbp_fluid = sub_tem_mbp_fluid.groupby(
        ['stay_id', 'sofa_time', 'mbp_charttime','mbp','mbp_nh_pre','mbp_nh_aft'], as_index=False).agg(sum)

    return sub_tem_mbp_fluid


# 获取 mbp 后 第一个 lactate 值
def lactate_aft_mbp(sub_tem_mbp_fluid,tem_lactate):
    # 获取 mbp 后 第一个 lactate 值
    sub_tem_fluid_mbp_lactate = pd.merge(sub_tem_mbp_fluid, tem_lactate, on=['stay_id'], how='inner')
    
    sub_tem_fluid_mbp_lactate = sub_tem_fluid_mbp_lactate.loc[
                                     sub_tem_fluid_mbp_lactate.lactate_charttime >= sub_tem_fluid_mbp_lactate.mbp_charttime,
                                     :]

    sub_tem_fluid_mbp_lactate = sub_tem_fluid_mbp_lactate.sort_values(
        by=['stay_id', 'sofa_time', 'mbp_charttime', 'mbp_nh_pre','mbp_nh_aft', 'lactate_charttime'])
    sub_tem_fluid_mbp_lactate = sub_tem_fluid_mbp_lactate.groupby(
        ['stay_id', 'sofa_time', 'mbp_charttime', 'mbp',
         'mbp_nh_pre', 'mbp_nh_aft', 'fluids_bolus_3h'], as_index=False).apply(lambda x: x.iloc[0, :])
    
    return(sub_tem_fluid_mbp_lactate)



def main_shock(nh_bef,nh_aft,bolus,lac,tem_sepsis3_split,tem_fluids_bolus,tem_mbp,tem_vasopressor,tem_lactate):
    
    num = tem_sepsis3_split.__len__()
    septic_shock = pd.DataFrame()
    
    for i in trange(num):
        sub_tem_sepsis3=tem_sepsis3_split[i]
        
        # vaso 前3H 的液体复苏量
        sub_tem_vaso_fluid = fluid_3h_pre_vaso(sub_tem_sepsis3,tem_vasopressor,tem_fluids_bolus,nh_bef,nh_aft)
        sub_tem_vaso_fluid = sub_tem_vaso_fluid.loc[sub_tem_vaso_fluid.fluids_bolus_3h >= bolus]

        # vaso 之后第一个lac 值
        sub_tem_vaso_fluid_lac = lactate_aft_vaso(sub_tem_vaso_fluid,tem_lactate)
        sub_tem_vaso_fluid_lac = sub_tem_vaso_fluid_lac.loc[sub_tem_vaso_fluid_lac.lactate >= lac,:]
        sub_tem_vaso_fluid_lac = sub_tem_vaso_fluid_lac.query('vaso_starttime <= lactate_charttime <= vaso_nh_aft')

        # mbp 前3H 的液体复苏量
        sub_tem_mbp_fluid = fluid_3h_pre_mbp(sub_tem_sepsis3,tem_mbp,tem_fluids_bolus,nh_bef,nh_aft)
        sub_tem_mbp_fluid = sub_tem_mbp_fluid.loc[sub_tem_mbp_fluid.fluids_bolus_3h >= bolus]

        # mbp 之后第一个lac 值
        sub_tem_mbp_fluid_lac = lactate_aft_mbp(sub_tem_mbp_fluid,tem_lactate)
        sub_tem_mbp_fluid_lac = sub_tem_mbp_fluid_lac.loc[sub_tem_mbp_fluid_lac.lactate >= lac,:]
        sub_tem_mbp_fluid_lac = sub_tem_mbp_fluid_lac.query('mbp_charttime <= lactate_charttime <= mbp_nh_aft')

        # concate vaso and mbp
        sub = pd.concat([sub_tem_vaso_fluid_lac,sub_tem_mbp_fluid_lac])
 
        # shock  # sort by lac time
        shock = sub.sort_values(by=['stay_id', 'sofa_time', 'lactate_charttime'])
        
        shock = shock.groupby(['stay_id', 'sofa_time'], as_index=False).apply(lambda x:x.iloc[0,:])
        
        septic_shock = pd.concat([septic_shock,shock],axis=0)
        
    septic_shock.rename(columns={'vaso_nh_pre': f'vaso_{nh_bef}h_bef',
                                 'vaso_nh_aft': f'vaso_{nh_aft}h_aft',
                                 'mbp_nh_pre' : f'mbp_{nh_bef}h_bef',
                                 'mbp_nh_aft' : f'mbp_{nh_aft}h_aft',
                                },inplace=True)
    
    return septic_shock
  

# 数据导入

In [6]:
path=r'E:\data_phenotype_for_septic_shock'
os.chdir(path)

my_save_load_v2.my_load('pickle_mimic_iv_derived_table_v5.pkl', globals())

In [7]:
sepsis3 = sepsis3_original.copy()
sepsis3 = sepsis3.sort_values(by='stay_id')
tem_sepsis3 = sepsis3.loc[:,['stay_id','sofa_time']]

tem_fluids_bolus = fluids_bolus.loc[:,['stay_id','starttime','endtime','colloid_bolus']]
tem_fluids_bolus.columns = ['stay_id','fluid_starttime','fluid_endtime','colloid_bolus']

tem_vasopressor = vasoactive_vasopressor.loc[:,['stay_id','starttime','endtime']]
tem_vasopressor.columns = ['stay_id','vaso_starttime','vaso_endtime']

tem_mbp=shock_define_mbp.copy()
tem_mbp.drop(columns='subject_id',inplace=True)
tem_mbp.columns = ['stay_id', 'mbp_charttime', 'mbp']

tem_lactate = shock_define_lactate2.copy()
tem_lactate.drop(columns='subject_id',inplace=True)
tem_lactate.columns = ['stay_id', 'lactate_charttime', 'lactate']


In [8]:
tem_fluids_bolus

Unnamed: 0,stay_id,fluid_starttime,fluid_endtime,colloid_bolus
0,39553978,2180-07-23 17:00:00,2180-07-23 17:30:00,50.0
1,39553978,2180-07-23 17:33:00,2180-07-23 18:03:00,50.0
2,39765666,2189-06-27 12:54:00,2189-06-27 20:37:00,69.0
3,37067082,2157-11-20 19:32:00,2157-11-21 08:49:00,1000.0
4,37067082,2157-11-21 00:51:00,2157-11-21 02:06:00,250.0
...,...,...,...,...
2714425,34670930,2126-10-21 18:32:00,2126-10-21 19:07:00,22.0
2714426,34670930,2126-10-21 19:00:00,2126-10-21 21:16:00,7.0
2714427,34670930,2126-10-21 19:07:00,2126-10-21 21:21:00,82.0
2714428,34670930,2126-10-21 20:20:00,2126-10-21 20:53:00,9.0


In [9]:
tem_vasopressor

Unnamed: 0,stay_id,vaso_starttime,vaso_endtime
0,37510196,2131-01-11 04:50:00,2131-01-11 05:04:00
1,37510196,2131-01-11 05:04:00,2131-01-11 06:54:00
2,37510196,2131-01-11 06:54:00,2131-01-11 07:40:00
3,37510196,2131-01-11 07:40:00,2131-01-11 08:17:00
4,37510196,2131-01-11 08:17:00,2131-01-11 09:14:00
...,...,...,...
678291,38978960,2164-09-16 18:43:00,2164-09-17 00:10:00
678292,38978960,2164-09-17 00:10:00,2164-09-17 05:44:00
678293,38978960,2164-09-17 10:00:00,2164-09-17 10:18:00
678294,38978960,2164-09-17 10:18:00,2164-09-17 11:37:00


In [10]:
tem_lactate

Unnamed: 0,stay_id,lactate_charttime,lactate
0,39765666,2189-06-27 07:52:00,1.7
1,37510196,2131-01-11 06:37:00,1.5
2,37510196,2131-01-11 11:33:00,1.1
3,37510196,2131-01-12 21:04:00,1.1
4,37510196,2131-01-13 02:28:00,1.2
...,...,...,...
244161,38978960,2164-09-15 02:46:00,0.9
244162,38978960,2164-09-17 08:11:00,1.5
244163,38978960,2164-09-17 13:18:00,7.0
244164,38978960,2164-09-17 13:34:00,4.0


In [11]:
tem_mbp

Unnamed: 0,stay_id,mbp_charttime,mbp
0,39553978,2180-07-23 14:11:00,56.0
1,39553978,2180-07-23 14:30:00,67.0
2,39553978,2180-07-23 15:00:00,64.0
3,39553978,2180-07-23 16:01:00,64.0
4,39553978,2180-07-23 17:00:00,67.0
...,...,...,...
6680969,36195440,2145-11-04 09:00:00,60.0
6680970,36195440,2145-11-04 13:48:00,84.0
6680971,36195440,2145-11-04 16:53:00,102.0
6680972,36195440,2145-11-04 19:31:00,131.0


In [12]:
tem_mbp = tem_mbp.query('mbp<=65') 
tem_mbp

Unnamed: 0,stay_id,mbp_charttime,mbp
0,39553978,2180-07-23 14:11:00,56.0
2,39553978,2180-07-23 15:00:00,64.0
3,39553978,2180-07-23 16:01:00,64.0
5,39553978,2180-07-23 18:00:00,60.0
6,39553978,2180-07-23 19:00:00,56.0
...,...,...,...
6680930,38978960,2164-09-17 13:05:00,51.0
6680931,38978960,2164-09-17 13:10:00,33.0
6680932,38978960,2164-09-17 13:17:00,44.0
6680943,36195440,2145-11-03 09:00:00,65.0


# main 

In [13]:
nh_bef = 3 # mbp_nh_bef
nh_aft = 6 # mbp_nh_aft
bolus = 1000 # ml
lac = 2 # 

# 分批次计算
tem_sepsis3_split = splite_data(tem_sepsis3,step=300)

septic_shock = main_shock(nh_bef,nh_aft,bolus,lac,tem_sepsis3_split,tem_fluids_bolus,tem_mbp,tem_vasopressor,tem_lactate)
   
septic_shock['onset_time'] = septic_shock.lactate_charttime
septic_shock

100%|████████████████████████████████████████████████████████████████████████████████| 116/116 [02:47<00:00,  1.44s/it]


Unnamed: 0,stay_id,sofa_time,vaso_starttime,vaso_endtime,vaso_3h_bef,vaso_6h_aft,fluids_bolus_3h,lactate_charttime,lactate,mbp_charttime,mbp,mbp_3h_bef,mbp_6h_aft,onset_time
0,30002654,2154-10-18 03:00:00,NaT,NaT,NaT,NaT,1014.981459,2154-10-18 14:15:00,2.5,2154-10-18 14:00:00,59.0,2154-10-18 11:00:00,2154-10-18 20:00:00,2154-10-18 14:15:00
1,30003749,2120-11-05 17:00:00,NaT,NaT,NaT,NaT,1561.853256,2120-11-05 19:05:00,12.8,2120-11-05 19:00:00,64.0,2120-11-05 16:00:00,2120-11-06 01:00:00,2120-11-05 19:05:00
2,30005707,2144-01-07 01:00:00,2144-01-07 11:12:00,2144-01-07 11:32:00,2144-01-07 08:12:00,2144-01-07 17:12:00,1061.348902,2144-01-07 11:50:00,2.4,NaT,,NaT,NaT,2144-01-07 11:50:00
3,30006983,2159-10-12 04:00:00,2159-10-12 22:18:00,2159-10-12 23:25:00,2159-10-12 19:18:00,2159-10-13 04:18:00,1009.623459,2159-10-13 01:06:00,9.3,NaT,,NaT,NaT,2159-10-13 01:06:00
4,30009123,2188-04-28 14:00:00,2188-04-28 15:40:00,2188-04-28 17:07:00,2188-04-28 12:40:00,2188-04-28 21:40:00,2294.658929,2188-04-28 16:03:00,2.3,NaT,,NaT,NaT,2188-04-28 16:03:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16,39980432,2176-01-01 12:00:00,2176-01-01 16:14:00,2176-01-01 17:06:00,2176-01-01 13:14:00,2176-01-01 22:14:00,1211.000000,2176-01-01 19:41:00,2.2,NaT,,NaT,NaT,2176-01-01 19:41:00
17,39981641,2131-05-29 21:00:00,2131-05-29 23:56:00,2131-05-30 02:25:00,2131-05-29 20:56:00,2131-05-30 05:56:00,1099.305944,2131-05-29 23:57:00,3.9,NaT,,NaT,NaT,2131-05-29 23:57:00
18,39986775,2123-10-07 23:00:00,2123-10-08 02:04:00,2123-10-08 03:01:00,2123-10-07 23:04:00,2123-10-08 08:04:00,1522.972061,2123-10-08 02:34:00,3.7,NaT,,NaT,NaT,2123-10-08 02:34:00
19,39995735,2124-08-15 18:00:00,2124-08-15 19:28:00,2124-08-15 19:37:00,2124-08-15 16:28:00,2124-08-16 01:28:00,1271.970299,2124-08-15 19:41:00,4.7,NaT,,NaT,NaT,2124-08-15 19:41:00


# 保存数据

In [14]:
###### save

path=r'E:\data_phenotype_for_septic_shock'  
os.chdir(path)


readMe_shock_defined= """
定义：脓毒性休克指 脓毒症患者 尽管充分的液体复苏仍存在持续的低血压,需要用升压药维持平均动脉压在 65 mmHg 以上,
        血乳酸在 2 mmol / L 以上,符合这一标准临床病死率超过 40% 。

基于患者已经是脓毒症的情况下：                    
1. 液体复苏：  3h窗口 至少1000ml ,          
2. mbp/vaso: 3h窗口后 (map最近一个值 小于等于 65) 或 vaso 使用
3. lactate:  3h窗口后 6H内 最近一个值 大于等于 2.0      
4. 所有条件都满足的点,即乳酸的点，
5. onset time. lac time

"""


out_name = 'pickle_mimiciv_septic_shock_v6.pkl' 

l = ['readMe_shock_defined','septic_shock', 'age', 'fluids_bolus','icustays',
     'sepsis3_original', 'shock_define_lactate2', 'shock_define_mbp', 'vasoactive_vasopressor']
my_save_load_v2.my_save(List=l, filename=out_name, Global=globals())

