In [13]:
import pandas as pd
from tqdm import tqdm
import numpy as np

In [14]:
df= pd.read_csv(f'./data/mimic_data/full.csv')

df.reset_index(drop=True, inplace=True)

if 'select_id' in df.columns:
    df = df.drop(columns=['select_id'])
if 'total' in df.columns:
    df = df.drop(columns=['total'])
df.rename(columns={'avg PEEP': 'PEEP'}, inplace=True)
df.rename(columns={'avg FiO2': 'FiO2'}, inplace=True)
df.rename(columns={'total2': 'total'}, inplace=True)

df.columns = df.columns.str.replace('avg ', '')


In [15]:
distinct_stay_id = df['stay_id'].unique()
print(len(distinct_stay_id))

2893


In [16]:
total_vent = 0
missing_peep = 0
missing_fio2 = 0

def calculate_missing_vent(df_P):
    global total_vent
    global missing_peep
    global missing_fio2
    
    for index, row in df_P.iterrows():
        if row['use_vent'] == 1:
            total_vent += 1
            if pd.isna(row['PEEP']):
                missing_peep += 1
            if pd.isna(row['FiO2']):
                missing_fio2 += 1

In [17]:
dod_weaning_count = 0
dod_weaning_successful_count = 0

def Get_Weaning_vent_Label (df_in):
    
    df_out = df_in.copy()

    """
    Adjusting small PEEP levels
    """
    df_out.loc[df_out['PEEP'] < 5, 'PEEP'] = 5
    df_out.loc[df_out['FiO2'] < 30, 'FiO2'] = 30
    df_out.loc[df_out['Peak Airway Pressure'] < 10, 'Peak Airway Pressure'] = np.nan
    
    """
    Determine whether to use a ventilator => InvasiveVent = 1
    """    
    df_out.insert(1, 'use_vent', df_out['InvasiveVent'].astype(int))
    
    """
    "Fill in the intervals of ventilator discontinuation less than or equal to four days"
    """
  
    idx = df_out[df_out['use_vent'] == 1].index 

    for i in range(len(idx) - 1):
        start_idx = idx[i]
        end_idx = idx[i+1]
        gap = end_idx - start_idx - 1  
        if gap <= 3 and (df_out.loc[start_idx+1:end_idx-1, 'use_vent'] == 0).all():
            df_out.loc[start_idx+1:end_idx-1, 'use_vent'] = 1
            
        
    """
    Weaning
    """
    df_out['Weaning'] = -1
    
    for i in range(len(df_out)-1):
        dod_in_3day = df_out['dod'][i:i+4].max()  #[i,i+1,i+2,i+3]
        if df_out.at[i,'InvasiveVent'] == 1:
            if df_out.at[i+1,'InvasiveVent'] == 0:
                if dod_in_3day == 1: 
                    df_out.at[i,'Weaning'] = 0
                    global dod_weaning_count
                    dod_weaning_count += 1
                else:
                    df_out.at[i,'Weaning'] = 1
            else:
                df_out.at[i,'Weaning'] = 0
    
    """
    Reintubation
    """
    df_out['Reintubation'] = -1
    for i in range(len(df_out)-4):
        if df_out.at[i, 'Weaning'] != 1:
            continue
        if df_out.loc[i+1:i+4, 'InvasiveVent'].max() == 1:
            df_out.at[i, 'Reintubation'] = 1
        else:
            df_out.at[i, 'Reintubation'] = 0
            
    check_flag = False
    if df_out['Reintubation'].max() == 1:
        check_flag = True
        
    
    """
    Weaning_successful
    """
    df_out['Weaning_successful'] = -1
    for i in range(0,len(df_out) - 1):
        use_vent_today = df_out.at[i, 'use_vent']
        use_vent_tomorrow = df_out.at[i+1, 'use_vent']
        dod_in_3day = df_out['dod'][i+0:i+4].max()  #[i+1,i+2,i+3]
        
        if use_vent_today == 1 and use_vent_tomorrow == 1:
            df_out.at[i, 'Weaning_successful'] = 0
            
        if use_vent_today == 1 and use_vent_tomorrow == 0:
            #死亡視為失敗
            if dod_in_3day == 1:
                df_out.at[i, 'Weaning_successful'] = 0
                global dod_weaning_successful_count
                dod_weaning_successful_count += 1
            else:
                df_out.at[i, 'Weaning_successful'] = 1
                #break
                
    
    """
    "Fill in default values for instances where the ventilator was not used."
    """
    for i in range(len(df_out)):
        if  df_out.at[i, 'use_vent'] == 0 or df_out.at[i, 'use_vent'] == -1:
            if pd.isnull(df_out.at[i, 'PEEP']):
                df_out.at[i, 'PEEP'] = 5 
            if pd.isnull(df_out.at[i, 'FiO2']):
                df_out.at[i, 'FiO2'] = 30            
    #################################################################################
    calculate_missing_vent(df_out)

    return df_out

In [18]:
df_result = pd.DataFrame()
df_result_list = []

for stay_id in tqdm(distinct_stay_id, desc="Processing stay_ids"):
    df_p = df[df['stay_id'] == stay_id]
    df_p.reset_index(drop=True, inplace=True)
    df_p = Get_Weaning_vent_Label(df_p)
    df_result_list.append(df_p)

df_result = pd.concat(df_result_list, ignore_index=True)

Processing stay_ids: 100%|████████████████████████████████████████████████████████| 2893/2893 [00:12<00:00, 239.07it/s]


In [19]:
df_result.to_csv(f'./data/mimic_data/full_step1.csv',index = False)