In [59]:
import pandas as pd

In [60]:
train_df = pd.read_csv('../dataset/train_cleaned.csv', sep='|')
val_df = pd.read_csv('../dataset/val_cleaned.csv', sep='|')

### HR

In [61]:
def get_hr_label(df):
    df.loc[df.HR > 90, 'hr_label'] = 1
    df.loc[df.HR <= 90, 'hr_label'] = 0
    
    return df

In [62]:
train_df = get_hr_label(train_df)
val_df = get_hr_label(val_df)

### Age

In [63]:
def get_age_label(df):
    df.loc[df.Age >= 80, 'age_label'] = 1 # old
    df.loc[(df.Age >= 18) & (df.Age < 80), 'age_label'] = 0 # adult
    
    return df

In [64]:
train_df = get_age_label(train_df)
val_df = get_age_label(val_df)

### Temp

In [65]:
def get_temp_label(df):
    df.loc[(df.Temp >= 38) | (df.Temp <= 36), 'temp_label'] = 0
    df.loc[(df.Temp < 38) & (df.Temp > 36), 'temp_label'] = 1
    
    return df

In [66]:
train_df = get_temp_label(train_df)
val_df = get_temp_label(val_df)

### Resp

normal respiration rate:
- 0 ~ 1 30 ~ 60
- 1 ~ 3 24 ~ 40
- 3 ~ 6 22 ~ 34
- 6 ~ 12 18 ~ 30
- 12 ~ 18 12 ~ 16
- 18+ 12 ~ 20

In [67]:
def get_resp_label(df):
    # normal
    df.loc[(df.Resp.between(30, 60)) & (df.Age <= 1), 'resp_label'] = '0'
    df.loc[(df.Resp.between(24, 40)) & (df.Age.between(2, 3)), 'resp_label'] = '0'
    df.loc[(df.Resp.between(22, 34)) & (df.Age.between(4, 6)), 'resp_label'] = '0'
    df.loc[(df.Resp.between(18, 30)) & (df.Age.between(7, 12)), 'resp_label'] = '0'
    df.loc[(df.Resp.between(12, 16)) & (df.Age.between(13, 17)), 'resp_label'] = '0'
    df.loc[(df.Resp.between(12, 20)) & (df.Age >= 18), 'resp_label'] = '0'
    
    # abnormal
    df.loc[((df.Resp < 30) | (df.Resp > 60)) & (df.Age <= 1), 'resp_label'] = '1'
    df.loc[((df.Resp < 24) | (df.Resp > 40)) & (df.Age.between(2, 3)), 'resp_label'] = '1'
    df.loc[((df.Resp < 22) | (df.Resp > 34)) & (df.Age.between(4, 6)), 'resp_label'] = '1'
    df.loc[((df.Resp < 18) | (df.Resp > 30)) & (df.Age.between(7, 12)), 'resp_label'] = '1'
    df.loc[((df.Resp < 12) | (df.Resp > 16)) & (df.Age.between(13, 17)), 'resp_label'] = '1'
    df.loc[((df.Resp < 12) | (df.Resp > 20)) & (df.Age >= 18), 'resp_label'] = '1'
    
    return df

In [68]:
train_df = get_resp_label(train_df)
val_df = get_resp_label(val_df)

### PaCO2

In [69]:
def get_paco2_label(df):
    df.loc[df.PaCO2 < 32, 'paco2_label'] = 1
    df.loc[df.PaCO2 >= 32, 'paco2_label'] = 0
    
    return df

In [70]:
train_df = get_paco2_label(train_df)
val_df = get_paco2_label(val_df)

### SBP

In [71]:
def get_sbp_label(df):
    df.loc[df.SBP <= 100, 'sbp_label'] = 1
    df.loc[df.SBP > 100, 'sbp_label'] = 0
    
    return df

In [72]:
train_df = get_sbp_label(train_df)
val_df = get_sbp_label(val_df)

### MAP

In [89]:
# mean arterial pressure
# normal between 70 and 100
def get_map_label(df):
    df.loc[df.MAP.between(70, 100), 'map_label'] = 0
    df.loc[(df.MAP > 100) | (df.MAP < 70), 'map_label'] = 1
    
    return df

In [90]:
train_df = get_map_label(train_df)
val_df = get_map_label(val_df)

In [91]:
train_df.head()

Unnamed: 0,patient_id,HR,O2Sat,Temp,SBP,MAP,DBP,Resp,BaseExcess,HCO3,...,HospAdmTime,ICULOS,SepsisLabel,age_label,hr_label,temp_label,resp_label,paco2_label,sbp_label,map_label
0,1,87.0,98.0,36.3,94.5,71.5,-999.0,21.0,-999.0,-999.0,...,-0.03,1.0,0.0,1.0,0.0,1.0,1,1.0,1.0,0.0
1,1,87.0,98.0,36.3,94.5,71.5,-999.0,21.0,-999.0,-999.0,...,-0.03,2.0,0.0,1.0,0.0,1.0,1,1.0,1.0,0.0
2,1,85.0,97.0,36.3,89.0,62.0,-999.0,22.0,-999.0,-999.0,...,-0.03,3.0,0.0,1.0,0.0,1.0,1,1.0,1.0,1.0
3,1,83.0,97.0,36.28,104.0,66.0,-999.0,22.0,-999.0,30.0,...,-0.03,4.0,0.0,1.0,0.0,1.0,1,1.0,0.0,1.0
4,1,81.0,98.0,36.28,87.0,67.0,-999.0,18.0,-999.0,-999.0,...,-0.03,5.0,0.0,1.0,0.0,1.0,0,1.0,1.0,1.0


In [92]:
train_df.to_csv('../dataset/train_cleaned.csv', sep='|', index=False)
val_df.to_csv('../dataset/val_cleaned.csv', sep='|', index=False)