In [None]:
# In[ ]:
# ** import package **
import os
import sys
import json
import pathlib
sys.path.append("..")

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import traceback
from tqdm import tqdm
from datetime import timedelta
from _utils.customlogger import customlogger as CL

pd.set_option('display.max_colwidth', -1)  #각 컬럼 width 최대로 
pd.set_option('display.max_rows', 50)      # display 50개 까지 

In [None]:
# In[ ]:
# ** loading path info **
current_dir = pathlib.Path.cwd()
parent_dir = current_dir.parent
curr_file_name = os.path.splitext(os.path.basename(os.path.abspath('')))[0]
data_dir = pathlib.Path('{}/data/'.format(parent_dir))
pathlib.Path.mkdir(data_dir, mode=0o777, parents=True, exist_ok=True)

In [None]:
# ** loading data **
MICU_df = pd.read_feather('{}/MICU_df.feather'.format(data_dir))
SICU_df = pd.read_feather('{}/SICU_df.feather'.format(data_dir))

In [None]:
# ** make to dict rename columns **
eicu_mapping = pd.read_csv('eicu_mapping.csv')
eicu_mapping.replace(['\r\n', '\n'], '', inplace=True)
eicu_mapping_diff = eicu_mapping[eicu_mapping['concept_name'] != eicu_mapping['feature_new_name']]
rename_dict = dict(zip(eicu_mapping_diff.concept_name, eicu_mapping_diff.feature_new_name))
print(rename_dict)

# ** rename columns **
MICU_df.rename(rename_dict, axis='columns', inplace=True)
SICU_df.rename(rename_dict, axis='columns', inplace=True)

In [None]:
# ** calc Glasgow Coma Scale **
def calculate_gcs(eyes, motor, verbal, meds):
    if meds == 1:
        return np.nan
    return eyes + motor + verbal

MICU_df.loc[:,'gcs'] = MICU_df.apply(lambda x: calculate_gcs(x['eyes'], x['motor'], x['verbal'], x['meds']), axis=1)
MICU_df.drop(['eyes', 'motor', 'verbal', 'meds'], axis=1, inplace=True)

SICU_df.loc[:,'gcs'] = SICU_df.apply(lambda x: calculate_gcs(x['eyes'], x['motor'], x['verbal'], x['meds']), axis=1)
SICU_df.drop(['eyes', 'motor', 'verbal', 'meds'], axis=1, inplace=True)

In [None]:
# ** check duplicated columns **
cols = list(SICU_df.columns)
dup = [x for i, x in enumerate(cols) if i != cols.index(x)]
dup

In [None]:
SICU_df.to_feather('{}/SICU_ps_df.feather'.format(data_dir))
MICU_df.to_feather('{}/MICU_ps_df.feather'.format(data_dir))