In [None]:
#!/usr/bin/env python
# coding: utf-8

'''
import SQL
'''

# In[ ]:
# ** import package **
import os
import sys
import json
import pathlib
sys.path.append("..")

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import traceback
from tqdm import tqdm
from datetime import timedelta
from _utils.customlogger import customlogger as CL

pd.set_option('display.max_colwidth', -1)  #각 컬럼 width 최대로 
pd.set_option('display.max_rows', 50)      # display 50개 까지 

In [None]:
# In[ ]:
# ** loading config **
with open('./../{}'.format("config.json")) as file:
    cfg = json.load(file)

In [None]:
# In[ ]:
# ** loading info **
current_dir = pathlib.Path.cwd()
parent_dir = current_dir.parent
current_date = cfg["working_date"]
curr_file_name = os.path.splitext(os.path.basename(os.path.abspath('')))[0]

In [None]:
# In[ ]:
# **create Logger**
log = CL("custom_logger")
pathlib.Path.mkdir(pathlib.Path('{}/_log/'.format(parent_dir)), mode=0o777, parents=True, exist_ok=True)
log = log.create_logger(file_name="../_log/{}.log".format(curr_file_name), mode="a", level="DEBUG")  
log.debug('start {}'.format(curr_file_name))

In [None]:
# In[ ]:
# ** connection DataBase **
if (cfg["dbms"]=="postgresql"):
    db_cfg = cfg["postgresql"]
    import psycopg2 as pg
    conn = pg.connect(host=db_cfg['@server'], user=db_cfg['@user'], password=db_cfg['@password'], port=db_cfg['@port'], dbname=db_cfg['@database']) 
    log.debug("postgresql connect")
    
elif (cfg["dbms"]=="mssql"):
    db_cfg = cfg["mssql"]
    import pymssql
    conn= pymssql.connect(server=db_cfg['@server'], user=db_cfg['@user'], password=db_cfg['@password'], port=db_cfg['@port'], database=db_cfg['@database'], as_dict=False)
    log.debug("mssql connect")
    
elif (cfg["dbms"]=="mysql"):
    db_cfg = cfg["mysql"]
    import pymysql
    conn= pymysql.connect(host=db_cfg['@server'], user=db_cfg['@user'], password=db_cfg['@password'], port=db_cfg['@port'], db=db_cfg['@database'], charset='utf8', autocommit=True)
    log.debug("mysql connect")
    
else:
    log.warning("set config.json - sql - dbms : mssql or postgresql")

In [None]:
apachepsvar_df = pd.read_sql("SELECT * FROM eicu.APACHEAPSVAR", con=conn)

In [None]:
patient_df = pd.read_sql("SELECT * FROM eicu.PATIENT", con=conn)

In [None]:
microlab_df = pd.read_sql("SELECT * FROM eicu.MICROLAB", con=conn)

In [10]:
lab_df = pd.read_sql("SELECT * FROM eicu.LAB", con=conn)

In [11]:
medication_df = pd.read_sql("SELECT * FROM eicu.MEDICATION", con=conn)

In [17]:
admissiondrug_df = pd.read_sql("SELECT * FROM eicu.ADMISSIONDRUG", con=conn)

In [15]:
apachepsvar_df.to_feather('apachepsvar_df.feather')
patient_df.to_feather('patient_df.feather')
microlab_df.to_feather('microlab_df.feather')
lab_df.to_feather('lab_df.feather')
medication_df.to_feather('medication_df.feather')
admissiondrug_df.to_feather('admissiondrug_df.feather')

In [None]:
apachepsvar_df = pd.read_feather('apachepsvar_df.feather')
patient_df = pd.read_feather('patient_df.feather')
microlab_df = pd.read_feather('microlab_df.feather')
lab_df = pd.read_feather('lab_df.feather')
medication_df = pd.read_feather('medication_df.feather')
admissiondrug_df = pd.read_feather('admissiondrug_df.feather')

In [21]:
patient_apc_df = pd.merge(patient_df, apachepsvar_df, left_on='patientunitstayid', right_on='patientunitstayid', how='inner')
patient_med_df = pd.merge(patient_df[['patientunitstayid', 'unittype']], medication_df[['patientunitstayid', 'drugname']], left_on='patientunitstayid', right_on='patientunitstayid', how='inner')
patient_lab_df = pd.merge(patient_df[['patientunitstayid', 'unittype']], lab_df[['patientunitstayid', 'labname', 'labresult']], left_on='patientunitstayid', right_on='patientunitstayid', how='inner')
patient_adm_df = pd.merge(patient_df[['patientunitstayid', 'unittype']], admissiondrug_df[['patientunitstayid', 'drugname']], left_on='patientunitstayid', right_on='patientunitstayid', how='inner')

In [35]:
patient_med_df.rename(columns={'drugname':'concept_name'}, inplace=True)
patient_med_df['concept_value'] = 1
patient_med_df['concept_table'] = 'med'
patient_lab_df.rename(columns={'labname':'concept_name','labresult':'concept_value'}, inplace=True)
patient_lab_df['concept_table'] = 'lab'
patient_adm_df.rename(columns={'drugname':'concept_name'}, inplace=True)
patient_adm_df['concept_value'] = 1
patient_adm_df['concept_table'] = 'adm'

In [36]:
concat_df = pd.concat([patient_med_df, patient_lab_df, patient_adm_df], axis=0, ignore_index=True)

In [72]:
def filter_with_missing_rate(df, threshold):
    def filter_with_missing_rate_concept(x, nTotalPatients, threshold):
        nConceptPatients = len(x.patientunitstayid.unique())
        fEpsilon = 1.0e-08 # devide by zero
        fMissingRate = nConceptPatients / (nTotalPatients + fEpsilon)
        # if (fMissingRate < threshold) :
        #     return pd.DataFrame(columns=x.columns)
        # print("{}, {}, {}, {:.2}", list(x.concept_name)[0], nConceptPatients, nTotalPatients, fMissingRate)
        temp_df = pd.DataFrame()
        var_temp = {}
        var_temp['concept_name'] = list(x.concept_name)[0]
        var_temp['concept_table'] = list(x.concept_table)[0]
        var_temp['unittype'] = list(x.unittype)[0]
        var_temp['nConceptPatients'] = nConceptPatients
        var_temp['nTotalPatients'] = nTotalPatients
        var_temp['fMissingRate'] = fMissingRate
        temp_df = temp_df.append(var_temp, ignore_index=True)
        return temp_df

    nTotalPatients = len(df.patientunitstayid.unique())
    features_df = df.groupby('concept_name').apply(lambda x : filter_with_missing_rate_concept(x, nTotalPatients, threshold)).reset_index(drop=True)
    return features_df

feature_df = concat_df.groupby('unittype').apply(lambda x : filter_with_missing_rate(x, threshold=0.05)).reset_index(drop=True)


In [73]:
feature_df = feature_df.loc[feature_df['unittype'].isin(['MICU', 'SICU'])]
print( len(feature_df.loc[feature_df['unittype']=='MICU']), len(feature_df.loc[feature_df['unittype']=='SICU']) )
feature_df2 = pd.pivot_table(data=feature_df, index=['concept_name', 'concept_table'], columns='unittype', values='fMissingRate', fill_value=0).reset_index()
feature_df2['gap'] = abs(feature_df2['MICU'] - feature_df2['SICU'])
feature_df2.sort_values('gap')
feature_df2.to_csv('feature_df2.csv')

2942 2814


In [91]:
filter = {'med': 0.05, 'adm':0.05, 'lab':0.7}

index_list = []
def filterwithmissingrate(df, unittype, filter):
    unit_df = pd.DataFrame()
    for tName in filter:
        print('concept_table == \'{}\' and {} > {}'.format(tName, unittype, filter[tName]))
        feature_table_df = df.query('concept_table == \'{}\' and {} > {}'.format(tName, unittype, filter[tName]))
        if not feature_table_df.empty:
            unit_df = pd.concat([unit_df, feature_table_df], axis=0)
    return unit_df
   
MICU_feature = filterwithmissingrate(feature_df2, 'MICU', filter) 
SICU_feature = filterwithmissingrate(feature_df2, 'SICU', filter)


concept_table == 'med' and MICU > 0.05
concept_table == 'adm' and MICU > 0.05
concept_table == 'lab' and MICU > 0.7
concept_table == 'med' and SICU > 0.05
concept_table == 'adm' and SICU > 0.05
concept_table == 'lab' and SICU > 0.7


In [92]:
len(SICU_feature.concept_name.unique()), len(MICU_feature.concept_name.unique())

(86, 50)

In [94]:
concat_df[concat_df['concept_table']=='lab']

Unnamed: 0,patientunitstayid,unittype,concept_name,concept_value,concept_table
7301853,141168,Med-Surg ICU,fibrinogen,177.00,lab
7301854,141168,Med-Surg ICU,PT - INR,2.50,lab
7301855,141168,Med-Surg ICU,magnesium,2.00,lab
7301856,141168,Med-Surg ICU,PT,26.60,lab
7301857,141168,Med-Surg ICU,pH,7.20,lab
...,...,...,...,...,...
46434379,3353263,MICU,WBC x 1000,6.40,lab
46434380,3353263,MICU,RBC,4.67,lab
46434381,3353263,MICU,-monos,10.00,lab
46434382,3353263,MICU,WBC x 1000,6.60,lab


In [96]:
SICU_sub_df = concat_df.loc[(concat_df['unittype']=='SICU') & (concat_df['concept_name'].isin(SICU_feature.concept_name.unique()))]
SICU_sub_df = pd.pivot_table(data=SICU_sub_df, index=['patientunitstayid', 'unittype'], columns='concept_name', values='concept_value').reset_index()
SICU_sub_df

concept_name,patientunitstayid,unittype,1000 ML - SODIUM CHLORIDE 0.9 % IV SOLN,1000 ML FLEX CONT : SODIUM CHLORIDE 0.9 % IV SOLN,1000 ML FLEX CONT: SODIUM CHLORIDE 0.9 % IV SOLN,2 ML VIAL : ONDANSETRON HCL 4 MG/2ML IJ SOLN,2 ML: ONDANSETRON HCL 4 MG/2ML IJ SOLN,50 ML SYRINGE : DEXTROSE 50 % IV SOLN,ACETAMINOPHEN,ACETAMINOPHEN 325 MG PO TABS,...,creatinine,fentaNYL,fentaNYL citrate (PF),glucose,hydrALAZINE,magnesium,morphine,platelets x 1000,potassium,sodium
0,141296,SICU,1.0,,,1.0,,,,1.0,...,5.311667,,,117.833333,,,,437.833333,5.050000,137.333333
1,141297,SICU,,,,,,1.0,,,...,5.413333,,,166.666667,,3.200000,,440.000000,7.066667,138.000000
2,141314,SICU,1.0,,,1.0,,1.0,,,...,1.806667,,,241.333333,,2.350000,,172.500000,6.533333,155.666667
3,141675,SICU,1.0,,,,,,,1.0,...,0.630000,,,88.000000,,,,232.000000,4.200000,142.000000
4,141708,SICU,,,,1.0,,,,1.0,...,1.055000,,,134.500000,,,,188.000000,3.800000,140.500000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11887,3353007,SICU,,,,,,,1.0,,...,0.756667,,,139.666667,,1.625000,,224.333333,4.366667,134.333333
11888,3353015,SICU,,,,,,,,,...,0.675161,,,122.096774,,1.838095,,385.358974,4.470968,134.612903
11889,3353031,SICU,,,,,,,1.0,,...,0.847500,,,144.000000,,1.766667,,226.750000,5.283333,138.250000
11890,3353043,SICU,,,,,,,,,...,0.630000,,,90.000000,,,,257.666667,4.400000,136.000000


In [97]:
MICU_sub_df = concat_df.loc[(concat_df['unittype']=='MICU') & (concat_df['concept_name'].isin(MICU_feature.concept_name.unique()))]
MICU_sub_df = pd.pivot_table(data=MICU_sub_df, index=['patientunitstayid', 'unittype'], columns='concept_name', values='concept_value').reset_index()
MICU_sub_df

concept_name,patientunitstayid,unittype,-basos,-eos,-lymphs,-monos,1000 ML - SODIUM CHLORIDE 0.9 % IV SOLN,1000 ML FLEX CONT : SODIUM CHLORIDE 0.9 % IV SOLN,1000 ML FLEX CONT: SODIUM CHLORIDE 0.9 % IV SOLN,50 ML SYRINGE : DEXTROSE 50 % IV SOLN,...,bicarbonate,calcium,chloride,creatinine,glucose,magnesium,platelets x 1000,potassium,sodium,total protein
0,141328,MICU,0.00,4.000000,18.000000,7.500,1.0,,,,...,30.000000,8.550000,95.000000,3.600000,113.000000,2.100000,174.500000,4.150000,132.000000,7.300000
1,141366,MICU,0.00,0.333333,9.333333,12.000,1.0,,,1.0,...,32.600000,8.440000,102.000000,1.115000,123.800000,2.100000,295.833333,4.540000,139.000000,6.300000
2,141392,MICU,0.25,2.750000,12.125000,9.375,1.0,,,1.0,...,32.875000,8.925000,100.000000,1.110000,117.375000,1.900000,172.625000,3.635714,138.125000,6.633333
3,141462,MICU,0.00,1.400000,11.000000,6.400,1.0,,,1.0,...,28.833333,8.566667,100.500000,1.217500,223.500000,2.016667,368.500000,3.855556,135.500000,6.400000
4,141475,MICU,0.00,0.666667,10.000000,5.800,1.0,,,1.0,...,25.400000,7.820000,114.600000,0.896000,119.200000,,182.500000,3.566667,145.600000,6.150000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16784,3353190,MICU,1.00,1.000000,12.000000,12.000,,,,,...,25.000000,8.600000,107.750000,1.388000,74.000000,1.833333,262.750000,4.200000,139.750000,7.300000
16785,3353198,MICU,0.10,2.300000,13.300000,4.800,,,,,...,34.217391,8.228261,103.673913,1.087826,133.065217,2.347619,264.615385,3.980769,143.434783,5.316667
16786,3353213,MICU,0.50,0.500000,11.500000,3.500,,,,,...,20.750000,8.175000,114.000000,0.582500,143.500000,2.200000,222.000000,4.025000,141.250000,5.250000
16787,3353237,MICU,0.00,4.000000,18.000000,9.000,,,,,...,31.500000,8.716667,101.000000,0.831667,103.000000,2.000000,144.875000,3.600000,138.500000,5.900000


In [98]:
# main : hosptial + apache
# sub : lab, admission_drug, med
SICU_main_df = patient_apc_df.loc[patient_apc_df['unittype']=='SICU']
MICU_main_df = patient_apc_df.loc[patient_apc_df['unittype']=='MICU']
SICU_df = pd.merge(SICU_main_df, SICU_sub_df, left_on='patientunitstayid', right_on='patientunitstayid', how='inner')
MICU_df = pd.merge(MICU_main_df, MICU_sub_df, left_on='patientunitstayid', right_on='patientunitstayid', how='inner')

In [100]:
len(SICU_df.columns), len(MICU_df.columns)

(141, 105)

In [104]:
SICU_df.to_feather('SICU_df.feather')
MICU_df.to_feather('MICU_df.feather')

In [None]:
def makegap(df):
    if(len(df) < 2):
        return pd.DataFrame(columns=df.columns)
    temp_df = pd.DataFrame()
    
    var_temp = {}
    df.loc[0, 'fMissingRate'] - df.loc['fMissingRate'][1]
    var_temp['concept_table'] = list(x.concept_table)[0]
    var_temp['unittype'] = list(x.unittype)[0]
    var_temp['nConceptPatients'] = nConceptPatients
    var_temp['nTotalPatients'] = nTotalPatients
    var_temp['fMissingRate'] = fMissingRate
    temp_df = temp_df.append(var_temp, ignore_index=True)

feature_df.groupby('concept_name').apply(lambda)

In [None]:

df.loc[~df['concept_name'].isin(list(features_df.concept_name)), 'concept_name'] = None
df['value'] = 1
df = pd.pivot_table(data=df, index=['patientunitstayid', 'unittype'], columns='concept_name', values='value').reset_index()
df

In [42]:
pat_feature_list = []
apc_feature_list = []
med_feature_list = []
lab_feature_list = []
adm_feature_list = []
for icu in ['Med-Surg ICU','MICU','SICU']:
    pat_feature_list.append(set(patient_df.columns))
    apc_feature_list.append(set(apachepsvar_df.columns))
    med_feature_list.append(set(concat_df.loc[(concat_df['unittype']==icu) & (concat_df['concept_table']=='med')].concept_name.unique()))
    lab_feature_list.append(set(concat_df.loc[(concat_df['unittype']==icu) & (concat_df['concept_table']=='lab')].concept_name.unique()))
    adm_feature_list.append(set(concat_df.loc[(concat_df['unittype']==icu) & (concat_df['concept_table']=='adm')].concept_name.unique()))

In [43]:
for i in range(3):
    print(len(pat_feature_list[i]), len(apc_feature_list[i]), len(med_feature_list[i]), len(lab_feature_list[i]), len(adm_feature_list[i]))
    print(len(pat_feature_list[i])+ len(apc_feature_list[i])+ len(med_feature_list[i])+ len(lab_feature_list[i])+ len(adm_feature_list[i]))

29 26 1327 158 3759
5299
29 26 1129 154 1660
2998
29 26 981 151 1683
2870


In [30]:
total_set = set(pat_feature_list[1])|set(apc_feature_list[1])|set(med_feature_list[1])|set(lab_feature_list[1])|set(adm_feature_list[1])

import pickle
with open('micu_set.pickle', 'wb') as f:
    pickle.dump(total_set, f, pickle.HIGHEST_PROTOCOL)

In [None]:
med_features = set(patient_med_df['unitype'])

In [None]:
medication_df['drughiclseqno'].apply(lambda x : int(x*1000/10000))

In [None]:
len(medication_df.drughiclseqno.unique()), len(medication_df.drugname.unique())

In [None]:
patient_med_df = pd.merge(patient_df[['patientunitstayid', 'unittype']], medication_df[['patientunitstayid', 'drugname']], left_on='patientunitstayid', right_on='patientunitstayid', how='inner')
patient_med_df

In [None]:
def filter_with_missing_rate_concept(x, nPatientInTotal, threshold):
    nPatientInConceptId = len(x.patientunitstayid.unique())
    fEpsilon = 1.0e-08 # devide by zero
    fMissingRate = nPatientInConceptId / (nPatientInTotal + fEpsilon)
    if (fMissingRate < threshold) :
        return pd.DataFrame(columns=x.columns)
    print("{}, {}, {}, {:.2}", list(x.drugname)[0], nPatientInConceptId, nPatientInTotal, fMissingRate)
    
    temp_df = pd.DataFrame(columns = ['drugname', 'nPatientInConceptId', 'nPatientInTotal', 'fMissingRate'])
    var_temp = {}
    var_temp['drugname'] = list(x.drugname)[0]
    var_temp['nPatientInConceptId'] = nPatientInConceptId
    var_temp['nPatientInTotal'] = nPatientInTotal
    var_temp['fMissingRate'] = fMissingRate
    temp_df = temp_df.append(var_temp, ignore_index=True)
    return temp_df

concept_list = []
nPatientInTotal = len(patient_df.patientunitstayid.unique())
drugs_df = patient_med_df.groupby('drugname').apply(lambda x : filter_with_missing_rate_concept(x, nPatientInTotal, threshold=0.05)).reset_index(drop=True).head(30)
patient_med_df.loc[~patient_med_df['drugname'].isin(list(drugs_df.drugname)), 'drugname'] = None
patient_med_df['value'] = 1
patient_med_df = pd.pivot_table(data=patient_med_df, index=['patientunitstayid', 'unittype'], columns='drugname', values='value').reset_index()
patient_med_df

In [None]:
def resumetable(df):
    df = df.copy().replace(-1, np.NaN)
    print(f'data frame shape: {df.shape}')
    summary = pd.DataFrame(df.dtypes, columns=['data_type'])
    summary = summary.reset_index()
    summary = summary.rename(columns={'index': 'feature'})
    summary['n_missingvalues'] = df.isnull().sum().values
    summary['n_missingrates'] = 1 - df.isnull().sum().values/len(df)
    summary['n_eigenvalues'] = df.nunique().values
    return summary
patient_med_df = patient_med_df[patient_med_df['unittype'].isin(['Med-Surg ICU','MICU','SICU'])]
patient_med_summary_df = patient_med_df.groupby('unittype').apply(lambda x : resumetable(x)).reset_index()
patient_med_summary_df
heatmap_data = patient_med_summary_df[['unittype', 'drugname', 'n_missingrates']].pivot(index='drugname', columns='unittype', values='n_missingrates')
len(heatmap_data)
import seaborn as sns
sns.set(rc = {'figure.figsize':(8,16)})
sns.heatmap(data=heatmap_data, annot=True)

In [None]:
patient_lab_df = pd.merge(patient_df[['patientunitstayid', 'unittype']], lab_df[['patientunitstayid', 'labname', 'labresult']], left_on='patientunitstayid', right_on='patientunitstayid', how='inner')
patient_lab_df

def filter_with_missing_rate_concept(x, nPatientInTotal, threshold):
    nPatientInConceptId = len(x.patientunitstayid.unique())
    fEpsilon = 1.0e-08 # devide by zero
    fMissingRate = nPatientInConceptId / (nPatientInTotal + fEpsilon)
    if (fMissingRate < threshold) :
        return pd.DataFrame(columns=x.columns)
    print("{}, {}, {}, {:.2}", list(x.labname)[0], nPatientInConceptId, nPatientInTotal, fMissingRate)
    
    temp_df = pd.DataFrame(columns = ['labname', 'nPatientInConceptId', 'nPatientInTotal', 'fMissingRate'])
    var_temp = {}
    var_temp['labname'] = list(x.labname)[0]
    var_temp['nPatientInConceptId'] = nPatientInConceptId
    var_temp['nPatientInTotal'] = nPatientInTotal
    var_temp['fMissingRate'] = fMissingRate
    temp_df = temp_df.append(var_temp, ignore_index=True)
    return temp_df

concept_list = []
nPatientInTotal = len(patient_df.patientunitstayid.unique())
labs_df = patient_lab_df.groupby('labname').apply(lambda x : filter_with_missing_rate_concept(x, nPatientInTotal, threshold=0.5)).reset_index(drop=True).head(30)
patient_lab_df.loc[~patient_lab_df['labname'].isin(list(labs_df.labname)), 'labname'] = None
# patient_lab_df['labresult'] = 1
patient_lab_df = pd.pivot_table(data=patient_lab_df, index=['patientunitstayid', 'unittype'], columns='labname', values='labresult').reset_index()
patient_lab_df

In [None]:
patient_lab_summary_df

In [None]:
def resumetable(df):
    df = df.copy().replace(-1, np.NaN)
    print(f'data frame shape: {df.shape}')
    summary = pd.DataFrame(df.dtypes, columns=['data_type'])
    summary = summary.reset_index()
    summary = summary.rename(columns={'index': 'feature'})
    summary['n_missingvalues'] = df.isnull().sum().values
    summary['n_missingrates'] = 1 - df.isnull().sum().values/len(df)
    summary['n_eigenvalues'] = df.nunique().values
    return summary
patient_lab_df = patient_lab_df[patient_lab_df['unittype'].isin(['Med-Surg ICU','MICU','SICU'])]
patient_lab_summary_df = patient_lab_df.groupby('unittype').apply(lambda x : resumetable(x)).reset_index()
patient_lab_summary_df
heatmap_data = patient_lab_summary_df.pivot(index='labname', columns='unittype', values='n_missingrates')
len(heatmap_data)
import seaborn as sns
sns.set(rc = {'figure.figsize':(8,16)})
sns.heatmap(data=heatmap_data, annot=True)

In [None]:
def resumetable(df):
    df = df.copy().replace(-1, np.NaN)
    print(f'data frame shape: {df.shape}')
    summary = pd.DataFrame(df.dtypes, columns=['data_type'])
    summary = summary.reset_index()
    summary = summary.rename(columns={'index': 'feature'})
    summary['n_missingvalues'] = df.isnull().sum().values
    summary['n_missingrates'] = df.isnull().sum().values/len(df)
    summary['n_eigenvalues'] = df.nunique().values
    return summary

summary = resumetable(patient_apc_df)
summary

def plot_missing_data_visualization(df, cols):
    import numpy as np
    import missingno as msno
    df = df.copy().replace(-1, np.NaN)  # -1 > np.NaN
    nCols = len(cols)
    msno.bar(df=df.loc[:,cols], figsize=(30, 20))
    # msno.matrix(df=df.loc[:,cols], figsize=(30, 6))
    
plot_missing_data_visualization(patient_apc_df, patient_apc_df.columns)

In [None]:
summary['n_missingrates']

In [None]:
patient_apc_df = patient_apc_df[patient_apc_df['unittype'].isin(['Med-Surg ICU','MICU','SICU'])]

In [None]:
patient_apc_summary_df = patient_apc_df.groupby('unittype').apply(lambda x : resumetable(x)).reset_index()
patient_apc_summary_df

In [None]:
heatmap_data = patient_apc_summary_df[['unittype', 'feature', 'n_missingrates']].pivot(index='feature', columns='unittype', values='n_missingrates')
len(heatmap_data)

In [None]:
import seaborn as sns
sns.set(rc = {'figure.figsize':(8,16)})
sns.heatmap(data=heatmap_data, annot=True)