# Imports

In [88]:
import os
import sys
import pandas as pd
import math
from pypots.utils.random import set_random_seed
from IPython.display import display, HTML
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from pypotsModify.benchpots_statistic_tables.datasets import preprocess_physionet2012

# Loading dataset

In [89]:
set_random_seed()
physionet2012_dataset = preprocess_physionet2012(subset="all", rate=0.1)
print(physionet2012_dataset.keys())

2025-06-22 14:14:36 [INFO]: Have set the random seed as 2022 for numpy and pytorch.
2025-06-22 14:14:36 [INFO]: You're using dataset physionet_2012, please cite it properly in your work. You can find its reference information at the below link: 
https://github.com/WenjieDu/TSDB/tree/main/dataset_profiles/physionet_2012
2025-06-22 14:14:36 [INFO]: Dataset physionet_2012 has already been downloaded. Processing directly...
2025-06-22 14:14:36 [INFO]: Dataset physionet_2012 has already been cached. Loading from cache directly...
2025-06-22 14:14:36 [INFO]: Loaded successfully!


dict_keys(['n_classes', 'n_steps', 'n_features', 'train_X', 'train_ICUType', 'val_X', 'val_ICUType', 'test_X', 'test_ICUType'])


# Training data

<h4>Loading training dataset</h4>

In [3]:
train_X = physionet2012_dataset['train_X']

<h4>Sum of 48 hours of all patients</h4>

In [4]:
total_pacientes = train_X.groupby("RecordID").first().reset_index()
total_pacientes = total_pacientes["RecordID"].count()
total_pacientes = total_pacientes*48
total_pacientes

np.int64(368208)

<h4>Female gender missing rate</h4>

In [5]:
female_gender_ids = train_X[train_X["Gender"] == 0.0]
female_gender_ids = female_gender_ids["RecordID"]
female_gender_missing_rate = train_X[train_X["RecordID"].isin(female_gender_ids)]
female_gender_missing_rate = (female_gender_missing_rate.isna().sum()/total_pacientes)
female_gender_missing_rate = female_gender_missing_rate.round(2)
female_gender_missing_rate = female_gender_missing_rate*100
female_gender_missing_rate

RecordID        0.0
level_1         0.0
Time            0.0
ALP            44.0
ALT            44.0
AST            44.0
Age             3.0
Albumin        44.0
BUN            41.0
Bilirubin      44.0
Cholesterol    44.0
Creatinine     41.0
DiasABP        21.0
FiO2           37.0
GCS            30.0
Gender         43.0
Glucose        41.0
HCO3           41.0
HCT            40.0
HR              4.0
Height          3.0
ICUType        43.0
K              41.0
Lactate        43.0
MAP            21.0
MechVent       38.0
Mg             41.0
NIDiasABP      25.0
NIMAP          25.0
NISysABP       25.0
Na             41.0
PaCO2          40.0
PaO2           40.0
Platelets      41.0
RespRate       32.0
SaO2           43.0
SysABP         21.0
Temp           28.0
TroponinI      44.0
TroponinT      44.0
Urine          13.0
WBC            41.0
Weight         21.0
pH             39.0
dtype: float64

<h4>Female gender measurements</h4>

In [6]:
female_gender_measurements_training = train_X[train_X["RecordID"].isin(female_gender_ids)]
female_gender_measurements_training = (female_gender_measurements_training.count()/total_pacientes)
female_gender_measurements_training = female_gender_measurements_training.round(2)
female_gender_measurements_training = female_gender_measurements_training*100
female_gender_measurements_training

RecordID       44.0
level_1        44.0
Time           44.0
ALP             1.0
ALT             1.0
AST             1.0
Age            42.0
Albumin         1.0
BUN             3.0
Bilirubin       1.0
Cholesterol     0.0
Creatinine      3.0
DiasABP        23.0
FiO2            7.0
GCS            14.0
Gender          1.0
Glucose         3.0
HCO3            3.0
HCT             4.0
HR             40.0
Height         42.0
ICUType         1.0
K               3.0
Lactate         2.0
MAP            23.0
MechVent        7.0
Mg              3.0
NIDiasABP      19.0
NIMAP          19.0
NISysABP       19.0
Na              3.0
PaCO2           5.0
PaO2            5.0
Platelets       3.0
RespRate       12.0
SaO2            2.0
SysABP         23.0
Temp           16.0
TroponinI       0.0
TroponinT       0.0
Urine          31.0
WBC             3.0
Weight         24.0
pH              5.0
dtype: float64

<h4>Male gender missing rate</h4>

In [7]:
male_gender_ids = train_X[train_X["Gender"] == 1.0]
male_gender_ids = male_gender_ids["RecordID"]
male_gender_missing_rate = train_X[train_X["RecordID"].isin(male_gender_ids)]
male_gender_missing_rate = (male_gender_missing_rate.isna().sum()/total_pacientes)
male_gender_missing_rate = male_gender_missing_rate.round(2)
male_gender_missing_rate = male_gender_missing_rate*100
male_gender_missing_rate

RecordID        0.0
level_1         0.0
Time            0.0
ALP            55.0
ALT            55.0
AST            55.0
Age             3.0
Albumin        55.0
BUN            51.0
Bilirubin      55.0
Cholesterol    55.0
Creatinine     51.0
DiasABP        25.0
FiO2           47.0
GCS            38.0
Gender         54.0
Glucose        52.0
HCO3           52.0
HCT            50.0
HR              6.0
Height          3.0
ICUType        54.0
K              51.0
Lactate        53.0
MAP            25.0
MechVent       47.0
Mg             52.0
NIDiasABP      33.0
NIMAP          33.0
NISysABP       33.0
Na             52.0
PaCO2          49.0
PaO2           49.0
Platelets      51.0
RespRate       43.0
SaO2           53.0
SysABP         25.0
Temp           34.0
TroponinI      55.0
TroponinT      55.0
Urine          17.0
WBC            52.0
Weight         26.0
pH             48.0
dtype: float64

<h4>Male gender measurements</h4>

In [8]:
male_gender_measurements_training = train_X[train_X["RecordID"].isin(male_gender_ids)]
male_gender_measurements_training  = (male_gender_measurements_training.count()/total_pacientes)
male_gender_measurements_training = male_gender_measurements_training.round(2)
male_gender_measurements_training = male_gender_measurements_training*100
male_gender_measurements_training

RecordID       56.0
level_1        56.0
Time           56.0
ALP             1.0
ALT             1.0
AST             1.0
Age            52.0
Albumin         1.0
BUN             4.0
Bilirubin       1.0
Cholesterol     0.0
Creatinine      4.0
DiasABP        31.0
FiO2            9.0
GCS            18.0
Gender          1.0
Glucose         4.0
HCO3            4.0
HCT             5.0
HR             50.0
Height         52.0
ICUType         1.0
K               4.0
Lactate         2.0
MAP            31.0
MechVent        8.0
Mg              4.0
NIDiasABP      23.0
NIMAP          22.0
NISysABP       23.0
Na              4.0
PaCO2           7.0
PaO2            7.0
Platelets       4.0
RespRate       12.0
SaO2            2.0
SysABP         31.0
Temp           21.0
TroponinI       0.0
TroponinT       1.0
Urine          38.0
WBC             4.0
Weight         29.0
pH              7.0
dtype: float64

<h4>Undefined gender missing rate</h4> 

In [9]:
undefined_gender_ids = train_X[train_X["Gender"] == -1.0]
undefined_gender_ids = undefined_gender_ids["RecordID"]
undefined_gender_missing_rate = train_X[train_X["RecordID"].isin(undefined_gender_ids)]
undefined_gender_missing_rate = (undefined_gender_missing_rate.isna().sum()/total_pacientes)
undefined_gender_missing_rate = undefined_gender_missing_rate.round(2)
undefined_gender_missing_rate = undefined_gender_missing_rate*100
undefined_gender_missing_rate

RecordID       0.0
level_1        0.0
Time           0.0
ALP            0.0
ALT            0.0
AST            0.0
Age            0.0
Albumin        0.0
BUN            0.0
Bilirubin      0.0
Cholesterol    0.0
Creatinine     0.0
DiasABP        0.0
FiO2           0.0
GCS            0.0
Gender         0.0
Glucose        0.0
HCO3           0.0
HCT            0.0
HR             0.0
Height         0.0
ICUType        0.0
K              0.0
Lactate        0.0
MAP            0.0
MechVent       0.0
Mg             0.0
NIDiasABP      0.0
NIMAP          0.0
NISysABP       0.0
Na             0.0
PaCO2          0.0
PaO2           0.0
Platelets      0.0
RespRate       0.0
SaO2           0.0
SysABP         0.0
Temp           0.0
TroponinI      0.0
TroponinT      0.0
Urine          0.0
WBC            0.0
Weight         0.0
pH             0.0
dtype: float64

<h4>Undefined gender measurements</h4>

In [10]:
undefined_gender_measurements_training = train_X[train_X["RecordID"].isin(undefined_gender_ids)]
undefined_gender_measurements_training = (undefined_gender_measurements_training.count()/total_pacientes)
undefined_gender_measurements_training = undefined_gender_measurements_training.round(2)
undefined_gender_measurements_training = undefined_gender_measurements_training*100
undefined_gender_measurements_training


RecordID       0.0
level_1        0.0
Time           0.0
ALP            0.0
ALT            0.0
AST            0.0
Age            0.0
Albumin        0.0
BUN            0.0
Bilirubin      0.0
Cholesterol    0.0
Creatinine     0.0
DiasABP        0.0
FiO2           0.0
GCS            0.0
Gender         0.0
Glucose        0.0
HCO3           0.0
HCT            0.0
HR             0.0
Height         0.0
ICUType        0.0
K              0.0
Lactate        0.0
MAP            0.0
MechVent       0.0
Mg             0.0
NIDiasABP      0.0
NIMAP          0.0
NISysABP       0.0
Na             0.0
PaCO2          0.0
PaO2           0.0
Platelets      0.0
RespRate       0.0
SaO2           0.0
SysABP         0.0
Temp           0.0
TroponinI      0.0
TroponinT      0.0
Urine          0.0
WBC            0.0
Weight         0.0
pH             0.0
dtype: float64

<h4>ICUType 1 missing rate</h4>

In [11]:
ICUType_1_training_ids = train_X[train_X['ICUType'] == 1.0]
ICUType_1_training_ids = ICUType_1_training_ids[ICUType_1_training_ids["Time"] == 0.0]
ICUType_1_training_ids = ICUType_1_training_ids["RecordID"]
ICUType_1_training = train_X[train_X["RecordID"].isin(ICUType_1_training_ids)]
ICUType_1_training_missing = (ICUType_1_training.isna().sum()/total_pacientes)
ICUType_1_training_missing = ICUType_1_training_missing.round(2)
#ICUType_1_training_missing = ICUType_1_training_missing*100
ICUType_1_training_missing

RecordID       0.00
level_1        0.00
Time           0.00
ALP            0.15
ALT            0.15
AST            0.15
Age            0.01
Albumin        0.15
BUN            0.14
Bilirubin      0.15
Cholesterol    0.15
Creatinine     0.14
DiasABP        0.09
FiO2           0.13
GCS            0.11
Gender         0.15
Glucose        0.14
HCO3           0.14
HCT            0.14
HR             0.02
Height         0.01
ICUType        0.15
K              0.14
Lactate        0.14
MAP            0.09
MechVent       0.13
Mg             0.14
NIDiasABP      0.08
NIMAP          0.08
NISysABP       0.08
Na             0.14
PaCO2          0.14
PaO2           0.14
Platelets      0.14
RespRate       0.10
SaO2           0.14
SysABP         0.09
Temp           0.10
TroponinI      0.15
TroponinT      0.15
Urine          0.06
WBC            0.14
Weight         0.08
pH             0.14
dtype: float64

<h4>ICUType 1 mesurements</h4>

In [12]:
ICUType_1_measurements_training = (ICUType_1_training.count()/total_pacientes)
ICUType_1_measurements_training = ICUType_1_measurements_training.round(2)
#ICUType_1_measurements_training = ICUType_1_measurements_training*100
ICUType_1_measurements_training

RecordID       0.15
level_1        0.15
Time           0.15
ALP            0.00
ALT            0.00
AST            0.00
Age            0.14
Albumin        0.00
BUN            0.01
Bilirubin      0.00
Cholesterol    0.00
Creatinine     0.01
DiasABP        0.06
FiO2           0.02
GCS            0.04
Gender         0.00
Glucose        0.01
HCO3           0.01
HCT            0.01
HR             0.13
Height         0.14
ICUType        0.00
K              0.01
Lactate        0.00
MAP            0.06
MechVent       0.01
Mg             0.01
NIDiasABP      0.07
NIMAP          0.07
NISysABP       0.07
Na             0.01
PaCO2          0.01
PaO2           0.01
Platelets      0.01
RespRate       0.05
SaO2           0.01
SysABP         0.06
Temp           0.04
TroponinI      0.00
TroponinT      0.00
Urine          0.08
WBC            0.01
Weight         0.07
pH             0.01
dtype: float64

<h4>ICUType missing rate</h4>

In [13]:
ICUType_2_training_ids = train_X[train_X['ICUType'] == 2.0]
ICUType_2_training_ids = ICUType_2_training_ids[ICUType_2_training_ids["Time"] == 0.0]
ICUType_2_training_ids = ICUType_2_training_ids["RecordID"]
ICUType_2_training = train_X[train_X["RecordID"].isin(ICUType_2_training_ids)]
ICUType_2_training_missing = (ICUType_2_training.isna().sum()/total_pacientes)
ICUType_2_training_missing = ICUType_2_training_missing.round(2)
#ICUType_2_training_missing = ICUType_2_training_missing*100
ICUType_2_training_missing

RecordID       0.00
level_1        0.00
Time           0.00
ALP            0.21
ALT            0.21
AST            0.21
Age            0.01
Albumin        0.21
BUN            0.20
Bilirubin      0.21
Cholesterol    0.21
Creatinine     0.20
DiasABP        0.04
FiO2           0.17
GCS            0.16
Gender         0.21
Glucose        0.20
HCO3           0.20
HCT            0.19
HR             0.02
Height         0.01
ICUType        0.21
K              0.20
Lactate        0.20
MAP            0.04
MechVent       0.18
Mg             0.20
NIDiasABP      0.16
NIMAP          0.17
NISysABP       0.16
Na             0.20
PaCO2          0.17
PaO2           0.17
Platelets      0.19
RespRate       0.20
SaO2           0.19
SysABP         0.04
Temp           0.09
TroponinI      0.21
TroponinT      0.21
Urine          0.03
WBC            0.20
Weight         0.10
pH             0.16
dtype: float64

<h4>ICUType 2 measurements</h4>

In [14]:
ICUType_2_measurements_training = (ICUType_2_training.count()/total_pacientes)
ICUType_2_measurements_training = ICUType_2_measurements_training.round(2)
#ICUType_2_measurements_training = ICUType_2_measurements_training*100
ICUType_2_measurements_training

RecordID       0.21
level_1        0.21
Time           0.21
ALP            0.00
ALT            0.00
AST            0.00
Age            0.20
Albumin        0.00
BUN            0.01
Bilirubin      0.00
Cholesterol    0.00
Creatinine     0.01
DiasABP        0.17
FiO2           0.04
GCS            0.06
Gender         0.00
Glucose        0.01
HCO3           0.01
HCT            0.02
HR             0.19
Height         0.20
ICUType        0.00
K              0.01
Lactate        0.01
MAP            0.17
MechVent       0.04
Mg             0.01
NIDiasABP      0.05
NIMAP          0.05
NISysABP       0.05
Na             0.01
PaCO2          0.04
PaO2           0.04
Platelets      0.02
RespRate       0.01
SaO2           0.02
SysABP         0.17
Temp           0.12
TroponinI      0.00
TroponinT      0.00
Urine          0.18
WBC            0.01
Weight         0.11
pH             0.05
dtype: float64

<h4>ICUType 3 missing rate</h4>

In [15]:
ICUType_3_training_ids = train_X[train_X['ICUType'] == 3.0]
ICUType_3_training_ids = ICUType_3_training_ids[ICUType_3_training_ids["Time"] == 0.0]
ICUType_3_training_ids = ICUType_3_training_ids["RecordID"]
ICUType_3_training = train_X[train_X["RecordID"].isin(ICUType_3_training_ids)]
ICUType_3_training_missing = (ICUType_3_training.isna().sum()/total_pacientes)
ICUType_3_training_missing = ICUType_3_training_missing.round(2)
#ICUType_3_training_missing = ICUType_3_training_missing*100
ICUType_3_training_missing

RecordID       0.00
level_1        0.00
Time           0.00
ALP            0.35
ALT            0.35
AST            0.35
Age            0.03
Albumin        0.35
BUN            0.33
Bilirubin      0.35
Cholesterol    0.36
Creatinine     0.33
DiasABP        0.23
FiO2           0.31
GCS            0.26
Gender         0.35
Glucose        0.33
HCO3           0.33
HCT            0.33
HR             0.04
Height         0.03
ICUType        0.35
K              0.33
Lactate        0.34
MAP            0.23
MechVent       0.31
Mg             0.33
NIDiasABP      0.16
NIMAP          0.16
NISysABP       0.16
Na             0.33
PaCO2          0.33
PaO2           0.33
Platelets      0.33
RespRate       0.24
SaO2           0.35
SysABP         0.23
Temp           0.25
TroponinI      0.36
TroponinT      0.35
Urine          0.14
WBC            0.33
Weight         0.13
pH             0.33
dtype: float64

<h4>ICUType 3 measurements</h4>

In [16]:
ICUType_3_measurements_training = (ICUType_3_training.count()/total_pacientes)
ICUType_3_measurements_training = ICUType_3_measurements_training.round(2)
#ICUType_3_measurements_training = ICUType_3_measurements_training*100
ICUType_3_measurements_training

RecordID       0.36
level_1        0.36
Time           0.36
ALP            0.01
ALT            0.01
AST            0.01
Age            0.33
Albumin        0.01
BUN            0.03
Bilirubin      0.01
Cholesterol    0.00
Creatinine     0.03
DiasABP        0.13
FiO2           0.05
GCS            0.09
Gender         0.01
Glucose        0.03
HCO3           0.03
HCT            0.03
HR             0.32
Height         0.33
ICUType        0.01
K              0.03
Lactate        0.01
MAP            0.13
MechVent       0.05
Mg             0.03
NIDiasABP      0.20
NIMAP          0.20
NISysABP       0.20
Na             0.03
PaCO2          0.03
PaO2           0.03
Platelets      0.02
RespRate       0.11
SaO2           0.00
SysABP         0.13
Temp           0.10
TroponinI      0.00
TroponinT      0.00
Urine          0.22
WBC            0.02
Weight         0.23
pH             0.03
dtype: float64

<h4>ICUType 4 missing rate</h4>

In [17]:
ICUType_4_training_ids = train_X[train_X['ICUType'] == 4.0]
ICUType_4_training_ids = ICUType_4_training_ids[ICUType_4_training_ids["Time"] == 0.0]
ICUType_4_training_ids = ICUType_4_training_ids["RecordID"]
ICUType_4_training = train_X[train_X["RecordID"].isin(ICUType_4_training_ids)]
ICUType_4_training_missing = (ICUType_4_training.isna().sum()/total_pacientes)
ICUType_4_training_missing = ICUType_4_training_missing.round(2)
#ICUType_4_training_missing = ICUType_4_training_missing*100
ICUType_4_training_missing

RecordID       0.00
level_1        0.00
Time           0.00
ALP            0.28
ALT            0.28
AST            0.28
Age            0.01
Albumin        0.28
BUN            0.26
Bilirubin      0.28
Cholesterol    0.28
Creatinine     0.26
DiasABP        0.10
FiO2           0.23
GCS            0.15
Gender         0.28
Glucose        0.26
HCO3           0.26
HCT            0.26
HR             0.02
Height         0.01
ICUType        0.28
K              0.26
Lactate        0.27
MAP            0.10
MechVent       0.23
Mg             0.26
NIDiasABP      0.18
NIMAP          0.18
NISysABP       0.18
Na             0.26
PaCO2          0.25
PaO2           0.25
Platelets      0.26
RespRate       0.21
SaO2           0.28
SysABP         0.10
Temp           0.18
TroponinI      0.28
TroponinT      0.28
Urine          0.07
WBC            0.26
Weight         0.16
pH             0.25
dtype: float64

<h4>ICUType 4 measurements</h4>

In [18]:
ICUType_4_measurements_training = (ICUType_4_training.count()/total_pacientes)
ICUType_4_measurements_training = ICUType_4_measurements_training.round(2)
#ICUType_4_measurements_training = ICUType_4_measurements_training*100
ICUType_4_measurements_training

RecordID       0.28
level_1        0.28
Time           0.28
ALP            0.01
ALT            0.01
AST            0.01
Age            0.27
Albumin        0.00
BUN            0.02
Bilirubin      0.01
Cholesterol    0.00
Creatinine     0.02
DiasABP        0.19
FiO2           0.05
GCS            0.13
Gender         0.01
Glucose        0.02
HCO3           0.02
HCT            0.03
HR             0.26
Height         0.27
ICUType        0.01
K              0.02
Lactate        0.02
MAP            0.19
MechVent       0.05
Mg             0.02
NIDiasABP      0.10
NIMAP          0.10
NISysABP       0.10
Na             0.02
PaCO2          0.03
PaO2           0.03
Platelets      0.02
RespRate       0.07
SaO2           0.01
SysABP         0.19
Temp           0.10
TroponinI      0.00
TroponinT      0.00
Urine          0.21
WBC            0.02
Weight         0.12
pH             0.03
dtype: float64

<h4>+65 missing rate</h4>

In [19]:
more_than_or_equal_to_65_train_ids = train_X[train_X["Age"] >= 65]
more_than_or_equal_to_65_train_ids = more_than_or_equal_to_65_train_ids[more_than_or_equal_to_65_train_ids["Time"] == 0.0]
more_than_or_equal_to_65_train_ids = more_than_or_equal_to_65_train_ids["RecordID"]
more_than_or_equal_to_65_train = train_X[train_X["RecordID"].isin(more_than_or_equal_to_65_train_ids)]
more_than_or_equal_to_65_train_missing = (more_than_or_equal_to_65_train.isna().sum()/total_pacientes)
#more_than_or_equal_to_65_train_missing = more_than_or_equal_to_65_train_missing.apply(custom_round)
#more_than_or_equal_to_65_train_missing = more_than_or_equal_to_65_train_missing*100
more_than_or_equal_to_65_train_missing

RecordID       0.000000
level_1        0.000000
Time           0.000000
ALP            0.547815
ALT            0.547655
AST            0.547636
Age            0.030760
Albumin        0.549043
BUN            0.515836
Bilirubin      0.547443
Cholesterol    0.554195
Creatinine     0.515660
DiasABP        0.248949
FiO2           0.466970
GCS            0.382751
Gender         0.543641
Glucose        0.518617
HCO3           0.516787
HCT            0.503012
HR             0.051723
Height         0.030760
ICUType        0.543641
K              0.514275
Lactate        0.533253
MAP            0.250489
MechVent       0.472570
Mg             0.516417
NIDiasABP      0.323301
NIMAP          0.326106
NISysABP       0.323111
Na             0.517129
PaCO2          0.489723
PaO2           0.489813
Platelets      0.515078
RespRate       0.416401
SaO2           0.529644
SysABP         0.248933
Temp           0.339615
TroponinI      0.553752
TroponinT      0.547614
Urine          0.160358
WBC            0

<h4>+65 measurements</h4>

In [20]:
age_65_and_above_measurements_training = (more_than_or_equal_to_65_train.count()/total_pacientes)
#age_65_and_above_measurements_training = age_65_and_above_measurements_training.apply(custom_round)
#age_65_and_above_measurements_training = age_65_and_above_measurements_training*100
age_65_and_above_measurements_training

RecordID       0.555208
level_1        0.555208
Time           0.555208
ALP            0.007393
ALT            0.007553
AST            0.007572
Age            0.524448
Albumin        0.006165
BUN            0.039372
Bilirubin      0.007765
Cholesterol    0.001013
Creatinine     0.039548
DiasABP        0.306259
FiO2           0.088238
GCS            0.172457
Gender         0.011567
Glucose        0.036591
HCO3           0.038421
HCT            0.052196
HR             0.503484
Height         0.524448
ICUType        0.011567
K              0.040933
Lactate        0.021955
MAP            0.304719
MechVent       0.082638
Mg             0.038791
NIDiasABP      0.231907
NIMAP          0.229101
NISysABP       0.232097
Na             0.038079
PaCO2          0.065485
PaO2           0.065395
Platelets      0.040129
RespRate       0.138807
SaO2           0.025564
SysABP         0.306275
Temp           0.215593
TroponinI      0.001456
TroponinT      0.007594
Urine          0.394850
WBC            0

<h4>-65 missing rate</h4>

In [21]:
less_than_65_train_ids = train_X[train_X["Age"] < 65]
less_than_65_train_ids = less_than_65_train_ids[less_than_65_train_ids["Time"] == 0.0]
less_than_65_train_ids = less_than_65_train_ids["RecordID"]
less_than_65_train = train_X[train_X["RecordID"].isin(less_than_65_train_ids)]
less_than_65_train_missing = (less_than_65_train.isna().sum()/total_pacientes)
#less_than_65_train_missing = less_than_65_train_missing.apply(custom_round)
#less_than_65_train_missing = less_than_65_train_missing*100
less_than_65_train_missing


RecordID       0.000000
level_1        0.000000
Time           0.000000
ALP            0.435881
ALT            0.435612
AST            0.435631
Age            0.027900
Albumin        0.438467
BUN            0.411501
Bilirubin      0.435561
Cholesterol    0.444083
Creatinine     0.411368
DiasABP        0.210661
FiO2           0.375255
GCS            0.298348
Gender         0.435526
Glucose        0.412957
HCO3           0.412175
HCT            0.402042
HR             0.046311
Height         0.027900
ICUType        0.435526
K              0.409752
Lactate        0.425624
MAP            0.211981
MechVent       0.376551
Mg             0.412571
NIDiasABP      0.255709
NIMAP          0.258870
NISysABP       0.255557
Na             0.411778
PaCO2          0.394576
PaO2           0.394668
Platelets      0.411042
RespRate       0.338738
SaO2           0.429727
SysABP         0.210639
Temp           0.287555
TroponinI      0.444140
TroponinT      0.441726
Urine          0.146273
WBC            0

<h4>-65 measurements</h4>

In [22]:
age_under_65_measurements_training  = (less_than_65_train.count()/total_pacientes)
#age_under_65_measurements_training = age_under_65_measurements_training.apply(custom_round)
#age_under_65_measurements_training = age_under_65_measurements_training*100
age_under_65_measurements_training

RecordID       0.444792
level_1        0.444792
Time           0.444792
ALP            0.008911
ALT            0.009180
AST            0.009161
Age            0.416892
Albumin        0.006325
BUN            0.033291
Bilirubin      0.009231
Cholesterol    0.000709
Creatinine     0.033424
DiasABP        0.234131
FiO2           0.069537
GCS            0.146444
Gender         0.009267
Glucose        0.031835
HCO3           0.032617
HCT            0.042750
HR             0.398481
Height         0.416892
ICUType        0.009267
K              0.035040
Lactate        0.019169
MAP            0.232811
MechVent       0.068241
Mg             0.032221
NIDiasABP      0.189083
NIMAP          0.185922
NISysABP       0.189235
Na             0.033014
PaCO2          0.050216
PaO2           0.050124
Platelets      0.033750
RespRate       0.106054
SaO2           0.015065
SysABP         0.234153
Temp           0.157237
TroponinI      0.000652
TroponinT      0.003066
Urine          0.298519
WBC            0

<h4>Filtering only patients who have the same height and weight</h4>

In [23]:
filtered_train_X = train_X[(train_X['Height'] != -1) & (train_X['Weight'] != -1) & (train_X['Height'].notna()) & (train_X['Weight'].notna())] 

<h4>Classify BMI</h4>

In [35]:
def classify_BMI(BMI):
    if BMI <= 18.5:
        return "Baixo peso"
    elif BMI >= 18.6 and BMI <= 24.9:
        return "Peso normal"
    elif BMI >= 25 and BMI <= 29.9:
        return "Sobrepeso"
    elif BMI >= 30:
        return "Obesidade"

<h4>Set the height to meters</h4>

In [25]:
filtered_train_X_metros = filtered_train_X.copy()
filtered_train_X_metros["Height"] = filtered_train_X["Height"]/100
filtered_train_X_metros["Height"]

48        1.753
67        1.753
68        1.753
69        1.753
70        1.753
          ...  
574989    1.524
574990    1.524
574991    1.524
575088    1.727
575184    1.727
Name: Height, Length: 101785, dtype: float64

<h4>BMI Calculation and Classification</h4>

In [26]:
bmi_data_train = filtered_train_X_metros
bmi_data_train["BMI"] = round(filtered_train_X_metros["Weight"] / (filtered_train_X_metros["Height"]**2), 1)
bmi_data_train["Classificacao"] = bmi_data_train["BMI"].apply(classify_BMI)
bmi_data_train.head()

Unnamed: 0,RecordID,level_1,Time,ALP,ALT,AST,Age,Albumin,BUN,Bilirubin,...,SysABP,Temp,TroponinI,TroponinT,Urine,WBC,Weight,pH,BMI,Classificacao
48,132540,0,0.0,,,,76.0,,,,...,,,,,,,76.0,7.45,24.7,Peso normal
67,132540,19,19.0,,,,76.0,,,,...,122.0,37.5,,,50.0,,80.6,,26.2,Sobrepeso
68,132540,20,20.0,,,,76.0,,,,...,107.0,37.4,,,380.0,,80.6,,26.2,Sobrepeso
69,132540,21,21.0,,,,76.0,,,,...,121.0,37.5,,,170.0,,80.6,,26.2,Sobrepeso
70,132540,22,22.0,,,,76.0,,,,...,128.0,37.5,,,130.0,,80.6,,26.2,Sobrepeso


<h4>Taking only the first occurrence of each patient</h4>

In [27]:
bmi_data_train = bmi_data_train.groupby("RecordID").first().reset_index()
bmi_data_train

Unnamed: 0,RecordID,level_1,Time,ALP,ALT,AST,Age,Albumin,BUN,Bilirubin,...,SysABP,Temp,TroponinI,TroponinT,Urine,WBC,Weight,pH,BMI,Classificacao
0,132540,0,0.0,,,,76.0,,21.0,,...,122.00,37.50,,,50.0,13.3,76.0,7.45,24.7,Peso normal
1,132543,0,0.0,105.0,12.0,15.0,68.0,4.4,23.0,0.2,...,,36.30,,,600.0,11.5,84.6,,26.0,Sobrepeso
2,132547,0,0.0,,,,64.0,,,,...,,,,,,,114.0,,35.1,Obesidade grau 2
3,132548,0,0.0,,,,68.0,,32.0,,...,205.00,36.30,0.7,,120.0,6.2,87.0,,32.9,Obesidade grau 1
4,132551,0,0.0,47.0,46.0,82.0,78.0,1.9,81.0,0.3,...,102.75,38.00,3.5,,120.0,16.1,48.4,7.40,18.3,Baixo peso
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4025,163007,0,0.0,42.0,30.0,40.0,19.0,2.8,16.0,0.6,...,0.00,40.55,,1.00,150.0,14.1,114.3,7.36,34.2,Obesidade grau 1
4026,163008,0,0.0,,,,59.0,,24.0,,...,97.00,37.60,,,45.0,6.9,98.5,7.38,34.0,Obesidade grau 1
4027,163013,0,0.0,82.0,11.0,30.0,74.0,2.5,30.0,1.2,...,118.00,36.50,,0.03,40.0,9.6,68.6,7.35,29.5,Sobrepeso
4028,163021,0,0.0,,,,72.0,,9.0,,...,,,,,,8.6,62.0,,20.8,Peso normal


In [28]:
bmi_data_train["Classificacao"].value_counts()

Classificacao
Sobrepeso           1405
Peso normal         1178
Obesidade grau 1     747
Obesidade grau 2     318
Obesidade grau 3     266
Baixo peso           116
Name: count, dtype: int64

<h4>Classification Undefined missing rate</h4>

In [29]:
classificacao_undefined_ids = bmi_data_train["RecordID"]
classificacao_undefined = train_X[~train_X["RecordID"].isin(classificacao_undefined_ids)]
classificacao_undefined_missing = (classificacao_undefined.isna().sum()/total_pacientes)
classificacao_undefined_missing = classificacao_undefined_missing.round(2)
classificacao_undefined_missing = classificacao_undefined_missing*100
classificacao_undefined_missing

RecordID        0.0
level_1         0.0
Time            0.0
ALP            47.0
ALT            47.0
AST            47.0
Age             4.0
Albumin        47.0
BUN            44.0
Bilirubin      47.0
Cholesterol    47.0
Creatinine     44.0
DiasABP        29.0
FiO2           41.0
GCS            32.0
Gender         46.0
Glucose        44.0
HCO3           44.0
HCT            43.0
HR              5.0
Height          4.0
ICUType        46.0
K              44.0
Lactate        46.0
MAP            29.0
MechVent       42.0
Mg             44.0
NIDiasABP      22.0
NIMAP          23.0
NISysABP       22.0
Na             44.0
PaCO2          44.0
PaO2           44.0
Platelets      44.0
RespRate       31.0
SaO2           47.0
SysABP         29.0
Temp           35.0
TroponinI      47.0
TroponinT      47.0
Urine          17.0
WBC            45.0
Weight         22.0
pH             44.0
dtype: float64

<h4>Classification Undefined measurements</h4>

In [30]:
classification_undefined_measurements = (classificacao_undefined.count()/total_pacientes)
classification_undefined_measurements = classification_undefined_measurements.round(2)
classification_undefined_measurements = classification_undefined_measurements*100
classification_undefined_measurements

RecordID       47.0
level_1        47.0
Time           47.0
ALP             1.0
ALT             1.0
AST             1.0
Age            44.0
Albumin         1.0
BUN             3.0
Bilirubin       1.0
Cholesterol     0.0
Creatinine      3.0
DiasABP        19.0
FiO2            6.0
GCS            16.0
Gender          1.0
Glucose         3.0
HCO3            3.0
HCT             4.0
HR             42.0
Height         44.0
ICUType         1.0
K               4.0
Lactate         2.0
MAP            18.0
MechVent        6.0
Mg              3.0
NIDiasABP      25.0
NIMAP          25.0
NISysABP       25.0
Na              3.0
PaCO2           3.0
PaO2            3.0
Platelets       3.0
RespRate       17.0
SaO2            1.0
SysABP         19.0
Temp           13.0
TroponinI       0.0
TroponinT       1.0
Urine          30.0
WBC             3.0
Weight         25.0
pH              3.0
dtype: float64

<h4>Low weight classification missing rate</h4>

In [31]:
classificacao_baixo_peso_ids = bmi_data_train[bmi_data_train["Classificacao"] == "Baixo peso"]
classificacao_baixo_peso_ids = classificacao_baixo_peso_ids["RecordID"]
classificacao_baixo_peso = train_X[train_X["RecordID"].isin(classificacao_baixo_peso_ids)]
classificacao_baixo_peso_missing = (classificacao_baixo_peso.isna().sum()/total_pacientes)
classificacao_baixo_peso_missing = classificacao_baixo_peso_missing.round(2)
classificacao_baixo_peso_missing = classificacao_baixo_peso_missing*100
classificacao_baixo_peso_missing

RecordID       0.0
level_1        0.0
Time           0.0
ALP            1.0
ALT            1.0
AST            1.0
Age            0.0
Albumin        1.0
BUN            1.0
Bilirubin      1.0
Cholesterol    2.0
Creatinine     1.0
DiasABP        1.0
FiO2           1.0
GCS            1.0
Gender         1.0
Glucose        1.0
HCO3           1.0
HCT            1.0
HR             0.0
Height         0.0
ICUType        1.0
K              1.0
Lactate        1.0
MAP            1.0
MechVent       1.0
Mg             1.0
NIDiasABP      1.0
NIMAP          1.0
NISysABP       1.0
Na             1.0
PaCO2          1.0
PaO2           1.0
Platelets      1.0
RespRate       1.0
SaO2           1.0
SysABP         1.0
Temp           1.0
TroponinI      2.0
TroponinT      1.0
Urine          0.0
WBC            1.0
Weight         1.0
pH             1.0
dtype: float64

<h4>Classificacao low weight measurements</h4>

In [32]:
classificacao_baixo_peso_measurements = (classificacao_baixo_peso.count()/total_pacientes)
classificacao_baixo_peso_measurements = classificacao_baixo_peso_measurements.round(2)
classificacao_baixo_peso_measurements = classificacao_baixo_peso_measurements*100
classificacao_baixo_peso_measurements

RecordID       2.0
level_1        2.0
Time           2.0
ALP            0.0
ALT            0.0
AST            0.0
Age            1.0
Albumin        0.0
BUN            0.0
Bilirubin      0.0
Cholesterol    0.0
Creatinine     0.0
DiasABP        1.0
FiO2           0.0
GCS            0.0
Gender         0.0
Glucose        0.0
HCO3           0.0
HCT            0.0
HR             1.0
Height         1.0
ICUType        0.0
K              0.0
Lactate        0.0
MAP            1.0
MechVent       0.0
Mg             0.0
NIDiasABP      1.0
NIMAP          1.0
NISysABP       1.0
Na             0.0
PaCO2          0.0
PaO2           0.0
Platelets      0.0
RespRate       0.0
SaO2           0.0
SysABP         1.0
Temp           1.0
TroponinI      0.0
TroponinT      0.0
Urine          1.0
WBC            0.0
Weight         1.0
pH             0.0
dtype: float64

<h4>Classification normal weight missing rate</h4>

In [33]:
classificacao_normal_peso_ids = bmi_data_train[bmi_data_train["Classificacao"] == "Peso normal"]
#classificacao_normal_peso_ids = classificacao_normal_peso_ids[classificacao_normal_peso_ids["Time"] == 0.0]
classificacao_normal_peso_ids = classificacao_normal_peso_ids["RecordID"]
classificacao_normal_peso = train_X[train_X["RecordID"].isin(classificacao_normal_peso_ids)]
classificacao_normal_peso_missing = (classificacao_normal_peso.isna().sum()/total_pacientes)
classificacao_normal_peso_missing = classificacao_normal_peso_missing.round(2)
classificacao_normal_peso_missing = classificacao_normal_peso_missing*100
classificacao_normal_peso_missing

RecordID        0.0
level_1         0.0
Time            0.0
ALP            15.0
ALT            15.0
AST            15.0
Age             1.0
Albumin        15.0
BUN            14.0
Bilirubin      15.0
Cholesterol    15.0
Creatinine     14.0
DiasABP         5.0
FiO2           13.0
GCS            10.0
Gender         15.0
Glucose        14.0
HCO3           14.0
HCT            14.0
HR              1.0
Height          1.0
ICUType        15.0
K              14.0
Lactate        15.0
MAP             5.0
MechVent       13.0
Mg             14.0
NIDiasABP      10.0
NIMAP          10.0
NISysABP       10.0
Na             14.0
PaCO2          13.0
PaO2           13.0
Platelets      14.0
RespRate       13.0
SaO2           14.0
SysABP          5.0
Temp            8.0
TroponinI      15.0
TroponinT      15.0
Urine           4.0
WBC            14.0
Weight          8.0
pH             13.0
dtype: float64

<h4>Classification normal weight measurements</h4>

In [34]:
classificacao_normal_peso_measurements = (classificacao_normal_peso.count()/total_pacientes)
classificacao_normal_peso_measurements = classificacao_normal_peso_measurements.round(2)
classificacao_normal_peso_measurements = classificacao_normal_peso_measurements*100
classificacao_normal_peso_measurements

RecordID       15.0
level_1        15.0
Time           15.0
ALP             0.0
ALT             0.0
AST             0.0
Age            15.0
Albumin         0.0
BUN             1.0
Bilirubin       0.0
Cholesterol     0.0
Creatinine      1.0
DiasABP        10.0
FiO2            3.0
GCS             5.0
Gender          0.0
Glucose         1.0
HCO3            1.0
HCT             2.0
HR             14.0
Height         15.0
ICUType         0.0
K               1.0
Lactate         1.0
MAP            10.0
MechVent        3.0
Mg              1.0
NIDiasABP       5.0
NIMAP           5.0
NISysABP        5.0
Na              1.0
PaCO2           2.0
PaO2            2.0
Platelets       1.0
RespRate        2.0
SaO2            1.0
SysABP         10.0
Temp            7.0
TroponinI       0.0
TroponinT       0.0
Urine          11.0
WBC             1.0
Weight          8.0
pH              2.0
dtype: float64

<h4>Classification overweight missing rate</h4>

In [35]:
classificacao_sobrepeso_ids = bmi_data_train[bmi_data_train["Classificacao"] == "Sobrepeso"]
classificacao_sobrepeso_ids = classificacao_sobrepeso_ids["RecordID"]
classificacao_sobrepeso = train_X[train_X["RecordID"].isin(classificacao_sobrepeso_ids)]
classificacao_sobrepeso_missing = (classificacao_sobrepeso.isna().sum()/total_pacientes)
classificacao_sobrepeso_missing = classificacao_sobrepeso_missing.round(2)
classificacao_sobrepeso_missing = classificacao_sobrepeso_missing*100
classificacao_sobrepeso_missing

RecordID        0.0
level_1         0.0
Time            0.0
ALP            18.0
ALT            18.0
AST            18.0
Age             1.0
Albumin        18.0
BUN            17.0
Bilirubin      18.0
Cholesterol    18.0
Creatinine     17.0
DiasABP         6.0
FiO2           15.0
GCS            13.0
Gender         18.0
Glucose        17.0
HCO3           17.0
HCT            16.0
HR              2.0
Height          1.0
ICUType        18.0
K              17.0
Lactate        17.0
MAP             6.0
MechVent       15.0
Mg             17.0
NIDiasABP      13.0
NIMAP          13.0
NISysABP       13.0
Na             17.0
PaCO2          15.0
PaO2           15.0
Platelets      17.0
RespRate       15.0
SaO2           17.0
SysABP          6.0
Temp           10.0
TroponinI      18.0
TroponinT      18.0
Urine           5.0
WBC            17.0
Weight          9.0
pH             15.0
dtype: float64

<h4>Classification overweight measurements</h4>

In [36]:
classificacao_sobrepeso_measurements = (classificacao_sobrepeso.count()/total_pacientes)
classificacao_sobrepeso_measurements = classificacao_sobrepeso_measurements.round(2)
classificacao_sobrepeso_measurements = classificacao_sobrepeso_measurements * 100
classificacao_sobrepeso_measurements

RecordID       18.0
level_1        18.0
Time           18.0
ALP             0.0
ALT             0.0
AST             0.0
Age            18.0
Albumin         0.0
BUN             1.0
Bilirubin       0.0
Cholesterol     0.0
Creatinine      1.0
DiasABP        12.0
FiO2            3.0
GCS             6.0
Gender          0.0
Glucose         1.0
HCO3            1.0
HCT             2.0
HR             17.0
Height         18.0
ICUType         0.0
K               1.0
Lactate         1.0
MAP            12.0
MechVent        3.0
Mg              1.0
NIDiasABP       6.0
NIMAP           6.0
NISysABP        6.0
Na              1.0
PaCO2           3.0
PaO2            3.0
Platelets       2.0
RespRate        3.0
SaO2            1.0
SysABP         12.0
Temp            9.0
TroponinI       0.0
TroponinT       0.0
Urine          14.0
WBC             1.0
Weight         10.0
pH              3.0
dtype: float64

<h4>Grade 1 obesity missing rate</h4>

In [37]:
classificacao_obesidade_1_ids = bmi_data_train[bmi_data_train["Classificacao"] == "Obesidade grau 1"]
classificacao_obesidade_1_ids = classificacao_obesidade_1_ids["RecordID"]
classificacao_obesidade_1 = train_X[train_X["RecordID"].isin(classificacao_obesidade_1_ids)]
classificacao_obesidade_1_missing =(classificacao_obesidade_1.isna().sum()/total_pacientes)
classificacao_obesidade_1_missing = classificacao_obesidade_1_missing.round(2)
classificacao_obesidade_1_missing = classificacao_obesidade_1_missing*100
classificacao_obesidade_1_missing

RecordID        0.0
level_1         0.0
Time            0.0
ALP            10.0
ALT            10.0
AST            10.0
Age             0.0
Albumin        10.0
BUN             9.0
Bilirubin      10.0
Cholesterol    10.0
Creatinine      9.0
DiasABP         3.0
FiO2            8.0
GCS             7.0
Gender         10.0
Glucose         9.0
HCO3            9.0
HCT             9.0
HR              1.0
Height          0.0
ICUType        10.0
K               9.0
Lactate         9.0
MAP             3.0
MechVent        8.0
Mg              9.0
NIDiasABP       7.0
NIMAP           7.0
NISysABP        7.0
Na              9.0
PaCO2           8.0
PaO2            8.0
Platelets       9.0
RespRate        8.0
SaO2            9.0
SysABP          3.0
Temp            5.0
TroponinI      10.0
TroponinT      10.0
Urine           2.0
WBC             9.0
Weight          5.0
pH              8.0
dtype: float64

<h4>Grade 1 obesity measurements </h4>

In [38]:
classificacao_obesidade_1_measurements = (classificacao_obesidade_1.count()/total_pacientes)
classificacao_obesidade_1_measurements = classificacao_obesidade_1_measurements.round(2)
classificacao_obesidade_1_measurements = classificacao_obesidade_1_measurements*100
classificacao_obesidade_1_measurements

RecordID       10.0
level_1        10.0
Time           10.0
ALP             0.0
ALT             0.0
AST             0.0
Age             9.0
Albumin         0.0
BUN             1.0
Bilirubin       0.0
Cholesterol     0.0
Creatinine      1.0
DiasABP         7.0
FiO2            2.0
GCS             3.0
Gender          0.0
Glucose         1.0
HCO3            1.0
HCT             1.0
HR              9.0
Height          9.0
ICUType         0.0
K               1.0
Lactate         0.0
MAP             7.0
MechVent        2.0
Mg              1.0
NIDiasABP       3.0
NIMAP           3.0
NISysABP        3.0
Na              1.0
PaCO2           2.0
PaO2            2.0
Platelets       1.0
RespRate        1.0
SaO2            1.0
SysABP          7.0
Temp            5.0
TroponinI       0.0
TroponinT       0.0
Urine           8.0
WBC             1.0
Weight          5.0
pH              2.0
dtype: float64

<h4>Grade 2 Obesity missing rate</h4>

In [39]:
classificacao_obesidade_2_ids = bmi_data_train[bmi_data_train["Classificacao"] == "Obesidade grau 2"]
classificacao_obesidade_2_ids = classificacao_obesidade_2_ids["RecordID"]
classificacao_obesidade_2 = train_X[train_X["RecordID"].isin(classificacao_obesidade_2_ids)]
classificacao_obesidade_2_missing = (classificacao_obesidade_2.isna().sum()/total_pacientes)
classificacao_obesidade_2_missing = classificacao_obesidade_2_missing.round(2)
classificacao_obesidade_2_missing = classificacao_obesidade_2_missing*100
classificacao_obesidade_2_missing

RecordID       0.0
level_1        0.0
Time           0.0
ALP            4.0
ALT            4.0
AST            4.0
Age            0.0
Albumin        4.0
BUN            4.0
Bilirubin      4.0
Cholesterol    4.0
Creatinine     4.0
DiasABP        1.0
FiO2           3.0
GCS            3.0
Gender         4.0
Glucose        4.0
HCO3           4.0
HCT            4.0
HR             0.0
Height         0.0
ICUType        4.0
K              4.0
Lactate        4.0
MAP            1.0
MechVent       3.0
Mg             4.0
NIDiasABP      3.0
NIMAP          3.0
NISysABP       3.0
Na             4.0
PaCO2          3.0
PaO2           3.0
Platelets      4.0
RespRate       4.0
SaO2           4.0
SysABP         1.0
Temp           2.0
TroponinI      4.0
TroponinT      4.0
Urine          1.0
WBC            4.0
Weight         2.0
pH             3.0
dtype: float64

<h4>Grade 2 Obesity measurements</h4>

In [40]:
classificacao_obesidade_2_measurements = (classificacao_obesidade_2.count()/total_pacientes)
classificacao_obesidade_2_measurements = classificacao_obesidade_2_measurements.round(2)
classificacao_obesidade_2_measurements = classificacao_obesidade_2_measurements*100
classificacao_obesidade_2_measurements

RecordID       4.0
level_1        4.0
Time           4.0
ALP            0.0
ALT            0.0
AST            0.0
Age            4.0
Albumin        0.0
BUN            0.0
Bilirubin      0.0
Cholesterol    0.0
Creatinine     0.0
DiasABP        3.0
FiO2           1.0
GCS            1.0
Gender         0.0
Glucose        0.0
HCO3           0.0
HCT            0.0
HR             4.0
Height         4.0
ICUType        0.0
K              0.0
Lactate        0.0
MAP            3.0
MechVent       1.0
Mg             0.0
NIDiasABP      1.0
NIMAP          1.0
NISysABP       1.0
Na             0.0
PaCO2          1.0
PaO2           1.0
Platelets      0.0
RespRate       1.0
SaO2           0.0
SysABP         3.0
Temp           2.0
TroponinI      0.0
TroponinT      0.0
Urine          3.0
WBC            0.0
Weight         2.0
pH             1.0
dtype: float64

<h4>Grade 3 Obesity missing rate</h4>

In [41]:
classificacao_obesidade_3_ids = bmi_data_train[bmi_data_train["Classificacao"] == "Obesidade grau 3"]
classificacao_obesidade_3_ids = classificacao_obesidade_3_ids["RecordID"]
classificacao_obesidade_3 = train_X[train_X["RecordID"].isin(classificacao_obesidade_3_ids)]
classificacao_obesidade_3_missing = (classificacao_obesidade_3.isna().sum()/total_pacientes)
classificacao_obesidade_3_missing = classificacao_obesidade_3_missing.round(2)
classificacao_obesidade_3_missing = classificacao_obesidade_3_missing*100
classificacao_obesidade_3_missing

RecordID       0.0
level_1        0.0
Time           0.0
ALP            3.0
ALT            3.0
AST            3.0
Age            0.0
Albumin        3.0
BUN            3.0
Bilirubin      3.0
Cholesterol    3.0
Creatinine     3.0
DiasABP        1.0
FiO2           3.0
GCS            2.0
Gender         3.0
Glucose        3.0
HCO3           3.0
HCT            3.0
HR             0.0
Height         0.0
ICUType        3.0
K              3.0
Lactate        3.0
MAP            1.0
MechVent       3.0
Mg             3.0
NIDiasABP      2.0
NIMAP          2.0
NISysABP       2.0
Na             3.0
PaCO2          3.0
PaO2           3.0
Platelets      3.0
RespRate       3.0
SaO2           3.0
SysABP         1.0
Temp           2.0
TroponinI      3.0
TroponinT      3.0
Urine          1.0
WBC            3.0
Weight         1.0
pH             3.0
dtype: float64

<h4>Grade 3 Obesity measurements</h4>

In [42]:
classificacao_obesidade_3_measurements = (classificacao_obesidade_3.count()/total_pacientes)
classificacao_obesidade_3_measurements = classificacao_obesidade_3_measurements.round(2)
classificacao_obesidade_3_measurements = classificacao_obesidade_3_measurements*100
classificacao_obesidade_3_measurements

RecordID       3.0
level_1        3.0
Time           3.0
ALP            0.0
ALT            0.0
AST            0.0
Age            3.0
Albumin        0.0
BUN            0.0
Bilirubin      0.0
Cholesterol    0.0
Creatinine     0.0
DiasABP        2.0
FiO2           1.0
GCS            1.0
Gender         0.0
Glucose        0.0
HCO3           0.0
HCT            0.0
HR             3.0
Height         3.0
ICUType        0.0
K              0.0
Lactate        0.0
MAP            2.0
MechVent       1.0
Mg             0.0
NIDiasABP      1.0
NIMAP          1.0
NISysABP       1.0
Na             0.0
PaCO2          1.0
PaO2           1.0
Platelets      0.0
RespRate       0.0
SaO2           0.0
SysABP         2.0
Temp           2.0
TroponinI      0.0
TroponinT      0.0
Urine          3.0
WBC            0.0
Weight         2.0
pH             1.0
dtype: float64

<h4>Columns for tables</h4>

In [43]:
df_columns = train_X.columns
df_columns

Index(['RecordID', 'level_1', 'Time', 'ALP', 'ALT', 'AST', 'Age', 'Albumin',
       'BUN', 'Bilirubin', 'Cholesterol', 'Creatinine', 'DiasABP', 'FiO2',
       'GCS', 'Gender', 'Glucose', 'HCO3', 'HCT', 'HR', 'Height', 'ICUType',
       'K', 'Lactate', 'MAP', 'MechVent', 'Mg', 'NIDiasABP', 'NIMAP',
       'NISysABP', 'Na', 'PaCO2', 'PaO2', 'Platelets', 'RespRate', 'SaO2',
       'SysABP', 'Temp', 'TroponinI', 'TroponinT', 'Urine', 'WBC', 'Weight',
       'pH'],
      dtype='object')

<h4>Building the missing rate table</h4>

In [44]:
df_missing = pd.DataFrame(columns=df_columns)
df_missing_transpose = df_missing.T
df_missing_transpose["Female"] = female_gender_missing_rate
df_missing_transpose["Male"] = male_gender_missing_rate
df_missing_transpose["Undefined gender"] = undefined_gender_missing_rate
df_missing_transpose["ICUType 1"] = ICUType_1_training_missing
df_missing_transpose["ICUType 2"] = ICUType_2_training_missing
df_missing_transpose["ICUType 3"] = ICUType_3_training_missing
df_missing_transpose["ICUType 4"] = ICUType_4_training_missing
df_missing_transpose["Age 65+"] = more_than_or_equal_to_65_train_missing
df_missing_transpose["Age 65-"] = less_than_65_train_missing
df_missing_transpose["Low Weight"] = classificacao_baixo_peso_missing
df_missing_transpose["Normal Weight"] = classificacao_normal_peso_missing
df_missing_transpose["Overweight"] = classificacao_sobrepeso_missing
df_missing_transpose["Obesity Grade 1"] = classificacao_obesidade_1_missing
df_missing_transpose["Obesity Grade 2"] = classificacao_obesidade_2_missing
df_missing_transpose["Obesity Grade 3"] = classificacao_obesidade_3_missing
df_missing_transpose["Undefined classification"] = classificacao_undefined_missing
df_missing_transpose = df_missing_transpose.drop("RecordID", axis=0)
df_missing_transpose = df_missing_transpose.drop("level_1", axis=0)
df_missing_transpose = df_missing_transpose.drop("Time", axis=0)
df_missing_transpose = df_missing_transpose.drop("Age", axis=0)
df_missing_transpose = df_missing_transpose.drop("Gender", axis=0)
display(HTML("<h2 style='text-align: center; font-size: 24px; font-weight: bold;'>Original Missing Rate per Variable by demographics - Train</h2>"))
df_missing_transpose

Unnamed: 0,Female,Male,Undefined gender,ICUType 1,ICUType 2,ICUType 3,ICUType 4,Age 65+,Age 65-,Low Weight,Normal Weight,Overweight,Obesity Grade 1,Obesity Grade 2,Obesity Grade 3,Undefined classification
ALP,44.0,55.0,0.0,0.15,0.21,0.35,0.28,0.547815,0.435881,1.0,15.0,18.0,10.0,4.0,3.0,47.0
ALT,44.0,55.0,0.0,0.15,0.21,0.35,0.28,0.547655,0.435612,1.0,15.0,18.0,10.0,4.0,3.0,47.0
AST,44.0,55.0,0.0,0.15,0.21,0.35,0.28,0.547636,0.435631,1.0,15.0,18.0,10.0,4.0,3.0,47.0
Albumin,44.0,55.0,0.0,0.15,0.21,0.35,0.28,0.549043,0.438467,1.0,15.0,18.0,10.0,4.0,3.0,47.0
BUN,41.0,51.0,0.0,0.14,0.2,0.33,0.26,0.515836,0.411501,1.0,14.0,17.0,9.0,4.0,3.0,44.0
Bilirubin,44.0,55.0,0.0,0.15,0.21,0.35,0.28,0.547443,0.435561,1.0,15.0,18.0,10.0,4.0,3.0,47.0
Cholesterol,44.0,55.0,0.0,0.15,0.21,0.36,0.28,0.554195,0.444083,2.0,15.0,18.0,10.0,4.0,3.0,47.0
Creatinine,41.0,51.0,0.0,0.14,0.2,0.33,0.26,0.51566,0.411368,1.0,14.0,17.0,9.0,4.0,3.0,44.0
DiasABP,21.0,25.0,0.0,0.09,0.04,0.23,0.1,0.248949,0.210661,1.0,5.0,6.0,3.0,1.0,1.0,29.0
FiO2,37.0,47.0,0.0,0.13,0.17,0.31,0.23,0.46697,0.375255,1.0,13.0,15.0,8.0,3.0,3.0,41.0


<h4>Building the measurements table</h4>

In [45]:
df_measurements = pd.DataFrame(columns=df_columns)
df_measurements_transpose = df_measurements.T
df_measurements_transpose["Female"] = female_gender_measurements_training
df_measurements_transpose["Male"] = male_gender_measurements_training
df_measurements_transpose["Undefined gender"] = undefined_gender_measurements_training
df_measurements_transpose["ICUType 1"] = ICUType_1_measurements_training
df_measurements_transpose["ICUType 2"] = ICUType_2_measurements_training
df_measurements_transpose["ICUType 3"] = ICUType_3_measurements_training
df_measurements_transpose["ICUType 4"] = ICUType_4_measurements_training
df_measurements_transpose["Age 65+"] = age_65_and_above_measurements_training
df_measurements_transpose["Age 65-"] = age_under_65_measurements_training
df_measurements_transpose["Low Weight"] = classificacao_baixo_peso_measurements
df_measurements_transpose["Normal Weight"] = classificacao_normal_peso_measurements
df_measurements_transpose["Overweight"] = classificacao_sobrepeso_measurements
df_measurements_transpose["Obesity Grade 1"] = classificacao_obesidade_1_measurements
df_measurements_transpose["Obesity Grade 2"] = classificacao_obesidade_2_measurements
df_measurements_transpose["Obesity Grade 3"] = classificacao_obesidade_3_measurements
df_measurements_transpose["Undefined classification"] = classification_undefined_measurements
df_measurements_transpose = df_measurements_transpose.drop("RecordID", axis=0)
df_measurements_transpose = df_measurements_transpose.drop("level_1", axis=0)
df_measurements_transpose = df_measurements_transpose.drop("Time", axis=0)
df_measurements_transpose = df_measurements_transpose.drop("Age", axis=0)
df_measurements_transpose = df_measurements_transpose.drop("Gender", axis=0)
display(HTML("<h2 style='text-align: center; font-size: 24px; font-weight: bold;'>Repeated Measurements per Variable by Demographics - Train Set</h2>"))
df_measurements_transpose

Unnamed: 0,Female,Male,Undefined gender,ICUType 1,ICUType 2,ICUType 3,ICUType 4,Age 65+,Age 65-,Low Weight,Normal Weight,Overweight,Obesity Grade 1,Obesity Grade 2,Obesity Grade 3,Undefined classification
ALP,1.0,1.0,0.0,0.0,0.0,0.01,0.01,0.007393,0.008911,0.0,0.0,0.0,0.0,0.0,0.0,1.0
ALT,1.0,1.0,0.0,0.0,0.0,0.01,0.01,0.007553,0.00918,0.0,0.0,0.0,0.0,0.0,0.0,1.0
AST,1.0,1.0,0.0,0.0,0.0,0.01,0.01,0.007572,0.009161,0.0,0.0,0.0,0.0,0.0,0.0,1.0
Albumin,1.0,1.0,0.0,0.0,0.0,0.01,0.0,0.006165,0.006325,0.0,0.0,0.0,0.0,0.0,0.0,1.0
BUN,3.0,4.0,0.0,0.01,0.01,0.03,0.02,0.039372,0.033291,0.0,1.0,1.0,1.0,0.0,0.0,3.0
Bilirubin,1.0,1.0,0.0,0.0,0.0,0.01,0.01,0.007765,0.009231,0.0,0.0,0.0,0.0,0.0,0.0,1.0
Cholesterol,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.001013,0.000709,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Creatinine,3.0,4.0,0.0,0.01,0.01,0.03,0.02,0.039548,0.033424,0.0,1.0,1.0,1.0,0.0,0.0,3.0
DiasABP,23.0,31.0,0.0,0.06,0.17,0.13,0.19,0.306259,0.234131,1.0,10.0,12.0,7.0,3.0,2.0,19.0
FiO2,7.0,9.0,0.0,0.02,0.04,0.05,0.05,0.088238,0.069537,0.0,3.0,3.0,2.0,1.0,1.0,6.0


# Validation data

<h4>Loading training database</h4>

In [46]:
validation_X = physionet2012_dataset['val_X']

<h4>Total of the patients</h4>

In [47]:
total_patients_validation = validation_X.groupby("RecordID").first().reset_index()
total_patients_validation = total_patients_validation["RecordID"].count()
total_patients_validation = total_patients_validation*48
total_patients_validation

np.int64(92064)

<h4>female gender missing rate</h1>

In [48]:
female_gender_validation_ids = validation_X[validation_X["Gender"] == 0.0]
female_gender_validation_ids = female_gender_validation_ids["RecordID"]
female_gender_validation = validation_X[validation_X["RecordID"].isin(female_gender_validation_ids)]
female_gender_missing_rate_validation = round((female_gender_validation.isna().sum()/total_patients_validation)*100,2)
female_gender_missing_rate_validation

RecordID        0.00
level_1         0.00
Time            0.00
ALP            41.96
ALT            41.94
AST            41.94
Age             2.41
Albumin        42.08
BUN            39.63
Bilirubin      41.94
Cholesterol    42.57
Creatinine     39.62
DiasABP        21.69
FiO2           36.48
GCS            28.60
Gender         41.76
Glucose        39.77
HCO3           39.67
HCT            38.88
HR              3.98
Height          2.41
ICUType        41.76
K              39.39
Lactate        41.14
MAP            21.82
MechVent       36.47
Mg             39.67
NIDiasABP      22.80
NIMAP          23.10
NISysABP       22.78
Na             39.61
PaCO2          38.42
PaO2           38.43
Platelets      39.70
RespRate       31.14
SaO2           41.16
SysABP         21.69
Temp           27.94
TroponinI      42.56
TroponinT      42.14
Urine          13.14
WBC            39.88
Weight         18.91
pH             38.27
dtype: float64

<h4>Famele gender measurements</h4>

In [49]:
female_gender_measurements_validation = round((female_gender_validation.count()/total_patients_validation)*100,2)
female_gender_measurements_validation

RecordID       42.65
level_1        42.65
Time           42.65
ALP             0.69
ALT             0.71
AST             0.71
Age            40.24
Albumin         0.57
BUN             3.02
Bilirubin       0.70
Cholesterol     0.08
Creatinine      3.03
DiasABP        20.96
FiO2            6.17
GCS            14.05
Gender          0.89
Glucose         2.88
HCO3            2.98
HCT             3.77
HR             38.67
Height         40.24
ICUType         0.89
K               3.26
Lactate         1.51
MAP            20.83
MechVent        6.18
Mg              2.98
NIDiasABP      19.85
NIMAP          19.55
NISysABP       19.87
Na              3.04
PaCO2           4.23
PaO2            4.22
Platelets       2.95
RespRate       11.51
SaO2            1.49
SysABP         20.96
Temp           14.71
TroponinI       0.08
TroponinT       0.51
Urine          29.51
WBC             2.77
Weight         23.74
pH              4.38
dtype: float64

<h4>Male gender missing rate</h4>

In [50]:
male_gender_validation_ids = validation_X[validation_X["Gender"] == 1.0]
male_gender_validation_ids = male_gender_validation_ids["RecordID"]
male_gender_validation = validation_X[validation_X["RecordID"].isin(male_gender_validation_ids)]
male_gender_missing_rate_validation = round((male_gender_validation.isna().sum()/total_patients_validation)*100,2)
male_gender_missing_rate_validation

RecordID        0.00
level_1         0.00
Time            0.00
ALP            56.31
ALT            56.28
AST            56.28
Age             3.37
Albumin        56.58
BUN            53.16
Bilirubin      56.29
Cholesterol    57.20
Creatinine     53.13
DiasABP        24.87
FiO2           48.50
GCS            38.96
Gender         56.11
Glucose        53.45
HCO3           53.27
HCT            51.71
HR              5.71
Height          3.37
ICUType        56.11
K              53.00
Lactate        54.70
MAP            24.97
MechVent       48.83
Mg             53.26
NIDiasABP      34.10
NIMAP          34.37
NISysABP       34.08
Na             53.29
PaCO2          50.41
PaO2           50.43
Platelets      53.01
RespRate       43.77
SaO2           54.89
SysABP         24.86
Temp           35.46
TroponinI      57.21
TroponinT      56.66
Urine          17.78
WBC            53.44
Weight         27.36
pH             50.04
dtype: float64

<h4>Male gender measurements</h4>

In [51]:
male_gender_measurements_validation = round((male_gender_validation.count()/total_patients_validation)*100,2)
male_gender_measurements_validation

RecordID       57.30
level_1        57.30
Time           57.30
ALP             0.99
ALT             1.02
AST             1.02
Age            53.93
Albumin         0.72
BUN             4.14
Bilirubin       1.01
Cholesterol     0.10
Creatinine      4.16
DiasABP        32.43
FiO2            8.80
GCS            18.34
Gender          1.19
Glucose         3.85
HCO3            4.03
HCT             5.59
HR             51.59
Height         53.93
ICUType         1.19
K               4.30
Lactate         2.60
MAP            32.33
MechVent        8.47
Mg              4.04
NIDiasABP      23.20
NIMAP          22.93
NISysABP       23.22
Na              4.01
PaCO2           6.89
PaO2            6.87
Platelets       4.29
RespRate       13.53
SaO2            2.41
SysABP         32.44
Temp           21.84
TroponinI       0.09
TroponinT       0.64
Urine          39.52
WBC             3.86
Weight         29.94
pH              7.26
dtype: float64

<h4>Undefined gender missing rate</h4>

In [52]:
undefined_gender_ids_validation = validation_X[validation_X["Gender"] == -1.0]
undefined_gender_ids_validation = undefined_gender_ids_validation["RecordID"]
undefined_gender_validation = validation_X[validation_X["RecordID"].isin(undefined_gender_ids_validation)]
undefined_gender_missing_rate_validation = round((undefined_gender_validation.isna().sum()/total_patients_validation)*100,2)
undefined_gender_missing_rate_validation

RecordID       0.00
level_1        0.00
Time           0.00
ALP            0.05
ALT            0.05
AST            0.05
Age            0.00
Albumin        0.05
BUN            0.05
Bilirubin      0.05
Cholesterol    0.05
Creatinine     0.05
DiasABP        0.00
FiO2           0.04
GCS            0.04
Gender         0.05
Glucose        0.05
HCO3           0.05
HCT            0.05
HR             0.00
Height         0.00
ICUType        0.05
K              0.05
Lactate        0.05
MAP            0.00
MechVent       0.04
Mg             0.05
NIDiasABP      0.04
NIMAP          0.04
NISysABP       0.04
Na             0.05
PaCO2          0.04
PaO2           0.04
Platelets      0.05
RespRate       0.05
SaO2           0.05
SysABP         0.00
Temp           0.03
TroponinI      0.05
TroponinT      0.05
Urine          0.01
WBC            0.05
Weight         0.01
pH             0.04
dtype: float64

<h4>Undefined gender measurements</h4>

In [53]:
undefined_gender_measurements_validation = round((undefined_gender_validation.count()/total_patients_validation)*100,2)
undefined_gender_measurements_validation

RecordID       0.05
level_1        0.05
Time           0.05
ALP            0.00
ALT            0.00
AST            0.00
Age            0.05
Albumin        0.00
BUN            0.00
Bilirubin      0.00
Cholesterol    0.00
Creatinine     0.00
DiasABP        0.05
FiO2           0.01
GCS            0.02
Gender         0.00
Glucose        0.00
HCO3           0.00
HCT            0.00
HR             0.05
Height         0.05
ICUType        0.00
K              0.00
Lactate        0.00
MAP            0.05
MechVent       0.01
Mg             0.00
NIDiasABP      0.01
NIMAP          0.01
NISysABP       0.01
Na             0.00
PaCO2          0.01
PaO2           0.01
Platelets      0.00
RespRate       0.00
SaO2           0.00
SysABP         0.05
Temp           0.02
TroponinI      0.00
TroponinT      0.00
Urine          0.04
WBC            0.00
Weight         0.04
pH             0.01
dtype: float64

<h4>ICUType 1 missing rate</h4>

In [54]:
ICUType_1_validation_ids = validation_X[validation_X["ICUType"] == 1.0]
ICUType_1_validation_ids = ICUType_1_validation_ids[ICUType_1_validation_ids["Time"] == 0.0]
ICUType_1_validation_ids = ICUType_1_validation_ids["RecordID"]
ICUType_1_validation = validation_X[validation_X["RecordID"].isin(ICUType_1_validation_ids)]
ICUType_1_validation_missing = round((ICUType_1_validation.isna().sum()/total_patients_validation)*100,2)
ICUType_1_validation_missing

RecordID        0.00
level_1         0.00
Time            0.00
ALP            15.15
ALT            15.14
AST            15.14
Age             1.13
Albumin        15.21
BUN            14.29
Bilirubin      15.14
Cholesterol    15.29
Creatinine     14.26
DiasABP         8.49
FiO2           13.73
GCS            11.38
Gender         15.06
Glucose        14.36
HCO3           14.34
HCT            14.02
HR              1.79
Height          1.13
ICUType        15.06
K              14.11
Lactate        15.02
MAP             8.51
MechVent       13.80
Mg             14.31
NIDiasABP       8.32
NIMAP           8.36
NISysABP        8.31
Na             14.36
PaCO2          14.10
PaO2           14.10
Platelets      14.27
RespRate        9.91
SaO2           14.61
SysABP          8.49
Temp           10.72
TroponinI      15.33
TroponinT      15.03
Urine           6.59
WBC            14.40
Weight          8.62
pH             14.06
dtype: float64

<h4>ICUType 1 measurements</h4>

In [55]:
ICUType_1_measurements_validation = round((ICUType_1_validation.count()/total_patients_validation)*100,2)
ICUType_1_measurements_validation

RecordID       15.38
level_1        15.38
Time           15.38
ALP             0.23
ALT             0.25
AST             0.24
Age            14.25
Albumin         0.17
BUN             1.09
Bilirubin       0.24
Cholesterol     0.10
Creatinine      1.12
DiasABP         6.89
FiO2            1.65
GCS             4.00
Gender          0.32
Glucose         1.02
HCO3            1.04
HCT             1.36
HR             13.59
Height         14.25
ICUType         0.32
K               1.27
Lactate         0.36
MAP             6.87
MechVent        1.58
Mg              1.07
NIDiasABP       7.06
NIMAP           7.02
NISysABP        7.07
Na              1.03
PaCO2           1.28
PaO2            1.28
Platelets       1.11
RespRate        5.47
SaO2            0.77
SysABP          6.89
Temp            4.66
TroponinI       0.05
TroponinT       0.35
Urine           8.79
WBC             0.99
Weight          6.76
pH              1.32
dtype: float64

<h4>ICUType 2 missing rate</h4>

In [56]:
ICUType_2_validation_ids = validation_X[validation_X["ICUType"] == 2.0]
ICUType_2_validation_ids = ICUType_2_validation_ids[ICUType_2_validation_ids["Time"] == 0.0]
ICUType_2_validation_ids = ICUType_2_validation_ids["RecordID"]
ICUType_2_validation = validation_X[validation_X["RecordID"].isin(ICUType_2_validation_ids)]
ICUType_2_validation_missing = round((ICUType_2_validation.isna().sum()/total_patients_validation)*100,2)
ICUType_2_validation_missing

RecordID        0.00
level_1         0.00
Time            0.00
ALP            19.76
ALT            19.76
AST            19.76
Age             0.70
Albumin        19.82
BUN            18.63
Bilirubin      19.76
Cholesterol    19.91
Creatinine     18.63
DiasABP         4.37
FiO2           16.66
GCS            14.64
Gender         19.50
Glucose        19.01
HCO3           18.72
HCT            17.73
HR              1.70
Height          0.70
ICUType        19.50
K              18.87
Lactate        19.14
MAP             4.30
MechVent       16.66
Mg             18.69
NIDiasABP      15.71
NIMAP          15.77
NISysABP       15.70
Na             18.91
PaCO2          15.82
PaO2           15.83
Platelets      18.29
RespRate       19.43
SaO2           17.75
SysABP          4.37
Temp            8.08
TroponinI      19.89
TroponinT      19.86
Urine           3.07
WBC            18.56
Weight          9.34
pH             15.42
dtype: float64

<h4>ICUType 2 measurements</h4>

In [57]:
ICUType_2_measurements_validation = round((ICUType_2_validation.count()/total_patients_validation)*100,2)
ICUType_2_measurements_validation

RecordID       19.92
level_1        19.92
Time           19.92
ALP             0.15
ALT             0.16
AST             0.16
Age            19.22
Albumin         0.09
BUN             1.29
Bilirubin       0.16
Cholesterol     0.00
Creatinine      1.28
DiasABP        15.54
FiO2            3.26
GCS             5.27
Gender          0.41
Glucose         0.91
HCO3            1.19
HCT             2.18
HR             18.22
Height         19.22
ICUType         0.41
K               1.05
Lactate         0.78
MAP            15.62
MechVent        3.26
Mg              1.23
NIDiasABP       4.21
NIMAP           4.15
NISysABP        4.22
Na              1.00
PaCO2           4.10
PaO2            4.08
Platelets       1.62
RespRate        0.48
SaO2            2.16
SysABP         15.55
Temp           11.84
TroponinI       0.02
TroponinT       0.05
Urine          16.85
WBC             1.35
Weight         10.58
pH              4.50
dtype: float64

<h4>ICUType 4 missing rate</h4>

In [58]:
ICUType_3_validation_ids = validation_X[validation_X["ICUType"] == 3.0]
ICUType_3_validation_ids = ICUType_3_validation_ids[ICUType_3_validation_ids["Time"] == 0.0]
ICUType_3_validation_ids = ICUType_3_validation_ids["RecordID"]
ICUType_3_validation = validation_X[validation_X["RecordID"].isin(ICUType_3_validation_ids)]
ICUType_3_validation_missing = round((ICUType_3_validation.isna().sum()/total_patients_validation)*100,2)
ICUType_3_validation_missing

RecordID        0.00
level_1         0.00
Time            0.00
ALP            35.52
ALT            35.49
AST            35.49
Age             2.54
Albumin        35.72
BUN            33.63
Bilirubin      35.48
Cholesterol    36.30
Creatinine     33.62
DiasABP        24.11
FiO2           31.30
GCS            26.48
Gender         35.58
Glucose        33.63
HCO3           33.62
HCT            33.19
HR              3.73
Height          2.54
ICUType        35.58
K              33.39
Lactate        34.96
MAP            24.27
MechVent       31.47
Mg             33.74
NIDiasABP      15.02
NIMAP          15.36
NISysABP       15.00
Na             33.55
PaCO2          33.77
PaO2           33.77
Platelets      33.91
RespRate       25.02
SaO2           35.92
SysABP         24.10
Temp           26.04
TroponinI      36.27
TroponinT      35.83
Urine          14.15
WBC            34.02
Weight         12.77
pH             33.73
dtype: float64

<h4>ICUType 3 measurements</h4>

In [59]:
ICUType_3_measurements_validation = round((ICUType_3_validation.count()/total_patients_validation)*100,2)
ICUType_3_measurements_validation

RecordID       36.34
level_1        36.34
Time           36.34
ALP             0.82
ALT             0.85
AST             0.85
Age            33.80
Albumin         0.62
BUN             2.71
Bilirubin       0.86
Cholesterol     0.04
Creatinine      2.72
DiasABP        12.23
FiO2            5.04
GCS             9.86
Gender          0.76
Glucose         2.71
HCO3            2.72
HCT             3.15
HR             32.61
Height         33.80
ICUType         0.76
K               2.95
Lactate         1.38
MAP            12.07
MechVent        4.87
Mg              2.60
NIDiasABP      21.32
NIMAP          20.98
NISysABP       21.34
Na              2.79
PaCO2           2.57
PaO2            2.57
Platelets       2.43
RespRate       11.32
SaO2            0.42
SysABP         12.24
Temp           10.30
TroponinI       0.07
TroponinT       0.51
Urine          22.19
WBC             2.32
Weight         23.57
pH              2.61
dtype: float64

<h4>ICUType 4 missing rate</h4>

In [60]:
ICUType_4_validation_ids = validation_X[validation_X["ICUType"] == 4.0]
ICUType_4_validation_ids = ICUType_4_validation_ids[ICUType_4_validation_ids["Time"] == 0.0]
ICUType_4_validation_ids = ICUType_4_validation_ids["RecordID"]
ICUType_4_validation = validation_X[validation_X["RecordID"].isin(ICUType_4_validation_ids)]
ICUType_4_validation_missing = round((ICUType_4_validation.isna().sum()/total_patients_validation)*100,2)
ICUType_4_validation_missing

RecordID        0.00
level_1         0.00
Time            0.00
ALP            27.90
ALT            27.89
AST            27.89
Age             1.41
Albumin        27.96
BUN            26.28
Bilirubin      27.91
Cholesterol    28.32
Creatinine     26.28
DiasABP         9.59
FiO2           23.33
GCS            15.10
Gender         27.77
Glucose        26.26
HCO3           26.30
HCT            25.70
HR              2.47
Height          1.41
ICUType        27.77
K              26.07
Lactate        26.77
MAP             9.71
MechVent       23.42
Mg             26.24
NIDiasABP      17.89
NIMAP          18.01
NISysABP       17.88
Na             26.12
PaCO2          25.19
PaO2           25.20
Platelets      26.28
RespRate       20.59
SaO2           27.82
SysABP          9.59
Temp           18.58
TroponinI      28.33
TroponinT      28.12
Urine           7.12
WBC            26.39
Weight         15.56
pH             25.15
dtype: float64

<h4>ICUTYpe 4 measurements</h4>

In [61]:
ICUType_4_measurements_validation = round((ICUType_4_validation.count()/total_patients_validation)*100,2)
ICUType_4_measurements_validation

RecordID       28.36
level_1        28.36
Time           28.36
ALP             0.47
ALT             0.47
AST             0.48
Age            26.96
Albumin         0.40
BUN             2.08
Bilirubin       0.46
Cholesterol     0.04
Creatinine      2.08
DiasABP        18.78
FiO2            5.03
GCS            13.27
Gender          0.59
Glucose         2.10
HCO3            2.06
HCT             2.66
HR             25.90
Height         26.96
ICUType         0.59
K               2.29
Lactate         1.59
MAP            18.65
MechVent        4.94
Mg              2.12
NIDiasABP      10.47
NIMAP          10.35
NISysABP       10.48
Na              2.24
PaCO2           3.17
PaO2            3.16
Platelets       2.08
RespRate        7.77
SaO2            0.54
SysABP         18.78
Temp            9.78
TroponinI       0.03
TroponinT       0.24
Urine          21.24
WBC             1.97
Weight         12.80
pH              3.21
dtype: float64

<h4>+65 missing rate</h4>

In [62]:
more_than_or_equal_to_65_validation_ids = validation_X[validation_X["Age"] >= 65]
more_than_or_equal_to_65_validation_ids = more_than_or_equal_to_65_validation_ids[more_than_or_equal_to_65_validation_ids["Time"] == 0.0]
more_than_or_equal_to_65_validation_ids = more_than_or_equal_to_65_validation_ids["RecordID"]
more_than_or_equal_to_65_validation = validation_X[validation_X["RecordID"].isin(more_than_or_equal_to_65_validation_ids)]
more_than_or_equal_to_65_validation_missing = round((more_than_or_equal_to_65_validation.isna().sum()/total_patients_validation)*100,2)
more_than_or_equal_to_65_validation_missing

RecordID        0.00
level_1         0.00
Time            0.00
ALP            53.31
ALT            53.29
AST            53.29
Age             3.00
Albumin        53.44
BUN            50.28
Bilirubin      53.29
Cholesterol    53.91
Creatinine     50.26
DiasABP        24.87
FiO2           46.14
GCS            37.22
Gender         52.89
Glucose        50.56
HCO3           50.37
HCT            48.98
HR              5.09
Height          3.00
ICUType        52.89
K              50.09
Lactate        51.95
MAP            24.96
MechVent       46.39
Mg             50.36
NIDiasABP      30.70
NIMAP          30.92
NISysABP       30.68
Na             50.41
PaCO2          47.94
PaO2           47.96
Platelets      50.18
RespRate       40.82
SaO2           51.53
SysABP         24.86
Temp           33.78
TroponinI      53.89
TroponinT      53.22
Urine          15.97
WBC            50.51
Weight         24.66
pH             47.65
dtype: float64

<h4>+65 measurements</h4>

In [63]:
more_than_or_equal_to_65_validation_measurements = round((more_than_or_equal_to_65_validation.count()/total_patients_validation)*100,2)
more_than_or_equal_to_65_validation_measurements

RecordID       54.01
level_1        54.01
Time           54.01
ALP             0.71
ALT             0.73
AST             0.73
Age            51.02
Albumin         0.57
BUN             3.73
Bilirubin       0.73
Cholesterol     0.11
Creatinine      3.76
DiasABP        29.15
FiO2            7.88
GCS            16.79
Gender          1.13
Glucose         3.45
HCO3            3.65
HCT             5.04
HR             48.92
Height         51.02
ICUType         1.13
K               3.92
Lactate         2.06
MAP            29.06
MechVent        7.63
Mg              3.66
NIDiasABP      23.31
NIMAP          23.10
NISysABP       23.33
Na              3.61
PaCO2           6.07
PaO2            6.05
Platelets       3.83
RespRate       13.19
SaO2            2.48
SysABP         29.15
Temp           20.23
TroponinI       0.12
TroponinT       0.79
Urine          38.05
WBC             3.50
Weight         29.35
pH              6.37
dtype: float64

<h4>-65 missing rate</h4>

In [64]:
less_than_65_validation_ids = validation_X[validation_X["Age"] < 65]
less_than_65_validation_ids = less_than_65_validation_ids[less_than_65_validation_ids["Time"] == 0.0]
less_than_65_validation_ids = less_than_65_validation_ids["RecordID"]
less_than_65_validation = validation_X[validation_X["RecordID"].isin(less_than_65_validation_ids)]
less_than_65_validation_missing = round((less_than_65_validation.isna().sum()/total_patients_validation)*100,2)
less_than_65_validation_missing

RecordID        0.00
level_1         0.00
Time            0.00
ALP            45.02
ALT            44.99
AST            44.99
Age             2.78
Albumin        45.27
BUN            42.56
Bilirubin      44.99
Cholesterol    45.92
Creatinine     42.54
DiasABP        21.69
FiO2           38.88
GCS            30.37
Gender         45.03
Glucose        42.70
HCO3           42.62
HCT            41.66
HR              4.60
Height          2.78
ICUType        45.03
K              42.35
Lactate        43.94
MAP            21.83
MechVent       38.96
Mg             42.62
NIDiasABP      26.24
NIMAP          26.59
NISysABP       26.22
Na             42.54
PaCO2          40.93
PaO2           40.95
Platelets      42.57
RespRate       34.14
SaO2           44.57
SysABP         21.69
Temp           29.65
TroponinI      45.94
TroponinT      45.62
Urine          14.96
WBC            42.85
Weight         21.62
pH             40.71
dtype: float64

<h4>-65 measurements</h4>

In [65]:
less_than_65_validation_measurements = round((less_than_65_validation.count()/total_patients_validation)*100,2)
less_than_65_validation_measurements

RecordID       45.99
level_1        45.99
Time           45.99
ALP             0.97
ALT             1.00
AST             1.00
Age            43.20
Albumin         0.71
BUN             3.43
Bilirubin       0.99
Cholesterol     0.07
Creatinine      3.44
DiasABP        24.30
FiO2            7.11
GCS            15.61
Gender          0.96
Glucose         3.29
HCO3            3.36
HCT             4.32
HR             41.39
Height         43.20
ICUType         0.96
K               3.64
Lactate         2.04
MAP            24.15
MechVent        7.03
Mg              3.37
NIDiasABP      19.75
NIMAP          19.40
NISysABP       19.77
Na              3.45
PaCO2           5.06
PaO2            5.04
Platelets       3.42
RespRate       11.85
SaO2            1.42
SysABP         24.30
Temp           16.34
TroponinI       0.05
TroponinT       0.36
Urine          31.03
WBC             3.13
Weight         24.36
pH              5.28
dtype: float64

<h4>Filtering only patients who have height and weight</h4>

In [66]:
filtered_validation_X = validation_X[(validation_X['Height'] != -1) & (validation_X['Weight'] != -1) & (validation_X['Height'].notna()) & (validation_X['Weight'].notna())] 

<h4>Changing the height to meters</h4>

In [67]:
filtered_validation_X_metros = filtered_validation_X.copy()
filtered_validation_X_metros["Height"] = filtered_validation_X["Height"]/100
filtered_validation_X_metros["Height"]

576       1.575
577       1.575
578       1.575
579       1.575
580       1.575
          ...  
575321    1.727
575322    1.727
575323    1.727
575325    1.727
575327    1.727
Name: Height, Length: 25755, dtype: float64

<h4>Calculating BMI and classification</h4>

In [68]:
bmi_data_validation = filtered_validation_X_metros
bmi_data_validation["BMI"] = round(filtered_validation_X_metros["Weight"] / (filtered_validation_X_metros["Height"]**2), 1)
bmi_data_validation["Classificacao"] = bmi_data_validation["BMI"].apply(classify_BMI)
bmi_data_validation.head()

Unnamed: 0,RecordID,level_1,Time,ALP,ALT,AST,Age,Albumin,BUN,Bilirubin,...,SysABP,Temp,TroponinI,TroponinT,Urine,WBC,Weight,pH,BMI,Classificacao
576,132568,0,0.0,,,,66.0,,,,...,,,,,220.0,,84.5,,34.1,Obesidade grau 1
577,132568,1,1.0,,,,66.0,,,,...,,,,,45.0,,84.5,,34.1,Obesidade grau 1
578,132568,2,2.0,,,,66.0,,,,...,,36.1,,,45.0,,84.5,,34.1,Obesidade grau 1
579,132568,3,3.0,,,,66.0,,18.0,,...,,,,,45.0,14.8,84.5,,34.1,Obesidade grau 1
580,132568,4,4.0,,,,66.0,,,,...,,,,,50.0,,84.5,,34.1,Obesidade grau 1


<h4>Taking only one moment from each patient</h4>

In [69]:
bmi_data_validation = bmi_data_validation.groupby("RecordID").first().reset_index()
bmi_data_validation

Unnamed: 0,RecordID,level_1,Time,ALP,ALT,AST,Age,Albumin,BUN,Bilirubin,...,SysABP,Temp,TroponinI,TroponinT,Urine,WBC,Weight,pH,BMI,Classificacao
0,132568,0,0.0,,,,66.0,,18.0,,...,,36.1,,,220.0,14.8,84.5,,34.1,Obesidade grau 1
1,132582,0,0.0,,,,84.0,2.6,31.0,,...,,36.3,,,200.0,5.3,82.5,,24.7,Peso normal
2,132585,0,0.0,,,,40.0,,,,...,90.5,,,,320.0,,84.7,,31.1,Obesidade grau 1
3,132622,0,0.0,,,,71.0,,64.0,,...,,37.4,19.0,,80.0,7.2,79.0,,30.9,Obesidade grau 1
4,132637,0,0.0,,,,78.0,,13.0,,...,99.0,37.0,,,90.0,14.2,56.0,7.39,19.3,Peso normal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
990,162889,0,0.0,,,,64.0,,15.0,,...,125.0,37.0,,,160.0,9.3,89.2,7.42,26.7,Sobrepeso
991,162901,0,0.0,,,,45.0,,,,...,,,,,,,87.0,,26.8,Sobrepeso
992,162980,0,0.0,,,,88.0,,31.0,,...,133.0,38.0,,,45.0,17.6,76.5,7.38,29.9,Sobrepeso
993,163016,0,0.0,,27.0,120.0,65.0,,29.0,0.4,...,101.0,38.1,,,75.0,8.0,63.6,7.37,24.8,Peso normal


In [70]:
bmi_data_validation["Classificacao"].value_counts()

Classificacao
Sobrepeso           333
Peso normal         298
Obesidade grau 1    175
Obesidade grau 2     79
Obesidade grau 3     75
Baixo peso           35
Name: count, dtype: int64

<h4>Undefined classification missing rate</h4>

In [71]:
classificacao_undefined_ids_validation = bmi_data_validation["RecordID"]
classificacao_undefined_validation = validation_X[~validation_X["RecordID"].isin(classificacao_undefined_ids_validation)]
classificacao_undefined_missing_validation = round((classificacao_undefined_validation.isna().sum()/total_patients_validation)*100,2)
classificacao_undefined_missing_validation

RecordID        0.00
level_1         0.00
Time            0.00
ALP            47.26
ALT            47.23
AST            47.23
Age             3.42
Albumin        47.42
BUN            44.78
Bilirubin      47.23
Cholesterol    48.04
Creatinine     44.76
DiasABP        28.42
FiO2           41.50
GCS            31.57
Gender         47.12
Glucose        44.79
HCO3           44.81
HCT            44.02
HR              5.10
Height          3.42
ICUType        47.12
K              44.46
Lactate        46.47
MAP            28.57
MechVent       41.77
Mg             44.81
NIDiasABP      22.42
NIMAP          22.80
NISysABP       22.41
Na             44.64
PaCO2          44.79
PaO2           44.81
Platelets      45.00
RespRate       32.36
SaO2           47.55
SysABP         28.42
Temp           34.96
TroponinI      48.05
TroponinT      47.50
Urine          17.12
WBC            45.12
Weight         22.38
pH             44.72
dtype: float64

<h4>Undefined classification measurements</h4>

In [72]:
classificacao_undefined_measurements_validation = round((classificacao_undefined_validation.count()/total_patients_validation)*100,2)
classificacao_undefined_measurements_validation

RecordID       48.12
level_1        48.12
Time           48.12
ALP             0.87
ALT             0.89
AST             0.89
Age            44.70
Albumin         0.70
BUN             3.34
Bilirubin       0.90
Cholesterol     0.08
Creatinine      3.37
DiasABP        19.70
FiO2            6.62
GCS            16.55
Gender          1.00
Glucose         3.33
HCO3            3.32
HCT             4.10
HR             43.02
Height         44.70
ICUType         1.00
K               3.67
Lactate         1.66
MAP            19.55
MechVent        6.35
Mg              3.31
NIDiasABP      25.70
NIMAP          25.32
NISysABP       25.72
Na              3.48
PaCO2           3.33
PaO2            3.32
Platelets       3.12
RespRate       15.76
SaO2            0.57
SysABP         19.71
Temp           13.16
TroponinI       0.07
TroponinT       0.63
Urine          31.01
WBC             3.01
Weight         25.74
pH              3.40
dtype: float64

<h4>Classification low weight missing rate</h4>

In [73]:
classificacao_baixo_peso_ids_validation = bmi_data_validation[bmi_data_validation["Classificacao"] == "Baixo peso"]
classificacao_baixo_peso_ids_validation = classificacao_baixo_peso_ids_validation["RecordID"]
classificacao_baixo_peso_validation = validation_X[validation_X["RecordID"].isin(classificacao_baixo_peso_ids_validation)]
classificacao_baixo_peso_missing_validation = round((classificacao_baixo_peso_validation.isna().sum()/total_patients_validation)*100,2)
classificacao_baixo_peso_missing_validation

RecordID       0.00
level_1        0.00
Time           0.00
ALP            1.80
ALT            1.80
AST            1.80
Age            0.07
Albumin        1.81
BUN            1.69
Bilirubin      1.80
Cholesterol    1.82
Creatinine     1.69
DiasABP        0.68
FiO2           1.57
GCS            1.22
Gender         1.79
Glucose        1.70
HCO3           1.69
HCT            1.64
HR             0.14
Height         0.07
ICUType        1.79
K              1.68
Lactate        1.73
MAP            0.68
MechVent       1.56
Mg             1.68
NIDiasABP      1.19
NIMAP          1.20
NISysABP       1.19
Na             1.69
PaCO2          1.62
PaO2           1.62
Platelets      1.69
RespRate       1.30
SaO2           1.75
SysABP         0.68
Temp           1.06
TroponinI      1.82
TroponinT      1.81
Urine          0.53
WBC            1.70
Weight         0.94
pH             1.61
dtype: float64

<h4>Classification low weight measurements</h4>

In [74]:
classificacao_baixo_peso_measurements_validation = round((classificacao_baixo_peso_validation.count()/total_patients_validation)*100,2)
classificacao_baixo_peso_measurements_validation

RecordID       1.82
level_1        1.82
Time           1.82
ALP            0.03
ALT            0.03
AST            0.03
Age            1.75
Albumin        0.02
BUN            0.13
Bilirubin      0.03
Cholesterol    0.00
Creatinine     0.14
DiasABP        1.15
FiO2           0.26
GCS            0.61
Gender         0.04
Glucose        0.12
HCO3           0.14
HCT            0.18
HR             1.69
Height         1.75
ICUType        0.04
K              0.14
Lactate        0.09
MAP            1.15
MechVent       0.27
Mg             0.14
NIDiasABP      0.63
NIMAP          0.62
NISysABP       0.63
Na             0.14
PaCO2          0.21
PaO2           0.21
Platelets      0.13
RespRate       0.53
SaO2           0.08
SysABP         1.15
Temp           0.76
TroponinI      0.01
TroponinT      0.02
Urine          1.29
WBC            0.12
Weight         0.89
pH             0.22
dtype: float64

<h4>Classification normal weight missing rate</h4>

In [75]:
classificacao_peso_normal_ids_validation = bmi_data_validation[bmi_data_validation["Classificacao"] == "Peso normal"]
classificacao_peso_normal_ids_validation = classificacao_peso_normal_ids_validation["RecordID"]
classificacao_peso_normal_validation = validation_X[validation_X["RecordID"].isin(classificacao_peso_normal_ids_validation)]
classificacao_peso_normal_missing_validation = round((classificacao_peso_normal_validation.isna().sum()/total_patients_validation)*100,2)
classificacao_peso_normal_missing_validation

RecordID        0.00
level_1         0.00
Time            0.00
ALP            15.30
ALT            15.29
AST            15.29
Age             0.69
Albumin        15.37
BUN            14.42
Bilirubin      15.29
Cholesterol    15.51
Creatinine     14.43
DiasABP         5.89
FiO2           13.16
GCS            10.61
Gender         15.21
Glucose        14.54
HCO3           14.47
HCT            13.96
HR              1.37
Height          0.69
ICUType        15.21
K              14.39
Lactate        14.82
MAP             5.90
MechVent       13.09
Mg             14.44
NIDiasABP      10.01
NIMAP          10.03
NISysABP       10.00
Na             14.48
PaCO2          13.32
PaO2           13.33
Platelets      14.31
RespRate       12.29
SaO2           14.66
SysABP          5.89
Temp            8.75
TroponinI      15.50
TroponinT      15.40
Urine           4.25
WBC            14.44
Weight          7.39
pH             13.20
dtype: float64

<h4>Classification normal weight measurements</h4>

In [76]:
classificacao_peso_normal_measurements_validation = round((classificacao_peso_normal_validation.count()/total_patients_validation)*100,2)
classificacao_peso_normal_measurements_validation

RecordID       15.54
level_1        15.54
Time           15.54
ALP             0.24
ALT             0.25
AST             0.25
Age            14.85
Albumin         0.17
BUN             1.11
Bilirubin       0.25
Cholesterol     0.02
Creatinine      1.11
DiasABP         9.64
FiO2            2.38
GCS             4.93
Gender          0.32
Glucose         0.99
HCO3            1.07
HCT             1.57
HR             14.17
Height         14.85
ICUType         0.32
K               1.15
Lactate         0.72
MAP             9.64
MechVent        2.45
Mg              1.09
NIDiasABP       5.53
NIMAP           5.51
NISysABP        5.54
Na              1.06
PaCO2           2.22
PaO2            2.21
Platelets       1.23
RespRate        3.24
SaO2            0.87
SysABP          9.64
Temp            6.79
TroponinI       0.04
TroponinT       0.14
Urine          11.29
WBC             1.09
Weight          8.15
pH              2.34
dtype: float64

<h4>Classification overweight missing rate</h4>

In [77]:
classificacao_sobrepeso_ids_validation = bmi_data_validation[bmi_data_validation["Classificacao"] == "Sobrepeso"]
classificacao_sobrepeso_ids_validation = classificacao_sobrepeso_ids_validation["RecordID"]
classificacao_sobrepeso_validation = validation_X[validation_X["RecordID"].isin(classificacao_sobrepeso_ids_validation)]
classificacao_sobrepeso_missing_validation = round((classificacao_sobrepeso_validation.isna().sum()/total_patients_validation)*100,2)
classificacao_sobrepeso_missing_validation

RecordID        0.00
level_1         0.00
Time            0.00
ALP            17.10
ALT            17.09
AST            17.09
Age             0.83
Albumin        17.15
BUN            16.04
Bilirubin      17.09
Cholesterol    17.33
Creatinine     16.03
DiasABP         5.70
FiO2           14.47
GCS            12.10
Gender         17.00
Glucose        16.20
HCO3           16.08
HCT            15.58
HR              1.60
Height          0.83
ICUType        17.00
K              16.04
Lactate        16.55
MAP             5.75
MechVent       14.63
Mg             16.10
NIDiasABP      11.71
NIMAP          11.77
NISysABP       11.70
Na             16.14
PaCO2          14.63
PaO2           14.64
Platelets      15.92
RespRate       14.90
SaO2           16.16
SysABP          5.70
Temp            9.23
TroponinI      17.35
TroponinT      17.18
Urine           4.68
WBC            16.12
Weight          8.31
pH             14.45
dtype: float64

<h4>Classification overweight measurements</h4>

In [78]:
classificacao_sobrepeso_measurements_validation = round((classificacao_sobrepeso_validation.count()/total_patients_validation)*100,2)
classificacao_sobrepeso_measurements_validation

RecordID       17.36
level_1        17.36
Time           17.36
ALP             0.27
ALT             0.27
AST             0.27
Age            16.53
Albumin         0.21
BUN             1.32
Bilirubin       0.27
Cholesterol     0.03
Creatinine      1.34
DiasABP        11.66
FiO2            2.89
GCS             5.26
Gender          0.36
Glucose         1.16
HCO3            1.28
HCT             1.79
HR             15.77
Height         16.53
ICUType         0.36
K               1.32
Lactate         0.81
MAP            11.61
MechVent        2.73
Mg              1.26
NIDiasABP       5.65
NIMAP           5.60
NISysABP        5.66
Na              1.22
PaCO2           2.73
PaO2            2.72
Platelets       1.44
RespRate        2.47
SaO2            1.20
SysABP         11.66
Temp            8.13
TroponinI       0.02
TroponinT       0.18
Urine          12.68
WBC             1.24
Weight          9.05
pH              2.92
dtype: float64

<h4>Grade 1 obesity missing rate</h4>

In [79]:
classificacao_obesidade_1_ids_validation = bmi_data_validation[bmi_data_validation["Classificacao"] == "Obesidade grau 1"]
classificacao_obesidade_1_ids_validation = classificacao_obesidade_1_ids_validation["RecordID"]
classificacao_obesidade_1_validation = validation_X[validation_X["RecordID"].isin(classificacao_obesidade_1_ids_validation)]
classificacao_obesidade_1_missing_validation = round((classificacao_obesidade_1_validation.isna().sum()/total_patients_validation)*100,2)
classificacao_obesidade_1_missing_validation

RecordID       0.00
level_1        0.00
Time           0.00
ALP            8.97
ALT            8.97
AST            8.97
Age            0.36
Albumin        9.02
BUN            8.45
Bilirubin      8.97
Cholesterol    9.11
Creatinine     8.45
DiasABP        2.93
FiO2           7.64
GCS            6.39
Gender         8.93
Glucose        8.53
HCO3           8.48
HCT            8.19
HR             0.77
Height         0.36
ICUType        8.93
K              8.43
Lactate        8.66
MAP            2.94
MechVent       7.63
Mg             8.49
NIDiasABP      6.27
NIMAP          6.30
NISysABP       6.26
Na             8.52
PaCO2          7.68
PaO2           7.68
Platelets      8.42
RespRate       7.84
SaO2           8.47
SysABP         2.93
Temp           4.94
TroponinI      9.11
TroponinT      9.04
Urine          2.17
WBC            8.51
Weight         3.90
pH             7.60
dtype: float64

<h4>Grade 1 obesity measurements</h4>

In [80]:
classificacao_obesidade_1_measurements_validation = round((classificacao_obesidade_1_validation.count()/total_patients_validation)*100,2)
classificacao_obesidade_1_measurements_validation

RecordID       9.12
level_1        9.12
Time           9.12
ALP            0.15
ALT            0.15
AST            0.15
Age            8.76
Albumin        0.10
BUN            0.67
Bilirubin      0.15
Cholesterol    0.02
Creatinine     0.68
DiasABP        6.19
FiO2           1.48
GCS            2.73
Gender         0.19
Glucose        0.59
HCO3           0.64
HCT            0.93
HR             8.36
Height         8.76
ICUType        0.19
K              0.69
Lactate        0.47
MAP            6.19
MechVent       1.50
Mg             0.63
NIDiasABP      2.86
NIMAP          2.83
NISysABP       2.86
Na             0.60
PaCO2          1.44
PaO2           1.44
Platelets      0.70
RespRate       1.28
SaO2           0.65
SysABP         6.19
Temp           4.18
TroponinI      0.01
TroponinT      0.08
Urine          6.96
WBC            0.61
Weight         5.22
pH             1.52
dtype: float64

<h4>Grade 2 obesity missing rate</h4>

In [81]:
classificacao_obesidade_2_ids_validation = bmi_data_validation[bmi_data_validation["Classificacao"] == "Obesidade grau 2"]
classificacao_obesidade_2_ids_validation = classificacao_obesidade_2_ids_validation["RecordID"]
classificacao_obesidade_2_validation = validation_X[validation_X["RecordID"].isin(classificacao_obesidade_2_ids_validation)]
classificacao_obesidade_2_missing_validation = round((classificacao_obesidade_2_validation.isna().sum()/total_patients_validation)*100,2)
classificacao_obesidade_2_missing_validation

RecordID       0.00
level_1        0.00
Time           0.00
ALP            4.04
ALT            4.04
AST            4.04
Age            0.19
Albumin        4.06
BUN            3.81
Bilirubin      4.04
Cholesterol    4.11
Creatinine     3.81
DiasABP        1.37
FiO2           3.45
GCS            2.89
Gender         4.03
Glucose        3.83
HCO3           3.80
HCT            3.69
HR             0.35
Height         0.19
ICUType        4.03
K              3.80
Lactate        3.91
MAP            1.37
MechVent       3.47
Mg             3.80
NIDiasABP      2.76
NIMAP          2.79
NISysABP       2.75
Na             3.81
PaCO2          3.45
PaO2           3.45
Platelets      3.78
RespRate       3.28
SaO2           3.79
SysABP         1.37
Temp           2.06
TroponinI      4.11
TroponinT      4.05
Urine          1.10
WBC            3.81
Weight         1.68
pH             3.42
dtype: float64

<h4>Grade 2 obesity measurements</h4>

In [82]:
classificacao_obesidade_2_measurements_validation = round((classificacao_obesidade_2_validation.count()/total_patients_validation)*100,2)
classificacao_obesidade_2_measurements_validation

RecordID       4.12
level_1        4.12
Time           4.12
ALP            0.08
ALT            0.08
AST            0.08
Age            3.93
Albumin        0.06
BUN            0.31
Bilirubin      0.08
Cholesterol    0.01
Creatinine     0.31
DiasABP        2.75
FiO2           0.67
GCS            1.23
Gender         0.09
Glucose        0.29
HCO3           0.31
HCT            0.43
HR             3.77
Height         3.93
ICUType        0.09
K              0.31
Lactate        0.21
MAP            2.75
MechVent       0.65
Mg             0.32
NIDiasABP      1.36
NIMAP          1.32
NISysABP       1.37
Na             0.31
PaCO2          0.67
PaO2           0.67
Platelets      0.34
RespRate       0.84
SaO2           0.33
SysABP         2.75
Temp           2.06
TroponinI      0.01
TroponinT      0.07
Urine          3.01
WBC            0.31
Weight         2.44
pH             0.70
dtype: float64

<h4>Grade 3 obesity missing rate</h4>

In [83]:
classificacao_obesidade_3_ids_validation = bmi_data_validation[bmi_data_validation["Classificacao"] == "Obesidade grau 3"]
classificacao_obesidade_3_ids_validation = classificacao_obesidade_3_ids_validation["RecordID"]
classificacao_obesidade_3_validation = validation_X[validation_X["RecordID"].isin(classificacao_obesidade_3_ids_validation)]
classificacao_obesidade_3_missing_validation = round((classificacao_obesidade_3_validation.isna().sum()/total_patients_validation)*100,2)
classificacao_obesidade_3_missing_validation

RecordID       0.00
level_1        0.00
Time           0.00
ALP            3.86
ALT            3.86
AST            3.86
Age            0.21
Albumin        3.88
BUN            3.65
Bilirubin      3.86
Cholesterol    3.90
Creatinine     3.65
DiasABP        1.57
FiO2           3.22
GCS            2.82
Gender         3.83
Glucose        3.67
HCO3           3.66
HCT            3.56
HR             0.37
Height         0.21
ICUType        3.83
K              3.63
Lactate        3.75
MAP            1.59
MechVent       3.20
Mg             3.64
NIDiasABP      2.59
NIMAP          2.62
NISysABP       2.59
Na             3.67
PaCO2          3.38
PaO2           3.38
Platelets      3.64
RespRate       2.99
SaO2           3.72
SysABP         1.57
Temp           2.43
TroponinI      3.90
TroponinT      3.86
Urine          1.08
WBC            3.66
Weight         1.69
pH             3.37
dtype: float64

<h4>Grade 3 obesity measurements</h4>

In [84]:
classificacao_obesidade_3_measurements_validation = round((classificacao_obesidade_3_validation.count()/total_patients_validation)*100,2)
classificacao_obesidade_3_measurements_validation

RecordID       3.91
level_1        3.91
Time           3.91
ALP            0.05
ALT            0.05
AST            0.05
Age            3.70
Albumin        0.03
BUN            0.26
Bilirubin      0.05
Cholesterol    0.01
Creatinine     0.26
DiasABP        2.34
FiO2           0.69
GCS            1.09
Gender         0.08
Glucose        0.24
HCO3           0.25
HCT            0.35
HR             3.54
Height         3.70
ICUType        0.08
K              0.28
Lactate        0.16
MAP            2.32
MechVent       0.71
Mg             0.27
NIDiasABP      1.32
NIMAP          1.29
NISysABP       1.33
Na             0.24
PaCO2          0.53
PaO2           0.53
Platelets      0.27
RespRate       0.92
SaO2           0.19
SysABP         2.34
Temp           1.48
TroponinI      0.01
TroponinT      0.05
Urine          2.83
WBC            0.25
Weight         2.22
pH             0.54
dtype: float64

<h4>Construction of the missing rate table</h4>

In [85]:
df_missing_validation = pd.DataFrame(columns=df_columns)
df_missing_transpose_validation = df_missing_validation.T
df_missing_transpose_validation["Female"] = female_gender_missing_rate_validation
df_missing_transpose_validation["Male"] = male_gender_missing_rate_validation
df_missing_transpose_validation["Undefined gender"] = undefined_gender_missing_rate_validation
df_missing_transpose_validation["ICUType 1"] = ICUType_1_validation_missing
df_missing_transpose_validation["ICUType 2"] = ICUType_2_validation_missing
df_missing_transpose_validation["ICUType 3"] = ICUType_3_validation_missing
df_missing_transpose_validation["ICUType 4"] = ICUType_4_validation_missing
df_missing_transpose_validation["Age 65+"] = more_than_or_equal_to_65_validation_missing
df_missing_transpose_validation["Age 65-"] = less_than_65_validation_missing
df_missing_transpose_validation["Low Weight"] = classificacao_baixo_peso_missing_validation
df_missing_transpose_validation["Normal Weight"] = classificacao_peso_normal_missing_validation
df_missing_transpose_validation["Overweight"] = classificacao_sobrepeso_missing_validation
df_missing_transpose_validation["Obesity Grade 1"] = classificacao_obesidade_1_missing_validation
df_missing_transpose_validation["Obesity Grade 2"] = classificacao_obesidade_2_missing_validation
df_missing_transpose_validation["Obesity Grade 3"] = classificacao_obesidade_3_missing_validation
df_missing_transpose_validation["Undefined classification"] = classificacao_undefined_missing_validation
df_missing_transpose_validation = df_missing_transpose_validation.drop("RecordID", axis=0)
df_missing_transpose_validation = df_missing_transpose_validation.drop("level_1", axis=0)
df_missing_transpose_validation = df_missing_transpose_validation.drop("Time", axis=0)
df_missing_transpose_validation = df_missing_transpose_validation.drop("Age", axis=0)
df_missing_transpose_validation = df_missing_transpose_validation.drop("Gender", axis=0)
display(HTML("<h2 style='text-align: center; font-size: 24px; font-weight: bold;'>Original Missing Rate per Variable by demographcs - Validation</h2>"))
df_missing_transpose_validation

Unnamed: 0,Female,Male,Undefined gender,ICUType 1,ICUType 2,ICUType 3,ICUType 4,Age 65+,Age 65-,Low Weight,Normal Weight,Overweight,Obesity Grade 1,Obesity Grade 2,Obesity Grade 3,Undefined classification
ALP,41.96,56.31,0.05,15.15,19.76,35.52,27.9,53.31,45.02,1.8,15.3,17.1,8.97,4.04,3.86,47.26
ALT,41.94,56.28,0.05,15.14,19.76,35.49,27.89,53.29,44.99,1.8,15.29,17.09,8.97,4.04,3.86,47.23
AST,41.94,56.28,0.05,15.14,19.76,35.49,27.89,53.29,44.99,1.8,15.29,17.09,8.97,4.04,3.86,47.23
Albumin,42.08,56.58,0.05,15.21,19.82,35.72,27.96,53.44,45.27,1.81,15.37,17.15,9.02,4.06,3.88,47.42
BUN,39.63,53.16,0.05,14.29,18.63,33.63,26.28,50.28,42.56,1.69,14.42,16.04,8.45,3.81,3.65,44.78
Bilirubin,41.94,56.29,0.05,15.14,19.76,35.48,27.91,53.29,44.99,1.8,15.29,17.09,8.97,4.04,3.86,47.23
Cholesterol,42.57,57.2,0.05,15.29,19.91,36.3,28.32,53.91,45.92,1.82,15.51,17.33,9.11,4.11,3.9,48.04
Creatinine,39.62,53.13,0.05,14.26,18.63,33.62,26.28,50.26,42.54,1.69,14.43,16.03,8.45,3.81,3.65,44.76
DiasABP,21.69,24.87,0.0,8.49,4.37,24.11,9.59,24.87,21.69,0.68,5.89,5.7,2.93,1.37,1.57,28.42
FiO2,36.48,48.5,0.04,13.73,16.66,31.3,23.33,46.14,38.88,1.57,13.16,14.47,7.64,3.45,3.22,41.5


<h4>Construction of the measurements table</h4>

In [86]:
df_measurements_validation = pd.DataFrame(columns=df_columns)
df_measurements_transpose_validation = df_measurements_validation.T
df_measurements_transpose_validation["Female"] = female_gender_measurements_validation
df_measurements_transpose_validation["Male"] = male_gender_measurements_validation
df_measurements_transpose_validation["Undefined gender"] = undefined_gender_measurements_validation
df_measurements_transpose_validation["ICUType 1"] = ICUType_1_measurements_validation
df_measurements_transpose_validation["ICUType 2"] = ICUType_2_measurements_validation
df_measurements_transpose_validation["ICUType 3"] = ICUType_3_measurements_validation
df_measurements_transpose_validation["ICUType 4"] = ICUType_4_measurements_validation
df_measurements_transpose_validation["Age 65+"] = more_than_or_equal_to_65_validation_measurements
df_measurements_transpose_validation["Age 65-"] = less_than_65_validation_measurements
df_measurements_transpose_validation["Low Weight"] = classificacao_baixo_peso_measurements_validation
df_measurements_transpose_validation["Normal Weight"] = classificacao_peso_normal_measurements_validation
df_measurements_transpose_validation["Overweight"] = classificacao_sobrepeso_measurements_validation
df_measurements_transpose_validation["Obesity Grade 1"] = classificacao_obesidade_1_measurements_validation
df_measurements_transpose_validation["Obesity Grade 2"] = classificacao_obesidade_2_measurements_validation
df_measurements_transpose_validation["Obesity Grade 3"] = classificacao_obesidade_3_measurements_validation
df_measurements_transpose_validation["Undefined classification"] = classificacao_undefined_measurements_validation
df_measurements_transpose_validation = df_measurements_transpose_validation.drop("RecordID", axis=0)
df_measurements_transpose_validation = df_measurements_transpose_validation.drop("level_1", axis=0)
df_measurements_transpose_validation = df_measurements_transpose_validation.drop("Time", axis=0)
df_measurements_transpose_validation = df_measurements_transpose_validation.drop("Age", axis=0)
df_measurements_transpose_validation = df_measurements_transpose_validation.drop("Gender", axis=0)
display(HTML("<h2 style='text-align: center; font-size: 24px; font-weight: bold;'>Repeated Measurements per Variable by Demographics - Validation Set</h2>"))
df_measurements_transpose_validation

Unnamed: 0,Female,Male,Undefined gender,ICUType 1,ICUType 2,ICUType 3,ICUType 4,Age 65+,Age 65-,Low Weight,Normal Weight,Overweight,Obesity Grade 1,Obesity Grade 2,Obesity Grade 3,Undefined classification
ALP,0.69,0.99,0.0,0.23,0.15,0.82,0.47,0.71,0.97,0.03,0.24,0.27,0.15,0.08,0.05,0.87
ALT,0.71,1.02,0.0,0.25,0.16,0.85,0.47,0.73,1.0,0.03,0.25,0.27,0.15,0.08,0.05,0.89
AST,0.71,1.02,0.0,0.24,0.16,0.85,0.48,0.73,1.0,0.03,0.25,0.27,0.15,0.08,0.05,0.89
Albumin,0.57,0.72,0.0,0.17,0.09,0.62,0.4,0.57,0.71,0.02,0.17,0.21,0.1,0.06,0.03,0.7
BUN,3.02,4.14,0.0,1.09,1.29,2.71,2.08,3.73,3.43,0.13,1.11,1.32,0.67,0.31,0.26,3.34
Bilirubin,0.7,1.01,0.0,0.24,0.16,0.86,0.46,0.73,0.99,0.03,0.25,0.27,0.15,0.08,0.05,0.9
Cholesterol,0.08,0.1,0.0,0.1,0.0,0.04,0.04,0.11,0.07,0.0,0.02,0.03,0.02,0.01,0.01,0.08
Creatinine,3.03,4.16,0.0,1.12,1.28,2.72,2.08,3.76,3.44,0.14,1.11,1.34,0.68,0.31,0.26,3.37
DiasABP,20.96,32.43,0.05,6.89,15.54,12.23,18.78,29.15,24.3,1.15,9.64,11.66,6.19,2.75,2.34,19.7
FiO2,6.17,8.8,0.01,1.65,3.26,5.04,5.03,7.88,7.11,0.26,2.38,2.89,1.48,0.67,0.69,6.62


# Test data

<h4>Loading test database</h4>

In [90]:
test_X = physionet2012_dataset['test_X']

In [119]:
teste  ="tese"

28328.24s - invalid syntax (<string>, line 1)
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_vars.py", line 629, in change_attr_expression
    value = eval(expression, frame.f_globals, frame.f_locals)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "<string>", line 1
    ALP   ALT    AST  Albumin  BUN  ...  TroponinT  Urine  WBC  Weight  \96        NaN   NaN    NaN      NaN  NaN  ...        NaN  100.0  NaN    56.7   97      127.0  91.0  235.0      2.7  8.0  ...        NaN   60.0  4.2    56.7   98        NaN   NaN    NaN      NaN  NaN  ...        NaN   75.0  NaN    56.7   99        NaN   NaN    NaN      NaN  NaN  ...        NaN   30.0  NaN    56.7   100       NaN   NaN    NaN      NaN  NaN  ...        NaN  125.0  NaN    56.7   ...       ...   ...    ...      ...  ...  ...        ...    ...  ...     ...   575419    NaN   NaN    NaN      NaN  NaN  ...        NaN   60.0  NaN     NaN   57

<h4>Total measurements of each variable</h4>

In [4]:
total_pacientes_test = test_X["RecordID"].count()
total_pacientes_test

np.int64(115152)

#### Test_X with only time series

In [91]:
test_X_mts = test_X.copy()

In [6]:
print(test_X_mts.columns)

Index(['RecordID', 'level_1', 'Time', 'ALP', 'ALT', 'AST', 'Age', 'Albumin',
       'BUN', 'Bilirubin', 'Cholesterol', 'Creatinine', 'DiasABP', 'FiO2',
       'GCS', 'Gender', 'Glucose', 'HCO3', 'HCT', 'HR', 'Height', 'ICUType',
       'K', 'Lactate', 'MAP', 'MechVent', 'Mg', 'NIDiasABP', 'NIMAP',
       'NISysABP', 'Na', 'PaCO2', 'PaO2', 'Platelets', 'RespRate', 'SaO2',
       'SysABP', 'Temp', 'TroponinI', 'TroponinT', 'Urine', 'WBC', 'Weight',
       'pH'],
      dtype='object')


In [92]:
test_X_mts = test_X_mts.drop(["Gender", "Age", "ICUType", "RecordID", "level_1", "Time", "Height"], axis=1)

In [93]:
print(test_X_mts.columns)

Index(['ALP', 'ALT', 'AST', 'Albumin', 'BUN', 'Bilirubin', 'Cholesterol',
       'Creatinine', 'DiasABP', 'FiO2', 'GCS', 'Glucose', 'HCO3', 'HCT', 'HR',
       'K', 'Lactate', 'MAP', 'MechVent', 'Mg', 'NIDiasABP', 'NIMAP',
       'NISysABP', 'Na', 'PaCO2', 'PaO2', 'Platelets', 'RespRate', 'SaO2',
       'SysABP', 'Temp', 'TroponinI', 'TroponinT', 'Urine', 'WBC', 'Weight',
       'pH'],
      dtype='object')


#### Total measurements test

In [13]:
total_medicoes = test_X_mts.count().sum() + test_X_mts.isna().sum().sum()

In [14]:
total_medicoes

np.int64(4260624)

In [95]:
len(test_X_mts) * 37 

4260624

<h4>Female gender missing rate</h4>

In [114]:
female_gender_test_ids = test_X[test_X['Gender'] == 0.0]
female_gender_test_ids = female_gender_test_ids["RecordID"]
female_gender_test = test_X[test_X["RecordID"].isin(female_gender_test_ids)]
total_gender_female = female_gender_test["RecordID"].count()
female_gender_missing_rate_test = round((female_gender_test.isna().sum()/total_pacientes_test)*100,2)
female_gender_missing_rate_test

RecordID        0.00
level_1         0.00
Time            0.00
ALP            42.55
ALT            42.52
AST            42.52
Age             2.54
Albumin        42.68
BUN            40.12
Bilirubin      42.51
Cholesterol    43.19
Creatinine     40.10
DiasABP        20.39
FiO2           36.39
GCS            29.09
Gender         42.37
Glucose        40.25
HCO3           40.16
HCT            39.21
HR              4.14
Height          2.54
ICUType        42.37
K              39.94
Lactate        41.53
MAP            20.54
MechVent       36.52
Mg             40.16
NIDiasABP      24.63
NIMAP          24.89
NISysABP       24.62
Na             40.13
PaCO2          38.56
PaO2           38.58
Platelets      40.11
RespRate       32.52
SaO2           41.82
SysABP         20.39
Temp           28.19
TroponinI      43.18
TroponinT      42.79
Urine          13.35
WBC            40.34
Weight         20.05
pH             38.38
dtype: float64

#### Removing variables that are not time series

In [16]:
female_gender_test = female_gender_test.drop(["Gender", "Age", "ICUType", "RecordID", "level_1", "Time", "Height"], axis=1)

#### Missing rate per subgroup

In [21]:
total_medicoes_female = female_gender_test.count().sum() + female_gender_test.isna().sum().sum()

In [22]:
total_medicoes_female

np.int64(1843488)

In [27]:
missing_rate_female = round((female_gender_test.isna().sum().sum()/total_medicoes)*100, 2)

In [28]:
missing_rate_female

np.float64(34.51)

<h4>Female gender measurements</h4>

In [19]:
female_gender_measurements_test = round((female_gender_test.count()/total_pacientes_test)*100,2)
female_gender_measurements_test

RecordID       43.27
level_1        43.27
Time           43.27
ALP             0.72
ALT             0.75
AST             0.75
Age            40.73
Albumin         0.59
BUN             3.15
Bilirubin       0.76
Cholesterol     0.08
Creatinine      3.16
DiasABP        22.88
FiO2            6.88
GCS            14.18
Gender          0.90
Glucose         3.02
HCO3            3.11
HCT             4.06
HR             39.12
Height         40.73
ICUType         0.90
K               3.33
Lactate         1.74
MAP            22.73
MechVent        6.75
Mg              3.11
NIDiasABP      18.63
NIMAP          18.38
NISysABP       18.64
Na              3.14
PaCO2           4.70
PaO2            4.69
Platelets       3.16
RespRate       10.75
SaO2            1.45
SysABP         22.88
Temp           15.08
TroponinI       0.09
TroponinT       0.47
Urine          29.91
WBC             2.93
Weight         23.22
pH              4.88
dtype: float64

In [29]:
round((female_gender_test.count().sum()/total_medicoes)*100, 2)

np.float64(8.75)

<h4>Male gender missing rate</h4>

In [30]:
male_gender_test_ids = test_X[test_X['Gender'] == 1.0]
male_gender_test_ids = male_gender_test_ids["RecordID"]
male_gender_test = test_X[test_X["RecordID"].isin(male_gender_test_ids)]
total_gender_male = male_gender_test["RecordID"].count()
male_gender_missing_rate_test = round((male_gender_test.isna().sum()/total_pacientes_test)*100,2)
male_gender_missing_rate_test

RecordID        0.00
level_1         0.00
Time            0.00
ALP            55.70
ALT            55.67
AST            55.67
Age             3.27
Albumin        55.93
BUN            52.53
Bilirubin      55.67
Cholesterol    56.51
Creatinine     52.52
DiasABP        24.31
FiO2           47.47
GCS            38.54
Gender         55.43
Glucose        52.80
HCO3           52.63
HCT            51.03
HR              5.70
Height          3.27
ICUType        55.43
K              52.36
Lactate        54.20
MAP            24.60
MechVent       48.00
Mg             52.58
NIDiasABP      34.10
NIMAP          34.38
NISysABP       34.06
Na             52.62
PaCO2          49.53
PaO2           49.53
Platelets      52.36
RespRate       45.49
SaO2           54.08
SysABP         24.31
Temp           34.64
TroponinI      56.49
TroponinT      55.99
Urine          17.36
WBC            52.79
Weight         27.50
pH             49.18
dtype: float64

#### Removing variables that are not time series

In [31]:
male_gender_test = male_gender_test.drop(["Gender", "Age", "ICUType", "RecordID", "level_1", "Time", "Height"], axis=1)

#### Missing rate per subgroup

In [36]:
total_medicoes_male = male_gender_test.count().sum() + male_gender_test.isna().sum().sum()

In [37]:
total_medicoes_male

np.int64(2411808)

In [44]:
missing_rate_male = round((male_gender_test.isna().sum().sum()/total_medicoes)*100, 2)

In [45]:
missing_rate_male

np.float64(45.1)

<h4>Male gender measurements</h4>

In [46]:
male_gender_measurements_test = round((male_gender_test.count()/total_pacientes_test)*100,2)
male_gender_measurements_test

ALP             0.91
ALT             0.94
AST             0.94
Albumin         0.68
BUN             4.08
Bilirubin       0.94
Cholesterol     0.10
Creatinine      4.09
DiasABP        32.30
FiO2            9.14
GCS            18.07
Glucose         3.81
HCO3            3.98
HCT             5.58
HR             50.91
K               4.24
Lactate         2.41
MAP            32.01
MechVent        8.60
Mg              4.02
NIDiasABP      22.51
NIMAP          22.23
NISysABP       22.54
Na              3.98
PaCO2           7.08
PaO2            7.07
Platelets       4.25
RespRate       11.12
SaO2            2.52
SysABP         32.30
Temp           21.97
TroponinI       0.12
TroponinT       0.62
Urine          39.25
WBC             3.82
Weight         29.11
pH              7.43
dtype: float64

In [47]:
round((male_gender_test.count().sum()/total_medicoes)*100, 2)

np.float64(11.5)

<h4>Undefined gender missing rate</h4>

In [96]:
undefined_gender_ids_test = test_X[test_X["Gender"] == -1.0]
undefined_gender_ids_test = undefined_gender_ids_test["RecordID"]
undefined_gender_test = test_X[test_X["RecordID"].isin(undefined_gender_ids_test)]
undefined_gender_missing_rate_test = round((undefined_gender_test.isna().sum()/total_pacientes_test)*100,2)
undefined_gender_missing_rate_test

RecordID       0.00
level_1        0.00
Time           0.00
ALP            0.12
ALT            0.12
AST            0.12
Age            0.04
Albumin        0.13
BUN            0.12
Bilirubin      0.12
Cholesterol    0.13
Creatinine     0.12
DiasABP        0.05
FiO2           0.13
GCS            0.09
Gender         0.12
Glucose        0.12
HCO3           0.12
HCT            0.12
HR             0.05
Height         0.04
ICUType        0.12
K              0.12
Lactate        0.12
MAP            0.05
MechVent       0.13
Mg             0.12
NIDiasABP      0.09
NIMAP          0.09
NISysABP       0.09
Na             0.12
PaCO2          0.12
PaO2           0.12
Platelets      0.12
RespRate       0.09
SaO2           0.12
SysABP         0.05
Temp           0.11
TroponinI      0.13
TroponinT      0.13
Urine          0.09
WBC            0.12
Weight         0.09
pH             0.12
dtype: float64

#### Removing variables that are not time series

In [97]:
undefined_gender_test = undefined_gender_test.drop(["Gender", "Age", "ICUType", "RecordID", "level_1", "Time", "Height"], axis=1)

#### Missing rate per subgroup

In [98]:
total_medicoes_undefined = undefined_gender_test.count().sum() + undefined_gender_test.isna().sum().sum()

In [54]:
total_medicoes_undefined

np.int64(5328)

In [99]:
round((total_medicoes_undefined/total_medicoes)*100, 2)

np.float64(0.13)

In [56]:
missing_rate_undefined = round((undefined_gender_test.isna().sum().sum()/total_medicoes)*100, 2)

In [57]:
missing_rate_undefined

np.float64(0.11)

<h4>Undefined gender measurements</h4>

In [58]:
undefined_gender_measurements_test = round((undefined_gender_test.count()/total_pacientes_test)*100,2)
undefined_gender_measurements_test

ALP            0.00
ALT            0.00
AST            0.00
Albumin        0.00
BUN            0.01
Bilirubin      0.00
Cholesterol    0.00
Creatinine     0.01
DiasABP        0.07
FiO2           0.00
GCS            0.04
Glucose        0.01
HCO3           0.01
HCT            0.01
HR             0.08
K              0.01
Lactate        0.00
MAP            0.07
MechVent       0.00
Mg             0.01
NIDiasABP      0.03
NIMAP          0.03
NISysABP       0.03
Na             0.01
PaCO2          0.00
PaO2           0.00
Platelets      0.01
RespRate       0.04
SaO2           0.00
SysABP         0.07
Temp           0.02
TroponinI      0.00
TroponinT      0.00
Urine          0.03
WBC            0.01
Weight         0.03
pH             0.00
dtype: float64

In [59]:
round((undefined_gender_test.count().sum()/total_medicoes)*100, 2)

np.float64(0.02)

<h4>ICUType 1 missing rate</h4>

In [95]:
ICUType_1_test_ids = test_X[test_X["ICUType"] == 1.0]
ICUType_1_test_ids = ICUType_1_test_ids[ICUType_1_test_ids["Time"] == 0.0]
ICUType_1_test_ids = ICUType_1_test_ids["RecordID"]
ICUType_1_test = test_X[test_X["RecordID"].isin(ICUType_1_test_ids)]
ICUType_1_test_missing = round((ICUType_1_test.isna().sum()/total_pacientes_test)*100,2)
ICUType_1_test_missing

RecordID        0.00
level_1         0.00
Time            0.00
ALP            13.69
ALT            13.68
AST            13.68
Age             1.30
Albumin        13.75
BUN            12.88
Bilirubin      13.68
Cholesterol    13.79
Creatinine     12.87
DiasABP         8.28
FiO2           12.38
GCS            10.32
Gender         13.59
Glucose        12.91
HCO3           12.91
HCT            12.67
HR              1.92
Height          1.30
ICUType        13.59
K              12.74
Lactate        13.60
MAP             8.30
MechVent       12.44
Mg             12.89
NIDiasABP       7.13
NIMAP           7.17
NISysABP        7.13
Na             12.91
PaCO2          12.74
PaO2           12.74
Platelets      12.89
RespRate        9.24
SaO2           13.25
SysABP          8.28
Temp            9.77
TroponinI      13.84
TroponinT      13.57
Urine           6.14
WBC            12.99
Weight          8.06
pH             12.72
dtype: float64

<h4>ICUType 1 measurements</h4>

In [96]:
ICUType_1_measurements_test = round((ICUType_1_test.count()/total_pacientes_test)*100,2)
ICUType_1_measurements_test

RecordID       13.88
level_1        13.88
Time           13.88
ALP             0.19
ALT             0.20
AST             0.20
Age            12.58
Albumin         0.13
BUN             1.00
Bilirubin       0.20
Cholesterol     0.09
Creatinine      1.01
DiasABP         5.60
FiO2            1.50
GCS             3.56
Gender          0.29
Glucose         0.97
HCO3            0.97
HCT             1.21
HR             11.96
Height         12.58
ICUType         0.29
K               1.14
Lactate         0.28
MAP             5.58
MechVent        1.44
Mg              0.99
NIDiasABP       6.75
NIMAP           6.71
NISysABP        6.75
Na              0.97
PaCO2           1.14
PaO2            1.14
Platelets       0.99
RespRate        4.65
SaO2            0.63
SysABP          5.60
Temp            4.11
TroponinI       0.04
TroponinT       0.31
Urine           7.74
WBC             0.89
Weight          5.82
pH              1.16
dtype: float64

<h4>ICUType 2 missing rate</h4>

In [97]:
ICUType_2_test_ids = test_X[test_X["ICUType"] == 2.0]
ICUType_2_test_ids = ICUType_2_test_ids[ICUType_2_test_ids["Time"] == 0.0]
ICUType_2_test_ids = ICUType_2_test_ids["RecordID"]
ICUType_2_test = test_X[test_X["RecordID"].isin(ICUType_2_test_ids)]
ICUType_2_test_missing = round((ICUType_2_test.isna().sum()/total_pacientes_test)*100,2)
ICUType_2_test_missing

RecordID        0.00
level_1         0.00
Time            0.00
ALP            21.79
ALT            21.79
AST            21.79
Age             0.84
Albumin        21.85
BUN            20.52
Bilirubin      21.79
Cholesterol    21.96
Creatinine     20.52
DiasABP         4.92
FiO2           18.31
GCS            16.13
Gender         21.51
Glucose        20.92
HCO3           20.63
HCT            19.56
HR              1.94
Height          0.84
ICUType        21.51
K              20.78
Lactate        21.18
MAP             4.92
MechVent       18.46
Mg             20.53
NIDiasABP      17.24
NIMAP          17.27
NISysABP       17.23
Na             20.84
PaCO2          17.56
PaO2           17.57
Platelets      20.17
RespRate       21.18
SaO2           19.64
SysABP          4.92
Temp            9.42
TroponinI      21.93
TroponinT      21.91
Urine           3.82
WBC            20.46
Weight         10.56
pH             17.14
dtype: float64

<h4>ICUType 2 measurements</h4>

In [98]:
ICUType_2_measurements_test = round((ICUType_2_test.count()/total_pacientes_test)*100,2)
ICUType_2_measurements_test

RecordID       21.97
level_1        21.97
Time           21.97
ALP             0.17
ALT             0.18
AST             0.18
Age            21.12
Albumin         0.12
BUN             1.45
Bilirubin       0.18
Cholesterol     0.01
Creatinine      1.45
DiasABP        17.05
FiO2            3.66
GCS             5.84
Gender          0.46
Glucose         1.05
HCO3            1.34
HCT             2.40
HR             20.02
Height         21.12
ICUType         0.46
K               1.18
Lactate         0.78
MAP            17.05
MechVent        3.50
Mg              1.44
NIDiasABP       4.73
NIMAP           4.70
NISysABP        4.74
Na              1.13
PaCO2           4.41
PaO2            4.40
Platelets       1.80
RespRate        0.79
SaO2            2.32
SysABP         17.05
Temp           12.55
TroponinI       0.04
TroponinT       0.06
Urine          18.15
WBC             1.51
Weight         11.41
pH              4.83
dtype: float64

<h4>ICUType 3 missing rate</h4>

In [99]:
ICUType_3_test_ids = test_X[test_X["ICUType"] == 3.0]
ICUType_3_test_ids = ICUType_3_test_ids[ICUType_3_test_ids["Time"] == 0.0]
ICUType_3_test_ids = ICUType_3_test_ids["RecordID"]
ICUType_3_test = test_X[test_X["RecordID"].isin(ICUType_3_test_ids)]
ICUType_3_test_missing = round((ICUType_3_test.isna().sum()/total_pacientes_test)*100,2)
ICUType_3_test_missing

RecordID        0.00
level_1         0.00
Time            0.00
ALP            34.38
ALT            34.36
AST            34.36
Age             2.35
Albumin        34.58
BUN            32.55
Bilirubin      34.33
Cholesterol    35.14
Creatinine     32.54
DiasABP        22.24
FiO2           29.75
GCS            25.70
Gender         34.45
Glucose        32.54
HCO3           32.53
HCT            32.04
HR              3.55
Height          2.35
ICUType        34.45
K              32.30
Lactate        33.79
MAP            22.44
MechVent       30.16
Mg             32.67
NIDiasABP      15.22
NIMAP          15.53
NISysABP       15.20
Na             32.48
PaCO2          32.37
PaO2           32.36
Platelets      32.75
RespRate       25.39
SaO2           34.72
SysABP         22.23
Temp           25.21
TroponinI      35.11
TroponinT      34.68
Urine          13.76
WBC            32.86
Weight         12.25
pH             32.32
dtype: float64

<h4>ICUType 3 measurements</h4>

In [100]:
ICUType_3_measurements_test = round((ICUType_3_test.count()/total_pacientes_test)*100,2)
ICUType_3_measurements_test

RecordID       35.18
level_1        35.18
Time           35.18
ALP             0.80
ALT             0.83
AST             0.83
Age            32.83
Albumin         0.60
BUN             2.63
Bilirubin       0.85
Cholesterol     0.04
Creatinine      2.65
DiasABP        12.94
FiO2            5.43
GCS             9.48
Gender          0.73
Glucose         2.64
HCO3            2.65
HCT             3.14
HR             31.63
Height         32.83
ICUType         0.73
K               2.88
Lactate         1.39
MAP            12.74
MechVent        5.02
Mg              2.51
NIDiasABP      19.96
NIMAP          19.65
NISysABP       19.98
Na              2.71
PaCO2           2.82
PaO2            2.82
Platelets       2.43
RespRate        9.80
SaO2            0.46
SysABP         12.95
Temp            9.97
TroponinI       0.07
TroponinT       0.50
Urine          21.42
WBC             2.32
Weight         22.93
pH              2.86
dtype: float64

<h4>ICUType 4 missing rate</h4>

In [101]:
ICUType_4_test_ids = test_X[test_X["ICUType"] == 4.0]
ICUType_4_test_ids = ICUType_4_test_ids[ICUType_4_test_ids["Time"] == 0.0]
ICUType_4_test_ids = ICUType_4_test_ids["RecordID"]
ICUType_4_test = test_X[test_X["RecordID"].isin(ICUType_4_test_ids)]
ICUType_4_test_missing = round((ICUType_4_test.isna().sum()/total_pacientes_test)*100,2)
ICUType_4_test_missing

RecordID        0.00
level_1         0.00
Time            0.00
ALP            28.50
ALT            28.49
AST            28.49
Age             1.37
Albumin        28.55
BUN            26.82
Bilirubin      28.50
Cholesterol    28.93
Creatinine     26.81
DiasABP         9.32
FiO2           23.55
GCS            15.57
Gender         28.37
Glucose        26.80
HCO3           26.83
HCT            26.09
HR              2.47
Height          1.37
ICUType        28.37
K              26.60
Lactate        27.29
MAP             9.54
MechVent       23.58
Mg             26.77
NIDiasABP      19.23
NIMAP          19.39
NISysABP       19.22
Na             26.65
PaCO2          25.55
PaO2           25.56
Platelets      26.78
RespRate       22.29
SaO2           28.42
SysABP          9.32
Temp           18.54
TroponinI      28.91
TroponinT      28.74
Urine           7.09
WBC            26.93
Weight         16.78
pH             25.49
dtype: float64

<h4>ICUType 4 measurements</h4>

In [102]:
ICUType_4_measurements_test = round((ICUType_4_test.count()/total_pacientes_test)*100,2)
ICUType_4_measurements_test

RecordID       28.97
level_1        28.97
Time           28.97
ALP             0.47
ALT             0.48
AST             0.48
Age            27.60
Albumin         0.42
BUN             2.15
Bilirubin       0.47
Cholesterol     0.04
Creatinine      2.16
DiasABP        19.65
FiO2            5.42
GCS            13.40
Gender          0.60
Glucose         2.17
HCO3            2.14
HCT             2.88
HR             26.50
Height         27.60
ICUType         0.60
K               2.37
Lactate         1.68
MAP            19.43
MechVent        5.39
Mg              2.20
NIDiasABP       9.74
NIMAP           9.58
NISysABP        9.75
Na              2.32
PaCO2           3.42
PaO2            3.41
Platelets       2.19
RespRate        6.68
SaO2            0.55
SysABP         19.65
Temp           10.43
TroponinI       0.06
TroponinT       0.23
Urine          21.88
WBC             2.04
Weight         12.20
pH              3.48
dtype: float64

<h4>+65 missing rate</h4>

In [61]:
more_than_or_equal_to_65_test_ids = test_X[test_X["Age"] >= 65]
more_than_or_equal_to_65_test_ids = more_than_or_equal_to_65_test_ids[more_than_or_equal_to_65_test_ids["Time"] == 0.0]
more_than_or_equal_to_65_test_ids = more_than_or_equal_to_65_test_ids["RecordID"]
more_than_or_equal_to_65_test = test_X[test_X["RecordID"].isin(more_than_or_equal_to_65_test_ids)]
total_age_more_65 = more_than_or_equal_to_65_test["RecordID"].count()
more_than_or_equal_to_65_test_missing = round((more_than_or_equal_to_65_test.isna().sum()/total_pacientes_test)*100,2)
more_than_or_equal_to_65_test_missing

RecordID        0.00
level_1         0.00
Time            0.00
ALP            51.82
ALT            51.80
AST            51.80
Age             2.86
Albumin        51.92
BUN            48.81
Bilirubin      51.80
Cholesterol    52.42
Creatinine     48.79
DiasABP        23.93
FiO2           44.16
GCS            36.04
Gender         51.43
Glucose        49.07
HCO3           48.89
HCT            47.47
HR              4.83
Height          2.86
ICUType        51.43
K              48.66
Lactate        50.51
MAP            24.09
MechVent       44.73
Mg             48.86
NIDiasABP      30.06
NIMAP          30.33
NISysABP       30.04
Na             48.92
PaCO2          46.40
PaO2           46.41
Platelets      48.69
RespRate       40.38
SaO2           50.25
SysABP         23.93
Temp           32.20
TroponinI      52.38
TroponinT      51.80
Urine          15.22
WBC            49.04
Weight         24.27
pH             46.10
dtype: float64

#### Removing variables that are not time series

In [62]:
more_than_or_equal_to_65_test = more_than_or_equal_to_65_test.drop(["Gender", "Age", "ICUType", "RecordID", "level_1", "Time", "Height"], axis=1)

#### Missing rate per subgroup

In [63]:
total_medicoes_more_65 = more_than_or_equal_to_65_test.count().sum() + more_than_or_equal_to_65_test.isna().sum().sum()

In [69]:
total_medicoes_more_65

np.int64(2237760)

In [70]:
missing_rate_more_65 = round((more_than_or_equal_to_65_test.isna().sum().sum()/total_medicoes)*100, 2)

In [71]:
missing_rate_more_65

np.float64(41.81)

<h4>+65 measurements</h4>

In [72]:
more_than_or_equal_to_65_test_measurements = round((more_than_or_equal_to_65_test.count()/total_pacientes_test)*100,2)
more_than_or_equal_to_65_test_measurements

ALP             0.70
ALT             0.72
AST             0.72
Albumin         0.60
BUN             3.71
Bilirubin       0.72
Cholesterol     0.10
Creatinine      3.73
DiasABP        28.59
FiO2            8.36
GCS            16.48
Glucose         3.45
HCO3            3.63
HCT             5.05
HR             47.69
K               3.86
Lactate         2.01
MAP            28.44
MechVent        7.79
Mg              3.66
NIDiasABP      22.46
NIMAP          22.19
NISysABP       22.48
Na              3.60
PaCO2           6.12
PaO2            6.11
Platelets       3.83
RespRate       12.14
SaO2            2.27
SysABP         28.59
Temp           20.33
TroponinI       0.15
TroponinT       0.72
Urine          37.30
WBC             3.49
Weight         28.25
pH              6.42
dtype: float64

In [73]:
round((more_than_or_equal_to_65_test.count().sum()/total_medicoes)*100, 2)

np.float64(10.72)

<h4>-65 missing rate</h4>

In [75]:
less_than_65_test_ids = test_X[test_X["Age"] < 65]
less_than_65_test_ids = less_than_65_test_ids[less_than_65_test_ids["Time"] == 0.0]
less_than_65_test_ids = less_than_65_test_ids["RecordID"]
less_than_65_test = test_X[test_X["RecordID"].isin(less_than_65_test_ids)]
total_age_less_65 = less_than_65_test["RecordID"].count()
less_than_65_test_missing = round((less_than_65_test.isna().sum()/total_pacientes_test)*100,2)
less_than_65_test_missing

RecordID        0.00
level_1         0.00
Time            0.00
ALP            46.55
ALT            46.51
AST            46.51
Age             3.00
Albumin        46.81
BUN            43.96
Bilirubin      46.51
Cholesterol    47.41
Creatinine     43.94
DiasABP        20.82
FiO2           39.82
GCS            31.67
Gender         46.49
Glucose        44.09
HCO3           44.01
HCT            42.89
HR              5.06
Height          3.00
ICUType        46.49
K              43.76
Lactate        45.35
MAP            21.11
MechVent       39.92
Mg             44.00
NIDiasABP      28.76
NIMAP          29.02
NISysABP       28.74
Na             43.95
PaCO2          41.82
PaO2           41.82
Platelets      43.90
RespRate       37.71
SaO2           45.78
SysABP         20.82
Temp           30.74
TroponinI      47.41
TroponinT      47.11
Urine          15.58
WBC            44.20
Weight         23.37
pH             41.58
dtype: float64

#### Removing variables that are not time series

In [76]:
less_than_65_test = less_than_65_test.drop(["Gender", "Age", "ICUType", "RecordID", "level_1", "Time", "Height"], axis=1)

#### Missing rate per subgroup

In [80]:
total_medicoes_less_65 = less_than_65_test.count().sum() + less_than_65_test.isna().sum().sum()

In [82]:
total_medicoes_less_65

np.int64(2022864)

In [83]:
missing_rate_less_65 = round((less_than_65_test.isna().sum().sum()/total_medicoes)*100, 2)

In [84]:
missing_rate_less_65

np.float64(37.92)

<h4>-65 measurements</h4>

In [85]:
less_than_65_test_measurements = round((less_than_65_test.count()/total_pacientes_test)*100,2)
less_than_65_test_measurements

ALP             0.93
ALT             0.97
AST             0.97
Albumin         0.66
BUN             3.52
Bilirubin       0.97
Cholesterol     0.07
Creatinine      3.53
DiasABP        26.66
FiO2            7.66
GCS            15.81
Glucose         3.38
HCO3            3.46
HCT             4.59
HR             42.42
K               3.72
Lactate         2.13
MAP            26.37
MechVent        7.56
Mg              3.48
NIDiasABP      18.72
NIMAP          18.45
NISysABP       18.74
Na              3.53
PaCO2           5.66
PaO2            5.66
Platelets       3.58
RespRate        9.77
SaO2            1.70
SysABP         26.66
Temp           16.74
TroponinI       0.06
TroponinT       0.37
Urine          31.90
WBC             3.27
Weight         24.11
pH              5.89
dtype: float64

In [86]:
round((less_than_65_test.count().sum()/total_medicoes)*100, 2)

np.float64(9.56)

<h4>Filtering only patients who have the same height and weight</h4>

In [36]:
filtered_test_X = test_X[(test_X['Height'] != -1) & (test_X['Weight'] != -1) & (test_X['Height'].notna()) & (test_X['Weight'].notna())] 

<h4>Converting height to meters</h4>

In [37]:
filtered_test_X_metros = filtered_test_X.copy()
filtered_test_X_metros["Height"] = filtered_test_X["Height"]/100
filtered_test_X_metros["Height"]

1584      1.626
1632      1.524
1633      1.524
1634      1.524
1635      1.524
          ...  
574841    1.651
574842    1.651
574843    1.651
574844    1.651
574845    1.651
Name: Height, Length: 31259, dtype: float64

<h4>Calculate BMI and classification</h4>

In [38]:
bmi_data_test = filtered_test_X_metros
bmi_data_test["BMI"] = round(filtered_test_X_metros["Weight"] / (filtered_test_X_metros["Height"]**2), 1)
bmi_data_test["Classificacao"] = bmi_data_test["BMI"].apply(classify_BMI)
bmi_data_test.head()

Unnamed: 0,RecordID,level_1,Time,ALP,ALT,AST,Age,Albumin,BUN,Bilirubin,...,SysABP,Temp,TroponinI,TroponinT,Urine,WBC,Weight,pH,BMI,Classificacao
1584,132614,0,0.0,,,,77.0,,,,...,,,,,,,59.0,,22.3,Peso normal
1632,132615,0,0.0,,,,46.0,,,,...,0.0,36.4,,,80.0,,88.6,,38.1,Obesidade
1633,132615,1,1.0,,,,46.0,,22.0,,...,137.0,,,,180.0,22.1,88.6,7.34,38.1,Obesidade
1634,132615,2,2.0,,,,46.0,,,,...,118.0,,,,110.0,,88.6,,38.1,Obesidade
1635,132615,3,3.0,,,,46.0,,,,...,132.0,,,,75.0,,88.6,,38.1,Obesidade


<h4>Taking the first occurrence of each patient</h4>

In [39]:
bmi_data_test = bmi_data_test.groupby("RecordID").first().reset_index()
bmi_data_test

Unnamed: 0,RecordID,level_1,Time,ALP,ALT,AST,Age,Albumin,BUN,Bilirubin,...,SysABP,Temp,TroponinI,TroponinT,Urine,WBC,Weight,pH,BMI,Classificacao
0,132614,0,0.0,,,,77.0,,,,...,,,,,,,59.0,,22.3,Peso normal
1,132615,0,0.0,81.0,32.0,28.0,46.0,2.6,22.0,0.3,...,0.0,36.4,,,80.0,22.1,88.6,7.34,38.1,Obesidade
2,132623,0,0.0,,,,24.0,,,,...,,,,,,,78.0,7.45,23.3,Peso normal
3,132639,0,0.0,,,,73.0,,18.0,,...,110.0,36.9,,,30.0,10.2,96.3,7.36,29.6,Sobrepeso
4,132658,0,0.0,71.0,9.0,42.0,81.0,,18.0,1.3,...,97.0,38.4,,,90.0,61.3,105.4,7.42,30.7,Obesidade
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1238,162926,0,0.0,,,,83.0,,18.0,,...,117.0,36.9,,,60.0,12.3,104.5,7.34,35.0,Obesidade
1239,162942,0,0.0,67.0,61.0,92.0,40.0,3.3,12.0,0.3,...,,36.3,,,600.0,20.6,120.7,7.38,37.1,Obesidade
1240,162971,0,0.0,,,,63.0,,,,...,130.5,35.8,,,0.0,,83.3,,37.1,Obesidade
1241,162995,0,0.0,60.0,21.0,20.0,84.0,,93.0,0.4,...,121.0,37.1,0.6,,60.0,17.1,96.5,7.31,28.8,Sobrepeso


In [40]:
bmi_data_test["Classificacao"].value_counts()

Classificacao
Sobrepeso      422
Obesidade      391
Peso normal    386
Baixo peso      44
Name: count, dtype: int64

<h4>Classification undefined missing rate</h4>

In [41]:
classificacao_undefined_ids_test = bmi_data_test["RecordID"]
classificacao_undefined_test = test_X[~test_X["RecordID"].isin(classificacao_undefined_ids_test)]
total_classification_undefined = classificacao_undefined_test["RecordID"].count()
classificacao_undefined_missing_test = round((classificacao_undefined_test.isna().sum()/total_pacientes_test)*100,2)
classificacao_undefined_missing_test

RecordID        0.00
level_1         0.00
Time            0.00
ALP            47.39
ALT            47.36
AST            47.36
Age             3.69
Albumin        47.53
BUN            44.78
Bilirubin      47.36
Cholesterol    48.11
Creatinine     44.76
DiasABP        28.25
FiO2           41.25
GCS            31.82
Gender         47.18
Glucose        44.77
HCO3           44.79
HCT            43.98
HR              5.48
Height          3.69
ICUType        47.18
K              44.44
Lactate        46.47
MAP            28.53
MechVent       41.68
Mg             44.88
NIDiasABP      23.01
NIMAP          23.36
NISysABP       22.98
Na             44.62
PaCO2          44.60
PaO2           44.60
Platelets      44.98
RespRate       32.87
SaO2           47.60
SysABP         28.25
Temp           35.17
TroponinI      48.12
TroponinT      47.59
Urine          17.59
WBC            45.15
Weight         22.97
pH             44.54
dtype: float64

In [42]:
classificacao_undefined_test.count().sum() + classificacao_undefined_test.isna().sum().sum()

np.int64(2441472)

In [43]:
round((classificacao_undefined_test.isna().sum().sum()/total_medicoes)*100, 2)

np.float64(35.02)

<h4>Classification undefined measurements</h4>

In [56]:
classificacao_undefined_measurements_test = round((classificacao_undefined_test.count()/total_pacientes_test)*100,2)
classificacao_undefined_measurements_test

RecordID       48.19
level_1        48.19
Time           48.19
ALP             0.79
ALT             0.82
AST             0.82
Age            44.50
Albumin         0.66
BUN             3.41
Bilirubin       0.83
Cholesterol     0.08
Creatinine      3.42
DiasABP        19.94
FiO2            6.93
GCS            16.37
Gender          1.00
Glucose         3.42
HCO3            3.39
HCT             4.21
HR             42.71
Height         44.50
ICUType         1.00
K               3.75
Lactate         1.72
MAP            19.65
MechVent        6.51
Mg              3.31
NIDiasABP      25.18
NIMAP          24.82
NISysABP       25.21
Na              3.56
PaCO2           3.59
PaO2            3.58
Platelets       3.21
RespRate       15.31
SaO2            0.59
SysABP         19.94
Temp           13.01
TroponinI       0.07
TroponinT       0.59
Urine          30.59
WBC             3.04
Weight         25.22
pH              3.65
dtype: float64

In [57]:
round((classificacao_undefined_test.count().sum()/total_medicoes)*100, 2)

np.float64(13.17)

<h4>Classification low weight</h4>

In [44]:
classificacao_baixo_peso_ids_test = bmi_data_test[bmi_data_test["Classificacao"] == "Baixo peso"]
classificacao_baixo_peso_ids_test = classificacao_baixo_peso_ids_test["RecordID"]
classificacao_baixo_peso_test = test_X[test_X["RecordID"].isin(classificacao_baixo_peso_ids_test)]
total_classificacao_baixo_peso = classificacao_baixo_peso_test["RecordID"].count()
classificacao_baixo_peso_missing_test = round((classificacao_baixo_peso_test.isna().sum()/total_pacientes_test)*100,2)
classificacao_baixo_peso_missing_test

RecordID       0.00
level_1        0.00
Time           0.00
ALP            1.80
ALT            1.80
AST            1.80
Age            0.06
Albumin        1.80
BUN            1.69
Bilirubin      1.80
Cholesterol    1.83
Creatinine     1.69
DiasABP        0.68
FiO2           1.49
GCS            1.29
Gender         1.80
Glucose        1.70
HCO3           1.69
HCT            1.66
HR             0.13
Height         0.06
ICUType        1.80
K              1.68
Lactate        1.75
MAP            0.67
MechVent       1.51
Mg             1.69
NIDiasABP      1.20
NIMAP          1.20
NISysABP       1.20
Na             1.69
PaCO2          1.55
PaO2           1.55
Platelets      1.67
RespRate       1.61
SaO2           1.71
SysABP         0.68
Temp           0.99
TroponinI      1.83
TroponinT      1.82
Urine          0.47
WBC            1.70
Weight         0.86
pH             1.54
dtype: float64

In [45]:
classificacao_baixo_peso_test.count().sum() + classificacao_baixo_peso_test.isna().sum().sum()

np.int64(92928)

In [42]:
round((classificacao_baixo_peso_test.isna().sum().sum()/total_medicoes)*100, 2)

np.float64(1.3)

<h4>Classification low weight measurements</h4>

In [58]:
classificacao_baixo_peso_measurements_test = round((classificacao_baixo_peso_test.count()/total_pacientes_test)*100,2)
classificacao_baixo_peso_measurements_test

RecordID       1.83
level_1        1.83
Time           1.83
ALP            0.03
ALT            0.03
AST            0.03
Age            1.78
Albumin        0.03
BUN            0.15
Bilirubin      0.03
Cholesterol    0.00
Creatinine     0.15
DiasABP        1.15
FiO2           0.34
GCS            0.54
Gender         0.04
Glucose        0.14
HCO3           0.15
HCT            0.18
HR             1.70
Height         1.78
ICUType        0.04
K              0.15
Lactate        0.09
MAP            1.16
MechVent       0.32
Mg             0.15
NIDiasABP      0.63
NIMAP          0.63
NISysABP       0.63
Na             0.14
PaCO2          0.28
PaO2           0.29
Platelets      0.16
RespRate       0.22
SaO2           0.12
SysABP         1.15
Temp           0.85
TroponinI      0.00
TroponinT      0.01
Urine          1.37
WBC            0.13
Weight         0.97
pH             0.30
dtype: float64

In [56]:
round((classificacao_baixo_peso_test.count().sum()/total_medicoes)*100, 2)

np.float64(0.54)

<h4>Classification normal weight missing rate</h4>

In [46]:
classificacao_normal_peso_ids_test = bmi_data_test[bmi_data_test["Classificacao"] == "Peso normal"]
classificacao_normal_peso_ids_test = classificacao_normal_peso_ids_test["RecordID"]
classificacao_normal_peso_test = test_X[test_X["RecordID"].isin(classificacao_normal_peso_ids_test)]
total_classificacao_normal_peso = classificacao_normal_peso_test["RecordID"].count()
classificacao_normal_peso_missing_test = round((classificacao_normal_peso_test.isna().sum()/total_pacientes_test)*100,2)
classificacao_normal_peso_missing_test

RecordID        0.00
level_1         0.00
Time            0.00
ALP            15.83
ALT            15.82
AST            15.82
Age             0.70
Albumin        15.90
BUN            14.91
Bilirubin      15.82
Cholesterol    16.06
Creatinine     14.90
DiasABP         5.30
FiO2           13.29
GCS            11.09
Gender         15.75
Glucose        15.03
HCO3           14.95
HCT            14.41
HR              1.39
Height          0.70
ICUType        15.75
K              14.93
Lactate        15.34
MAP             5.38
MechVent       13.34
Mg             14.93
NIDiasABP      11.04
NIMAP          11.09
NISysABP       11.03
Na             14.98
PaCO2          13.63
PaO2           13.64
Platelets      14.78
RespRate       14.16
SaO2           15.11
SysABP          5.30
Temp            9.02
TroponinI      16.05
TroponinT      15.95
Urine           4.27
WBC            14.93
Weight          7.66
pH             13.46
dtype: float64

In [47]:
classificacao_normal_peso_test.count().sum() + classificacao_normal_peso_test.isna().sum().sum()

np.int64(815232)

In [45]:
round((classificacao_normal_peso_test.isna().sum().sum()/total_medicoes)*100, 2)

np.float64(11.44)

<h4>Classification normal weight measurements</h4>

In [60]:
classificacao_normal_peso_measurements_test = round((classificacao_normal_peso_test.count()/total_pacientes_test)*100,2)
classificacao_normal_peso_measurements_test

RecordID       16.09
level_1        16.09
Time           16.09
ALP             0.26
ALT             0.27
AST             0.27
Age            15.39
Albumin         0.19
BUN             1.18
Bilirubin       0.27
Cholesterol     0.03
Creatinine      1.19
DiasABP        10.79
FiO2            2.80
GCS             5.00
Gender          0.34
Glucose         1.06
HCO3            1.14
HCT             1.68
HR             14.70
Height         15.39
ICUType         0.34
K               1.16
Lactate         0.75
MAP            10.71
MechVent        2.75
Mg              1.16
NIDiasABP       5.05
NIMAP           5.00
NISysABP        5.06
Na              1.11
PaCO2           2.46
PaO2            2.45
Platelets       1.31
RespRate        1.93
SaO2            0.98
SysABP         10.79
Temp            7.07
TroponinI       0.04
TroponinT       0.14
Urine          11.82
WBC             1.16
Weight          8.43
pH              2.63
dtype: float64

In [55]:
round((classificacao_normal_peso_test.count().sum()/total_medicoes)*100, 2)

np.float64(4.65)

<h4>Classification overweight missing rate</h4>

In [48]:
classificacao_sobrepeso_ids_test = bmi_data_test[bmi_data_test["Classificacao"] == "Sobrepeso"]
classificacao_sobrepeso_ids_test = classificacao_sobrepeso_ids_test["RecordID"]
classificacao_sobrepeso_test = test_X[test_X["RecordID"].isin(classificacao_sobrepeso_ids_test)]
total_classificacao_sobrepeso = classificacao_sobrepeso_test["RecordID"].count()
classificacao_sobrepeso_missing_test = round((classificacao_sobrepeso_test.isna().sum()/total_pacientes_test)*100,2)
classificacao_sobrepeso_missing_test

RecordID        0.00
level_1         0.00
Time            0.00
ALP            17.32
ALT            17.30
AST            17.31
Age             0.73
Albumin        17.40
BUN            16.30
Bilirubin      17.31
Cholesterol    17.56
Creatinine     16.29
DiasABP         5.19
FiO2           14.64
GCS            12.13
Gender         17.22
Glucose        16.46
HCO3           16.35
HCT            15.66
HR              1.48
Height          0.73
ICUType        17.22
K              16.32
Lactate        16.83
MAP             5.21
MechVent       14.67
Mg             16.28
NIDiasABP      12.39
NIMAP          12.47
NISysABP       12.38
Na             16.42
PaCO2          14.76
PaO2           14.76
Platelets      16.13
RespRate       15.20
SaO2           16.29
SysABP          5.19
Temp            8.98
TroponinI      17.54
TroponinT      17.43
Urine           4.38
WBC            16.30
Weight          8.28
pH             14.60
dtype: float64

In [49]:
classificacao_sobrepeso_test.count().sum() + classificacao_sobrepeso_test.isna().sum().sum()

np.int64(891264)

In [47]:
round((classificacao_sobrepeso_test.isna().sum().sum()/total_medicoes)*100, 2)

np.float64(12.44)

<h4>Classification overweight measurements</h4>

In [44]:
classificacao_sobrepeso_measurements_test = round((classificacao_sobrepeso_test.count()/total_pacientes_test)*100,2)
classificacao_sobrepeso_measurements_test

RecordID       17.59
level_1        17.59
Time           17.59
ALP             0.27
ALT             0.29
AST             0.28
Age            16.86
Albumin         0.19
BUN             1.29
Bilirubin       0.28
Cholesterol     0.03
Creatinine      1.30
DiasABP        12.40
FiO2            2.95
GCS             5.46
Gender          0.37
Glucose         1.14
HCO3            1.24
HCT             1.93
HR             16.11
Height         16.86
ICUType         0.37
K               1.27
Lactate         0.77
MAP            12.38
MechVent        2.92
Mg              1.32
NIDiasABP       5.20
NIMAP           5.12
NISysABP        5.21
Na              1.17
PaCO2           2.84
PaO2            2.83
Platelets       1.46
RespRate        2.39
SaO2            1.30
SysABP         12.40
Temp            8.61
TroponinI       0.06
TroponinT       0.16
Urine          13.21
WBC             1.29
Weight          9.32
pH              2.99
dtype: float64

In [53]:
round((classificacao_sobrepeso_test.count().sum()/total_medicoes)*100, 2)

np.float64(5.15)

<h4>obesity missing rate</h4>

In [50]:
classificacao_obesidade_1_ids_test = bmi_data_test[bmi_data_test["Classificacao"] == "Obesidade"]
classificacao_obesidade_1_ids_test = classificacao_obesidade_1_ids_test["RecordID"]
classificacao_obesidade_1_test = test_X[test_X["RecordID"].isin(classificacao_obesidade_1_ids_test)]
total_classificacao_obesidade = classificacao_obesidade_1_test["RecordID"].count()
classificacao_obesidade_1_missing_test = round((classificacao_obesidade_1_test.isna().sum()/total_pacientes_test)*100,2)
classificacao_obesidade_1_missing_test

RecordID        0.00
level_1         0.00
Time            0.00
ALP            16.02
ALT            16.02
AST            16.02
Age             0.69
Albumin        16.11
BUN            15.10
Bilirubin      16.01
Cholesterol    16.26
Creatinine     15.10
DiasABP         5.33
FiO2           13.31
GCS            11.38
Gender         15.96
Glucose        15.21
HCO3           15.13
HCT            14.65
HR              1.41
Height          0.69
ICUType        15.96
K              15.06
Lactate        15.47
MAP             5.40
MechVent       13.45
Mg             15.09
NIDiasABP      11.18
NIMAP          11.24
NISysABP       11.18
Na             15.15
PaCO2          13.68
PaO2           13.68
Platelets      15.04
RespRate       14.24
SaO2           15.32
SysABP          5.33
Temp            8.78
TroponinI      16.26
TroponinT      16.12
Urine           4.08
WBC            15.16
Weight          7.87
pH             13.54
dtype: float64

In [51]:
classificacao_obesidade_1_test.count().sum() + classificacao_obesidade_1_test.isna().sum().sum()

np.int64(825792)

In [49]:
round((classificacao_obesidade_1_test.isna().sum().sum()/total_medicoes)*100, 2)

np.float64(11.56)

<h4>Grade 1 obesity measurements</h4>

In [50]:
classificacao_obesidade_1_measurements_test = round((classificacao_obesidade_1_test.count()/total_pacientes_test)*100,2)
classificacao_obesidade_1_measurements_test

RecordID       16.30
level_1        16.30
Time           16.30
ALP             0.28
ALT             0.28
AST             0.28
Age            15.61
Albumin         0.19
BUN             1.20
Bilirubin       0.29
Cholesterol     0.03
Creatinine      1.20
DiasABP        10.97
FiO2            2.99
GCS             4.92
Gender          0.34
Glucose         1.09
HCO3            1.17
HCT             1.65
HR             14.89
Height         15.61
ICUType         0.34
K               1.24
Lactate         0.83
MAP            10.90
MechVent        2.85
Mg              1.21
NIDiasABP       5.11
NIMAP           5.06
NISysABP        5.12
Na              1.14
PaCO2           2.62
PaO2            2.61
Platelets       1.26
RespRate        2.05
SaO2            0.98
SysABP         10.97
Temp            7.52
TroponinI       0.04
TroponinT       0.18
Urine          12.22
WBC             1.14
Weight          8.43
pH              2.75
dtype: float64

In [52]:
round((classificacao_obesidade_1_test.count().sum()/total_medicoes)*100, 2)

np.float64(4.74)

<h4>Grade 2 obesity missing rate</h4>

In [51]:
classificacao_obesidade_2_ids_test = bmi_data_test[bmi_data_test["Classificacao"] == "Obesidade grau 2"]
classificacao_obesidade_2_ids_test = classificacao_obesidade_2_ids_test["RecordID"]
classificacao_obesidade_2_test = test_X[test_X["RecordID"].isin(classificacao_obesidade_2_ids_test)]
classificacao_obesidade_2_missing_test = round((classificacao_obesidade_2_test.isna().sum()/total_pacientes_test)*100,2)
classificacao_obesidade_2_missing_test

RecordID       0.0
level_1        0.0
Time           0.0
ALP            0.0
ALT            0.0
AST            0.0
Age            0.0
Albumin        0.0
BUN            0.0
Bilirubin      0.0
Cholesterol    0.0
Creatinine     0.0
DiasABP        0.0
FiO2           0.0
GCS            0.0
Gender         0.0
Glucose        0.0
HCO3           0.0
HCT            0.0
HR             0.0
Height         0.0
ICUType        0.0
K              0.0
Lactate        0.0
MAP            0.0
MechVent       0.0
Mg             0.0
NIDiasABP      0.0
NIMAP          0.0
NISysABP       0.0
Na             0.0
PaCO2          0.0
PaO2           0.0
Platelets      0.0
RespRate       0.0
SaO2           0.0
SysABP         0.0
Temp           0.0
TroponinI      0.0
TroponinT      0.0
Urine          0.0
WBC            0.0
Weight         0.0
pH             0.0
dtype: float64

<h4>Grade 2 obesity measurements</h4>

In [48]:
classificacao_obesidade_2_measurements_test = round((classificacao_obesidade_2_test.count()/total_pacientes_test)*100,2)
classificacao_obesidade_2_measurements_test

RecordID       3.42
level_1        3.42
Time           3.42
ALP            0.06
ALT            0.06
AST            0.06
Age            3.26
Albumin        0.04
BUN            0.25
Bilirubin      0.06
Cholesterol    0.01
Creatinine     0.26
DiasABP        2.20
FiO2           0.56
GCS            0.99
Gender         0.07
Glucose        0.22
HCO3           0.24
HCT            0.36
HR             3.10
Height         3.26
ICUType        0.07
K              0.25
Lactate        0.16
MAP            2.20
MechVent       0.55
Mg             0.24
NIDiasABP      1.14
NIMAP          1.13
NISysABP       1.14
Na             0.23
PaCO2          0.52
PaO2           0.52
Platelets      0.28
RespRate       0.36
SaO2           0.17
SysABP         2.20
Temp           1.56
TroponinI      0.01
TroponinT      0.03
Urine          2.48
WBC            0.25
Weight         1.87
pH             0.54
dtype: float64

<h4>Grade 3 obesity missing rate</h4>

In [49]:
classificacao_obesidade_3_ids_test = bmi_data_test[bmi_data_test["Classificacao"] == "Obesidade grau 3"]
classificacao_obesidade_3_ids_test = classificacao_obesidade_3_ids_test["RecordID"]
classificacao_obesidade_3_test = test_X[test_X["RecordID"].isin(classificacao_obesidade_3_ids_test)]
classificacao_obesidade_3_missing_test = round((classificacao_obesidade_3_test.isna().sum()/total_pacientes_test)*100,2)
classificacao_obesidade_3_missing_test

RecordID       0.00
level_1        0.00
Time           0.00
ALP            3.77
ALT            3.77
AST            3.77
Age            0.16
Albumin        3.79
BUN            3.54
Bilirubin      3.77
Cholesterol    3.83
Creatinine     3.54
DiasABP        1.22
FiO2           3.00
GCS            2.72
Gender         3.76
Glucose        3.57
HCO3           3.55
HCT            3.47
HR             0.31
Height         0.16
ICUType        3.76
K              3.53
Lactate        3.60
MAP            1.24
MechVent       3.07
Mg             3.55
NIDiasABP      2.66
NIMAP          2.67
NISysABP       2.66
Na             3.56
PaCO2          3.14
PaO2           3.14
Platelets      3.55
RespRate       3.50
SaO2           3.58
SysABP         1.22
Temp           2.13
TroponinI      3.83
TroponinT      3.79
Urine          0.97
WBC            3.58
Weight         1.81
pH             3.11
dtype: float64

<h4>Grade 3 obesity measurements</h4>

In [125]:
classificacao_obesidade_3_measurements_test = round((classificacao_obesidade_3_test.count()/total_pacientes_test)*100,2)
classificacao_obesidade_3_measurements_test

RecordID       3.83
level_1        3.83
Time           3.83
ALP            0.06
ALT            0.06
AST            0.06
Age            3.68
Albumin        0.05
BUN            0.29
Bilirubin      0.07
Cholesterol    0.01
Creatinine     0.29
DiasABP        2.61
FiO2           0.84
GCS            1.12
Gender         0.08
Glucose        0.26
HCO3           0.29
HCT            0.37
HR             3.53
Height         3.68
ICUType        0.08
K              0.30
Lactate        0.24
MAP            2.59
MechVent       0.77
Mg             0.29
NIDiasABP      1.17
NIMAP          1.17
NISysABP       1.17
Na             0.28
PaCO2          0.70
PaO2           0.70
Platelets      0.29
RespRate       0.34
SaO2           0.26
SysABP         2.61
Temp           1.70
TroponinI      0.01
TroponinT      0.04
Urine          2.87
WBC            0.26
Weight         2.03
pH             0.73
dtype: float64

<h4>Building of the missing rate table</h4>

In [126]:
df_missing_test = pd.DataFrame(columns=df_columns)
df_missing_transpose_test = df_missing_test.T
df_missing_transpose_test ["Female"] = female_gender_missing_rate_test
df_missing_transpose_test ["Male"] = male_gender_missing_rate_test
df_missing_transpose_test["Undefined gender"] = undefined_gender_missing_rate_test
df_missing_transpose_test["ICUType 1"] = ICUType_1_test_missing
df_missing_transpose_test ["ICUType 2"] = ICUType_2_test_missing
df_missing_transpose_test ["ICUType 3"] = ICUType_3_test_missing
df_missing_transpose_test ["ICUType 4"] = ICUType_4_test_missing
df_missing_transpose_test["Age 65+"] = more_than_or_equal_to_65_test_missing
df_missing_transpose_test["Age 65-"] = less_than_65_test_missing
df_missing_transpose_test ["Low Weight"] = classificacao_baixo_peso_missing_test
df_missing_transpose_test ["Normal Weight"] = classificacao_normal_peso_missing_test
df_missing_transpose_test ["Overweight"] = classificacao_sobrepeso_missing_test
df_missing_transpose_test ["Obesity Grade 1"] = classificacao_obesidade_1_missing_test
df_missing_transpose_test ["Obesity Grade 2"] = classificacao_obesidade_2_missing_test
df_missing_transpose_test ["Obesity Grade 3"] = classificacao_obesidade_3_missing_test
df_missing_transpose_test["Undefined classification"] = classificacao_undefined_missing_test
df_missing_transpose_test = df_missing_transpose_test.drop("RecordID", axis=0)
df_missing_transpose_test = df_missing_transpose_test.drop("level_1", axis=0)
df_missing_transpose_test = df_missing_transpose_test.drop("Time", axis=0)
df_missing_transpose_test = df_missing_transpose_test.drop("Age", axis=0)
df_missing_transpose_test = df_missing_transpose_test.drop("Gender", axis=0)
display(HTML("<h2 style='text-align: center; font-size: 24px; font-weight: bold;'>original Missing rate per Variable by demographics - Test</h2>"))
df_missing_transpose_test 

Unnamed: 0,Female,Male,Undefined gender,ICUType 1,ICUType 2,ICUType 3,ICUType 4,Age 65+,Age 65-,Low Weight,Normal Weight,Overweight,Obesity Grade 1,Obesity Grade 2,Obesity Grade 3,Undefined classification
ALP,42.55,55.7,0.12,13.69,21.79,34.38,28.5,51.82,46.55,1.8,15.83,17.32,8.89,3.36,3.77,47.39
ALT,42.52,55.67,0.12,13.68,21.79,34.36,28.49,51.8,46.51,1.8,15.82,17.3,8.89,3.36,3.77,47.36
AST,42.52,55.67,0.12,13.68,21.79,34.36,28.49,51.8,46.51,1.8,15.82,17.31,8.89,3.36,3.77,47.36
Albumin,42.68,55.93,0.13,13.75,21.85,34.58,28.55,51.92,46.81,1.8,15.9,17.4,8.94,3.38,3.79,47.53
BUN,40.12,52.53,0.12,12.88,20.52,32.55,26.82,48.81,43.96,1.69,14.91,16.3,8.39,3.17,3.54,44.78
Bilirubin,42.51,55.67,0.12,13.68,21.79,34.33,28.5,51.8,46.51,1.8,15.82,17.31,8.88,3.36,3.77,47.36
Cholesterol,43.19,56.51,0.13,13.79,21.96,35.14,28.93,52.42,47.41,1.83,16.06,17.56,9.03,3.41,3.83,48.11
Creatinine,40.1,52.52,0.12,12.87,20.52,32.54,26.81,48.79,43.94,1.69,14.9,16.29,8.39,3.16,3.54,44.76
DiasABP,20.39,24.31,0.05,8.28,4.92,22.24,9.32,23.93,20.82,0.68,5.3,5.19,2.89,1.21,1.22,28.25
FiO2,36.39,47.47,0.13,12.38,18.31,29.75,23.55,44.16,39.82,1.49,13.29,14.64,7.46,2.86,3.0,41.25


<h4>Building of the measurements table</h4>

In [127]:
df_measurements_test = pd.DataFrame(columns=df_columns)
df_measurements_transpose_test = df_measurements_test.T
df_measurements_transpose_test ["Female"] = female_gender_measurements_test
df_measurements_transpose_test ["Male"] = male_gender_measurements_test
df_measurements_transpose_test["Undefined gender"] = undefined_gender_measurements_test
df_measurements_transpose_test["ICUType 1"] = ICUType_1_measurements_test
df_measurements_transpose_test ["ICUType 2"] = ICUType_2_measurements_test
df_measurements_transpose_test ["ICUType 3"] = ICUType_3_measurements_test
df_measurements_transpose_test ["ICUType 4"] = ICUType_4_measurements_test
df_measurements_transpose_test["Age 65+"] = more_than_or_equal_to_65_test_measurements
df_measurements_transpose_test["Age 65-"] = less_than_65_test_measurements
df_measurements_transpose_test ["Low Weight"] =classificacao_baixo_peso_measurements_test
df_measurements_transpose_test ["Normal Weight"] = classificacao_normal_peso_measurements_test
df_measurements_transpose_test ["Overweight"] = classificacao_sobrepeso_measurements_test
df_measurements_transpose_test ["Obesity Grade 1"] = classificacao_obesidade_1_measurements_test
df_measurements_transpose_test ["Obesity Grade 2"] = classificacao_obesidade_2_measurements_test
df_measurements_transpose_test ["Obesity Grade 3"] = classificacao_obesidade_3_measurements_test
df_measurements_transpose_test["Undefined classification"] = classificacao_undefined_measurements_test
df_measurements_transpose_test = df_measurements_transpose_test.drop("RecordID", axis=0)
df_measurements_transpose_test = df_measurements_transpose_test.drop("level_1", axis=0)
df_measurements_transpose_test = df_measurements_transpose_test.drop("Time", axis=0)
df_measurements_transpose_test = df_measurements_transpose_test.drop("Age", axis=0)
df_measurements_transpose_test = df_measurements_transpose_test.drop("Gender", axis=0)
display(HTML("<h2 style='text-align: center; font-size: 24px; font-weight: bold;'>Repeated Measurements per Variable by Demographics - test Set</h2>"))
df_measurements_transpose_test 

Unnamed: 0,Female,Male,Undefined gender,ICUType 1,ICUType 2,ICUType 3,ICUType 4,Age 65+,Age 65-,Low Weight,Normal Weight,Overweight,Obesity Grade 1,Obesity Grade 2,Obesity Grade 3,Undefined classification
ALP,0.72,0.91,0.0,0.19,0.17,0.8,0.47,0.7,0.93,0.03,0.26,0.27,0.16,0.06,0.06,0.79
ALT,0.75,0.94,0.0,0.2,0.18,0.83,0.48,0.72,0.97,0.03,0.27,0.29,0.16,0.06,0.06,0.82
AST,0.75,0.94,0.0,0.2,0.18,0.83,0.48,0.72,0.97,0.03,0.27,0.28,0.16,0.06,0.06,0.82
Albumin,0.59,0.68,0.0,0.13,0.12,0.6,0.42,0.6,0.66,0.03,0.19,0.19,0.11,0.04,0.05,0.66
BUN,3.15,4.08,0.01,1.0,1.45,2.63,2.15,3.71,3.52,0.15,1.18,1.29,0.65,0.25,0.29,3.41
Bilirubin,0.76,0.94,0.0,0.2,0.18,0.85,0.47,0.72,0.97,0.03,0.27,0.28,0.16,0.06,0.07,0.83
Cholesterol,0.08,0.1,0.0,0.09,0.01,0.04,0.04,0.1,0.07,0.0,0.03,0.03,0.02,0.01,0.01,0.08
Creatinine,3.16,4.09,0.01,1.01,1.45,2.65,2.16,3.73,3.53,0.15,1.19,1.3,0.65,0.26,0.29,3.42
DiasABP,22.88,32.3,0.07,5.6,17.05,12.94,19.65,28.59,26.66,1.15,10.79,12.4,6.16,2.2,2.61,19.94
FiO2,6.88,9.14,0.0,1.5,3.66,5.43,5.42,8.36,7.66,0.34,2.8,2.95,1.59,0.56,0.84,6.93
