# Imports

In [5]:
import os
import sys
import pandas as pd
import math
from IPython.display import display, HTML
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

# Loading dataset

In [6]:
from pypotsModify.benchpots.datasets import preprocess_physionet2012
physionet2012_dataset = preprocess_physionet2012(subset="all", rate=0.1)
teste = "TEste"

2024-12-04 00:44:08 [INFO]: You're using dataset physionet_2012, please cite it properly in your work. You can find its reference information at the below link: 
https://github.com/WenjieDu/TSDB/tree/main/dataset_profiles/physionet_2012
2024-12-04 00:44:08 [INFO]: Dataset physionet_2012 has already been downloaded. Processing directly...
2024-12-04 00:44:08 [INFO]: Dataset physionet_2012 has already been cached. Loading from cache directly...
2024-12-04 00:44:08 [INFO]: Loaded successfully!


# Arredondamento

In [7]:
import math

def custom_round(value):
    """
    Arredonda para cima se o segundo decimal for maior ou igual a 5,
    caso contrário, trunca o valor.
    
    Args:
        value (float): O número decimal a ser processado.
    
    Returns:
        float: O valor processado.
    """
    # Obtenha o segundo decimal
    second_decimal = int((value * 100) % 10)
    
    # Se o segundo decimal for 5 ou mais, arredonde para cima
    if second_decimal >= 5:
        return math.ceil(value * 100) / 100
    else:
        # Trunca o valor
        return math.floor(value * 100) / 100

# Exemplos
print(custom_round(0.66789))  # Saída: 0.67
print(custom_round(0.2856))   # Saída: 0.28


0.67
0.29


In [3]:
custom_round(0.07178)

NameError: name 'custom_round' is not defined

# Training data

<h4>Loading training dataset</h4>

In [4]:
train_X = physionet2012_dataset['train_X']

<h4>Sum of 48 hours of all patients</h4>

In [6]:
total_pacientes = train_X.groupby("RecordID").first().reset_index()
total_pacientes = total_pacientes["RecordID"].count()
total_pacientes = total_pacientes*48
total_pacientes

368208

<h4>Female gender missing rate</h4>

In [7]:
female_gender_ids = train_X[train_X["Gender"] == 0.0]
female_gender_ids = female_gender_ids["RecordID"]
female_gender_missing_rate = train_X[train_X["RecordID"].isin(female_gender_ids)]
female_gender_missing_rate = (female_gender_missing_rate.isna().sum()/total_pacientes)
female_gender_missing_rate = female_gender_missing_rate.round(2)
female_gender_missing_rate = female_gender_missing_rate*100
female_gender_missing_rate

RecordID        0.0
level_1         0.0
Time            0.0
ALP            44.0
ALT            44.0
AST            44.0
Age             3.0
Albumin        44.0
BUN            41.0
Bilirubin      44.0
Cholesterol    44.0
Creatinine     41.0
DiasABP        22.0
FiO2           38.0
GCS            30.0
Gender         44.0
Glucose        41.0
HCO3           41.0
HCT            40.0
HR              4.0
Height          3.0
ICUType        44.0
K              41.0
Lactate        43.0
MAP            22.0
MechVent       38.0
Mg             41.0
NIDiasABP      25.0
NIMAP          25.0
NISysABP       25.0
Na             41.0
PaCO2          40.0
PaO2           40.0
Platelets      41.0
RespRate       32.0
SaO2           43.0
SysABP         22.0
Temp           29.0
TroponinI      44.0
TroponinT      44.0
Urine          14.0
WBC            42.0
Weight         21.0
pH             40.0
dtype: float64

<h4>Female gender measurements</h4>

In [8]:
female_gender_measurements_training = train_X[train_X["RecordID"].isin(female_gender_ids)]
female_gender_measurements_training = (female_gender_measurements_training.count()/total_pacientes)
female_gender_measurements_training = female_gender_measurements_training.round(2)
female_gender_measurements_training = female_gender_measurements_training*100
female_gender_measurements_training

RecordID       45.0
level_1        45.0
Time           45.0
ALP             1.0
ALT             1.0
AST             1.0
Age            42.0
Albumin         1.0
BUN             3.0
Bilirubin       1.0
Cholesterol     0.0
Creatinine      3.0
DiasABP        23.0
FiO2            7.0
GCS            14.0
Gender          1.0
Glucose         3.0
HCO3            3.0
HCT             4.0
HR             40.0
Height         42.0
ICUType         1.0
K               3.0
Lactate         2.0
MAP            23.0
MechVent        7.0
Mg              3.0
NIDiasABP      20.0
NIMAP          19.0
NISysABP       20.0
Na              3.0
PaCO2           5.0
PaO2            5.0
Platelets       3.0
RespRate       12.0
SaO2            2.0
SysABP         23.0
Temp           16.0
TroponinI       0.0
TroponinT       0.0
Urine          31.0
WBC             3.0
Weight         24.0
pH              5.0
dtype: float64

<h4>Male gender missing rate</h4>

In [9]:
male_gender_ids = train_X[train_X["Gender"] == 1.0]
male_gender_ids = male_gender_ids["RecordID"]
male_gender_missing_rate = train_X[train_X["RecordID"].isin(male_gender_ids)]
male_gender_missing_rate = (male_gender_missing_rate.isna().sum()/total_pacientes)
male_gender_missing_rate = male_gender_missing_rate.round(2)
male_gender_missing_rate = male_gender_missing_rate*100
male_gender_missing_rate

RecordID        0.0
level_1         0.0
Time            0.0
ALP            54.0
ALT            54.0
AST            54.0
Age             3.0
Albumin        55.0
BUN            51.0
Bilirubin      54.0
Cholesterol    55.0
Creatinine     51.0
DiasABP        24.0
FiO2           46.0
GCS            38.0
Gender         54.0
Glucose        52.0
HCO3           51.0
HCT            50.0
HR              5.0
Height          3.0
ICUType        54.0
K              51.0
Lactate        53.0
MAP            24.0
MechVent       47.0
Mg             51.0
NIDiasABP      33.0
NIMAP          33.0
NISysABP       33.0
Na             51.0
PaCO2          49.0
PaO2           49.0
Platelets      51.0
RespRate       43.0
SaO2           53.0
SysABP         24.0
Temp           34.0
TroponinI      55.0
TroponinT      55.0
Urine          17.0
WBC            52.0
Weight         27.0
pH             48.0
dtype: float64

<h4>Male gender measurements</h4>

In [10]:
male_gender_measurements_training = train_X[train_X["RecordID"].isin(male_gender_ids)]
male_gender_measurements_training  = (male_gender_measurements_training.count()/total_pacientes)
male_gender_measurements_training = male_gender_measurements_training.round(2)
male_gender_measurements_training = male_gender_measurements_training*100
male_gender_measurements_training

RecordID       55.0
level_1        55.0
Time           55.0
ALP             1.0
ALT             1.0
AST             1.0
Age            52.0
Albumin         1.0
BUN             4.0
Bilirubin       1.0
Cholesterol     0.0
Creatinine      4.0
DiasABP        31.0
FiO2            9.0
GCS            18.0
Gender          1.0
Glucose         4.0
HCO3            4.0
HCT             5.0
HR             50.0
Height         52.0
ICUType         1.0
K               4.0
Lactate         2.0
MAP            31.0
MechVent        8.0
Mg              4.0
NIDiasABP      22.0
NIMAP          22.0
NISysABP       22.0
Na              4.0
PaCO2           7.0
PaO2            7.0
Platelets       4.0
RespRate       12.0
SaO2            2.0
SysABP         31.0
Temp           21.0
TroponinI       0.0
TroponinT       1.0
Urine          38.0
WBC             4.0
Weight         29.0
pH              7.0
dtype: float64

<h4>Undefined gender missing rate</h4> 

In [11]:
undefined_gender_ids = train_X[train_X["Gender"] == -1.0]
undefined_gender_ids = undefined_gender_ids["RecordID"]
undefined_gender_missing_rate = train_X[train_X["RecordID"].isin(undefined_gender_ids)]
undefined_gender_missing_rate = (undefined_gender_missing_rate.isna().sum()/total_pacientes)
undefined_gender_missing_rate = undefined_gender_missing_rate.round(2)
undefined_gender_missing_rate = undefined_gender_missing_rate*100
undefined_gender_missing_rate

RecordID       0.0
level_1        0.0
Time           0.0
ALP            0.0
ALT            0.0
AST            0.0
Age            0.0
Albumin        0.0
BUN            0.0
Bilirubin      0.0
Cholesterol    0.0
Creatinine     0.0
DiasABP        0.0
FiO2           0.0
GCS            0.0
Gender         0.0
Glucose        0.0
HCO3           0.0
HCT            0.0
HR             0.0
Height         0.0
ICUType        0.0
K              0.0
Lactate        0.0
MAP            0.0
MechVent       0.0
Mg             0.0
NIDiasABP      0.0
NIMAP          0.0
NISysABP       0.0
Na             0.0
PaCO2          0.0
PaO2           0.0
Platelets      0.0
RespRate       0.0
SaO2           0.0
SysABP         0.0
Temp           0.0
TroponinI      0.0
TroponinT      0.0
Urine          0.0
WBC            0.0
Weight         0.0
pH             0.0
dtype: float64

<h4>Undefined gender measurements</h4>

In [12]:
undefined_gender_measurements_training = train_X[train_X["RecordID"].isin(undefined_gender_ids)]
undefined_gender_measurements_training = (undefined_gender_measurements_training.count()/total_pacientes)
undefined_gender_measurements_training = undefined_gender_measurements_training.round(2)
undefined_gender_measurements_training = undefined_gender_measurements_training*100
undefined_gender_measurements_training


RecordID       0.0
level_1        0.0
Time           0.0
ALP            0.0
ALT            0.0
AST            0.0
Age            0.0
Albumin        0.0
BUN            0.0
Bilirubin      0.0
Cholesterol    0.0
Creatinine     0.0
DiasABP        0.0
FiO2           0.0
GCS            0.0
Gender         0.0
Glucose        0.0
HCO3           0.0
HCT            0.0
HR             0.0
Height         0.0
ICUType        0.0
K              0.0
Lactate        0.0
MAP            0.0
MechVent       0.0
Mg             0.0
NIDiasABP      0.0
NIMAP          0.0
NISysABP       0.0
Na             0.0
PaCO2          0.0
PaO2           0.0
Platelets      0.0
RespRate       0.0
SaO2           0.0
SysABP         0.0
Temp           0.0
TroponinI      0.0
TroponinT      0.0
Urine          0.0
WBC            0.0
Weight         0.0
pH             0.0
dtype: float64

<h4>ICUType 1 missing rate</h4>

In [13]:
ICUType_1_training_ids = train_X[train_X['ICUType'] == 1.0]
ICUType_1_training_ids = ICUType_1_training_ids[ICUType_1_training_ids["Time"] == 0.0]
ICUType_1_training_ids = ICUType_1_training_ids["RecordID"]
ICUType_1_training = train_X[train_X["RecordID"].isin(ICUType_1_training_ids)]
ICUType_1_training_missing = (ICUType_1_training.isna().sum()/total_pacientes)
ICUType_1_training_missing = ICUType_1_training_missing.round(2)
#ICUType_1_training_missing = ICUType_1_training_missing*100
ICUType_1_training_missing

RecordID       0.00
level_1        0.00
Time           0.00
ALP            0.14
ALT            0.14
AST            0.14
Age            0.01
Albumin        0.14
BUN            0.13
Bilirubin      0.14
Cholesterol    0.14
Creatinine     0.13
DiasABP        0.08
FiO2           0.13
GCS            0.11
Gender         0.14
Glucose        0.13
HCO3           0.13
HCT            0.13
HR             0.02
Height         0.01
ICUType        0.14
K              0.13
Lactate        0.14
MAP            0.08
MechVent       0.13
Mg             0.13
NIDiasABP      0.08
NIMAP          0.08
NISysABP       0.08
Na             0.13
PaCO2          0.13
PaO2           0.13
Platelets      0.13
RespRate       0.09
SaO2           0.14
SysABP         0.08
Temp           0.10
TroponinI      0.14
TroponinT      0.14
Urine          0.06
WBC            0.13
Weight         0.08
pH             0.13
dtype: float64

<h4>ICUType 1 mesurements</h4>

In [14]:
ICUType_1_measurements_training = (ICUType_1_training.count()/total_pacientes)
ICUType_1_measurements_training = ICUType_1_measurements_training.round(2)
#ICUType_1_measurements_training = ICUType_1_measurements_training*100
ICUType_1_measurements_training

RecordID       0.14
level_1        0.14
Time           0.14
ALP            0.00
ALT            0.00
AST            0.00
Age            0.13
Albumin        0.00
BUN            0.01
Bilirubin      0.00
Cholesterol    0.00
Creatinine     0.01
DiasABP        0.06
FiO2           0.02
GCS            0.04
Gender         0.00
Glucose        0.01
HCO3           0.01
HCT            0.01
HR             0.12
Height         0.13
ICUType        0.00
K              0.01
Lactate        0.00
MAP            0.06
MechVent       0.01
Mg             0.01
NIDiasABP      0.07
NIMAP          0.07
NISysABP       0.07
Na             0.01
PaCO2          0.01
PaO2           0.01
Platelets      0.01
RespRate       0.05
SaO2           0.01
SysABP         0.06
Temp           0.04
TroponinI      0.00
TroponinT      0.00
Urine          0.08
WBC            0.01
Weight         0.06
pH             0.01
dtype: float64

<h4>ICUType missing rate</h4>

In [15]:
ICUType_2_training_ids = train_X[train_X['ICUType'] == 2.0]
ICUType_2_training_ids = ICUType_2_training_ids[ICUType_2_training_ids["Time"] == 0.0]
ICUType_2_training_ids = ICUType_2_training_ids["RecordID"]
ICUType_2_training = train_X[train_X["RecordID"].isin(ICUType_2_training_ids)]
ICUType_2_training_missing = (ICUType_2_training.isna().sum()/total_pacientes)
ICUType_2_training_missing = ICUType_2_training_missing.round(2)
#ICUType_2_training_missing = ICUType_2_training_missing*100
ICUType_2_training_missing

RecordID       0.00
level_1        0.00
Time           0.00
ALP            0.21
ALT            0.21
AST            0.21
Age            0.01
Albumin        0.21
BUN            0.20
Bilirubin      0.21
Cholesterol    0.21
Creatinine     0.20
DiasABP        0.04
FiO2           0.17
GCS            0.15
Gender         0.21
Glucose        0.20
HCO3           0.20
HCT            0.19
HR             0.02
Height         0.01
ICUType        0.21
K              0.20
Lactate        0.20
MAP            0.04
MechVent       0.17
Mg             0.20
NIDiasABP      0.17
NIMAP          0.17
NISysABP       0.17
Na             0.20
PaCO2          0.17
PaO2           0.17
Platelets      0.19
RespRate       0.20
SaO2           0.19
SysABP         0.04
Temp           0.09
TroponinI      0.21
TroponinT      0.21
Urine          0.03
WBC            0.20
Weight         0.10
pH             0.16
dtype: float64

<h4>ICUType 2 measurements</h4>

In [16]:
ICUType_2_measurements_training = (ICUType_2_training.count()/total_pacientes)
ICUType_2_measurements_training = ICUType_2_measurements_training.round(2)
#ICUType_2_measurements_training = ICUType_2_measurements_training*100
ICUType_2_measurements_training

RecordID       0.21
level_1        0.21
Time           0.21
ALP            0.00
ALT            0.00
AST            0.00
Age            0.20
Albumin        0.00
BUN            0.01
Bilirubin      0.00
Cholesterol    0.00
Creatinine     0.01
DiasABP        0.17
FiO2           0.04
GCS            0.06
Gender         0.00
Glucose        0.01
HCO3           0.01
HCT            0.02
HR             0.19
Height         0.20
ICUType        0.00
K              0.01
Lactate        0.01
MAP            0.17
MechVent       0.04
Mg             0.01
NIDiasABP      0.04
NIMAP          0.04
NISysABP       0.04
Na             0.01
PaCO2          0.04
PaO2           0.04
Platelets      0.02
RespRate       0.01
SaO2           0.02
SysABP         0.17
Temp           0.12
TroponinI      0.00
TroponinT      0.00
Urine          0.18
WBC            0.01
Weight         0.11
pH             0.05
dtype: float64

<h4>ICUType 3 missing rate</h4>

In [17]:
ICUType_3_training_ids = train_X[train_X['ICUType'] == 3.0]
ICUType_3_training_ids = ICUType_3_training_ids[ICUType_3_training_ids["Time"] == 0.0]
ICUType_3_training_ids = ICUType_3_training_ids["RecordID"]
ICUType_3_training = train_X[train_X["RecordID"].isin(ICUType_3_training_ids)]
ICUType_3_training_missing = (ICUType_3_training.isna().sum()/total_pacientes)
ICUType_3_training_missing = ICUType_3_training_missing.round(2)
#ICUType_3_training_missing = ICUType_3_training_missing*100
ICUType_3_training_missing

RecordID       0.00
level_1        0.00
Time           0.00
ALP            0.36
ALT            0.36
AST            0.36
Age            0.03
Albumin        0.36
BUN            0.34
Bilirubin      0.36
Cholesterol    0.36
Creatinine     0.34
DiasABP        0.23
FiO2           0.31
GCS            0.27
Gender         0.36
Glucose        0.34
HCO3           0.34
HCT            0.33
HR             0.04
Height         0.03
ICUType        0.36
K              0.34
Lactate        0.35
MAP            0.24
MechVent       0.32
Mg             0.34
NIDiasABP      0.16
NIMAP          0.16
NISysABP       0.16
Na             0.34
PaCO2          0.34
PaO2           0.34
Platelets      0.34
RespRate       0.25
SaO2           0.36
SysABP         0.23
Temp           0.26
TroponinI      0.36
TroponinT      0.36
Urine          0.14
WBC            0.34
Weight         0.13
pH             0.34
dtype: float64

<h4>ICUType 3 measurements</h4>

In [18]:
ICUType_3_measurements_training = (ICUType_3_training.count()/total_pacientes)
ICUType_3_measurements_training = ICUType_3_measurements_training.round(2)
#ICUType_3_measurements_training = ICUType_3_measurements_training*100
ICUType_3_measurements_training

RecordID       0.37
level_1        0.37
Time           0.37
ALP            0.01
ALT            0.01
AST            0.01
Age            0.34
Albumin        0.01
BUN            0.03
Bilirubin      0.01
Cholesterol    0.00
Creatinine     0.03
DiasABP        0.13
FiO2           0.05
GCS            0.10
Gender         0.01
Glucose        0.03
HCO3           0.03
HCT            0.03
HR             0.33
Height         0.34
ICUType        0.01
K              0.03
Lactate        0.01
MAP            0.13
MechVent       0.05
Mg             0.03
NIDiasABP      0.21
NIMAP          0.20
NISysABP       0.21
Na             0.03
PaCO2          0.03
PaO2           0.03
Platelets      0.02
RespRate       0.11
SaO2           0.00
SysABP         0.13
Temp           0.10
TroponinI      0.00
TroponinT      0.00
Urine          0.22
WBC            0.02
Weight         0.23
pH             0.03
dtype: float64

<h4>ICUType 4 missing rate</h4>

In [19]:
ICUType_4_training_ids = train_X[train_X['ICUType'] == 4.0]
ICUType_4_training_ids = ICUType_4_training_ids[ICUType_4_training_ids["Time"] == 0.0]
ICUType_4_training_ids = ICUType_4_training_ids["RecordID"]
ICUType_4_training = train_X[train_X["RecordID"].isin(ICUType_4_training_ids)]
ICUType_4_training_missing = (ICUType_4_training.isna().sum()/total_pacientes)
ICUType_4_training_missing = ICUType_4_training_missing.round(2)
#ICUType_4_training_missing = ICUType_4_training_missing*100
ICUType_4_training_missing

RecordID       0.00
level_1        0.00
Time           0.00
ALP            0.28
ALT            0.28
AST            0.28
Age            0.01
Albumin        0.28
BUN            0.26
Bilirubin      0.28
Cholesterol    0.28
Creatinine     0.26
DiasABP        0.10
FiO2           0.23
GCS            0.15
Gender         0.28
Glucose        0.26
HCO3           0.26
HCT            0.25
HR             0.02
Height         0.01
ICUType        0.28
K              0.26
Lactate        0.27
MAP            0.10
MechVent       0.23
Mg             0.26
NIDiasABP      0.18
NIMAP          0.18
NISysABP       0.18
Na             0.26
PaCO2          0.25
PaO2           0.25
Platelets      0.26
RespRate       0.21
SaO2           0.28
SysABP         0.10
Temp           0.18
TroponinI      0.28
TroponinT      0.28
Urine          0.07
WBC            0.26
Weight         0.16
pH             0.25
dtype: float64

<h4>ICUType 4 measurements</h4>

In [20]:
ICUType_4_measurements_training = (ICUType_4_training.count()/total_pacientes)
ICUType_4_measurements_training = ICUType_4_measurements_training.round(2)
#ICUType_4_measurements_training = ICUType_4_measurements_training*100
ICUType_4_measurements_training

RecordID       0.28
level_1        0.28
Time           0.28
ALP            0.00
ALT            0.00
AST            0.00
Age            0.27
Albumin        0.00
BUN            0.02
Bilirubin      0.00
Cholesterol    0.00
Creatinine     0.02
DiasABP        0.19
FiO2           0.05
GCS            0.13
Gender         0.01
Glucose        0.02
HCO3           0.02
HCT            0.03
HR             0.26
Height         0.27
ICUType        0.01
K              0.02
Lactate        0.02
MAP            0.19
MechVent       0.05
Mg             0.02
NIDiasABP      0.10
NIMAP          0.10
NISysABP       0.10
Na             0.02
PaCO2          0.03
PaO2           0.03
Platelets      0.02
RespRate       0.07
SaO2           0.01
SysABP         0.19
Temp           0.10
TroponinI      0.00
TroponinT      0.00
Urine          0.21
WBC            0.02
Weight         0.12
pH             0.03
dtype: float64

<h4>+65 missing rate</h4>

In [85]:
more_than_or_equal_to_65_train_ids = train_X[train_X["Age"] >= 65]
more_than_or_equal_to_65_train_ids = more_than_or_equal_to_65_train_ids[more_than_or_equal_to_65_train_ids["Time"] == 0.0]
more_than_or_equal_to_65_train_ids = more_than_or_equal_to_65_train_ids["RecordID"]
more_than_or_equal_to_65_train = train_X[train_X["RecordID"].isin(more_than_or_equal_to_65_train_ids)]
more_than_or_equal_to_65_train_missing = (more_than_or_equal_to_65_train.isna().sum()/total_pacientes)
#more_than_or_equal_to_65_train_missing = more_than_or_equal_to_65_train_missing.apply(custom_round)
#more_than_or_equal_to_65_train_missing = more_than_or_equal_to_65_train_missing*100
more_than_or_equal_to_65_train_missing

RecordID       0.000000
level_1        0.000000
Time           0.000000
ALP            0.536037
ALT            0.535836
AST            0.535819
Age            0.030274
Albumin        0.537172
BUN            0.504913
Bilirubin      0.535678
Cholesterol    0.542204
Creatinine     0.504704
DiasABP        0.243558
FiO2           0.458108
GCS            0.374253
Gender         0.531898
Glucose        0.507585
HCO3           0.505812
HCT            0.492290
HR             0.051004
Height         0.030274
ICUType        0.531898
K              0.503427
Lactate        0.521792
MAP            0.244989
MechVent       0.463833
Mg             0.505367
NIDiasABP      0.317481
NIMAP          0.320142
NISysABP       0.317269
Na             0.506119
PaCO2          0.479446
PaO2           0.479552
Platelets      0.504180
RespRate       0.406670
SaO2           0.517998
SysABP         0.243534
Temp           0.333412
TroponinI      0.541816
TroponinT      0.535800
Urine          0.157359
WBC            0

<h4>+65 measurements</h4>

In [86]:
age_65_and_above_measurements_training = (more_than_or_equal_to_65_train.count()/total_pacientes)
#age_65_and_above_measurements_training = age_65_and_above_measurements_training.apply(custom_round)
#age_65_and_above_measurements_training = age_65_and_above_measurements_training*100
age_65_and_above_measurements_training

RecordID       0.543215
level_1        0.543215
Time           0.543215
ALP            0.007178
ALT            0.007379
AST            0.007395
Age            0.512941
Albumin        0.006043
BUN            0.038302
Bilirubin      0.007537
Cholesterol    0.001010
Creatinine     0.038511
DiasABP        0.299657
FiO2           0.085107
GCS            0.168962
Gender         0.011317
Glucose        0.035629
HCO3           0.037403
HCT            0.050925
HR             0.492211
Height         0.512941
ICUType        0.011317
K              0.039787
Lactate        0.021423
MAP            0.298225
MechVent       0.079382
Mg             0.037848
NIDiasABP      0.225734
NIMAP          0.223072
NISysABP       0.225946
Na             0.037096
PaCO2          0.063768
PaO2           0.063662
Platelets      0.039035
RespRate       0.136545
SaO2           0.025217
SysABP         0.299681
Temp           0.209803
TroponinI      0.001399
TroponinT      0.007414
Urine          0.385855
WBC            0

<h4>-65 missing rate</h4>

In [87]:
less_than_65_train_ids = train_X[train_X["Age"] < 65]
less_than_65_train_ids = less_than_65_train_ids[less_than_65_train_ids["Time"] == 0.0]
less_than_65_train_ids = less_than_65_train_ids["RecordID"]
less_than_65_train = train_X[train_X["RecordID"].isin(less_than_65_train_ids)]
less_than_65_train_missing = (less_than_65_train.isna().sum()/total_pacientes)
#less_than_65_train_missing = less_than_65_train_missing.apply(custom_round)
#less_than_65_train_missing = less_than_65_train_missing*100
less_than_65_train_missing


RecordID       0.000000
level_1        0.000000
Time           0.000000
ALP            0.447570
ALT            0.447264
AST            0.447277
Age            0.028321
Albumin        0.450164
BUN            0.422661
Bilirubin      0.447269
Cholesterol    0.456093
Creatinine     0.422538
DiasABP        0.214186
FiO2           0.385070
GCS            0.306129
Gender         0.447269
Glucose        0.424192
HCO3           0.423361
HCT            0.412813
HR             0.047318
Height         0.028321
ICUType        0.447269
K              0.420895
Lactate        0.437068
MAP            0.215870
MechVent       0.386306
Mg             0.423747
NIDiasABP      0.264644
NIMAP          0.268093
NISysABP       0.264484
Na             0.422891
PaCO2          0.405111
PaO2           0.405181
Platelets      0.422248
RespRate       0.347478
SaO2           0.441454
SysABP         0.214162
Temp           0.295224
TroponinI      0.456166
TroponinT      0.453556
Urine          0.150385
WBC            0

<h4>-65 measurements</h4>

In [92]:
age_under_65_measurements_training  = (less_than_65_train.count()/total_pacientes)
#age_under_65_measurements_training = age_under_65_measurements_training.apply(custom_round)
#age_under_65_measurements_training = age_under_65_measurements_training*100
age_under_65_measurements_training

RecordID       0.456785
level_1        0.456785
Time           0.456785
ALP            0.009215
ALT            0.009522
AST            0.009508
Age            0.428464
Albumin        0.006621
BUN            0.034125
Bilirubin      0.009516
Cholesterol    0.000693
Creatinine     0.034247
DiasABP        0.242599
FiO2           0.071715
GCS            0.150657
Gender         0.009516
Glucose        0.032593
HCO3           0.033424
HCT            0.043972
HR             0.409467
Height         0.428464
ICUType        0.009516
K              0.035890
Lactate        0.019717
MAP            0.240915
MechVent       0.070479
Mg             0.033038
NIDiasABP      0.192141
NIMAP          0.188692
NISysABP       0.192302
Na             0.033894
PaCO2          0.051675
PaO2           0.051604
Platelets      0.034538
RespRate       0.109308
SaO2           0.015331
SysABP         0.242624
Temp           0.161561
TroponinI      0.000619
TroponinT      0.003229
Urine          0.306400
WBC            0

<h4>Filtering only patients who have the same height and weight</h4>

In [25]:
filtered_train_X = train_X[(train_X['Height'] != -1) & (train_X['Weight'] != -1) & (train_X['Height'].notna()) & (train_X['Weight'].notna())] 

<h4>Classify BMI</h4>

In [26]:
def classify_BMI(BMI):
    if BMI <= 18.5:
        return "Baixo peso"
    elif BMI >= 18.6 and BMI <= 24.9:
        return "Peso normal"
    elif BMI >= 25 and BMI <= 29.9:
        return "Sobrepeso"
    elif BMI >= 30 and BMI <= 34.9:
        return "Obesidade grau 1"
    elif BMI >= 35 and BMI <= 39.9:
        return "Obesidade grau 2"
    elif BMI >= 40:
        return "Obesidade grau 3"

<h4>Set the height to meters</h4>

In [27]:
filtered_train_X_metros = filtered_train_X.copy()
filtered_train_X_metros["Height"] = filtered_train_X["Height"]/100
filtered_train_X_metros["Height"]

144       1.803
145       1.803
146       1.803
147       1.803
148       1.803
          ...  
575321    1.727
575322    1.727
575323    1.727
575325    1.727
575327    1.727
Name: Height, Length: 101196, dtype: float64

<h4>BMI Calculation and Classification</h4>

In [28]:
bmi_data_train = filtered_train_X_metros
bmi_data_train["BMI"] = round(filtered_train_X_metros["Weight"] / (filtered_train_X_metros["Height"]**2), 1)
bmi_data_train["Classificacao"] = bmi_data_train["BMI"].apply(classify_BMI)
bmi_data_train.head()

Unnamed: 0,RecordID,level_1,Time,ALP,ALT,AST,Age,Albumin,BUN,Bilirubin,...,SysABP,Temp,TroponinI,TroponinT,Urine,WBC,Weight,pH,BMI,Classificacao
144,132543,0,0.0,105.0,12.0,15.0,68.0,4.4,23.0,0.2,...,,36.3,,,,11.5,84.6,,26.0,Sobrepeso
145,132543,1,1.0,,,,68.0,,,,...,,,,,,,84.6,,26.0,Sobrepeso
146,132543,2,2.0,,,,68.0,,,,...,,,,,,,84.6,,26.0,Sobrepeso
147,132543,3,3.0,,,,68.0,,,,...,,36.4,,,,,84.6,,26.0,Sobrepeso
148,132543,4,4.0,,,,68.0,,,,...,,,,,,,84.6,,26.0,Sobrepeso


<h4>Taking only the first occurrence of each patient</h4>

In [29]:
bmi_data_train = bmi_data_train.groupby("RecordID").first().reset_index()
bmi_data_train

Unnamed: 0,RecordID,level_1,Time,ALP,ALT,AST,Age,Albumin,BUN,Bilirubin,...,SysABP,Temp,TroponinI,TroponinT,Urine,WBC,Weight,pH,BMI,Classificacao
0,132543,0,0.0,105.0,12.0,15.0,68.0,4.4,23.0,0.2,...,,36.3,,,600.0,11.5,84.6,,26.0,Sobrepeso
1,132547,0,0.0,,,,64.0,,,,...,,,,,,,114.0,,35.1,Obesidade grau 2
2,132551,0,0.0,47.0,46.0,82.0,78.0,1.9,81.0,0.3,...,102.75,38.0,3.5,,120.0,16.1,48.4,7.40,18.3,Baixo peso
3,132568,0,0.0,,,,66.0,,18.0,,...,,36.1,,,220.0,14.8,84.5,,34.1,Obesidade grau 1
4,132570,0,0.0,19.0,15.0,20.0,84.0,,83.0,0.1,...,,36.6,,,600.0,8.8,102.6,,35.4,Obesidade grau 2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3977,163008,0,0.0,,,,59.0,,24.0,,...,97.00,37.6,,,45.0,6.9,98.5,7.38,34.0,Obesidade grau 1
3978,163013,0,0.0,82.0,11.0,30.0,74.0,2.5,30.0,1.2,...,118.00,36.5,,0.03,40.0,9.6,68.6,7.35,29.5,Sobrepeso
3979,163021,0,0.0,,,,72.0,,9.0,,...,,,,,,8.6,62.0,,20.8,Peso normal
3980,163029,0,0.0,,,,61.0,,,,...,,,,,,,85.0,,28.5,Sobrepeso


In [30]:
bmi_data_train["Classificacao"].value_counts()

Classificacao
Sobrepeso           1385
Peso normal         1153
Obesidade grau 1     722
Obesidade grau 2     323
Obesidade grau 3     275
Baixo peso           124
Name: count, dtype: int64

<h4>Classification Undefined missing rate</h4>

In [31]:
classificacao_undefined_ids = bmi_data_train["RecordID"]
classificacao_undefined = train_X[~train_X["RecordID"].isin(classificacao_undefined_ids)]
classificacao_undefined_missing = (classificacao_undefined.isna().sum()/total_pacientes)
classificacao_undefined_missing = classificacao_undefined_missing.round(2)
classificacao_undefined_missing = classificacao_undefined_missing*100
classificacao_undefined_missing

RecordID        0.0
level_1         0.0
Time            0.0
ALP            47.0
ALT            47.0
AST            47.0
Age             4.0
Albumin        47.0
BUN            45.0
Bilirubin      47.0
Cholesterol    48.0
Creatinine     45.0
DiasABP        29.0
FiO2           42.0
GCS            32.0
Gender         47.0
Glucose        45.0
HCO3           45.0
HCT            44.0
HR              5.0
Height          4.0
ICUType        47.0
K              44.0
Lactate        46.0
MAP            29.0
MechVent       42.0
Mg             45.0
NIDiasABP      23.0
NIMAP          23.0
NISysABP       23.0
Na             45.0
PaCO2          45.0
PaO2           45.0
Platelets      45.0
RespRate       32.0
SaO2           48.0
SysABP         29.0
Temp           35.0
TroponinI      48.0
TroponinT      48.0
Urine          17.0
WBC            45.0
Weight         23.0
pH             45.0
dtype: float64

<h4>Classification Undefined measurements</h4>

In [32]:
classification_undefined_measurements = (classificacao_undefined.count()/total_pacientes)
classification_undefined_measurements = classification_undefined_measurements.round(2)
classification_undefined_measurements = classification_undefined_measurements*100
classification_undefined_measurements

RecordID       48.0
level_1        48.0
Time           48.0
ALP             1.0
ALT             1.0
AST             1.0
Age            44.0
Albumin         1.0
BUN             3.0
Bilirubin       1.0
Cholesterol     0.0
Creatinine      3.0
DiasABP        19.0
FiO2            7.0
GCS            16.0
Gender          1.0
Glucose         3.0
HCO3            3.0
HCT             4.0
HR             43.0
Height         44.0
ICUType         1.0
K               4.0
Lactate         2.0
MAP            19.0
MechVent        6.0
Mg              3.0
NIDiasABP      25.0
NIMAP          25.0
NISysABP       26.0
Na              4.0
PaCO2           3.0
PaO2            3.0
Platelets       3.0
RespRate       17.0
SaO2            1.0
SysABP         19.0
Temp           13.0
TroponinI       0.0
TroponinT       1.0
Urine          31.0
WBC             3.0
Weight         25.0
pH              3.0
dtype: float64

<h4>Low weight classification missing rate</h4>

In [33]:
classificacao_baixo_peso_ids = bmi_data_train[bmi_data_train["Classificacao"] == "Baixo peso"]
classificacao_baixo_peso_ids = classificacao_baixo_peso_ids["RecordID"]
classificacao_baixo_peso = train_X[train_X["RecordID"].isin(classificacao_baixo_peso_ids)]
classificacao_baixo_peso_missing = (classificacao_baixo_peso.isna().sum()/total_pacientes)
classificacao_baixo_peso_missing = classificacao_baixo_peso_missing.round(2)
classificacao_baixo_peso_missing = classificacao_baixo_peso_missing*100
classificacao_baixo_peso_missing

RecordID       0.0
level_1        0.0
Time           0.0
ALP            2.0
ALT            2.0
AST            2.0
Age            0.0
Albumin        2.0
BUN            1.0
Bilirubin      2.0
Cholesterol    2.0
Creatinine     1.0
DiasABP        1.0
FiO2           1.0
GCS            1.0
Gender         2.0
Glucose        2.0
HCO3           2.0
HCT            1.0
HR             0.0
Height         0.0
ICUType        2.0
K              1.0
Lactate        2.0
MAP            1.0
MechVent       1.0
Mg             1.0
NIDiasABP      1.0
NIMAP          1.0
NISysABP       1.0
Na             2.0
PaCO2          1.0
PaO2           1.0
Platelets      1.0
RespRate       1.0
SaO2           2.0
SysABP         1.0
Temp           1.0
TroponinI      2.0
TroponinT      2.0
Urine          0.0
WBC            2.0
Weight         1.0
pH             1.0
dtype: float64

<h4>Classificacao low weight measurements</h4>

In [34]:
classificacao_baixo_peso_measurements = (classificacao_baixo_peso.count()/total_pacientes)
classificacao_baixo_peso_measurements = classificacao_baixo_peso_measurements.round(2)
classificacao_baixo_peso_measurements = classificacao_baixo_peso_measurements*100
classificacao_baixo_peso_measurements

RecordID       2.0
level_1        2.0
Time           2.0
ALP            0.0
ALT            0.0
AST            0.0
Age            2.0
Albumin        0.0
BUN            0.0
Bilirubin      0.0
Cholesterol    0.0
Creatinine     0.0
DiasABP        1.0
FiO2           0.0
GCS            0.0
Gender         0.0
Glucose        0.0
HCO3           0.0
HCT            0.0
HR             1.0
Height         2.0
ICUType        0.0
K              0.0
Lactate        0.0
MAP            1.0
MechVent       0.0
Mg             0.0
NIDiasABP      1.0
NIMAP          1.0
NISysABP       1.0
Na             0.0
PaCO2          0.0
PaO2           0.0
Platelets      0.0
RespRate       0.0
SaO2           0.0
SysABP         1.0
Temp           1.0
TroponinI      0.0
TroponinT      0.0
Urine          1.0
WBC            0.0
Weight         1.0
pH             0.0
dtype: float64

<h4>Classification normal weight missing rate</h4>

In [35]:
classificacao_normal_peso_ids = bmi_data_train[bmi_data_train["Classificacao"] == "Peso normal"]
#classificacao_normal_peso_ids = classificacao_normal_peso_ids[classificacao_normal_peso_ids["Time"] == 0.0]
classificacao_normal_peso_ids = classificacao_normal_peso_ids["RecordID"]
classificacao_normal_peso = train_X[train_X["RecordID"].isin(classificacao_normal_peso_ids)]
classificacao_normal_peso_missing = (classificacao_normal_peso.isna().sum()/total_pacientes)
classificacao_normal_peso_missing = classificacao_normal_peso_missing.round(2)
classificacao_normal_peso_missing = classificacao_normal_peso_missing*100
classificacao_normal_peso_missing

RecordID        0.0
level_1         0.0
Time            0.0
ALP            15.0
ALT            15.0
AST            15.0
Age             1.0
Albumin        15.0
BUN            14.0
Bilirubin      15.0
Cholesterol    15.0
Creatinine     14.0
DiasABP         5.0
FiO2           12.0
GCS            10.0
Gender         15.0
Glucose        14.0
HCO3           14.0
HCT            14.0
HR              1.0
Height          1.0
ICUType        15.0
K              14.0
Lactate        14.0
MAP             5.0
MechVent       12.0
Mg             14.0
NIDiasABP      10.0
NIMAP          10.0
NISysABP       10.0
Na             14.0
PaCO2          13.0
PaO2           13.0
Platelets      14.0
RespRate       13.0
SaO2           14.0
SysABP          5.0
Temp            8.0
TroponinI      15.0
TroponinT      15.0
Urine           4.0
WBC            14.0
Weight          7.0
pH             13.0
dtype: float64

<h4>Classification normal weight measurements</h4>

In [36]:
classificacao_normal_peso_measurements = (classificacao_normal_peso.count()/total_pacientes)
classificacao_normal_peso_measurements = classificacao_normal_peso_measurements.round(2)
classificacao_normal_peso_measurements = classificacao_normal_peso_measurements*100
classificacao_normal_peso_measurements

RecordID       15.0
level_1        15.0
Time           15.0
ALP             0.0
ALT             0.0
AST             0.0
Age            14.0
Albumin         0.0
BUN             1.0
Bilirubin       0.0
Cholesterol     0.0
Creatinine      1.0
DiasABP        10.0
FiO2            3.0
GCS             5.0
Gender          0.0
Glucose         1.0
HCO3            1.0
HCT             2.0
HR             14.0
Height         14.0
ICUType         0.0
K               1.0
Lactate         1.0
MAP            10.0
MechVent        3.0
Mg              1.0
NIDiasABP       5.0
NIMAP           5.0
NISysABP        5.0
Na              1.0
PaCO2           2.0
PaO2            2.0
Platelets       1.0
RespRate        2.0
SaO2            1.0
SysABP         10.0
Temp            7.0
TroponinI       0.0
TroponinT       0.0
Urine          11.0
WBC             1.0
Weight          8.0
pH              2.0
dtype: float64

<h4>Classification overweight missing rate</h4>

In [37]:
classificacao_sobrepeso_ids = bmi_data_train[bmi_data_train["Classificacao"] == "Sobrepeso"]
classificacao_sobrepeso_ids = classificacao_sobrepeso_ids["RecordID"]
classificacao_sobrepeso = train_X[train_X["RecordID"].isin(classificacao_sobrepeso_ids)]
classificacao_sobrepeso_missing = (classificacao_sobrepeso.isna().sum()/total_pacientes)
classificacao_sobrepeso_missing = classificacao_sobrepeso_missing.round(2)
classificacao_sobrepeso_missing = classificacao_sobrepeso_missing*100
classificacao_sobrepeso_missing

RecordID        0.0
level_1         0.0
Time            0.0
ALP            18.0
ALT            18.0
AST            18.0
Age             1.0
Albumin        18.0
BUN            17.0
Bilirubin      18.0
Cholesterol    18.0
Creatinine     17.0
DiasABP         6.0
FiO2           15.0
GCS            13.0
Gender         18.0
Glucose        17.0
HCO3           17.0
HCT            16.0
HR              2.0
Height          1.0
ICUType        18.0
K              17.0
Lactate        17.0
MAP             6.0
MechVent       15.0
Mg             17.0
NIDiasABP      13.0
NIMAP          13.0
NISysABP       13.0
Na             17.0
PaCO2          15.0
PaO2           15.0
Platelets      17.0
RespRate       15.0
SaO2           17.0
SysABP          6.0
Temp            9.0
TroponinI      18.0
TroponinT      18.0
Urine           5.0
WBC            17.0
Weight          9.0
pH             15.0
dtype: float64

<h4>Classification overweight measurements</h4>

In [38]:
classificacao_sobrepeso_measurements = (classificacao_sobrepeso.count()/total_pacientes)
classificacao_sobrepeso_measurements = classificacao_sobrepeso_measurements.round(2)
classificacao_sobrepeso_measurements = classificacao_sobrepeso_measurements * 100
classificacao_sobrepeso_measurements

RecordID       18.0
level_1        18.0
Time           18.0
ALP             0.0
ALT             0.0
AST             0.0
Age            17.0
Albumin         0.0
BUN             1.0
Bilirubin       0.0
Cholesterol     0.0
Creatinine      1.0
DiasABP        12.0
FiO2            3.0
GCS             6.0
Gender          0.0
Glucose         1.0
HCO3            1.0
HCT             2.0
HR             16.0
Height         17.0
ICUType         0.0
K               1.0
Lactate         1.0
MAP            12.0
MechVent        3.0
Mg              1.0
NIDiasABP       6.0
NIMAP           5.0
NISysABP        6.0
Na              1.0
PaCO2           3.0
PaO2            3.0
Platelets       1.0
RespRate        3.0
SaO2            1.0
SysABP         12.0
Temp            9.0
TroponinI       0.0
TroponinT       0.0
Urine          13.0
WBC             1.0
Weight          9.0
pH              3.0
dtype: float64

<h4>Grade 1 obesity missing rate</h4>

In [39]:
classificacao_obesidade_1_ids = bmi_data_train[bmi_data_train["Classificacao"] == "Obesidade grau 1"]
classificacao_obesidade_1_ids = classificacao_obesidade_1_ids["RecordID"]
classificacao_obesidade_1 = train_X[train_X["RecordID"].isin(classificacao_obesidade_1_ids)]
classificacao_obesidade_1_missing =(classificacao_obesidade_1.isna().sum()/total_pacientes)
classificacao_obesidade_1_missing = classificacao_obesidade_1_missing.round(2)
classificacao_obesidade_1_missing = classificacao_obesidade_1_missing*100
classificacao_obesidade_1_missing

RecordID       0.0
level_1        0.0
Time           0.0
ALP            9.0
ALT            9.0
AST            9.0
Age            0.0
Albumin        9.0
BUN            9.0
Bilirubin      9.0
Cholesterol    9.0
Creatinine     9.0
DiasABP        3.0
FiO2           8.0
GCS            7.0
Gender         9.0
Glucose        9.0
HCO3           9.0
HCT            8.0
HR             1.0
Height         0.0
ICUType        9.0
K              9.0
Lactate        9.0
MAP            3.0
MechVent       8.0
Mg             9.0
NIDiasABP      7.0
NIMAP          7.0
NISysABP       7.0
Na             9.0
PaCO2          8.0
PaO2           8.0
Platelets      9.0
RespRate       8.0
SaO2           9.0
SysABP         3.0
Temp           5.0
TroponinI      9.0
TroponinT      9.0
Urine          2.0
WBC            9.0
Weight         4.0
pH             8.0
dtype: float64

<h4>Grade 1 obesity measurements </h4>

In [40]:
classificacao_obesidade_1_measurements = (classificacao_obesidade_1.count()/total_pacientes)
classificacao_obesidade_1_measurements = classificacao_obesidade_1_measurements.round(2)
classificacao_obesidade_1_measurements = classificacao_obesidade_1_measurements*100
classificacao_obesidade_1_measurements

RecordID       9.0
level_1        9.0
Time           9.0
ALP            0.0
ALT            0.0
AST            0.0
Age            9.0
Albumin        0.0
BUN            1.0
Bilirubin      0.0
Cholesterol    0.0
Creatinine     1.0
DiasABP        7.0
FiO2           2.0
GCS            3.0
Gender         0.0
Glucose        1.0
HCO3           1.0
HCT            1.0
HR             9.0
Height         9.0
ICUType        0.0
K              1.0
Lactate        0.0
MAP            7.0
MechVent       2.0
Mg             1.0
NIDiasABP      3.0
NIMAP          3.0
NISysABP       3.0
Na             1.0
PaCO2          2.0
PaO2           2.0
Platelets      1.0
RespRate       1.0
SaO2           1.0
SysABP         7.0
Temp           5.0
TroponinI      0.0
TroponinT      0.0
Urine          7.0
WBC            1.0
Weight         5.0
pH             2.0
dtype: float64

<h4>Grade 2 Obesity missing rate</h4>

In [41]:
classificacao_obesidade_2_ids = bmi_data_train[bmi_data_train["Classificacao"] == "Obesidade grau 2"]
classificacao_obesidade_2_ids = classificacao_obesidade_2_ids["RecordID"]
classificacao_obesidade_2 = train_X[train_X["RecordID"].isin(classificacao_obesidade_2_ids)]
classificacao_obesidade_2_missing = (classificacao_obesidade_2.isna().sum()/total_pacientes)
classificacao_obesidade_2_missing = classificacao_obesidade_2_missing.round(2)
classificacao_obesidade_2_missing = classificacao_obesidade_2_missing*100
classificacao_obesidade_2_missing

RecordID       0.0
level_1        0.0
Time           0.0
ALP            4.0
ALT            4.0
AST            4.0
Age            0.0
Albumin        4.0
BUN            4.0
Bilirubin      4.0
Cholesterol    4.0
Creatinine     4.0
DiasABP        1.0
FiO2           3.0
GCS            3.0
Gender         4.0
Glucose        4.0
HCO3           4.0
HCT            4.0
HR             0.0
Height         0.0
ICUType        4.0
K              4.0
Lactate        4.0
MAP            1.0
MechVent       3.0
Mg             4.0
NIDiasABP      3.0
NIMAP          3.0
NISysABP       3.0
Na             4.0
PaCO2          4.0
PaO2           4.0
Platelets      4.0
RespRate       4.0
SaO2           4.0
SysABP         1.0
Temp           2.0
TroponinI      4.0
TroponinT      4.0
Urine          1.0
WBC            4.0
Weight         2.0
pH             3.0
dtype: float64

<h4>Grade 2 Obesity measurements</h4>

In [42]:
classificacao_obesidade_2_measurements = (classificacao_obesidade_2.count()/total_pacientes)
classificacao_obesidade_2_measurements = classificacao_obesidade_2_measurements.round(2)
classificacao_obesidade_2_measurements = classificacao_obesidade_2_measurements*100
classificacao_obesidade_2_measurements

RecordID       4.0
level_1        4.0
Time           4.0
ALP            0.0
ALT            0.0
AST            0.0
Age            4.0
Albumin        0.0
BUN            0.0
Bilirubin      0.0
Cholesterol    0.0
Creatinine     0.0
DiasABP        3.0
FiO2           1.0
GCS            1.0
Gender         0.0
Glucose        0.0
HCO3           0.0
HCT            0.0
HR             4.0
Height         4.0
ICUType        0.0
K              0.0
Lactate        0.0
MAP            3.0
MechVent       1.0
Mg             0.0
NIDiasABP      1.0
NIMAP          1.0
NISysABP       1.0
Na             0.0
PaCO2          1.0
PaO2           1.0
Platelets      0.0
RespRate       1.0
SaO2           0.0
SysABP         3.0
Temp           2.0
TroponinI      0.0
TroponinT      0.0
Urine          3.0
WBC            0.0
Weight         2.0
pH             1.0
dtype: float64

<h4>Grade 3 Obesity missing rate</h4>

In [43]:
classificacao_obesidade_3_ids = bmi_data_train[bmi_data_train["Classificacao"] == "Obesidade grau 3"]
classificacao_obesidade_3_ids = classificacao_obesidade_3_ids["RecordID"]
classificacao_obesidade_3 = train_X[train_X["RecordID"].isin(classificacao_obesidade_3_ids)]
classificacao_obesidade_3_missing = (classificacao_obesidade_3.isna().sum()/total_pacientes)
classificacao_obesidade_3_missing = classificacao_obesidade_3_missing.round(2)
classificacao_obesidade_3_missing = classificacao_obesidade_3_missing*100
classificacao_obesidade_3_missing

RecordID       0.0
level_1        0.0
Time           0.0
ALP            4.0
ALT            4.0
AST            4.0
Age            0.0
Albumin        4.0
BUN            3.0
Bilirubin      4.0
Cholesterol    4.0
Creatinine     3.0
DiasABP        1.0
FiO2           3.0
GCS            3.0
Gender         4.0
Glucose        3.0
HCO3           3.0
HCT            3.0
HR             0.0
Height         0.0
ICUType        4.0
K              3.0
Lactate        3.0
MAP            1.0
MechVent       3.0
Mg             3.0
NIDiasABP      2.0
NIMAP          2.0
NISysABP       2.0
Na             3.0
PaCO2          3.0
PaO2           3.0
Platelets      3.0
RespRate       3.0
SaO2           3.0
SysABP         1.0
Temp           2.0
TroponinI      4.0
TroponinT      4.0
Urine          1.0
WBC            3.0
Weight         2.0
pH             3.0
dtype: float64

<h4>Grade 3 Obesity measurements</h4>

In [44]:
classificacao_obesidade_3_measurements = (classificacao_obesidade_3.count()/total_pacientes)
classificacao_obesidade_3_measurements = classificacao_obesidade_3_measurements.round(2)
classificacao_obesidade_3_measurements = classificacao_obesidade_3_measurements*100
classificacao_obesidade_3_measurements

RecordID       4.0
level_1        4.0
Time           4.0
ALP            0.0
ALT            0.0
AST            0.0
Age            3.0
Albumin        0.0
BUN            0.0
Bilirubin      0.0
Cholesterol    0.0
Creatinine     0.0
DiasABP        2.0
FiO2           1.0
GCS            1.0
Gender         0.0
Glucose        0.0
HCO3           0.0
HCT            0.0
HR             3.0
Height         3.0
ICUType        0.0
K              0.0
Lactate        0.0
MAP            2.0
MechVent       1.0
Mg             0.0
NIDiasABP      1.0
NIMAP          1.0
NISysABP       1.0
Na             0.0
PaCO2          1.0
PaO2           1.0
Platelets      0.0
RespRate       0.0
SaO2           0.0
SysABP         2.0
Temp           2.0
TroponinI      0.0
TroponinT      0.0
Urine          3.0
WBC            0.0
Weight         2.0
pH             1.0
dtype: float64

<h4>Columns for tables</h4>

In [45]:
df_columns = train_X.columns
df_columns

Index(['RecordID', 'level_1', 'Time', 'ALP', 'ALT', 'AST', 'Age', 'Albumin',
       'BUN', 'Bilirubin', 'Cholesterol', 'Creatinine', 'DiasABP', 'FiO2',
       'GCS', 'Gender', 'Glucose', 'HCO3', 'HCT', 'HR', 'Height', 'ICUType',
       'K', 'Lactate', 'MAP', 'MechVent', 'Mg', 'NIDiasABP', 'NIMAP',
       'NISysABP', 'Na', 'PaCO2', 'PaO2', 'Platelets', 'RespRate', 'SaO2',
       'SysABP', 'Temp', 'TroponinI', 'TroponinT', 'Urine', 'WBC', 'Weight',
       'pH'],
      dtype='object')

<h4>Building the missing rate table</h4>

In [83]:
df_missing = pd.DataFrame(columns=df_columns)
df_missing_transpose = df_missing.T
df_missing_transpose["Female"] = female_gender_missing_rate
df_missing_transpose["Male"] = male_gender_missing_rate
df_missing_transpose["Undefined gender"] = undefined_gender_missing_rate
df_missing_transpose["ICUType 1"] = ICUType_1_training_missing
df_missing_transpose["ICUType 2"] = ICUType_2_training_missing
df_missing_transpose["ICUType 3"] = ICUType_3_training_missing
df_missing_transpose["ICUType 4"] = ICUType_4_training_missing
df_missing_transpose["Age 65+"] = more_than_or_equal_to_65_train_missing
df_missing_transpose["Age 65-"] = less_than_65_train_missing
df_missing_transpose["Low Weight"] = classificacao_baixo_peso_missing
df_missing_transpose["Normal Weight"] = classificacao_normal_peso_missing
df_missing_transpose["Overweight"] = classificacao_sobrepeso_missing
df_missing_transpose["Obesity Grade 1"] = classificacao_obesidade_1_missing
df_missing_transpose["Obesity Grade 2"] = classificacao_obesidade_2_missing
df_missing_transpose["Obesity Grade 3"] = classificacao_obesidade_3_missing
df_missing_transpose["Undefined classification"] = classificacao_undefined_missing
df_missing_transpose = df_missing_transpose.drop("RecordID", axis=0)
df_missing_transpose = df_missing_transpose.drop("level_1", axis=0)
df_missing_transpose = df_missing_transpose.drop("Time", axis=0)
df_missing_transpose = df_missing_transpose.drop("Age", axis=0)
df_missing_transpose = df_missing_transpose.drop("Gender", axis=0)
display(HTML("<h2 style='text-align: center; font-size: 24px; font-weight: bold;'>Original Missing Rate per Variable by demographics - Train</h2>"))
df_missing_transpose

Unnamed: 0,Female,Male,Undefined gender,ICUType 1,ICUType 2,ICUType 3,ICUType 4,Age 65+,Age 65-,Low Weight,Normal Weight,Overweight,Obesity Grade 1,Obesity Grade 2,Obesity Grade 3,Undefined classification
ALP,44.0,54.0,0.0,0.14,0.21,0.36,0.28,54.0,45.0,2.0,15.0,18.0,9.0,4.0,4.0,47.0
ALT,44.0,54.0,0.0,0.14,0.21,0.36,0.28,54.0,45.0,2.0,15.0,18.0,9.0,4.0,4.0,47.0
AST,44.0,54.0,0.0,0.14,0.21,0.36,0.28,54.0,45.0,2.0,15.0,18.0,9.0,4.0,4.0,47.0
Albumin,44.0,55.0,0.0,0.14,0.21,0.36,0.28,54.0,45.0,2.0,15.0,18.0,9.0,4.0,4.0,47.0
BUN,41.0,51.0,0.0,0.13,0.2,0.34,0.26,50.0,42.0,1.0,14.0,17.0,9.0,4.0,3.0,45.0
Bilirubin,44.0,54.0,0.0,0.14,0.21,0.36,0.28,54.0,45.0,2.0,15.0,18.0,9.0,4.0,4.0,47.0
Cholesterol,44.0,55.0,0.0,0.14,0.21,0.36,0.28,54.0,46.0,2.0,15.0,18.0,9.0,4.0,4.0,48.0
Creatinine,41.0,51.0,0.0,0.13,0.2,0.34,0.26,50.0,42.0,1.0,14.0,17.0,9.0,4.0,3.0,45.0
DiasABP,22.0,24.0,0.0,0.08,0.04,0.23,0.1,24.0,21.0,1.0,5.0,6.0,3.0,1.0,1.0,29.0
FiO2,38.0,46.0,0.0,0.13,0.17,0.31,0.23,46.0,39.0,1.0,12.0,15.0,8.0,3.0,3.0,42.0


<h4>Building the measurements table</h4>

In [84]:
df_measurements = pd.DataFrame(columns=df_columns)
df_measurements_transpose = df_measurements.T
df_measurements_transpose["Female"] = female_gender_measurements_training
df_measurements_transpose["Male"] = male_gender_measurements_training
df_measurements_transpose["Undefined gender"] = undefined_gender_measurements_training
df_measurements_transpose["ICUType 1"] = ICUType_1_measurements_training
df_measurements_transpose["ICUType 2"] = ICUType_2_measurements_training
df_measurements_transpose["ICUType 3"] = ICUType_3_measurements_training
df_measurements_transpose["ICUType 4"] = ICUType_4_measurements_training
df_measurements_transpose["Age 65+"] = age_65_and_above_measurements_training
df_measurements_transpose["Age 65-"] = age_under_65_measurements_training
df_measurements_transpose["Low Weight"] = classificacao_baixo_peso_measurements
df_measurements_transpose["Normal Weight"] = classificacao_normal_peso_measurements
df_measurements_transpose["Overweight"] = classificacao_sobrepeso_measurements
df_measurements_transpose["Obesity Grade 1"] = classificacao_obesidade_1_measurements
df_measurements_transpose["Obesity Grade 2"] = classificacao_obesidade_2_measurements
df_measurements_transpose["Obesity Grade 3"] = classificacao_obesidade_3_measurements
df_measurements_transpose["Undefined classification"] = classification_undefined_measurements
df_measurements_transpose = df_measurements_transpose.drop("RecordID", axis=0)
df_measurements_transpose = df_measurements_transpose.drop("level_1", axis=0)
df_measurements_transpose = df_measurements_transpose.drop("Time", axis=0)
df_measurements_transpose = df_measurements_transpose.drop("Age", axis=0)
df_measurements_transpose = df_measurements_transpose.drop("Gender", axis=0)
display(HTML("<h2 style='text-align: center; font-size: 24px; font-weight: bold;'>Repeated Measurements per Variable by Demographics - Train Set</h2>"))
df_measurements_transpose

Unnamed: 0,Female,Male,Undefined gender,ICUType 1,ICUType 2,ICUType 3,ICUType 4,Age 65+,Age 65-,Low Weight,Normal Weight,Overweight,Obesity Grade 1,Obesity Grade 2,Obesity Grade 3,Undefined classification
ALP,1.0,1.0,0.0,0.0,0.0,0.01,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
ALT,1.0,1.0,0.0,0.0,0.0,0.01,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
AST,1.0,1.0,0.0,0.0,0.0,0.01,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
Albumin,1.0,1.0,0.0,0.0,0.0,0.01,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
BUN,3.0,4.0,0.0,0.01,0.01,0.03,0.02,4.0,3.0,0.0,1.0,1.0,1.0,0.0,0.0,3.0
Bilirubin,1.0,1.0,0.0,0.0,0.0,0.01,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
Cholesterol,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Creatinine,3.0,4.0,0.0,0.01,0.01,0.03,0.02,4.0,3.0,0.0,1.0,1.0,1.0,0.0,0.0,3.0
DiasABP,23.0,31.0,0.0,0.06,0.17,0.13,0.19,30.0,24.0,1.0,10.0,12.0,7.0,3.0,2.0,19.0
FiO2,7.0,9.0,0.0,0.02,0.04,0.05,0.05,9.0,7.0,0.0,3.0,3.0,2.0,1.0,1.0,7.0


# Validation data

<h4>Loading training database</h4>

In [53]:
validation_X = physionet2012_dataset['val_X']

<h4>Total of the patients</h4>

In [54]:
total_patients_validation = validation_X.groupby("RecordID").first().reset_index()
total_patients_validation = total_patients_validation["RecordID"].count()
total_patients_validation = total_patients_validation*48
total_patients_validation

92064

<h4>female gender missing rate</h1>

In [55]:
female_gender_validation_ids = validation_X[validation_X["Gender"] == 0.0]
female_gender_validation_ids = female_gender_validation_ids["RecordID"]
female_gender_validation = validation_X[validation_X["RecordID"].isin(female_gender_validation_ids)]
female_gender_missing_rate_validation = round((female_gender_validation.isna().sum()/total_patients_validation)*100,2)
female_gender_missing_rate_validation

RecordID        0.00
level_1         0.00
Time            0.00
ALP            43.39
ALT            43.37
AST            43.37
Age             2.66
Albumin        43.56
BUN            40.96
Bilirubin      43.36
Cholesterol    44.01
Creatinine     40.95
DiasABP        20.80
FiO2           37.45
GCS            30.00
Gender         43.19
Glucose        41.11
HCO3           41.03
HCT            40.00
HR              4.32
Height          2.66
ICUType        43.19
K              40.80
Lactate        42.35
MAP            20.92
MechVent       37.45
Mg             41.06
NIDiasABP      25.15
NIMAP          25.41
NISysABP       25.13
Na             41.00
PaCO2          39.17
PaO2           39.17
Platelets      40.92
RespRate       32.16
SaO2           42.39
SysABP         20.80
Temp           27.89
TroponinI      43.99
TroponinT      43.66
Urine          13.31
WBC            41.17
Weight         20.76
pH             38.98
dtype: float64

<h4>Famele gender measurements</h4>

In [56]:
female_gender_measurements_validation = round((female_gender_validation.count()/total_patients_validation)*100,2)
female_gender_measurements_validation

RecordID       44.11
level_1        44.11
Time           44.11
ALP             0.71
ALT             0.74
AST             0.73
Age            41.45
Albumin         0.54
BUN             3.15
Bilirubin       0.75
Cholesterol     0.10
Creatinine      3.16
DiasABP        23.31
FiO2            6.66
GCS            14.11
Gender          0.92
Glucose         2.99
HCO3            3.08
HCT             4.10
HR             39.79
Height         41.45
ICUType         0.92
K               3.31
Lactate         1.76
MAP            23.19
MechVent        6.66
Mg              3.05
NIDiasABP      18.96
NIMAP          18.70
NISysABP       18.98
Na              3.11
PaCO2           4.94
PaO2            4.94
Platelets       3.19
RespRate       11.95
SaO2            1.72
SysABP         23.31
Temp           16.22
TroponinI       0.12
TroponinT       0.45
Urine          30.79
WBC             2.94
Weight         23.35
pH              5.13
dtype: float64

<h4>Male gender missing rate</h4>

In [57]:
male_gender_validation_ids = validation_X[validation_X["Gender"] == 1.0]
male_gender_validation_ids = male_gender_validation_ids["RecordID"]
male_gender_validation = validation_X[validation_X["RecordID"].isin(male_gender_validation_ids)]
male_gender_missing_rate_validation = round((male_gender_validation.isna().sum()/total_patients_validation)*100,2)
male_gender_missing_rate_validation

RecordID        0.00
level_1         0.00
Time            0.00
ALP            54.92
ALT            54.89
AST            54.89
Age             2.80
Albumin        55.17
BUN            51.78
Bilirubin      54.90
Cholesterol    55.74
Creatinine     51.76
DiasABP        24.25
FiO2           47.17
GCS            37.83
Gender         54.68
Glucose        52.03
HCO3           51.89
HCT            50.52
HR              4.96
Height          2.80
ICUType        54.68
K              51.63
Lactate        53.53
MAP            24.46
MechVent       47.49
Mg             51.87
NIDiasABP      33.00
NIMAP          33.33
NISysABP       32.98
Na             51.93
PaCO2          49.15
PaO2           49.15
Platelets      51.64
RespRate       43.57
SaO2           53.52
SysABP         24.25
Temp           34.43
TroponinI      55.75
TroponinT      55.22
Urine          16.84
WBC            52.06
Weight         25.96
pH             48.77
dtype: float64

<h4>Male gender measurements</h4>

In [58]:
male_gender_measurements_validation = round((male_gender_validation.count()/total_patients_validation)*100,2)
male_gender_measurements_validation

RecordID       55.84
level_1        55.84
Time           55.84
ALP             0.92
ALT             0.94
AST             0.95
Age            53.04
Albumin         0.67
BUN             4.06
Bilirubin       0.94
Cholesterol     0.10
Creatinine      4.08
DiasABP        31.59
FiO2            8.66
GCS            18.01
Gender          1.16
Glucose         3.81
HCO3            3.95
HCT             5.32
HR             50.88
Height         53.04
ICUType         1.16
K               4.21
Lactate         2.30
MAP            31.38
MechVent        8.35
Mg              3.97
NIDiasABP      22.84
NIMAP          22.51
NISysABP       22.86
Na              3.91
PaCO2           6.69
PaO2            6.68
Platelets       4.20
RespRate       12.27
SaO2            2.32
SysABP         31.59
Temp           21.41
TroponinI       0.09
TroponinT       0.62
Urine          39.00
WBC             3.78
Weight         29.88
pH              7.07
dtype: float64

<h4>Undefined gender missing rate</h4>

In [59]:
undefined_gender_ids_validation = validation_X[validation_X["Gender"] == -1.0]
undefined_gender_ids_validation = undefined_gender_ids_validation["RecordID"]
undefined_gender_validation = validation_X[validation_X["RecordID"].isin(undefined_gender_ids_validation)]
undefined_gender_missing_rate_validation = round((undefined_gender_validation.isna().sum()/total_patients_validation)*100,2)
undefined_gender_missing_rate_validation

RecordID       0.00
level_1        0.00
Time           0.00
ALP            0.05
ALT            0.05
AST            0.05
Age            0.05
Albumin        0.05
BUN            0.05
Bilirubin      0.05
Cholesterol    0.05
Creatinine     0.05
DiasABP        0.05
FiO2           0.05
GCS            0.05
Gender         0.05
Glucose        0.05
HCO3           0.05
HCT            0.05
HR             0.05
Height         0.05
ICUType        0.05
K              0.05
Lactate        0.05
MAP            0.05
MechVent       0.05
Mg             0.05
NIDiasABP      0.05
NIMAP          0.05
NISysABP       0.05
Na             0.05
PaCO2          0.05
PaO2           0.05
Platelets      0.05
RespRate       0.05
SaO2           0.05
SysABP         0.05
Temp           0.05
TroponinI      0.05
TroponinT      0.05
Urine          0.05
WBC            0.05
Weight         0.05
pH             0.05
dtype: float64

<h4>Undefined gender measurements</h4>

In [60]:
undefined_gender_measurements_validation = round((undefined_gender_validation.count()/total_patients_validation)*100,2)
undefined_gender_measurements_validation

RecordID       0.05
level_1        0.05
Time           0.05
ALP            0.00
ALT            0.00
AST            0.00
Age            0.00
Albumin        0.00
BUN            0.00
Bilirubin      0.00
Cholesterol    0.00
Creatinine     0.00
DiasABP        0.00
FiO2           0.00
GCS            0.00
Gender         0.00
Glucose        0.00
HCO3           0.00
HCT            0.00
HR             0.00
Height         0.00
ICUType        0.00
K              0.00
Lactate        0.00
MAP            0.00
MechVent       0.00
Mg             0.00
NIDiasABP      0.00
NIMAP          0.00
NISysABP       0.00
Na             0.00
PaCO2          0.00
PaO2           0.00
Platelets      0.00
RespRate       0.00
SaO2           0.00
SysABP         0.00
Temp           0.00
TroponinI      0.00
TroponinT      0.00
Urine          0.00
WBC            0.00
Weight         0.00
pH             0.00
dtype: float64

<h4>ICUType 1 missing rate</h4>

In [61]:
ICUType_1_validation_ids = validation_X[validation_X["ICUType"] == 1.0]
ICUType_1_validation_ids = ICUType_1_validation_ids[ICUType_1_validation_ids["Time"] == 0.0]
ICUType_1_validation_ids = ICUType_1_validation_ids["RecordID"]
ICUType_1_validation = validation_X[validation_X["RecordID"].isin(ICUType_1_validation_ids)]
ICUType_1_validation_missing = round((ICUType_1_validation.isna().sum()/total_patients_validation)*100,2)
ICUType_1_validation_missing

RecordID        0.00
level_1         0.00
Time            0.00
ALP            14.02
ALT            14.00
AST            14.00
Age             1.14
Albumin        14.07
BUN            13.20
Bilirubin      14.01
Cholesterol    14.12
Creatinine     13.19
DiasABP         8.52
FiO2           12.75
GCS            10.61
Gender         13.94
Glucose        13.25
HCO3           13.24
HCT            12.96
HR              1.72
Height          1.14
ICUType        13.94
K              13.03
Lactate        13.91
MAP             8.53
MechVent       12.81
Mg             13.21
NIDiasABP       7.09
NIMAP           7.16
NISysABP        7.08
Na             13.24
PaCO2          13.09
PaO2           13.09
Platelets      13.18
RespRate        8.84
SaO2           13.60
SysABP          8.52
Temp            9.81
TroponinI      14.20
TroponinT      13.92
Urine           5.95
WBC            13.30
Weight          7.87
pH             13.07
dtype: float64

<h4>ICUType 1 measurements</h4>

In [62]:
ICUType_1_measurements_validation = round((ICUType_1_validation.count()/total_patients_validation)*100,2)
ICUType_1_measurements_validation

RecordID       14.23
level_1        14.23
Time           14.23
ALP             0.22
ALT             0.23
AST             0.23
Age            13.09
Albumin         0.16
BUN             1.03
Bilirubin       0.23
Cholesterol     0.11
Creatinine      1.05
DiasABP         5.71
FiO2            1.49
GCS             3.62
Gender          0.30
Glucose         0.99
HCO3            1.00
HCT             1.27
HR             12.52
Height         13.09
ICUType         0.30
K               1.20
Lactate         0.33
MAP             5.71
MechVent        1.42
Mg              1.03
NIDiasABP       7.14
NIMAP           7.08
NISysABP        7.15
Na              0.99
PaCO2           1.14
PaO2            1.14
Platelets       1.05
RespRate        5.40
SaO2            0.63
SysABP          5.71
Temp            4.42
TroponinI       0.04
TroponinT       0.31
Urine           8.28
WBC             0.94
Weight          6.36
pH              1.16
dtype: float64

<h4>ICUType 2 missing rate</h4>

In [63]:
ICUType_2_validation_ids = validation_X[validation_X["ICUType"] == 2.0]
ICUType_2_validation_ids = ICUType_2_validation_ids[ICUType_2_validation_ids["Time"] == 0.0]
ICUType_2_validation_ids = ICUType_2_validation_ids["RecordID"]
ICUType_2_validation = validation_X[validation_X["RecordID"].isin(ICUType_2_validation_ids)]
ICUType_2_validation_missing = round((ICUType_2_validation.isna().sum()/total_patients_validation)*100,2)
ICUType_2_validation_missing

RecordID        0.00
level_1         0.00
Time            0.00
ALP            21.68
ALT            21.68
AST            21.67
Age             0.61
Albumin        21.71
BUN            20.41
Bilirubin      21.68
Cholesterol    21.79
Creatinine     20.41
DiasABP         4.63
FiO2           18.28
GCS            15.94
Gender         21.34
Glucose        20.79
HCO3           20.52
HCT            19.44
HR              1.62
Height          0.61
ICUType        21.34
K              20.67
Lactate        21.10
MAP             4.63
MechVent       18.20
Mg             20.49
NIDiasABP      17.16
NIMAP          17.18
NISysABP       17.14
Na             20.74
PaCO2          17.40
PaO2           17.42
Platelets      20.07
RespRate       20.78
SaO2           19.47
SysABP          4.63
Temp            9.17
TroponinI      21.76
TroponinT      21.73
Urine           3.33
WBC            20.37
Weight         10.10
pH             16.96
dtype: float64

<h4>ICUType 2 measurements</h4>

In [64]:
ICUType_2_measurements_validation = round((ICUType_2_validation.count()/total_patients_validation)*100,2)
ICUType_2_measurements_validation

RecordID       21.79
level_1        21.79
Time           21.79
ALP             0.12
ALT             0.12
AST             0.12
Age            21.19
Albumin         0.08
BUN             1.38
Bilirubin       0.12
Cholesterol     0.01
Creatinine      1.39
DiasABP        17.16
FiO2            3.52
GCS             5.85
Gender          0.45
Glucose         1.00
HCO3            1.27
HCT             2.35
HR             20.18
Height         21.19
ICUType         0.45
K               1.12
Lactate         0.69
MAP            17.16
MechVent        3.59
Mg              1.30
NIDiasABP       4.64
NIMAP           4.61
NISysABP        4.65
Na              1.05
PaCO2           4.39
PaO2            4.38
Platelets       1.72
RespRate        1.02
SaO2            2.32
SysABP         17.16
Temp           12.63
TroponinI       0.03
TroponinT       0.06
Urine          18.47
WBC             1.42
Weight         11.69
pH              4.83
dtype: float64

<h4>ICUType 4 missing rate</h4>

In [65]:
ICUType_3_validation_ids = validation_X[validation_X["ICUType"] == 3.0]
ICUType_3_validation_ids = ICUType_3_validation_ids[ICUType_3_validation_ids["Time"] == 0.0]
ICUType_3_validation_ids = ICUType_3_validation_ids["RecordID"]
ICUType_3_validation = validation_X[validation_X["RecordID"].isin(ICUType_3_validation_ids)]
ICUType_3_validation_missing = round((ICUType_3_validation.isna().sum()/total_patients_validation)*100,2)
ICUType_3_validation_missing

RecordID        0.00
level_1         0.00
Time            0.00
ALP            34.33
ALT            34.30
AST            34.31
Age             2.41
Albumin        34.51
BUN            32.45
Bilirubin      34.28
Cholesterol    35.05
Creatinine     32.45
DiasABP        22.54
FiO2           30.04
GCS            25.72
Gender         34.36
Glucose        32.47
HCO3           32.45
HCT            32.03
HR              3.53
Height          2.41
ICUType        34.36
K              32.25
Lactate        33.72
MAP            22.73
MechVent       30.47
Mg             32.58
NIDiasABP      14.84
NIMAP          15.24
NISysABP       14.83
Na             32.40
PaCO2          32.38
PaO2           32.37
Platelets      32.71
RespRate       23.97
SaO2           34.62
SysABP         22.54
Temp           25.04
TroponinI      34.99
TroponinT      34.60
Urine          13.72
WBC            32.84
Weight         12.40
pH             32.34
dtype: float64

<h4>ICUType 3 measurements</h4>

In [66]:
ICUType_3_measurements_validation = round((ICUType_3_validation.count()/total_patients_validation)*100,2)
ICUType_3_measurements_validation

RecordID       35.09
level_1        35.09
Time           35.09
ALP             0.76
ALT             0.79
AST             0.78
Age            32.68
Albumin         0.57
BUN             2.64
Bilirubin       0.81
Cholesterol     0.04
Creatinine      2.64
DiasABP        12.55
FiO2            5.05
GCS             9.37
Gender          0.73
Glucose         2.62
HCO3            2.64
HCT             3.06
HR             31.56
Height         32.68
ICUType         0.73
K               2.84
Lactate         1.37
MAP            12.36
MechVent        4.62
Mg              2.51
NIDiasABP      20.25
NIMAP          19.84
NISysABP       20.26
Na              2.68
PaCO2           2.71
PaO2            2.71
Platelets       2.37
RespRate       11.12
SaO2            0.47
SysABP         12.55
Temp           10.05
TroponinI       0.10
TroponinT       0.49
Urine          21.37
WBC             2.25
Weight         22.69
pH              2.75
dtype: float64

<h4>ICUType 4 missing rate</h4>

In [67]:
ICUType_4_validation_ids = validation_X[validation_X["ICUType"] == 4.0]
ICUType_4_validation_ids = ICUType_4_validation_ids[ICUType_4_validation_ids["Time"] == 0.0]
ICUType_4_validation_ids = ICUType_4_validation_ids["RecordID"]
ICUType_4_validation = validation_X[validation_X["RecordID"].isin(ICUType_4_validation_ids)]
ICUType_4_validation_missing = round((ICUType_4_validation.isna().sum()/total_patients_validation)*100,2)
ICUType_4_validation_missing

RecordID        0.00
level_1         0.00
Time            0.00
ALP            28.35
ALT            28.34
AST            28.34
Age             1.35
Albumin        28.49
BUN            26.72
Bilirubin      28.35
Cholesterol    28.84
Creatinine     26.72
DiasABP         9.41
FiO2           23.61
GCS            15.61
Gender         28.28
Glucose        26.69
HCO3           26.75
HCT            26.14
HR              2.46
Height          1.35
ICUType        28.28
K              26.52
Lactate        27.20
MAP             9.55
MechVent       23.50
Mg             26.70
NIDiasABP      19.11
NIMAP          19.21
NISysABP       19.10
Na             26.59
PaCO2          25.48
PaO2           25.49
Platelets      26.65
RespRate       22.20
SaO2           28.27
SysABP          9.41
Temp           18.35
TroponinI      28.84
TroponinT      28.68
Urine           7.21
WBC            26.78
Weight         16.40
pH             25.43
dtype: float64

<h4>ICUTYpe 4 measurements</h4>

In [68]:
ICUType_4_measurements_validation = round((ICUType_4_validation.count()/total_patients_validation)*100,2)
ICUType_4_measurements_validation

RecordID       28.88
level_1        28.88
Time           28.88
ALP             0.54
ALT             0.55
AST             0.55
Age            27.53
Albumin         0.40
BUN             2.16
Bilirubin       0.54
Cholesterol     0.04
Creatinine      2.16
DiasABP        19.47
FiO2            5.27
GCS            13.27
Gender          0.60
Glucose         2.19
HCO3            2.13
HCT             2.74
HR             26.42
Height         27.53
ICUType         0.60
K               2.37
Lactate         1.68
MAP            19.34
MechVent        5.38
Mg              2.19
NIDiasABP       9.77
NIMAP           9.67
NISysABP        9.79
Na              2.29
PaCO2           3.40
PaO2            3.39
Platelets       2.23
RespRate        6.68
SaO2            0.61
SysABP         19.47
Temp           10.53
TroponinI       0.04
TroponinT       0.21
Urine          21.67
WBC             2.10
Weight         12.49
pH              3.46
dtype: float64

<h4>+65 missing rate</h4>

In [69]:
more_than_or_equal_to_65_validation_ids = validation_X[validation_X["Age"] >= 65]
more_than_or_equal_to_65_validation_ids = more_than_or_equal_to_65_validation_ids[more_than_or_equal_to_65_validation_ids["Time"] == 0.0]
more_than_or_equal_to_65_validation_ids = more_than_or_equal_to_65_validation_ids["RecordID"]
more_than_or_equal_to_65_validation = validation_X[validation_X["RecordID"].isin(more_than_or_equal_to_65_validation_ids)]
more_than_or_equal_to_65_validation_missing = round((more_than_or_equal_to_65_validation.isna().sum()/total_patients_validation)*100,2)
more_than_or_equal_to_65_validation_missing

RecordID        0.00
level_1         0.00
Time            0.00
ALP            55.37
ALT            55.35
AST            55.35
Age             2.91
Albumin        55.50
BUN            52.14
Bilirubin      55.33
Cholesterol    55.99
Creatinine     52.12
DiasABP        25.21
FiO2           47.54
GCS            38.75
Gender         54.93
Glucose        52.41
HCO3           52.24
HCT            50.88
HR              4.91
Height          2.91
ICUType        54.93
K              52.00
Lactate        53.90
MAP            25.34
MechVent       47.89
Mg             52.26
NIDiasABP      32.01
NIMAP          32.28
NISysABP       31.99
Na             52.29
PaCO2          49.62
PaO2           49.63
Platelets      52.09
RespRate       42.24
SaO2           53.63
SysABP         25.21
Temp           34.43
TroponinI      55.94
TroponinT      55.33
Urine          15.88
WBC            52.44
Weight         24.90
pH             49.32
dtype: float64

<h4>+65 measurements</h4>

In [70]:
more_than_or_equal_to_65_validation_measurements = round((more_than_or_equal_to_65_validation.count()/total_patients_validation)*100,2)
more_than_or_equal_to_65_validation_measurements

RecordID       56.10
level_1        56.10
Time           56.10
ALP             0.73
ALT             0.75
AST             0.75
Age            53.19
Albumin         0.60
BUN             3.96
Bilirubin       0.77
Cholesterol     0.11
Creatinine      3.98
DiasABP        30.89
FiO2            8.56
GCS            17.35
Gender          1.17
Glucose         3.69
HCO3            3.86
HCT             5.22
HR             51.19
Height         53.19
ICUType         1.17
K               4.10
Lactate         2.20
MAP            30.76
MechVent        8.21
Mg              3.84
NIDiasABP      24.09
NIMAP          23.82
NISysABP       24.11
Na              3.81
PaCO2           6.48
PaO2            6.47
Platelets       4.01
RespRate       13.86
SaO2            2.47
SysABP         30.89
Temp           21.67
TroponinI       0.16
TroponinT       0.77
Urine          40.22
WBC             3.66
Weight         31.20
pH              6.78
dtype: float64

<h4>-65 missing rate</h4>

In [71]:
less_than_65_validation_ids = validation_X[validation_X["Age"] < 65]
less_than_65_validation_ids = less_than_65_validation_ids[less_than_65_validation_ids["Time"] == 0.0]
less_than_65_validation_ids = less_than_65_validation_ids["RecordID"]
less_than_65_validation = validation_X[validation_X["RecordID"].isin(less_than_65_validation_ids)]
less_than_65_validation_missing = round((less_than_65_validation.isna().sum()/total_patients_validation)*100,2)
less_than_65_validation_missing

RecordID        0.00
level_1         0.00
Time            0.00
ALP            42.99
ALT            42.97
AST            42.97
Age             2.60
Albumin        43.28
BUN            40.65
Bilirubin      42.98
Cholesterol    43.82
Creatinine     40.64
DiasABP        19.90
FiO2           37.14
GCS            29.13
Gender         42.99
Glucose        40.79
HCO3           40.73
HCT            39.70
HR              4.42
Height          2.60
ICUType        42.99
K              40.48
Lactate        42.03
MAP            20.09
MechVent       37.10
Mg             40.72
NIDiasABP      26.19
NIMAP          26.52
NISysABP       26.17
Na             40.68
PaCO2          38.74
PaO2           38.74
Platelets      40.53
RespRate       33.54
SaO2           42.34
SysABP         19.90
Temp           27.93
TroponinI      43.85
TroponinT      43.60
Urine          14.33
WBC            40.84
Weight         21.87
pH             38.48
dtype: float64

<h4>-65 measurements</h4>

In [72]:
less_than_65_validation_measurements = round((less_than_65_validation.count()/total_patients_validation)*100,2)
less_than_65_validation_measurements

RecordID       43.90
level_1        43.90
Time           43.90
ALP             0.91
ALT             0.93
AST             0.93
Age            41.30
Albumin         0.62
BUN             3.25
Bilirubin       0.92
Cholesterol     0.08
Creatinine      3.26
DiasABP        24.00
FiO2            6.76
GCS            14.77
Gender          0.91
Glucose         3.11
HCO3            3.17
HCT             4.20
HR             39.48
Height         41.30
ICUType         0.91
K               3.42
Lactate         1.87
MAP            23.81
MechVent        6.80
Mg              3.18
NIDiasABP      17.71
NIMAP          17.38
NISysABP       17.73
Na              3.22
PaCO2           5.16
PaO2            5.16
Platelets       3.37
RespRate       10.36
SaO2            1.56
SysABP         24.00
Temp           15.97
TroponinI       0.05
TroponinT       0.30
Urine          29.57
WBC             3.06
Weight         22.03
pH              5.42
dtype: float64

<h4>Filtering only patients who have height and weight</h4>

In [73]:
filtered_validation_X = validation_X[(validation_X['Height'] != -1) & (validation_X['Weight'] != -1) & (validation_X['Height'].notna()) & (validation_X['Weight'].notna())] 

<h4>Changing the height to meters</h4>

In [74]:
filtered_validation_X_metros = filtered_validation_X.copy()
filtered_validation_X_metros["Height"] = filtered_validation_X["Height"]/100
filtered_validation_X_metros["Height"]

624       1.702
628       1.702
629       1.702
630       1.702
631       1.702
          ...  
573643    1.651
573645    1.651
573646    1.651
573647    1.651
573984    1.702
Name: Height, Length: 25635, dtype: float64

<h4>Calculating BMI and classification</h4>

In [75]:
bmi_data_validation = filtered_validation_X_metros
bmi_data_validation["BMI"] = round(filtered_validation_X_metros["Weight"] / (filtered_validation_X_metros["Height"]**2), 1)
bmi_data_validation["Classificacao"] = bmi_data_validation["BMI"].apply(classify_BMI)
bmi_data_validation.head()

Unnamed: 0,RecordID,level_1,Time,ALP,ALT,AST,Age,Albumin,BUN,Bilirubin,...,SysABP,Temp,TroponinI,TroponinT,Urine,WBC,Weight,pH,BMI,Classificacao
624,132570,0,0.0,,,,84.0,,,,...,,,,,,,102.6,,35.4,Obesidade grau 2
628,132570,4,4.0,,,,84.0,,,,...,,36.6,,,,,102.6,,35.4,Obesidade grau 2
629,132570,5,5.0,19.0,15.0,20.0,84.0,,83.0,0.1,...,,,,,,8.8,102.6,,35.4,Obesidade grau 2
630,132570,6,6.0,,,,84.0,,,,...,,,,,,,102.6,,35.4,Obesidade grau 2
631,132570,7,7.0,,,,84.0,,,,...,,,,,600.0,,102.6,,35.4,Obesidade grau 2


<h4>Taking only one moment from each patient</h4>

In [76]:
bmi_data_validation = bmi_data_validation.groupby("RecordID").first().reset_index()
bmi_data_validation

Unnamed: 0,RecordID,level_1,Time,ALP,ALT,AST,Age,Albumin,BUN,Bilirubin,...,SysABP,Temp,TroponinI,TroponinT,Urine,WBC,Weight,pH,BMI,Classificacao
0,132570,0,0.0,19.0,15.0,20.0,84.0,,83.0,0.1,...,,36.6,,,600.0,8.8,102.6,,35.4,Obesidade grau 2
1,132575,0,0.0,,,,78.0,,18.0,,...,122.000000,37.4,,,38.0,12.5,63.0,7.34,22.4,Peso normal
2,132599,0,0.0,,,,53.0,,,,...,,37.3,,,350.0,,73.5,,23.3,Peso normal
3,132601,0,0.0,,,,74.0,,,,...,,,,,,,75.9,7.39,24.0,Peso normal
4,132658,0,0.0,71.0,9.0,42.0,81.0,,18.0,1.3,...,97.000000,38.4,,,90.0,61.3,105.4,7.42,30.7,Obesidade grau 1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1015,162863,0,0.0,,,,75.0,,19.0,,...,113.000000,37.2,,,2.0,13.6,45.0,7.34,18.1,Baixo peso
1016,162883,0,0.0,,,,32.0,,,,...,,,,,,,75.0,,25.1,Sobrepeso
1017,162912,0,0.0,34.0,17.0,24.0,63.0,2.3,6.0,0.4,...,123.133333,35.0,,,370.0,5.4,80.0,7.03,25.3,Sobrepeso
1018,162929,0,0.0,,,,63.0,,26.0,,...,113.000000,37.0,,,55.0,13.0,100.0,7.41,36.7,Obesidade grau 2


In [77]:
bmi_data_validation["Classificacao"].value_counts()

Classificacao
Sobrepeso           344
Peso normal         305
Obesidade grau 1    197
Obesidade grau 2     76
Obesidade grau 3     67
Baixo peso           31
Name: count, dtype: int64

<h4>Undefined classification missing rate</h4>

In [78]:
classificacao_undefined_ids_validation = bmi_data_validation["RecordID"]
classificacao_undefined_validation = validation_X[~validation_X["RecordID"].isin(classificacao_undefined_ids_validation)]
classificacao_undefined_missing_validation = round((classificacao_undefined_validation.isna().sum()/total_patients_validation)*100,2)
classificacao_undefined_missing_validation

RecordID        0.00
level_1         0.00
Time            0.00
ALP            46.06
ALT            46.03
AST            46.04
Age             3.27
Albumin        46.20
BUN            43.56
Bilirubin      46.02
Cholesterol    46.74
Creatinine     43.56
DiasABP        27.64
FiO2           40.36
GCS            31.09
Gender         45.84
Glucose        43.58
HCO3           43.59
HCT            42.95
HR              4.84
Height          3.27
ICUType        45.84
K              43.28
Lactate        45.21
MAP            27.82
MechVent       40.90
Mg             43.63
NIDiasABP      21.67
NIMAP          22.07
NISysABP       21.65
Na             43.45
PaCO2          43.43
PaO2           43.43
Platelets      43.81
RespRate       30.98
SaO2           46.26
SysABP         27.64
Temp           34.14
TroponinI      46.73
TroponinT      46.25
Urine          16.87
WBC            43.96
Weight         21.43
pH             43.35
dtype: float64

<h4>Undefined classification measurements</h4>

In [79]:
classificacao_undefined_measurements_validation = round((classificacao_undefined_validation.count()/total_patients_validation)*100,2)
classificacao_undefined_measurements_validation

RecordID       46.82
level_1        46.82
Time           46.82
ALP             0.76
ALT             0.79
AST             0.78
Age            43.55
Albumin         0.62
BUN             3.26
Bilirubin       0.80
Cholesterol     0.08
Creatinine      3.26
DiasABP        19.18
FiO2            6.46
GCS            15.72
Gender          0.98
Glucose         3.24
HCO3            3.23
HCT             3.87
HR             41.98
Height         43.55
ICUType         0.98
K               3.54
Lactate         1.61
MAP            19.00
MechVent        5.92
Mg              3.19
NIDiasABP      25.15
NIMAP          24.75
NISysABP       25.17
Na              3.37
PaCO2           3.39
PaO2            3.39
Platelets       3.01
RespRate       15.84
SaO2            0.56
SysABP         19.18
Temp           12.67
TroponinI       0.09
TroponinT       0.57
Urine          29.95
WBC             2.86
Weight         25.39
pH              3.47
dtype: float64

<h4>Classification low weight missing rate</h4>

In [80]:
classificacao_baixo_peso_ids_validation = bmi_data_validation[bmi_data_validation["Classificacao"] == "Baixo peso"]
classificacao_baixo_peso_ids_validation = classificacao_baixo_peso_ids_validation["RecordID"]
classificacao_baixo_peso_validation = validation_X[validation_X["RecordID"].isin(classificacao_baixo_peso_ids_validation)]
classificacao_baixo_peso_missing_validation = round((classificacao_baixo_peso_validation.isna().sum()/total_patients_validation)*100,2)
classificacao_baixo_peso_missing_validation

RecordID       0.00
level_1        0.00
Time           0.00
ALP            1.60
ALT            1.60
AST            1.60
Age            0.07
Albumin        1.60
BUN            1.51
Bilirubin      1.60
Cholesterol    1.62
Creatinine     1.51
DiasABP        0.60
FiO2           1.38
GCS            1.12
Gender         1.58
Glucose        1.51
HCO3           1.51
HCT            1.48
HR             0.12
Height         0.07
ICUType        1.58
K              1.49
Lactate        1.56
MAP            0.60
MechVent       1.35
Mg             1.50
NIDiasABP      0.97
NIMAP          0.97
NISysABP       0.97
Na             1.50
PaCO2          1.44
PaO2           1.44
Platelets      1.51
RespRate       1.36
SaO2           1.55
SysABP         0.60
Temp           0.93
TroponinI      1.61
TroponinT      1.60
Urine          0.42
WBC            1.52
Weight         0.96
pH             1.43
dtype: float64

<h4>Classification low weight measurements</h4>

In [81]:
classificacao_baixo_peso_measurements_validation = round((classificacao_baixo_peso_validation.count()/total_patients_validation)*100,2)
classificacao_baixo_peso_measurements_validation

RecordID       1.62
level_1        1.62
Time           1.62
ALP            0.02
ALT            0.02
AST            0.02
Age            1.55
Albumin        0.02
BUN            0.11
Bilirubin      0.02
Cholesterol    0.00
Creatinine     0.11
DiasABP        1.02
FiO2           0.24
GCS            0.50
Gender         0.03
Glucose        0.11
HCO3           0.11
HCT            0.13
HR             1.50
Height         1.55
ICUType        0.03
K              0.13
Lactate        0.05
MAP            1.02
MechVent       0.26
Mg             0.11
NIDiasABP      0.65
NIMAP          0.64
NISysABP       0.65
Na             0.12
PaCO2          0.17
PaO2           0.17
Platelets      0.11
RespRate       0.26
SaO2           0.07
SysABP         1.02
Temp           0.69
TroponinI      0.01
TroponinT      0.02
Urine          1.19
WBC            0.10
Weight         0.65
pH             0.18
dtype: float64

<h4>Classification normal weight missing rate</h4>

In [82]:
classificacao_peso_normal_ids_validation = bmi_data_validation[bmi_data_validation["Classificacao"] == "Peso normal"]
classificacao_peso_normal_ids_validation = classificacao_peso_normal_ids_validation["RecordID"]
classificacao_peso_normal_validation = validation_X[validation_X["RecordID"].isin(classificacao_peso_normal_ids_validation)]
classificacao_peso_normal_missing_validation = round((classificacao_peso_normal_validation.isna().sum()/total_patients_validation)*100,2)
classificacao_peso_normal_missing_validation

RecordID        0.00
level_1         0.00
Time            0.00
ALP            15.62
ALT            15.62
AST            15.62
Age             0.61
Albumin        15.73
BUN            14.73
Bilirubin      15.62
Cholesterol    15.86
Creatinine     14.72
DiasABP         5.26
FiO2           13.32
GCS            10.81
Gender         15.57
Glucose        14.81
HCO3           14.77
HCT            14.21
HR              1.27
Height          0.61
ICUType        15.57
K              14.68
Lactate        15.16
MAP             5.31
MechVent       13.24
Mg             14.75
NIDiasABP      10.88
NIMAP          10.92
NISysABP       10.87
Na             14.80
PaCO2          13.58
PaO2           13.58
Platelets      14.60
RespRate       13.06
SaO2           14.95
SysABP          5.26
Temp            8.57
TroponinI      15.87
TroponinT      15.76
Urine           3.92
WBC            14.75
Weight          8.06
pH             13.45
dtype: float64

<h4>Classification normal weight measurements</h4>

In [83]:
classificacao_peso_normal_measurements_validation = round((classificacao_peso_normal_validation.count()/total_patients_validation)*100,2)
classificacao_peso_normal_measurements_validation

RecordID       15.90
level_1        15.90
Time           15.90
ALP             0.28
ALT             0.28
AST             0.28
Age            15.29
Albumin         0.17
BUN             1.17
Bilirubin       0.28
Cholesterol     0.04
Creatinine      1.18
DiasABP        10.64
FiO2            2.58
GCS             5.09
Gender          0.33
Glucose         1.09
HCO3            1.13
HCT             1.69
HR             14.63
Height         15.29
ICUType         0.33
K               1.22
Lactate         0.74
MAP            10.59
MechVent        2.66
Mg              1.15
NIDiasABP       5.02
NIMAP           4.98
NISysABP        5.03
Na              1.10
PaCO2           2.32
PaO2            2.32
Platelets       1.31
RespRate        2.85
SaO2            0.95
SysABP         10.64
Temp            7.33
TroponinI       0.03
TroponinT       0.14
Urine          11.98
WBC             1.15
Weight          7.84
pH              2.45
dtype: float64

<h4>Classification overweight missing rate</h4>

In [84]:
classificacao_sobrepeso_ids_validation = bmi_data_validation[bmi_data_validation["Classificacao"] == "Sobrepeso"]
classificacao_sobrepeso_ids_validation = classificacao_sobrepeso_ids_validation["RecordID"]
classificacao_sobrepeso_validation = validation_X[validation_X["RecordID"].isin(classificacao_sobrepeso_ids_validation)]
classificacao_sobrepeso_missing_validation = round((classificacao_sobrepeso_validation.isna().sum()/total_patients_validation)*100,2)
classificacao_sobrepeso_missing_validation

RecordID        0.00
level_1         0.00
Time            0.00
ALP            17.64
ALT            17.63
AST            17.63
Age             0.72
Albumin        17.72
BUN            16.59
Bilirubin      17.62
Cholesterol    17.90
Creatinine     16.58
DiasABP         5.85
FiO2           15.12
GCS            12.46
Gender         17.56
Glucose        16.74
HCO3           16.64
HCT            16.07
HR              1.50
Height          0.72
ICUType        17.56
K              16.62
Lactate        17.16
MAP             5.91
MechVent       15.08
Mg             16.65
NIDiasABP      12.26
NIMAP          12.32
NISysABP       12.25
Na             16.72
PaCO2          15.21
PaO2           15.22
Platelets      16.45
RespRate       15.33
SaO2           16.82
SysABP          5.85
Temp            9.34
TroponinI      17.89
TroponinT      17.78
Urine           4.67
WBC            16.63
Weight          8.40
pH             15.02
dtype: float64

<h4>Classification overweight measurements</h4>

In [85]:
classificacao_sobrepeso_measurements_validation = round((classificacao_sobrepeso_validation.count()/total_patients_validation)*100,2)
classificacao_sobrepeso_measurements_validation

RecordID       17.94
level_1        17.94
Time           17.94
ALP             0.30
ALT             0.31
AST             0.31
Age            17.21
Albumin         0.21
BUN             1.35
Bilirubin       0.31
Cholesterol     0.04
Creatinine      1.35
DiasABP        12.09
FiO2            2.81
GCS             5.48
Gender          0.37
Glucose         1.20
HCO3            1.29
HCT             1.86
HR             16.44
Height         17.21
ICUType         0.37
K               1.32
Lactate         0.78
MAP            12.03
MechVent        2.85
Mg              1.29
NIDiasABP       5.68
NIMAP           5.61
NISysABP        5.69
Na              1.22
PaCO2           2.72
PaO2            2.71
Platelets       1.49
RespRate        2.60
SaO2            1.12
SysABP         12.09
Temp            8.60
TroponinI       0.04
TroponinT       0.15
Urine          13.26
WBC             1.31
Weight          9.54
pH              2.92
dtype: float64

<h4>Grade 1 obesity missing rate</h4>

In [86]:
classificacao_obesidade_1_ids_validation = bmi_data_validation[bmi_data_validation["Classificacao"] == "Obesidade grau 1"]
classificacao_obesidade_1_ids_validation = classificacao_obesidade_1_ids_validation["RecordID"]
classificacao_obesidade_1_validation = validation_X[validation_X["RecordID"].isin(classificacao_obesidade_1_ids_validation)]
classificacao_obesidade_1_missing_validation = round((classificacao_obesidade_1_validation.isna().sum()/total_patients_validation)*100,2)
classificacao_obesidade_1_missing_validation

RecordID        0.00
level_1         0.00
Time            0.00
ALP            10.10
ALT            10.09
AST            10.09
Age             0.43
Albumin        10.15
BUN             9.51
Bilirubin      10.10
Cholesterol    10.25
Creatinine      9.50
DiasABP         3.03
FiO2            8.42
GCS             7.04
Gender         10.06
Glucose         9.60
HCO3            9.55
HCT             9.19
HR              0.86
Height          0.43
ICUType        10.06
K               9.50
Lactate         9.75
MAP             3.07
MechVent        8.28
Mg              9.53
NIDiasABP       7.36
NIMAP           7.38
NISysABP        7.35
Na              9.58
PaCO2           8.49
PaO2            8.49
Platelets       9.42
RespRate        8.89
SaO2            9.53
SysABP          3.03
Temp            5.26
TroponinI      10.24
TroponinT      10.17
Urine           2.32
WBC             9.52
Weight          4.81
pH              8.39
dtype: float64

<h4>Grade 1 obesity measurements</h4>

In [87]:
classificacao_obesidade_1_measurements_validation = round((classificacao_obesidade_1_validation.count()/total_patients_validation)*100,2)
classificacao_obesidade_1_measurements_validation

RecordID       10.27
level_1        10.27
Time           10.27
ALP             0.17
ALT             0.18
AST             0.18
Age             9.84
Albumin         0.12
BUN             0.76
Bilirubin       0.17
Cholesterol     0.02
Creatinine      0.77
DiasABP         7.24
FiO2            1.85
GCS             3.23
Gender          0.21
Glucose         0.67
HCO3            0.72
HCT             1.08
HR              9.41
Height          9.84
ICUType         0.21
K               0.77
Lactate         0.52
MAP             7.20
MechVent        1.99
Mg              0.74
NIDiasABP       2.91
NIMAP           2.89
NISysABP        2.92
Na              0.70
PaCO2           1.78
PaO2            1.78
Platelets       0.85
RespRate        1.38
SaO2            0.75
SysABP          7.24
Temp            5.01
TroponinI       0.03
TroponinT       0.10
Urine           7.95
WBC             0.75
Weight          5.46
pH              1.88
dtype: float64

<h4>Grade 2 obesity missing rate</h4>

In [88]:
classificacao_obesidade_2_ids_validation = bmi_data_validation[bmi_data_validation["Classificacao"] == "Obesidade grau 2"]
classificacao_obesidade_2_ids_validation = classificacao_obesidade_2_ids_validation["RecordID"]
classificacao_obesidade_2_validation = validation_X[validation_X["RecordID"].isin(classificacao_obesidade_2_ids_validation)]
classificacao_obesidade_2_missing_validation = round((classificacao_obesidade_2_validation.isna().sum()/total_patients_validation)*100,2)
classificacao_obesidade_2_missing_validation

RecordID       0.00
level_1        0.00
Time           0.00
ALP            3.90
ALT            3.90
AST            3.90
Age            0.22
Albumin        3.92
BUN            3.67
Bilirubin      3.90
Cholesterol    3.95
Creatinine     3.66
DiasABP        1.56
FiO2           3.30
GCS            2.79
Gender         3.88
Glucose        3.70
HCO3           3.67
HCT            3.53
HR             0.40
Height         0.22
ICUType        3.88
K              3.67
Lactate        3.79
MAP            1.57
MechVent       3.34
Mg             3.67
NIDiasABP      2.56
NIMAP          2.61
NISysABP       2.56
Na             3.69
PaCO2          3.37
PaO2           3.38
Platelets      3.61
RespRate       3.15
SaO2           3.68
SysABP         1.56
Temp           2.15
TroponinI      3.96
TroponinT      3.90
Urine          1.04
WBC            3.66
Weight         1.55
pH             3.35
dtype: float64

<h4>Grade 2 obesity measurements</h4>

In [89]:
classificacao_obesidade_2_measurements_validation = round((classificacao_obesidade_2_validation.count()/total_patients_validation)*100,2)
classificacao_obesidade_2_measurements_validation

RecordID       3.96
level_1        3.96
Time           3.96
ALP            0.06
ALT            0.06
AST            0.06
Age            3.74
Albumin        0.04
BUN            0.30
Bilirubin      0.06
Cholesterol    0.01
Creatinine     0.30
DiasABP        2.40
FiO2           0.67
GCS            1.17
Gender         0.08
Glucose        0.26
HCO3           0.29
HCT            0.43
HR             3.56
Height         3.74
ICUType        0.08
K              0.29
Lactate        0.18
MAP            2.40
MechVent       0.63
Mg             0.29
NIDiasABP      1.40
NIMAP          1.36
NISysABP       1.41
Na             0.28
PaCO2          0.59
PaO2           0.59
Platelets      0.35
RespRate       0.82
SaO2           0.28
SysABP         2.40
Temp           1.82
TroponinI      0.00
TroponinT      0.06
Urine          2.92
WBC            0.30
Weight         2.41
pH             0.62
dtype: float64

<h4>Grade 3 obesity missing rate</h4>

In [90]:
classificacao_obesidade_3_ids_validation = bmi_data_validation[bmi_data_validation["Classificacao"] == "Obesidade grau 3"]
classificacao_obesidade_3_ids_validation = classificacao_obesidade_3_ids_validation["RecordID"]
classificacao_obesidade_3_validation = validation_X[validation_X["RecordID"].isin(classificacao_obesidade_3_ids_validation)]
classificacao_obesidade_3_missing_validation = round((classificacao_obesidade_3_validation.isna().sum()/total_patients_validation)*100,2)
classificacao_obesidade_3_missing_validation

RecordID       0.00
level_1        0.00
Time           0.00
ALP            3.44
ALT            3.44
AST            3.44
Age            0.18
Albumin        3.46
BUN            3.23
Bilirubin      3.45
Cholesterol    3.48
Creatinine     3.23
DiasABP        1.17
FiO2           2.78
GCS            2.56
Gender         3.42
Glucose        3.26
HCO3           3.24
HCT            3.14
HR             0.34
Height         0.18
ICUType        3.42
K              3.23
Lactate        3.31
MAP            1.16
MechVent       2.80
Mg             3.24
NIDiasABP      2.51
NIMAP          2.52
NISysABP       2.51
Na             3.25
PaCO2          2.84
PaO2           2.84
Platelets      3.23
RespRate       3.03
SaO2           3.18
SysABP         1.17
Temp           1.98
TroponinI      3.49
TroponinT      3.46
Urine          0.97
WBC            3.25
Weight         1.55
pH             2.81
dtype: float64

<h4>Grade 3 obesity measurements</h4>

In [91]:
classificacao_obesidade_3_measurements_validation = round((classificacao_obesidade_3_validation.count()/total_patients_validation)*100,2)
classificacao_obesidade_3_measurements_validation

RecordID       3.49
level_1        3.49
Time           3.49
ALP            0.05
ALT            0.05
AST            0.05
Age            3.31
Albumin        0.03
BUN            0.27
Bilirubin      0.05
Cholesterol    0.01
Creatinine     0.27
DiasABP        2.33
FiO2           0.72
GCS            0.93
Gender         0.07
Glucose        0.23
HCO3           0.25
HCT            0.35
HR             3.15
Height         3.31
ICUType        0.07
K              0.26
Lactate        0.18
MAP            2.34
MechVent       0.69
Mg             0.25
NIDiasABP      0.98
NIMAP          0.98
NISysABP       0.98
Na             0.25
PaCO2          0.65
PaO2           0.65
Platelets      0.27
RespRate       0.47
SaO2           0.31
SysABP         2.33
Temp           1.51
TroponinI      0.00
TroponinT      0.03
Urine          2.53
WBC            0.25
Weight         1.94
pH             0.68
dtype: float64

<h4>Construction of the missing rate table</h4>

In [92]:
df_missing_validation = pd.DataFrame(columns=df_columns)
df_missing_transpose_validation = df_missing_validation.T
df_missing_transpose_validation["Female"] = female_gender_missing_rate_validation
df_missing_transpose_validation["Male"] = male_gender_missing_rate_validation
df_missing_transpose_validation["Undefined gender"] = undefined_gender_missing_rate_validation
df_missing_transpose_validation["ICUType 1"] = ICUType_1_validation_missing
df_missing_transpose_validation["ICUType 2"] = ICUType_2_validation_missing
df_missing_transpose_validation["ICUType 3"] = ICUType_3_validation_missing
df_missing_transpose_validation["ICUType 4"] = ICUType_4_validation_missing
df_missing_transpose_validation["Age 65+"] = more_than_or_equal_to_65_validation_missing
df_missing_transpose_validation["Age 65-"] = less_than_65_validation_missing
df_missing_transpose_validation["Low Weight"] = classificacao_baixo_peso_missing_validation
df_missing_transpose_validation["Normal Weight"] = classificacao_peso_normal_missing_validation
df_missing_transpose_validation["Overweight"] = classificacao_sobrepeso_missing_validation
df_missing_transpose_validation["Obesity Grade 1"] = classificacao_obesidade_1_missing_validation
df_missing_transpose_validation["Obesity Grade 2"] = classificacao_obesidade_2_missing_validation
df_missing_transpose_validation["Obesity Grade 3"] = classificacao_obesidade_3_missing_validation
df_missing_transpose_validation["Undefined classification"] = classificacao_undefined_missing_validation
df_missing_transpose_validation = df_missing_transpose_validation.drop("RecordID", axis=0)
df_missing_transpose_validation = df_missing_transpose_validation.drop("level_1", axis=0)
df_missing_transpose_validation = df_missing_transpose_validation.drop("Time", axis=0)
df_missing_transpose_validation = df_missing_transpose_validation.drop("Age", axis=0)
df_missing_transpose_validation = df_missing_transpose_validation.drop("Gender", axis=0)
display(HTML("<h2 style='text-align: center; font-size: 24px; font-weight: bold;'>Original Missing Rate per Variable by demographcs - Validation</h2>"))
df_missing_transpose_validation

Unnamed: 0,Female,Male,Undefined gender,ICUType 1,ICUType 2,ICUType 3,ICUType 4,Age 65+,Age 65-,Low Weight,Normal Weight,Overweight,Obesity Grade 1,Obesity Grade 2,Obesity Grade 3,Undefined classification
ALP,43.39,54.92,0.05,14.02,21.68,34.33,28.35,55.37,42.99,1.6,15.62,17.64,10.1,3.9,3.44,46.06
ALT,43.37,54.89,0.05,14.0,21.68,34.3,28.34,55.35,42.97,1.6,15.62,17.63,10.09,3.9,3.44,46.03
AST,43.37,54.89,0.05,14.0,21.67,34.31,28.34,55.35,42.97,1.6,15.62,17.63,10.09,3.9,3.44,46.04
Albumin,43.56,55.17,0.05,14.07,21.71,34.51,28.49,55.5,43.28,1.6,15.73,17.72,10.15,3.92,3.46,46.2
BUN,40.96,51.78,0.05,13.2,20.41,32.45,26.72,52.14,40.65,1.51,14.73,16.59,9.51,3.67,3.23,43.56
Bilirubin,43.36,54.9,0.05,14.01,21.68,34.28,28.35,55.33,42.98,1.6,15.62,17.62,10.1,3.9,3.45,46.02
Cholesterol,44.01,55.74,0.05,14.12,21.79,35.05,28.84,55.99,43.82,1.62,15.86,17.9,10.25,3.95,3.48,46.74
Creatinine,40.95,51.76,0.05,13.19,20.41,32.45,26.72,52.12,40.64,1.51,14.72,16.58,9.5,3.66,3.23,43.56
DiasABP,20.8,24.25,0.05,8.52,4.63,22.54,9.41,25.21,19.9,0.6,5.26,5.85,3.03,1.56,1.17,27.64
FiO2,37.45,47.17,0.05,12.75,18.28,30.04,23.61,47.54,37.14,1.38,13.32,15.12,8.42,3.3,2.78,40.36


<h4>Construction of the measurements table</h4>

In [93]:
df_measurements_validation = pd.DataFrame(columns=df_columns)
df_measurements_transpose_validation = df_measurements_validation.T
df_measurements_transpose_validation["Female"] = female_gender_measurements_validation
df_measurements_transpose_validation["Male"] = male_gender_measurements_validation
df_measurements_transpose_validation["Undefined gender"] = undefined_gender_measurements_validation
df_measurements_transpose_validation["ICUType 1"] = ICUType_1_measurements_validation
df_measurements_transpose_validation["ICUType 2"] = ICUType_2_measurements_validation
df_measurements_transpose_validation["ICUType 3"] = ICUType_3_measurements_validation
df_measurements_transpose_validation["ICUType 4"] = ICUType_4_measurements_validation
df_measurements_transpose_validation["Age 65+"] = more_than_or_equal_to_65_validation_measurements
df_measurements_transpose_validation["Age 65-"] = less_than_65_validation_measurements
df_measurements_transpose_validation["Low Weight"] = classificacao_baixo_peso_measurements_validation
df_measurements_transpose_validation["Normal Weight"] = classificacao_peso_normal_measurements_validation
df_measurements_transpose_validation["Overweight"] = classificacao_sobrepeso_measurements_validation
df_measurements_transpose_validation["Obesity Grade 1"] = classificacao_obesidade_1_measurements_validation
df_measurements_transpose_validation["Obesity Grade 2"] = classificacao_obesidade_2_measurements_validation
df_measurements_transpose_validation["Obesity Grade 3"] = classificacao_obesidade_3_measurements_validation
df_measurements_transpose_validation["Undefined classification"] = classificacao_undefined_measurements_validation
df_measurements_transpose_validation = df_measurements_transpose_validation.drop("RecordID", axis=0)
df_measurements_transpose_validation = df_measurements_transpose_validation.drop("level_1", axis=0)
df_measurements_transpose_validation = df_measurements_transpose_validation.drop("Time", axis=0)
df_measurements_transpose_validation = df_measurements_transpose_validation.drop("Age", axis=0)
df_measurements_transpose_validation = df_measurements_transpose_validation.drop("Gender", axis=0)
display(HTML("<h2 style='text-align: center; font-size: 24px; font-weight: bold;'>Repeated Measurements per Variable by Demographics - Validation Set</h2>"))
df_measurements_transpose_validation

Unnamed: 0,Female,Male,Undefined gender,ICUType 1,ICUType 2,ICUType 3,ICUType 4,Age 65+,Age 65-,Low Weight,Normal Weight,Overweight,Obesity Grade 1,Obesity Grade 2,Obesity Grade 3,Undefined classification
ALP,0.71,0.92,0.0,0.22,0.12,0.76,0.54,0.73,0.91,0.02,0.28,0.3,0.17,0.06,0.05,0.76
ALT,0.74,0.94,0.0,0.23,0.12,0.79,0.55,0.75,0.93,0.02,0.28,0.31,0.18,0.06,0.05,0.79
AST,0.73,0.95,0.0,0.23,0.12,0.78,0.55,0.75,0.93,0.02,0.28,0.31,0.18,0.06,0.05,0.78
Albumin,0.54,0.67,0.0,0.16,0.08,0.57,0.4,0.6,0.62,0.02,0.17,0.21,0.12,0.04,0.03,0.62
BUN,3.15,4.06,0.0,1.03,1.38,2.64,2.16,3.96,3.25,0.11,1.17,1.35,0.76,0.3,0.27,3.26
Bilirubin,0.75,0.94,0.0,0.23,0.12,0.81,0.54,0.77,0.92,0.02,0.28,0.31,0.17,0.06,0.05,0.8
Cholesterol,0.1,0.1,0.0,0.11,0.01,0.04,0.04,0.11,0.08,0.0,0.04,0.04,0.02,0.01,0.01,0.08
Creatinine,3.16,4.08,0.0,1.05,1.39,2.64,2.16,3.98,3.26,0.11,1.18,1.35,0.77,0.3,0.27,3.26
DiasABP,23.31,31.59,0.0,5.71,17.16,12.55,19.47,30.89,24.0,1.02,10.64,12.09,7.24,2.4,2.33,19.18
FiO2,6.66,8.66,0.0,1.49,3.52,5.05,5.27,8.56,6.76,0.24,2.58,2.81,1.85,0.67,0.72,6.46


# Test data

<h4>Loading test database</h4>

In [94]:
test_X = physionet2012_dataset['test_X']

<h4>Total number of patients tested</h4>

In [95]:
total_pacientes_test = test_X["RecordID"].count()
total_pacientes_test

115152

<h4>Female gender missing rate</h4>

In [96]:
female_gender_test_ids = test_X[test_X['Gender'] == 0.0]
female_gender_test_ids = female_gender_test_ids["RecordID"]
female_gender_test = test_X[test_X["RecordID"].isin(female_gender_test_ids)]
female_gender_missing_rate_test = round((female_gender_test.isna().sum()/total_pacientes_test)*100,2)
female_gender_missing_rate_test

RecordID        0.00
level_1         0.00
Time            0.00
ALP            43.75
ALT            43.74
AST            43.74
Age             2.46
Albumin        43.91
BUN            41.27
Bilirubin      43.72
Cholesterol    44.40
Creatinine     41.25
DiasABP        21.75
FiO2           37.73
GCS            30.31
Gender         43.55
Glucose        41.43
HCO3           41.32
HCT            40.29
HR              4.16
Height          2.46
ICUType        43.55
K              41.07
Lactate        42.79
MAP            21.90
MechVent       37.84
Mg             41.29
NIDiasABP      24.68
NIMAP          24.95
NISysABP       24.66
Na             41.31
PaCO2          39.69
PaO2           39.69
Platelets      41.22
RespRate       32.51
SaO2           42.84
SysABP         21.75
Temp           28.41
TroponinI      44.37
TroponinT      43.99
Urine          13.70
WBC            41.47
Weight         20.22
pH             39.49
dtype: float64

<h4>Female gender measurements</h4>

In [97]:
female_gender_measurements_test = round((female_gender_test.count()/total_pacientes_test)*100,2)
female_gender_measurements_test

RecordID       44.48
level_1        44.48
Time           44.48
ALP             0.73
ALT             0.74
AST             0.74
Age            42.02
Albumin         0.57
BUN             3.21
Bilirubin       0.76
Cholesterol     0.07
Creatinine      3.22
DiasABP        22.73
FiO2            6.75
GCS            14.16
Gender          0.93
Glucose         3.04
HCO3            3.16
HCT             4.18
HR             40.31
Height         42.02
ICUType         0.93
K               3.40
Lactate         1.68
MAP            22.58
MechVent        6.63
Mg              3.18
NIDiasABP      19.80
NIMAP          19.53
NISysABP       19.81
Na              3.17
PaCO2           4.79
PaO2            4.78
Platelets       3.26
RespRate       11.97
SaO2            1.63
SysABP         22.73
Temp           16.07
TroponinI       0.11
TroponinT       0.49
Urine          30.78
WBC             3.01
Weight         24.25
pH              4.98
dtype: float64

<h4>Male gender missing rate</h4>

In [98]:
male_gender_test_ids = test_X[test_X['Gender'] == 1.0]
male_gender_test_ids = male_gender_test_ids["RecordID"]
male_gender_test = test_X[test_X["RecordID"].isin(male_gender_test_ids)]
male_gender_missing_rate_test = round((male_gender_test.isna().sum()/total_pacientes_test)*100,2)
male_gender_missing_rate_test

RecordID        0.00
level_1         0.00
Time            0.00
ALP            54.52
ALT            54.50
AST            54.50
Age             3.58
Albumin        54.72
BUN            51.37
Bilirubin      54.49
Cholesterol    55.31
Creatinine     51.36
DiasABP        23.79
FiO2           46.22
GCS            37.79
Gender         54.24
Glucose        51.66
HCO3           51.49
HCT            49.91
HR              5.96
Height          3.58
ICUType        54.24
K              51.21
Lactate        52.94
MAP            23.91
MechVent       46.80
Mg             51.41
NIDiasABP      34.04
NIMAP          34.30
NISysABP       34.01
Na             51.46
PaCO2          48.41
PaO2           48.42
Platelets      51.18
RespRate       44.14
SaO2           52.99
SysABP         23.78
Temp           33.92
TroponinI      55.31
TroponinT      54.85
Urine          17.93
WBC            51.61
Weight         26.29
pH             48.04
dtype: float64

<h4>Male gender measurements</h4>

In [99]:
male_gender_measurements_test = round((male_gender_test.count()/total_pacientes_test)*100,2)
male_gender_measurements_test

RecordID       55.40
level_1        55.40
Time           55.40
ALP             0.88
ALT             0.90
AST             0.90
Age            51.82
Albumin         0.68
BUN             4.02
Bilirubin       0.90
Cholesterol     0.09
Creatinine      4.04
DiasABP        31.61
FiO2            9.17
GCS            17.61
Gender          1.15
Glucose         3.74
HCO3            3.91
HCT             5.49
HR             49.44
Height         51.82
ICUType         1.15
K               4.19
Lactate         2.45
MAP            31.49
MechVent        8.60
Mg              3.99
NIDiasABP      21.36
NIMAP          21.10
NISysABP       21.39
Na              3.94
PaCO2           6.98
PaO2            6.98
Platelets       4.22
RespRate       11.26
SaO2            2.41
SysABP         31.61
Temp           21.48
TroponinI       0.09
TroponinT       0.55
Urine          37.47
WBC             3.79
Weight         29.11
pH              7.36
dtype: float64

<h4>Undefined gender missing rate</h4>

In [100]:
undefined_gender_ids_test = test_X[test_X["Gender"] == -1.0]
undefined_gender_ids_test = undefined_gender_ids_test["RecordID"]
undefined_gender_test = test_X[test_X["RecordID"].isin(undefined_gender_ids_test)]
undefined_gender_missing_rate_test = round((undefined_gender_test.isna().sum()/total_pacientes_test)*100,2)
undefined_gender_missing_rate_test

RecordID       0.00
level_1        0.00
Time           0.00
ALP            0.12
ALT            0.12
AST            0.12
Age            0.04
Albumin        0.12
BUN            0.11
Bilirubin      0.12
Cholesterol    0.13
Creatinine     0.11
DiasABP        0.08
FiO2           0.11
GCS            0.11
Gender         0.12
Glucose        0.11
HCO3           0.11
HCT            0.11
HR             0.04
Height         0.04
ICUType        0.12
K              0.11
Lactate        0.10
MAP            0.08
MechVent       0.11
Mg             0.11
NIDiasABP      0.09
NIMAP          0.09
NISysABP       0.09
Na             0.11
PaCO2          0.10
PaO2           0.10
Platelets      0.11
RespRate       0.13
SaO2           0.12
SysABP         0.08
Temp           0.09
TroponinI      0.13
TroponinT      0.12
Urine          0.07
WBC            0.11
Weight         0.05
pH             0.10
dtype: float64

<h4>Undefined gender measurements</h4>

In [101]:
undefined_gender_measurements_test = round((undefined_gender_test.count()/total_pacientes_test)*100,2)
undefined_gender_measurements_test

RecordID       0.13
level_1        0.13
Time           0.13
ALP            0.00
ALT            0.00
AST            0.00
Age            0.08
Albumin        0.00
BUN            0.01
Bilirubin      0.00
Cholesterol    0.00
Creatinine     0.01
DiasABP        0.04
FiO2           0.02
GCS            0.02
Gender         0.00
Glucose        0.01
HCO3           0.01
HCT            0.01
HR             0.08
Height         0.08
ICUType        0.00
K              0.01
Lactate        0.02
MAP            0.04
MechVent       0.02
Mg             0.01
NIDiasABP      0.03
NIMAP          0.03
NISysABP       0.03
Na             0.01
PaCO2          0.03
PaO2           0.03
Platelets      0.01
RespRate       0.00
SaO2           0.00
SysABP         0.04
Temp           0.03
TroponinI      0.00
TroponinT      0.00
Urine          0.05
WBC            0.01
Weight         0.08
pH             0.03
dtype: float64

<h4>ICUType 1 missing rate</h4>

In [102]:
ICUType_1_test_ids = test_X[test_X["ICUType"] == 1.0]
ICUType_1_test_ids = ICUType_1_test_ids[ICUType_1_test_ids["Time"] == 0.0]
ICUType_1_test_ids = ICUType_1_test_ids["RecordID"]
ICUType_1_test = test_X[test_X["RecordID"].isin(ICUType_1_test_ids)]
ICUType_1_test_missing = round((ICUType_1_test.isna().sum()/total_pacientes_test)*100,2)
ICUType_1_test_missing

RecordID        0.00
level_1         0.00
Time            0.00
ALP            15.12
ALT            15.11
AST            15.11
Age             1.18
Albumin        15.17
BUN            14.28
Bilirubin      15.12
Cholesterol    15.25
Creatinine     14.26
DiasABP         8.67
FiO2           13.68
GCS            11.30
Gender         15.02
Glucose        14.33
HCO3           14.33
HCT            14.01
HR              1.91
Height          1.18
ICUType        15.02
K              14.09
Lactate        14.99
MAP             8.71
MechVent       13.81
Mg             14.27
NIDiasABP       8.32
NIMAP           8.36
NISysABP        8.32
Na             14.33
PaCO2          14.04
PaO2           14.03
Platelets      14.26
RespRate        9.82
SaO2           14.57
SysABP          8.67
Temp           10.52
TroponinI      15.29
TroponinT      15.05
Urine           6.70
WBC            14.38
Weight          7.97
pH             13.99
dtype: float64

<h4>ICUType 1 measurements</h4>

In [103]:
ICUType_1_measurements_test = round((ICUType_1_test.count()/total_pacientes_test)*100,2)
ICUType_1_measurements_test

RecordID       15.34
level_1        15.34
Time           15.34
ALP             0.22
ALT             0.23
AST             0.23
Age            14.16
Albumin         0.17
BUN             1.06
Bilirubin       0.22
Cholesterol     0.09
Creatinine      1.08
DiasABP         6.67
FiO2            1.66
GCS             4.04
Gender          0.32
Glucose         1.01
HCO3            1.01
HCT             1.33
HR             13.43
Height         14.16
ICUType         0.32
K               1.25
Lactate         0.35
MAP             6.63
MechVent        1.53
Mg              1.07
NIDiasABP       7.02
NIMAP           6.98
NISysABP        7.02
Na              1.01
PaCO2           1.30
PaO2            1.31
Platelets       1.08
RespRate        5.52
SaO2            0.77
SysABP          6.67
Temp            4.82
TroponinI       0.05
TroponinT       0.29
Urine           8.64
WBC             0.96
Weight          7.37
pH              1.35
dtype: float64

<h4>ICUType 2 missing rate</h4>

In [104]:
ICUType_2_test_ids = test_X[test_X["ICUType"] == 2.0]
ICUType_2_test_ids = ICUType_2_test_ids[ICUType_2_test_ids["Time"] == 0.0]
ICUType_2_test_ids = ICUType_2_test_ids["RecordID"]
ICUType_2_test = test_X[test_X["RecordID"].isin(ICUType_2_test_ids)]
ICUType_2_test_missing = round((ICUType_2_test.isna().sum()/total_pacientes_test)*100,2)
ICUType_2_test_missing

RecordID        0.00
level_1         0.00
Time            0.00
ALP            21.50
ALT            21.49
AST            21.49
Age             0.79
Albumin        21.56
BUN            20.27
Bilirubin      21.50
Cholesterol    21.67
Creatinine     20.27
DiasABP         4.41
FiO2           17.98
GCS            15.92
Gender         21.22
Glucose        20.67
HCO3           20.37
HCT            19.26
HR              1.90
Height          0.79
ICUType        21.22
K              20.53
Lactate        20.92
MAP             4.35
MechVent       17.97
Mg             20.25
NIDiasABP      17.53
NIMAP          17.56
NISysABP       17.52
Na             20.57
PaCO2          17.26
PaO2           17.27
Platelets      19.87
RespRate       20.75
SaO2           19.32
SysABP          4.40
Temp            9.02
TroponinI      21.65
TroponinT      21.63
Urine           3.69
WBC            20.18
Weight         10.60
pH             16.84
dtype: float64

<h4>ICUType 2 measurements</h4>

In [105]:
ICUType_2_measurements_test = round((ICUType_2_test.count()/total_pacientes_test)*100,2)
ICUType_2_measurements_test

RecordID       21.68
level_1        21.68
Time           21.68
ALP             0.18
ALT             0.18
AST             0.18
Age            20.88
Albumin         0.11
BUN             1.40
Bilirubin       0.18
Cholesterol     0.00
Creatinine      1.40
DiasABP        17.27
FiO2            3.69
GCS             5.76
Gender          0.45
Glucose         1.00
HCO3            1.31
HCT             2.41
HR             19.77
Height         20.88
ICUType         0.45
K               1.15
Lactate         0.76
MAP            17.32
MechVent        3.71
Mg              1.42
NIDiasABP       4.15
NIMAP           4.12
NISysABP        4.15
Na              1.10
PaCO2           4.42
PaO2            4.40
Platelets       1.80
RespRate        0.93
SaO2            2.36
SysABP         17.27
Temp           12.66
TroponinI       0.03
TroponinT       0.04
Urine          17.99
WBC             1.50
Weight         11.08
pH              4.84
dtype: float64

<h4>ICUType 3 missing rate</h4>

In [106]:
ICUType_3_test_ids = test_X[test_X["ICUType"] == 3.0]
ICUType_3_test_ids = ICUType_3_test_ids[ICUType_3_test_ids["Time"] == 0.0]
ICUType_3_test_ids = ICUType_3_test_ids["RecordID"]
ICUType_3_test = test_X[test_X["RecordID"].isin(ICUType_3_test_ids)]
ICUType_3_test_missing = round((ICUType_3_test.isna().sum()/total_pacientes_test)*100,2)
ICUType_3_test_missing

RecordID        0.00
level_1         0.00
Time            0.00
ALP            34.42
ALT            34.41
AST            34.41
Age             2.70
Albumin        34.60
BUN            32.47
Bilirubin      34.37
Cholesterol    35.15
Creatinine     32.47
DiasABP        22.48
FiO2           29.78
GCS            25.92
Gender         34.45
Glucose        32.49
HCO3           32.48
HCT            31.98
HR              3.95
Height          2.70
ICUType        34.45
K              32.27
Lactate        33.69
MAP            22.62
MechVent       30.23
Mg             32.65
NIDiasABP      15.59
NIMAP          15.90
NISysABP       15.57
Na             32.38
PaCO2          32.33
PaO2           32.33
Platelets      32.70
RespRate       25.22
SaO2           34.78
SysABP         22.48
Temp           25.06
TroponinI      35.11
TroponinT      34.72
Urine          14.20
WBC            32.81
Weight         12.52
pH             32.29
dtype: float64

<h4>ICUType 3 measurements</h4>

In [107]:
ICUType_3_measurements_test = round((ICUType_3_test.count()/total_pacientes_test)*100,2)
ICUType_3_measurements_test

RecordID       35.18
level_1        35.18
Time           35.18
ALP             0.76
ALT             0.77
AST             0.78
Age            32.48
Albumin         0.58
BUN             2.71
Bilirubin       0.81
Cholesterol     0.03
Creatinine      2.72
DiasABP        12.70
FiO2            5.41
GCS             9.26
Gender          0.73
Glucose         2.69
HCO3            2.70
HCT             3.20
HR             31.23
Height         32.48
ICUType         0.73
K               2.91
Lactate         1.49
MAP            12.56
MechVent        4.95
Mg              2.54
NIDiasABP      19.59
NIMAP          19.28
NISysABP       19.61
Na              2.80
PaCO2           2.85
PaO2            2.85
Platelets       2.49
RespRate        9.96
SaO2            0.40
SysABP         12.71
Temp           10.12
TroponinI       0.07
TroponinT       0.46
Urine          20.98
WBC             2.37
Weight         22.66
pH              2.89
dtype: float64

<h4>ICUType 4 missing rate</h4>

In [108]:
ICUType_4_test_ids = test_X[test_X["ICUType"] == 4.0]
ICUType_4_test_ids = ICUType_4_test_ids[ICUType_4_test_ids["Time"] == 0.0]
ICUType_4_test_ids = ICUType_4_test_ids["RecordID"]
ICUType_4_test = test_X[test_X["RecordID"].isin(ICUType_4_test_ids)]
ICUType_4_test_missing = round((ICUType_4_test.isna().sum()/total_pacientes_test)*100,2)
ICUType_4_test_missing

RecordID        0.00
level_1         0.00
Time            0.00
ALP            27.35
ALT            27.35
AST            27.35
Age             1.40
Albumin        27.42
BUN            25.73
Bilirubin      27.35
Cholesterol    27.76
Creatinine     25.73
DiasABP        10.06
FiO2           22.62
GCS            15.07
Gender         27.22
Glucose        25.71
HCO3           25.75
HCT            25.06
HR              2.40
Height          1.40
ICUType        27.22
K              25.50
Lactate        26.25
MAP            10.21
MechVent       22.74
Mg             25.64
NIDiasABP      17.37
NIMAP          17.51
NISysABP       17.35
Na             25.59
PaCO2          24.57
PaO2           24.58
Platelets      25.68
RespRate       20.99
SaO2           27.29
SysABP         10.06
Temp           17.82
TroponinI      27.76
TroponinT      27.56
Urine           7.10
WBC            25.83
Weight         15.47
pH             24.51
dtype: float64

<h4>ICUType 4 measurements</h4>

In [109]:
ICUType_4_measurements_test = round((ICUType_4_test.count()/total_pacientes_test)*100,2)
ICUType_4_measurements_test

RecordID       27.80
level_1        27.80
Time           27.80
ALP             0.45
ALT             0.46
AST             0.46
Age            26.41
Albumin         0.39
BUN             2.08
Bilirubin       0.45
Cholesterol     0.04
Creatinine      2.08
DiasABP        17.74
FiO2            5.19
GCS            12.73
Gender          0.58
Glucose         2.09
HCO3            2.05
HCT             2.75
HR             25.41
Height         26.41
ICUType         0.58
K               2.30
Lactate         1.55
MAP            17.59
MechVent        5.07
Mg              2.16
NIDiasABP      10.43
NIMAP          10.29
NISysABP       10.45
Na              2.21
PaCO2           3.23
PaO2            3.22
Platelets       2.12
RespRate        6.82
SaO2            0.51
SysABP         17.74
Temp            9.99
TroponinI       0.05
TroponinT       0.24
Urine          20.70
WBC             1.97
Weight         12.34
pH              3.29
dtype: float64

<h4>+64 missing rate</h4>

In [110]:
more_than_or_equal_to_65_test_ids = test_X[test_X["Age"] >= 65]
more_than_or_equal_to_65_test_ids = more_than_or_equal_to_65_test_ids[more_than_or_equal_to_65_test_ids["Time"] == 0.0]
more_than_or_equal_to_65_test_ids = more_than_or_equal_to_65_test_ids["RecordID"]
more_than_or_equal_to_65_test = test_X[test_X["RecordID"].isin(more_than_or_equal_to_65_test_ids)]
more_than_or_equal_to_65_test_missing = round((more_than_or_equal_to_65_test.isna().sum()/total_pacientes_test)*100,2)
more_than_or_equal_to_65_test_missing

RecordID        0.00
level_1         0.00
Time            0.00
ALP            54.52
ALT            54.51
AST            54.50
Age             3.15
Albumin        54.63
BUN            51.37
Bilirubin      54.49
Cholesterol    55.17
Creatinine     51.35
DiasABP        24.76
FiO2           46.51
GCS            38.17
Gender         54.12
Glucose        51.66
HCO3           51.46
HCT            49.93
HR              5.27
Height          3.15
ICUType        54.12
K              51.22
Lactate        53.07
MAP            24.86
MechVent       47.03
Mg             51.37
NIDiasABP      32.52
NIMAP          32.82
NISysABP       32.49
Na             51.49
PaCO2          48.69
PaO2           48.70
Platelets      51.20
RespRate       41.27
SaO2           52.79
SysABP         24.76
Temp           33.77
TroponinI      55.13
TroponinT      54.51
Urine          16.69
WBC            51.59
Weight         25.72
pH             48.36
dtype: float64

<h4>+65 measurements</h4>

In [111]:
more_than_or_equal_to_65_test_measurements = round((more_than_or_equal_to_65_test.count()/total_pacientes_test)*100,2)
more_than_or_equal_to_65_test_measurements

RecordID       55.27
level_1        55.27
Time           55.27
ALP             0.75
ALT             0.76
AST             0.77
Age            52.12
Albumin         0.64
BUN             3.91
Bilirubin       0.78
Cholesterol     0.10
Creatinine      3.93
DiasABP        30.51
FiO2            8.77
GCS            17.10
Gender          1.15
Glucose         3.61
HCO3            3.81
HCT             5.34
HR             50.00
Height         52.12
ICUType         1.15
K               4.06
Lactate         2.21
MAP            30.41
MechVent        8.25
Mg              3.91
NIDiasABP      22.76
NIMAP          22.45
NISysABP       22.78
Na              3.78
PaCO2           6.58
PaO2            6.57
Platelets       4.07
RespRate       14.00
SaO2            2.49
SysABP         30.51
Temp           21.51
TroponinI       0.14
TroponinT       0.76
Urine          38.58
WBC             3.69
Weight         29.55
pH              6.91
dtype: float64

<h4>-65 missing rate</h4>

In [112]:
less_than_65_test_ids = test_X[test_X["Age"] < 65]
less_than_65_test_ids = less_than_65_test_ids[less_than_65_test_ids["Time"] == 0.0]
less_than_65_test_ids = less_than_65_test_ids["RecordID"]
less_than_65_test = test_X[test_X["RecordID"].isin(less_than_65_test_ids)]
less_than_65_test_missing = round((less_than_65_test.isna().sum()/total_pacientes_test)*100,2)
less_than_65_test_missing

RecordID        0.00
level_1         0.00
Time            0.00
ALP            43.87
ALT            43.85
AST            43.85
Age             2.93
Albumin        44.11
BUN            41.39
Bilirubin      43.84
Cholesterol    44.66
Creatinine     41.38
DiasABP        20.86
FiO2           37.55
GCS            30.04
Gender         43.80
Glucose        41.55
HCO3           41.46
HCT            40.39
HR              4.89
Height          2.93
ICUType        43.80
K              41.18
Lactate        42.78
MAP            21.02
MechVent       37.72
Mg             41.45
NIDiasABP      26.30
NIMAP          26.51
NISysABP       26.27
Na             41.39
PaCO2          39.50
PaO2           39.51
Platelets      41.31
RespRate       35.50
SaO2           43.17
SysABP         20.86
Temp           28.65
TroponinI      44.67
TroponinT      44.44
Urine          15.01
WBC            41.61
Weight         20.84
pH             39.27
dtype: float64

<h4>-65 measurements</h4>

In [113]:
less_than_65_test_measurements = round((less_than_65_test.count()/total_pacientes_test)*100,2)
less_than_65_test_measurements

RecordID       44.73
level_1        44.73
Time           44.73
ALP             0.86
ALT             0.88
AST             0.87
Age            41.80
Albumin         0.61
BUN             3.34
Bilirubin       0.88
Cholesterol     0.06
Creatinine      3.35
DiasABP        23.87
FiO2            7.17
GCS            14.69
Gender          0.93
Glucose         3.18
HCO3            3.26
HCT             4.34
HR             39.84
Height         41.80
ICUType         0.93
K               3.55
Lactate         1.95
MAP            23.70
MechVent        7.01
Mg              3.27
NIDiasABP      18.43
NIMAP          18.21
NISysABP       18.45
Na              3.34
PaCO2           5.22
PaO2            5.21
Platelets       3.41
RespRate        9.23
SaO2            1.56
SysABP         23.87
Temp           16.08
TroponinI       0.05
TroponinT       0.29
Urine          29.72
WBC             3.12
Weight         23.89
pH              5.46
dtype: float64

<h4>Filtering only patients who have the same height and weight</h4>

In [114]:
filtered_test_X = test_X[(test_X['Height'] != -1) & (test_X['Weight'] != -1) & (test_X['Height'].notna()) & (test_X['Weight'].notna())] 

<h4>Converting height to meters</h4>

In [115]:
filtered_test_X_metros = filtered_test_X.copy()
filtered_test_X_metros["Height"] = filtered_test_X["Height"]/100
filtered_test_X_metros["Height"]

432       1.753
433       1.753
434       1.753
435       1.753
436       1.753
          ...  
574653    1.829
574654    1.829
574655    1.829
574704    1.829
575088    1.727
Name: Height, Length: 32141, dtype: float64

<h4>Calculate BMI and classification</h4>

In [116]:
bmi_data_test = filtered_test_X_metros
bmi_data_test["BMI"] = round(filtered_test_X_metros["Weight"] / (filtered_test_X_metros["Height"]**2), 1)
bmi_data_test["Classificacao"] = bmi_data_test["BMI"].apply(classify_BMI)
bmi_data_test.head()

Unnamed: 0,RecordID,level_1,Time,ALP,ALT,AST,Age,Albumin,BUN,Bilirubin,...,SysABP,Temp,TroponinI,TroponinT,Urine,WBC,Weight,pH,BMI,Classificacao
432,132555,0,0.0,,,,74.0,,,,...,98.0,34.8,,,35.0,,66.1,7.39,21.5,Peso normal
433,132555,1,1.0,,,,74.0,,19.0,,...,112.0,35.3,,,130.0,9.0,66.1,7.41,21.5,Peso normal
434,132555,2,2.0,,,,74.0,,,,...,104.0,36.05,,,210.0,,66.1,,21.5,Peso normal
435,132555,3,3.0,,,,74.0,,,,...,114.0,36.2,,,120.0,,66.1,,21.5,Peso normal
436,132555,4,4.0,,,,74.0,,,,...,111.0,36.1,,,185.0,,66.1,7.29,21.5,Peso normal


<h4>Taking the first occurrence of each patient</h4>

In [117]:
bmi_data_test = bmi_data_test.groupby("RecordID").first().reset_index()
bmi_data_test

Unnamed: 0,RecordID,level_1,Time,ALP,ALT,AST,Age,Albumin,BUN,Bilirubin,...,SysABP,Temp,TroponinI,TroponinT,Urine,WBC,Weight,pH,BMI,Classificacao
0,132555,0,0.0,,,,74.0,,19.0,,...,98.0,34.8,,,35.0,9.0,66.1,7.39,21.5,Peso normal
1,132567,0,0.0,,,,71.0,,9.0,,...,111.5,35.6,,,15.0,9.0,56.0,7.44,22.6,Peso normal
2,132597,0,0.0,,,,66.0,,27.0,,...,,36.5,1.2,,,18.6,82.0,,43.6,Obesidade grau 3
3,132602,0,0.0,,,,80.0,,,,...,,37.3,,,150.0,,70.0,,21.5,Peso normal
4,132617,0,0.0,,,,77.0,,110.0,,...,,36.4,,,100.0,7.9,75.0,7.55,25.9,Sobrepeso
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1246,162941,0,0.0,54.0,16.0,28.0,74.0,3.9,18.0,1.4,...,139.5,36.9,,,40.0,11.6,84.0,7.44,25.1,Sobrepeso
1247,162987,0,0.0,,,,57.0,,,,...,92.0,36.4,,,380.0,,83.0,7.34,22.3,Peso normal
1248,162995,0,0.0,60.0,21.0,20.0,84.0,,93.0,0.4,...,121.0,37.1,0.6,,60.0,17.1,96.5,7.31,28.8,Sobrepeso
1249,162999,0,0.0,,,,70.0,,30.0,,...,0.0,36.3,,,,2.5,68.1,,20.4,Peso normal


In [118]:
bmi_data_test["Classificacao"].value_counts()

Classificacao
Sobrepeso           453
Peso normal         385
Obesidade grau 1    195
Obesidade grau 2     95
Obesidade grau 3     79
Baixo peso           44
Name: count, dtype: int64

<h4>Classification undefined missing rate</h4>

In [119]:
classificacao_undefined_ids_test = bmi_data_test["RecordID"]
classificacao_undefined_test = test_X[~test_X["RecordID"].isin(classificacao_undefined_ids_test)]
classificacao_undefined_missing_test = round((classificacao_undefined_test.isna().sum()/total_pacientes_test)*100,2)
classificacao_undefined_missing_test

RecordID        0.00
level_1         0.00
Time            0.00
ALP            47.08
ALT            47.06
AST            47.05
Age             3.89
Albumin        47.22
BUN            44.44
Bilirubin      47.05
Cholesterol    47.76
Creatinine     44.41
DiasABP        28.71
FiO2           41.11
GCS            32.04
Gender         46.86
Glucose        44.46
HCO3           44.47
HCT            43.64
HR              5.68
Height          3.89
ICUType        46.86
K              44.09
Lactate        46.17
MAP            28.94
MechVent       41.63
Mg             44.53
NIDiasABP      22.68
NIMAP          23.05
NISysABP       22.66
Na             44.31
PaCO2          44.30
PaO2           44.31
Platelets      44.64
RespRate       32.17
SaO2           47.31
SysABP         28.71
Temp           34.96
TroponinI      47.80
TroponinT      47.30
Urine          18.20
WBC            44.81
Weight         22.32
pH             44.22
dtype: float64

<h4>Classification undefined measurements</h4>

In [120]:
classificacao_undefined_measurements_test = round((classificacao_undefined_test.count()/total_pacientes_test)*100,2)
classificacao_undefined_measurements_test

RecordID       47.85
level_1        47.85
Time           47.85
ALP             0.77
ALT             0.80
AST             0.80
Age            43.96
Albumin         0.63
BUN             3.42
Bilirubin       0.80
Cholesterol     0.09
Creatinine      3.44
DiasABP        19.14
FiO2            6.74
GCS            15.81
Gender          1.00
Glucose         3.40
HCO3            3.38
HCT             4.22
HR             42.17
Height         43.96
ICUType         1.00
K               3.76
Lactate         1.68
MAP            18.91
MechVent        6.22
Mg              3.32
NIDiasABP      25.17
NIMAP          24.81
NISysABP       25.20
Na              3.55
PaCO2           3.55
PaO2            3.55
Platelets       3.21
RespRate       15.68
SaO2            0.54
SysABP         19.15
Temp           12.89
TroponinI       0.06
TroponinT       0.56
Urine          29.65
WBC             3.04
Weight         25.53
pH              3.63
dtype: float64

<h4>Classification low weight</h4>

In [121]:
classificacao_baixo_peso_ids_test = bmi_data_test[bmi_data_test["Classificacao"] == "Baixo peso"]
classificacao_baixo_peso_ids_test = classificacao_baixo_peso_ids_test["RecordID"]
classificacao_baixo_peso_test = test_X[test_X["RecordID"].isin(classificacao_baixo_peso_ids_test)]
classificacao_baixo_peso_missing_test = round((classificacao_baixo_peso_test.isna().sum()/total_pacientes_test)*100,2)
classificacao_baixo_peso_missing_test

RecordID       0.00
level_1        0.00
Time           0.00
ALP            1.80
ALT            1.80
AST            1.80
Age            0.07
Albumin        1.81
BUN            1.69
Bilirubin      1.80
Cholesterol    1.83
Creatinine     1.69
DiasABP        0.66
FiO2           1.54
GCS            1.25
Gender         1.80
Glucose        1.69
HCO3           1.69
HCT            1.65
HR             0.14
Height         0.07
ICUType        1.80
K              1.68
Lactate        1.74
MAP            0.65
MechVent       1.51
Mg             1.68
NIDiasABP      1.19
NIMAP          1.21
NISysABP       1.19
Na             1.69
PaCO2          1.58
PaO2           1.57
Platelets      1.68
RespRate       1.41
SaO2           1.73
SysABP         0.66
Temp           1.08
TroponinI      1.83
TroponinT      1.82
Urine          0.50
WBC            1.70
Weight         0.91
pH             1.56
dtype: float64

<h4>Classification low weight measurements</h4>

In [122]:
classificacao_baixo_peso_measurements_test = round((classificacao_baixo_peso_test.count()/total_pacientes_test)*100,2)
classificacao_baixo_peso_measurements_test

RecordID       1.83
level_1        1.83
Time           1.83
ALP            0.03
ALT            0.03
AST            0.03
Age            1.76
Albumin        0.03
BUN            0.14
Bilirubin      0.03
Cholesterol    0.00
Creatinine     0.14
DiasABP        1.18
FiO2           0.30
GCS            0.58
Gender         0.04
Glucose        0.14
HCO3           0.14
HCT            0.19
HR             1.70
Height         1.76
ICUType        0.04
K              0.15
Lactate        0.10
MAP            1.19
MechVent       0.33
Mg             0.15
NIDiasABP      0.64
NIMAP          0.63
NISysABP       0.65
Na             0.14
PaCO2          0.26
PaO2           0.26
Platelets      0.16
RespRate       0.43
SaO2           0.11
SysABP         1.18
Temp           0.75
TroponinI      0.01
TroponinT      0.01
Urine          1.33
WBC            0.14
Weight         0.92
pH             0.27
dtype: float64

<h4>Classification normal weight missing rate</h4>

In [123]:
classificacao_normal_peso_ids_test = bmi_data_test[bmi_data_test["Classificacao"] == "Peso normal"]
classificacao_normal_peso_ids_test = classificacao_normal_peso_ids_test["RecordID"]
classificacao_normal_peso_test = test_X[test_X["RecordID"].isin(classificacao_normal_peso_ids_test)]
classificacao_normal_peso_missing_test = round((classificacao_normal_peso_test.isna().sum()/total_pacientes_test)*100,2)
classificacao_normal_peso_missing_test

RecordID        0.00
level_1         0.00
Time            0.00
ALP            15.80
ALT            15.80
AST            15.80
Age             0.62
Albumin        15.87
BUN            14.87
Bilirubin      15.79
Cholesterol    16.03
Creatinine     14.87
DiasABP         5.61
FiO2           13.40
GCS            10.88
Gender         15.71
Glucose        14.98
HCO3           14.91
HCT            14.36
HR              1.26
Height          0.62
ICUType        15.71
K              14.84
Lactate        15.27
MAP             5.62
MechVent       13.38
Mg             14.85
NIDiasABP      10.58
NIMAP          10.63
NISysABP       10.57
Na             14.93
PaCO2          13.73
PaO2           13.73
Platelets      14.73
RespRate       13.41
SaO2           15.17
SysABP          5.61
Temp            8.62
TroponinI      16.01
TroponinT      15.91
Urine           4.36
WBC            14.88
Weight          7.81
pH             13.57
dtype: float64

<h4>Classification normal weight measurements</h4>

In [124]:
classificacao_normal_peso_measurements_test = round((classificacao_normal_peso_test.count()/total_pacientes_test)*100,2)
classificacao_normal_peso_measurements_test

RecordID       16.05
level_1        16.05
Time           16.05
ALP             0.25
ALT             0.25
AST             0.25
Age            15.43
Albumin         0.18
BUN             1.18
Bilirubin       0.26
Cholesterol     0.02
Creatinine      1.18
DiasABP        10.44
FiO2            2.65
GCS             5.17
Gender          0.33
Glucose         1.07
HCO3            1.14
HCT             1.69
HR             14.79
Height         15.43
ICUType         0.33
K               1.21
Lactate         0.78
MAP            10.43
MechVent        2.67
Mg              1.20
NIDiasABP       5.47
NIMAP           5.42
NISysABP        5.48
Na              1.12
PaCO2           2.32
PaO2            2.32
Platelets       1.32
RespRate        2.64
SaO2            0.88
SysABP         10.44
Temp            7.43
TroponinI       0.04
TroponinT       0.14
Urine          11.69
WBC             1.17
Weight          8.24
pH              2.47
dtype: float64

<h4>Classification overweight missing rate</h4>

In [125]:
classificacao_sobrepeso_ids_test = bmi_data_test[bmi_data_test["Classificacao"] == "Sobrepeso"]
classificacao_sobrepeso_ids_test = classificacao_sobrepeso_ids_test["RecordID"]
classificacao_sobrepeso_test = test_X[test_X["RecordID"].isin(classificacao_sobrepeso_ids_test)]
classificacao_sobrepeso_missing_test = round((classificacao_sobrepeso_test.isna().sum()/total_pacientes_test)*100,2)
classificacao_sobrepeso_missing_test

RecordID        0.00
level_1         0.00
Time            0.00
ALP            18.57
ALT            18.57
AST            18.57
Age             0.82
Albumin        18.66
BUN            17.52
Bilirubin      18.57
Cholesterol    18.85
Creatinine     17.52
DiasABP         5.86
FiO2           15.57
GCS            13.29
Gender         18.49
Glucose        17.70
HCO3           17.58
HCT            16.86
HR              1.69
Height          0.82
ICUType        18.49
K              17.55
Lactate        18.03
MAP             5.87
MechVent       15.68
Mg             17.50
NIDiasABP      13.28
NIMAP          13.35
NISysABP       13.28
Na             17.64
PaCO2          15.81
PaO2           15.82
Platelets      17.33
RespRate       16.15
SaO2           17.43
SysABP          5.86
Temp            9.85
TroponinI      18.84
TroponinT      18.71
Urine           4.83
WBC            17.52
Weight          8.60
pH             15.61
dtype: float64

<h4>Classification overweight measurements</h4>

In [126]:
classificacao_sobrepeso_measurements_test = round((classificacao_sobrepeso_test.count()/total_pacientes_test)*100,2)
classificacao_sobrepeso_measurements_test

RecordID       18.88
level_1        18.88
Time           18.88
ALP             0.31
ALT             0.32
AST             0.32
Age            18.07
Albumin         0.22
BUN             1.36
Bilirubin       0.32
Cholesterol     0.03
Creatinine      1.37
DiasABP        13.02
FiO2            3.31
GCS             5.59
Gender          0.39
Glucose         1.18
HCO3            1.31
HCT             2.02
HR             17.19
Height         18.07
ICUType         0.39
K               1.33
Lactate         0.85
MAP            13.01
MechVent        3.21
Mg              1.38
NIDiasABP       5.60
NIMAP           5.53
NISysABP        5.61
Na              1.24
PaCO2           3.08
PaO2            3.07
Platelets       1.56
RespRate        2.73
SaO2            1.45
SysABP         13.02
Temp            9.03
TroponinI       0.05
TroponinT       0.18
Urine          14.05
WBC             1.37
Weight         10.29
pH              3.27
dtype: float64

<h4>Grade 1 obesity missing rate</h4>

In [127]:
classificacao_obesidade_1_ids_test = bmi_data_test[bmi_data_test["Classificacao"] == "Obesidade grau 1"]
classificacao_obesidade_1_ids_test = classificacao_obesidade_1_ids_test["RecordID"]
classificacao_obesidade_1_test = test_X[test_X["RecordID"].isin(classificacao_obesidade_1_ids_test)]
classificacao_obesidade_1_missing_test = round((classificacao_obesidade_1_test.isna().sum()/total_pacientes_test)*100,2)
classificacao_obesidade_1_missing_test

RecordID       0.00
level_1        0.00
Time           0.00
ALP            8.00
ALT            8.00
AST            8.00
Age            0.32
Albumin        8.03
BUN            7.52
Bilirubin      8.00
Cholesterol    8.12
Creatinine     7.52
DiasABP        2.23
FiO2           6.57
GCS            5.56
Gender         7.96
Glucose        7.60
HCO3           7.55
HCT            7.28
HR             0.71
Height         0.32
ICUType        7.96
K              7.51
Lactate        7.73
MAP            2.24
MechVent       6.64
Mg             7.52
NIDiasABP      6.04
NIMAP          6.05
NISysABP       6.04
Na             7.56
PaCO2          6.71
PaO2           6.72
Platelets      7.46
RespRate       7.48
SaO2           7.53
SysABP         2.23
Temp           4.02
TroponinI      8.10
TroponinT      8.05
Urine          1.83
WBC            7.56
Weight         3.69
pH             6.64
dtype: float64

<h4>Grade 1 obesity measurements</h4>

In [128]:
classificacao_obesidade_1_measurements_test = round((classificacao_obesidade_1_test.count()/total_pacientes_test)*100,2)
classificacao_obesidade_1_measurements_test

RecordID       8.13
level_1        8.13
Time           8.13
ALP            0.13
ALT            0.13
AST            0.13
Age            7.80
Albumin        0.09
BUN            0.61
Bilirubin      0.13
Cholesterol    0.01
Creatinine     0.61
DiasABP        5.90
FiO2           1.56
GCS            2.57
Gender         0.17
Glucose        0.52
HCO3           0.58
HCT            0.85
HR             7.42
Height         7.80
ICUType        0.17
K              0.62
Lactate        0.40
MAP            5.89
MechVent       1.49
Mg             0.61
NIDiasABP      2.09
NIMAP          2.07
NISysABP       2.09
Na             0.56
PaCO2          1.41
PaO2           1.41
Platelets      0.66
RespRate       0.65
SaO2           0.59
SysABP         5.90
Temp           4.11
TroponinI      0.03
TroponinT      0.08
Urine          6.30
WBC            0.57
Weight         4.44
pH             1.49
dtype: float64

<h4>Grade 2 obesity missing rate</h4>

In [129]:
classificacao_obesidade_2_ids_test = bmi_data_test[bmi_data_test["Classificacao"] == "Obesidade grau 2"]
classificacao_obesidade_2_ids_test = classificacao_obesidade_2_ids_test["RecordID"]
classificacao_obesidade_2_test = test_X[test_X["RecordID"].isin(classificacao_obesidade_2_ids_test)]
classificacao_obesidade_2_missing_test = round((classificacao_obesidade_2_test.isna().sum()/total_pacientes_test)*100,2)
classificacao_obesidade_2_missing_test

RecordID       0.00
level_1        0.00
Time           0.00
ALP            3.89
ALT            3.89
AST            3.89
Age            0.21
Albumin        3.91
BUN            3.65
Bilirubin      3.89
Cholesterol    3.96
Creatinine     3.65
DiasABP        1.28
FiO2           3.24
GCS            2.86
Gender         3.88
Glucose        3.68
HCO3           3.65
HCT            3.54
HR             0.41
Height         0.21
ICUType        3.88
K              3.66
Lactate        3.75
MAP            1.29
MechVent       3.26
Mg             3.66
NIDiasABP      2.86
NIMAP          2.86
NISysABP       2.85
Na             3.66
PaCO2          3.28
PaO2           3.28
Platelets      3.61
RespRate       3.35
SaO2           3.67
SysABP         1.28
Temp           2.01
TroponinI      3.96
TroponinT      3.92
Urine          1.12
WBC            3.65
Weight         1.88
pH             3.26
dtype: float64

<h4>Grade 2 obesity measurements</h4>

In [130]:
classificacao_obesidade_2_measurements_test = round((classificacao_obesidade_2_test.count()/total_pacientes_test)*100,2)
classificacao_obesidade_2_measurements_test

RecordID       3.96
level_1        3.96
Time           3.96
ALP            0.07
ALT            0.07
AST            0.07
Age            3.75
Albumin        0.05
BUN            0.31
Bilirubin      0.07
Cholesterol    0.00
Creatinine     0.31
DiasABP        2.68
FiO2           0.72
GCS            1.10
Gender         0.08
Glucose        0.28
HCO3           0.31
HCT            0.42
HR             3.55
Height         3.75
ICUType        0.08
K              0.30
Lactate        0.21
MAP            2.67
MechVent       0.70
Mg             0.30
NIDiasABP      1.10
NIMAP          1.10
NISysABP       1.11
Na             0.30
PaCO2          0.68
PaO2           0.68
Platelets      0.35
RespRate       0.61
SaO2           0.29
SysABP         2.68
Temp           1.95
TroponinI      0.00
TroponinT      0.04
Urine          2.84
WBC            0.31
Weight         2.08
pH             0.70
dtype: float64

<h4>Grade 3 obesity missing rate</h4>

In [131]:
classificacao_obesidade_3_ids_test = bmi_data_test[bmi_data_test["Classificacao"] == "Obesidade grau 3"]
classificacao_obesidade_3_ids_test = classificacao_obesidade_3_ids_test["RecordID"]
classificacao_obesidade_3_test = test_X[test_X["RecordID"].isin(classificacao_obesidade_3_ids_test)]
classificacao_obesidade_3_missing_test = round((classificacao_obesidade_3_test.isna().sum()/total_pacientes_test)*100,2)
classificacao_obesidade_3_missing_test

RecordID       0.00
level_1        0.00
Time           0.00
ALP            3.25
ALT            3.25
AST            3.25
Age            0.14
Albumin        3.25
BUN            3.06
Bilirubin      3.25
Cholesterol    3.29
Creatinine     3.06
DiasABP        1.28
FiO2           2.64
GCS            2.32
Gender         3.22
Glucose        3.09
HCO3           3.07
HCT            3.00
HR             0.29
Height         0.14
ICUType        3.22
K              3.07
Lactate        3.16
MAP            1.28
MechVent       2.66
Mg             3.08
NIDiasABP      2.18
NIMAP          2.18
NISysABP       2.18
Na             3.08
PaCO2          2.79
PaO2           2.78
Platelets      3.07
RespRate       2.80
SaO2           3.11
SysABP         1.28
Temp           1.87
TroponinI      3.28
TroponinT      3.25
Urine          0.86
WBC            3.09
Weight         1.35
pH             2.77
dtype: float64

<h4>Grade 3 obesity measurements</h4>

In [132]:
classificacao_obesidade_3_measurements_test = round((classificacao_obesidade_3_test.count()/total_pacientes_test)*100,2)
classificacao_obesidade_3_measurements_test

RecordID       3.29
level_1        3.29
Time           3.29
ALP            0.04
ALT            0.05
AST            0.05
Age            3.15
Albumin        0.04
BUN            0.23
Bilirubin      0.05
Cholesterol    0.01
Creatinine     0.23
DiasABP        2.01
FiO2           0.65
GCS            0.97
Gender         0.07
Glucose        0.20
HCO3           0.22
HCT            0.30
HR             3.01
Height         3.15
ICUType        0.07
K              0.23
Lactate        0.14
MAP            2.01
MechVent       0.63
Mg             0.22
NIDiasABP      1.11
NIMAP          1.11
NISysABP       1.11
Na             0.21
PaCO2          0.51
PaO2           0.51
Platelets      0.23
RespRate       0.49
SaO2           0.19
SysABP         2.01
Temp           1.42
TroponinI      0.01
TroponinT      0.04
Urine          2.43
WBC            0.21
Weight         1.94
pH             0.53
dtype: float64

<h4>Building of the missing rate table</h4>

In [133]:
df_missing_test = pd.DataFrame(columns=df_columns)
df_missing_transpose_test = df_missing_test.T
df_missing_transpose_test ["Female"] = female_gender_missing_rate_test
df_missing_transpose_test ["Male"] = male_gender_missing_rate_test
df_missing_transpose_test["Undefined gender"] = undefined_gender_missing_rate_test
df_missing_transpose_test["ICUType 1"] = ICUType_1_test_missing
df_missing_transpose_test ["ICUType 2"] = ICUType_2_test_missing
df_missing_transpose_test ["ICUType 3"] = ICUType_3_test_missing
df_missing_transpose_test ["ICUType 4"] = ICUType_4_test_missing
df_missing_transpose_test["Age 65+"] = more_than_or_equal_to_65_test_missing
df_missing_transpose_test["Age 65-"] = less_than_65_test_missing
df_missing_transpose_test ["Low Weight"] = classificacao_baixo_peso_missing_test
df_missing_transpose_test ["Normal Weight"] = classificacao_normal_peso_missing_test
df_missing_transpose_test ["Overweight"] = classificacao_sobrepeso_missing_test
df_missing_transpose_test ["Obesity Grade 1"] = classificacao_obesidade_1_missing_test
df_missing_transpose_test ["Obesity Grade 2"] = classificacao_obesidade_2_missing_test
df_missing_transpose_test ["Obesity Grade 3"] = classificacao_obesidade_3_missing_test
df_missing_transpose_test["Undefined classification"] = classificacao_undefined_missing_test
df_missing_transpose_test = df_missing_transpose_test.drop("RecordID", axis=0)
df_missing_transpose_test = df_missing_transpose_test.drop("level_1", axis=0)
df_missing_transpose_test = df_missing_transpose_test.drop("Time", axis=0)
df_missing_transpose_test = df_missing_transpose_test.drop("Age", axis=0)
df_missing_transpose_test = df_missing_transpose_test.drop("Gender", axis=0)
display(HTML("<h2 style='text-align: center; font-size: 24px; font-weight: bold;'>original Missing rate per Variable by demographics - Test</h2>"))
df_missing_transpose_test 

Unnamed: 0,Female,Male,Undefined gender,ICUType 1,ICUType 2,ICUType 3,ICUType 4,Age 65+,Age 65-,Low Weight,Normal Weight,Overweight,Obesity Grade 1,Obesity Grade 2,Obesity Grade 3,Undefined classification
ALP,43.75,54.52,0.12,15.12,21.5,34.42,27.35,54.52,43.87,1.8,15.8,18.57,8.0,3.89,3.25,47.08
ALT,43.74,54.5,0.12,15.11,21.49,34.41,27.35,54.51,43.85,1.8,15.8,18.57,8.0,3.89,3.25,47.06
AST,43.74,54.5,0.12,15.11,21.49,34.41,27.35,54.5,43.85,1.8,15.8,18.57,8.0,3.89,3.25,47.05
Albumin,43.91,54.72,0.12,15.17,21.56,34.6,27.42,54.63,44.11,1.81,15.87,18.66,8.03,3.91,3.25,47.22
BUN,41.27,51.37,0.11,14.28,20.27,32.47,25.73,51.37,41.39,1.69,14.87,17.52,7.52,3.65,3.06,44.44
Bilirubin,43.72,54.49,0.12,15.12,21.5,34.37,27.35,54.49,43.84,1.8,15.79,18.57,8.0,3.89,3.25,47.05
Cholesterol,44.4,55.31,0.13,15.25,21.67,35.15,27.76,55.17,44.66,1.83,16.03,18.85,8.12,3.96,3.29,47.76
Creatinine,41.25,51.36,0.11,14.26,20.27,32.47,25.73,51.35,41.38,1.69,14.87,17.52,7.52,3.65,3.06,44.41
DiasABP,21.75,23.79,0.08,8.67,4.41,22.48,10.06,24.76,20.86,0.66,5.61,5.86,2.23,1.28,1.28,28.71
FiO2,37.73,46.22,0.11,13.68,17.98,29.78,22.62,46.51,37.55,1.54,13.4,15.57,6.57,3.24,2.64,41.11


<h4>Building of the measurements table</h4>

In [134]:
df_measurements_test = pd.DataFrame(columns=df_columns)
df_measurements_transpose_test = df_measurements_test.T
df_measurements_transpose_test ["Female"] = female_gender_measurements_test
df_measurements_transpose_test ["Male"] = male_gender_measurements_test
df_measurements_transpose_test["Undefined gender"] = undefined_gender_measurements_test
df_measurements_transpose_test["ICUType 1"] = ICUType_1_measurements_test
df_measurements_transpose_test ["ICUType 2"] = ICUType_2_measurements_test
df_measurements_transpose_test ["ICUType 3"] = ICUType_3_measurements_test
df_measurements_transpose_test ["ICUType 4"] = ICUType_4_measurements_test
df_measurements_transpose_test["Age 65+"] = more_than_or_equal_to_65_test_measurements
df_measurements_transpose_test["Age 65-"] = less_than_65_test_measurements
df_measurements_transpose_test ["Low Weight"] =classificacao_baixo_peso_measurements_test
df_measurements_transpose_test ["Normal Weight"] = classificacao_normal_peso_measurements_test
df_measurements_transpose_test ["Overweight"] = classificacao_sobrepeso_measurements_test
df_measurements_transpose_test ["Obesity Grade 1"] = classificacao_obesidade_1_measurements_test
df_measurements_transpose_test ["Obesity Grade 2"] = classificacao_obesidade_2_measurements_test
df_measurements_transpose_test ["Obesity Grade 3"] = classificacao_obesidade_3_measurements_test
df_measurements_transpose_test["Undefined classification"] = classificacao_undefined_measurements_test
df_measurements_transpose_test = df_measurements_transpose_test.drop("RecordID", axis=0)
df_measurements_transpose_test = df_measurements_transpose_test.drop("level_1", axis=0)
df_measurements_transpose_test = df_measurements_transpose_test.drop("Time", axis=0)
df_measurements_transpose_test = df_measurements_transpose_test.drop("Age", axis=0)
df_measurements_transpose_test = df_measurements_transpose_test.drop("Gender", axis=0)
display(HTML("<h2 style='text-align: center; font-size: 24px; font-weight: bold;'>Repeated Measurements per Variable by Demographics - test Set</h2>"))
df_measurements_transpose_test 

Unnamed: 0,Female,Male,Undefined gender,ICUType 1,ICUType 2,ICUType 3,ICUType 4,Age 65+,Age 65-,Low Weight,Normal Weight,Overweight,Obesity Grade 1,Obesity Grade 2,Obesity Grade 3,Undefined classification
ALP,0.73,0.88,0.0,0.22,0.18,0.76,0.45,0.75,0.86,0.03,0.25,0.31,0.13,0.07,0.04,0.77
ALT,0.74,0.9,0.0,0.23,0.18,0.77,0.46,0.76,0.88,0.03,0.25,0.32,0.13,0.07,0.05,0.8
AST,0.74,0.9,0.0,0.23,0.18,0.78,0.46,0.77,0.87,0.03,0.25,0.32,0.13,0.07,0.05,0.8
Albumin,0.57,0.68,0.0,0.17,0.11,0.58,0.39,0.64,0.61,0.03,0.18,0.22,0.09,0.05,0.04,0.63
BUN,3.21,4.02,0.01,1.06,1.4,2.71,2.08,3.91,3.34,0.14,1.18,1.36,0.61,0.31,0.23,3.42
Bilirubin,0.76,0.9,0.0,0.22,0.18,0.81,0.45,0.78,0.88,0.03,0.26,0.32,0.13,0.07,0.05,0.8
Cholesterol,0.07,0.09,0.0,0.09,0.0,0.03,0.04,0.1,0.06,0.0,0.02,0.03,0.01,0.0,0.01,0.09
Creatinine,3.22,4.04,0.01,1.08,1.4,2.72,2.08,3.93,3.35,0.14,1.18,1.37,0.61,0.31,0.23,3.44
DiasABP,22.73,31.61,0.04,6.67,17.27,12.7,17.74,30.51,23.87,1.18,10.44,13.02,5.9,2.68,2.01,19.14
FiO2,6.75,9.17,0.02,1.66,3.69,5.41,5.19,8.77,7.17,0.3,2.65,3.31,1.56,0.72,0.65,6.74
