# Imports

In [3]:
import os
import sys
import pandas as pd
from IPython.display import display, HTML
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

# Loading dataset

In [4]:
from pypotsModify.benchpots.datasets import preprocess_physionet2012
physionet2012_dataset = preprocess_physionet2012(subset="all", rate=0.1)

2024-11-29 11:55:50 [INFO]: You're using dataset physionet_2012, please cite it properly in your work. You can find its reference information at the below link: 
https://github.com/WenjieDu/TSDB/tree/main/dataset_profiles/physionet_2012
2024-11-29 11:55:50 [INFO]: Dataset physionet_2012 has already been downloaded. Processing directly...
2024-11-29 11:55:50 [INFO]: Dataset physionet_2012 has already been cached. Loading from cache directly...
2024-11-29 11:55:50 [INFO]: Loaded successfully!


# Training data

<h4>Loading training dataset</h4>

In [143]:
train_X = physionet2012_dataset['train_X']

<h4>Sum of 48 hours of all patients</h4>

In [56]:
total_pacientes = train_X.groupby("RecordID").first().reset_index()
total_pacientes = total_pacientes["RecordID"].count()
total_pacientes = total_pacientes*48
total_pacientes

368208

<h4>Female gender missing rate</h4>

In [57]:
female_gender_ids = train_X[train_X["Gender"] == 0.0]
female_gender_ids = female_gender_ids["RecordID"]
female_gender_missing_rate = train_X[train_X["RecordID"].isin(female_gender_ids)]
female_gender_missing_rate = round((female_gender_missing_rate.isna().sum()/total_pacientes) * 100, 2)
female_gender_missing_rate

RecordID        0.00
level_1         0.00
Time            0.00
ALP            43.01
ALT            42.99
AST            42.99
Age             2.62
Albumin        43.17
BUN            40.58
Bilirubin      42.98
Cholesterol    43.66
Creatinine     40.57
DiasABP        21.14
FiO2           36.97
GCS            29.56
Gender         42.82
Glucose        40.72
HCO3           40.64
HCT            39.71
HR              4.24
Height          2.62
ICUType        42.82
K              40.41
Lactate        41.98
MAP            21.28
MechVent       37.16
Mg             40.64
NIDiasABP      24.46
NIMAP          24.76
NISysABP       24.44
Na             40.60
PaCO2          39.06
PaO2           39.07
Platelets      40.61
RespRate       32.25
SaO2           42.15
SysABP         21.14
Temp           28.24
TroponinI      43.64
TroponinT      43.27
Urine          13.33
WBC            40.83
Weight         20.25
pH             38.90
dtype: float64

<h4>Female gender measurements</h4>

In [58]:
female_gender_measurements_training = train_X[train_X["RecordID"].isin(female_gender_ids)]
female_gender_measurements_training = round((female_gender_measurements_training.count()/total_pacientes)*100, 2)
female_gender_measurements_training

RecordID       43.74
level_1        43.74
Time           43.74
ALP             0.72
ALT             0.74
AST             0.74
Age            41.12
Albumin         0.56
BUN             3.15
Bilirubin       0.76
Cholesterol     0.07
Creatinine      3.16
DiasABP        22.60
FiO2            6.77
GCS            14.17
Gender          0.91
Glucose         3.02
HCO3            3.10
HCT             4.03
HR             39.50
Height         41.12
ICUType         0.91
K               3.33
Lactate         1.75
MAP            22.46
MechVent        6.58
Mg              3.09
NIDiasABP      19.27
NIMAP          18.98
NISysABP       19.29
Na              3.14
PaCO2           4.67
PaO2            4.66
Platelets       3.13
RespRate       11.48
SaO2            1.59
SysABP         22.60
Temp           15.49
TroponinI       0.09
TroponinT       0.47
Urine          30.41
WBC             2.91
Weight         23.48
pH              4.84
dtype: float64

<h4>Male gender missing rate</h4>

In [59]:
male_gender_ids = train_X[train_X["Gender"] == 1.0]
male_gender_ids = male_gender_ids["RecordID"]
male_gender_missing_rate = train_X[train_X["RecordID"].isin(male_gender_ids)]
male_gender_missing_rate = round((male_gender_missing_rate.isna().sum()/total_pacientes)*100, 2)
male_gender_missing_rate

RecordID        0.00
level_1         0.00
Time            0.00
ALP            55.25
ALT            55.22
AST            55.22
Age             3.31
Albumin        55.48
BUN            52.09
Bilirubin      55.21
Cholesterol    56.08
Creatinine     52.07
DiasABP        24.11
FiO2           47.24
GCS            38.35
Gender         55.02
Glucose        52.36
HCO3           52.19
HCT            50.68
HR              5.64
Height          3.31
ICUType        55.02
K              51.90
Lactate        53.77
MAP            24.31
MechVent       47.66
Mg             52.16
NIDiasABP      33.87
NIMAP          34.15
NISysABP       33.85
Na             52.20
PaCO2          49.26
PaO2           49.27
Platelets      51.90
RespRate       43.65
SaO2           53.75
SysABP         24.11
Temp           34.59
TroponinI      56.08
TroponinT      55.55
Urine          17.56
WBC            52.32
Weight         27.01
pH             48.92
dtype: float64

<h4>Male gender measurements</h4>

In [60]:
male_gender_measurements_training = train_X[train_X["RecordID"].isin(male_gender_ids)]
male_gender_measurements_training  = round((male_gender_measurements_training.count()/total_pacientes)*100, 2)
male_gender_measurements_training 

RecordID       56.19
level_1        56.19
Time           56.19
ALP             0.94
ALT             0.96
AST             0.96
Age            52.88
Albumin         0.70
BUN             4.10
Bilirubin       0.97
Cholesterol     0.10
Creatinine      4.12
DiasABP        32.07
FiO2            8.94
GCS            17.84
Gender          1.17
Glucose         3.82
HCO3            3.99
HCT             5.51
HR             50.55
Height         52.88
ICUType         1.17
K               4.29
Lactate         2.42
MAP            31.88
MechVent        8.53
Mg              4.02
NIDiasABP      22.31
NIMAP          22.04
NISysABP       22.33
Na              3.98
PaCO2           6.93
PaO2            6.91
Platelets       4.29
RespRate       12.54
SaO2            2.44
SysABP         32.08
Temp           21.59
TroponinI       0.11
TroponinT       0.63
Urine          38.63
WBC             3.86
Weight         29.18
pH              7.26
dtype: float64

<h4>Undefined gender missing rate</h4> 

In [61]:
undefined_gender_ids = train_X[train_X["Gender"] == -1.0]
undefined_gender_ids = undefined_gender_ids["RecordID"]
undefined_gender_missing_rate = train_X[train_X["RecordID"].isin(undefined_gender_ids)]
undefined_gender_missing_rate = round((undefined_gender_missing_rate.isna().sum()/total_pacientes)*100, 2)
undefined_gender_missing_rate

RecordID       0.00
level_1        0.00
Time           0.00
ALP            0.08
ALT            0.08
AST            0.08
Age            0.03
Albumin        0.08
BUN            0.07
Bilirubin      0.08
Cholesterol    0.08
Creatinine     0.07
DiasABP        0.04
FiO2           0.07
GCS            0.06
Gender         0.08
Glucose        0.07
HCO3           0.07
HCT            0.07
HR             0.03
Height         0.03
ICUType        0.08
K              0.07
Lactate        0.08
MAP            0.04
MechVent       0.07
Mg             0.07
NIDiasABP      0.05
NIMAP          0.05
NISysABP       0.05
Na             0.07
PaCO2          0.08
PaO2           0.08
Platelets      0.07
RespRate       0.07
SaO2           0.08
SysABP         0.04
Temp           0.06
TroponinI      0.08
TroponinT      0.08
Urine          0.04
WBC            0.07
Weight         0.06
pH             0.07
dtype: float64

<h4>Undefined gender measurements</h4>

In [62]:
undefined_gender_measurements_training = train_X[train_X["RecordID"].isin(undefined_gender_ids)]
undefined_gender_measurements_training = round((undefined_gender_measurements_training.count()/total_pacientes)*100, 2)
undefined_gender_measurements_training

RecordID       0.08
level_1        0.08
Time           0.08
ALP            0.00
ALT            0.00
AST            0.00
Age            0.05
Albumin        0.00
BUN            0.00
Bilirubin      0.00
Cholesterol    0.00
Creatinine     0.00
DiasABP        0.03
FiO2           0.00
GCS            0.02
Gender         0.00
Glucose        0.00
HCO3           0.00
HCT            0.00
HR             0.05
Height         0.05
ICUType        0.00
K              0.00
Lactate        0.00
MAP            0.03
MechVent       0.00
Mg             0.00
NIDiasABP      0.02
NIMAP          0.02
NISysABP       0.02
Na             0.00
PaCO2          0.00
PaO2           0.00
Platelets      0.00
RespRate       0.01
SaO2           0.00
SysABP         0.03
Temp           0.02
TroponinI      0.00
TroponinT      0.00
Urine          0.04
WBC            0.00
Weight         0.02
pH             0.00
dtype: float64

<h4>ICUType 1 missing rate</h4>

In [63]:
ICUType_1_training_ids = train_X[train_X['ICUType'] == 1.0]
ICUType_1_training_ids = ICUType_1_training_ids[ICUType_1_training_ids["Time"] == 0.0]
ICUType_1_training_ids = ICUType_1_training_ids["RecordID"]
ICUType_1_training = train_X[train_X["RecordID"].isin(ICUType_1_training_ids)]
ICUType_1_training_missing = round((ICUType_1_training.isna().sum()/total_pacientes)*100, 2)
ICUType_1_training_missing

RecordID        0.00
level_1         0.00
Time            0.00
ALP            14.51
ALT            14.49
AST            14.49
Age             1.17
Albumin        14.57
BUN            13.68
Bilirubin      14.50
Cholesterol    14.63
Creatinine     13.66
DiasABP         8.45
FiO2           13.12
GCS            10.93
Gender         14.42
Glucose        13.73
HCO3           13.72
HCT            13.44
HR              1.81
Height          1.17
ICUType        14.42
K              13.50
Lactate        14.40
MAP             8.46
MechVent       13.22
Mg             13.67
NIDiasABP       7.78
NIMAP           7.81
NISysABP        7.78
Na             13.73
PaCO2          13.50
PaO2           13.50
Platelets      13.67
RespRate        9.56
SaO2           13.99
SysABP          8.45
Temp           10.32
TroponinI      14.69
TroponinT      14.41
Urine           6.34
WBC            13.77
Weight          8.14
pH             13.48
dtype: float64

<h4>ICUType 1 mesurements</h4>

In [64]:
ICUType_1_measurements_training = round((ICUType_1_training.count()/total_pacientes)*100, 2)
ICUType_1_measurements_training

RecordID       14.73
level_1        14.73
Time           14.73
ALP             0.22
ALT             0.24
AST             0.24
Age            13.56
Albumin         0.16
BUN             1.05
Bilirubin       0.23
Cholesterol     0.10
Creatinine      1.07
DiasABP         6.28
FiO2            1.61
GCS             3.80
Gender          0.31
Glucose         1.01
HCO3            1.01
HCT             1.29
HR             12.92
Height         13.56
ICUType         0.31
K               1.23
Lactate         0.33
MAP             6.27
MechVent        1.51
Mg              1.06
NIDiasABP       6.95
NIMAP           6.92
NISysABP        6.95
Na              1.00
PaCO2           1.23
PaO2            1.23
Platelets       1.06
RespRate        5.17
SaO2            0.74
SysABP          6.28
Temp            4.41
TroponinI       0.04
TroponinT       0.33
Urine           8.39
WBC             0.96
Weight          6.59
pH              1.25
dtype: float64

<h4>ICUType missing rate</h4>

In [65]:
ICUType_2_training_ids = train_X[train_X['ICUType'] == 2.0]
ICUType_2_training_ids = ICUType_2_training_ids[ICUType_2_training_ids["Time"] == 0.0]
ICUType_2_training_ids = ICUType_2_training_ids["RecordID"]
ICUType_2_training = train_X[train_X["RecordID"].isin(ICUType_2_training_ids)]
ICUType_2_training_missing = round((ICUType_2_training.isna().sum()/total_pacientes)*100,2)
ICUType_2_training_missing

RecordID        0.00
level_1         0.00
Time            0.00
ALP            20.85
ALT            20.84
AST            20.84
Age             0.77
Albumin        20.90
BUN            19.64
Bilirubin      20.84
Cholesterol    21.00
Creatinine     19.64
DiasABP         4.45
FiO2           17.46
GCS            15.46
Gender         20.56
Glucose        20.01
HCO3           19.73
HCT            18.71
HR              1.80
Height          0.77
ICUType        20.56
K              19.88
Lactate        20.24
MAP             4.44
MechVent       17.48
Mg             19.68
NIDiasABP      16.51
NIMAP          16.55
NISysABP       16.50
Na             19.94
PaCO2          16.71
PaO2           16.72
Platelets      19.30
RespRate       20.17
SaO2           18.72
SysABP          4.45
Temp            8.74
TroponinI      20.97
TroponinT      20.94
Urine           3.42
WBC            19.59
Weight         10.11
pH             16.32
dtype: float64

<h4>ICUType 2 measurements</h4>

In [66]:
ICUType_2_measurements_training = round((ICUType_2_training.count()/total_pacientes)*100, 2)
ICUType_2_measurements_training

RecordID       21.00
level_1        21.00
Time           21.00
ALP             0.15
ALT             0.16
AST             0.16
Age            20.23
Albumin         0.10
BUN             1.36
Bilirubin       0.16
Cholesterol     0.01
Creatinine      1.37
DiasABP        16.56
FiO2            3.54
GCS             5.54
Gender          0.44
Glucose         0.99
HCO3            1.27
HCT             2.29
HR             19.20
Height         20.23
ICUType         0.44
K               1.12
Lactate         0.76
MAP            16.56
MechVent        3.52
Mg              1.32
NIDiasABP       4.49
NIMAP           4.45
NISysABP        4.50
Na              1.06
PaCO2           4.29
PaO2            4.28
Platelets       1.70
RespRate        0.83
SaO2            2.28
SysABP         16.56
Temp           12.26
TroponinI       0.03
TroponinT       0.06
Urine          17.58
WBC             1.42
Weight         10.89
pH              4.68
dtype: float64

<h4>ICUType 3 missing rate</h4>

In [67]:
ICUType_3_training_ids = train_X[train_X['ICUType'] == 3.0]
ICUType_3_training_ids = ICUType_3_training_ids[ICUType_3_training_ids["Time"] == 0.0]
ICUType_3_training_ids = ICUType_3_training_ids["RecordID"]
ICUType_3_training = train_X[train_X["RecordID"].isin(ICUType_3_training_ids)]
ICUType_3_training_missing = round((ICUType_3_training.isna().sum()/total_pacientes)*100, 2)
ICUType_3_training_missing

RecordID        0.00
level_1         0.00
Time            0.00
ALP            34.82
ALT            34.80
AST            34.80
Age             2.68
Albumin        35.00
BUN            32.91
Bilirubin      34.76
Cholesterol    35.55
Creatinine     32.90
DiasABP        22.81
FiO2           30.40
GCS            26.12
Gender         34.85
Glucose        32.92
HCO3           32.90
HCT            32.43
HR              3.91
Height          2.68
ICUType        34.85
K              32.68
Lactate        34.20
MAP            22.99
MechVent       30.82
Mg             33.06
NIDiasABP      15.59
NIMAP          15.95
NISysABP       15.58
Na             32.83
PaCO2          32.89
PaO2           32.89
Platelets      33.13
RespRate       24.31
SaO2           35.17
SysABP         22.80
Temp           25.40
TroponinI      35.50
TroponinT      35.09
Urine          14.22
WBC            33.24
Weight         12.75
pH             32.86
dtype: float64

<h4>ICUType 3 measurements</h4>

In [68]:
ICUType_3_measurements_training = round((ICUType_3_training.count()/total_pacientes)*100, 2)
ICUType_3_measurements_training

RecordID       35.59
level_1        35.59
Time           35.59
ALP             0.77
ALT             0.79
AST             0.79
Age            32.91
Albumin         0.59
BUN             2.68
Bilirubin       0.82
Cholesterol     0.03
Creatinine      2.69
DiasABP        12.78
FiO2            5.19
GCS             9.47
Gender          0.74
Glucose         2.67
HCO3            2.69
HCT             3.16
HR             31.68
Height         32.91
ICUType         0.74
K               2.90
Lactate         1.39
MAP            12.60
MechVent        4.76
Mg              2.53
NIDiasABP      20.00
NIMAP          19.64
NISysABP       20.01
Na              2.76
PaCO2           2.69
PaO2            2.69
Platelets       2.46
RespRate       11.27
SaO2            0.42
SysABP         12.79
Temp           10.19
TroponinI       0.09
TroponinT       0.49
Urine          21.37
WBC             2.35
Weight         22.84
pH              2.73
dtype: float64

<h4>ICUType 4 missing rate</h4>

In [69]:
ICUType_4_training_ids = train_X[train_X['ICUType'] == 4.0]
ICUType_4_training_ids = ICUType_4_training_ids[ICUType_4_training_ids["Time"] == 0.0]
ICUType_4_training_ids = ICUType_4_training_ids["RecordID"]
ICUType_4_training = train_X[train_X["RecordID"].isin(ICUType_4_training_ids)]
ICUType_4_training_missing = round((ICUType_4_training.isna().sum()/total_pacientes)*100, 2)
ICUType_4_training_missing

RecordID        0.00
level_1         0.00
Time            0.00
ALP            28.17
ALT            28.16
AST            28.16
Age             1.33
Albumin        28.27
BUN            26.52
Bilirubin      28.17
Cholesterol    28.64
Creatinine     26.52
DiasABP         9.60
FiO2           23.30
GCS            15.47
Gender         28.08
Glucose        26.51
HCO3           26.55
HCT            25.89
HR              2.38
Height          1.33
ICUType        28.08
K              26.31
Lactate        26.99
MAP             9.74
MechVent       23.36
Mg             26.47
NIDiasABP      18.51
NIMAP          18.65
NISysABP       18.49
Na             26.38
PaCO2          25.29
PaO2           25.30
Platelets      26.49
RespRate       21.93
SaO2           28.10
SysABP          9.60
Temp           18.43
TroponinI      28.63
TroponinT      28.46
Urine           6.95
WBC            26.63
Weight         16.32
pH             25.24
dtype: float64

<h4>ICUType 4 measurements</h4>

In [70]:
ICUType_4_measurements_training = round((ICUType_4_training.count()/total_pacientes)*100,2)
ICUType_4_measurements_training 

RecordID       28.68
level_1        28.68
Time           28.68
ALP             0.51
ALT             0.52
AST             0.52
Age            27.35
Albumin         0.41
BUN             2.16
Bilirubin       0.51
Cholesterol     0.04
Creatinine      2.16
DiasABP        19.08
FiO2            5.38
GCS            13.21
Gender          0.60
Glucose         2.17
HCO3            2.13
HCT             2.79
HR             26.30
Height         27.35
ICUType         0.60
K               2.36
Lactate         1.69
MAP            18.94
MechVent        5.32
Mg              2.21
NIDiasABP      10.17
NIMAP          10.03
NISysABP       10.19
Na              2.30
PaCO2           3.39
PaO2            3.38
Platelets       2.19
RespRate        6.75
SaO2            0.57
SysABP         19.08
Temp           10.25
TroponinI       0.05
TroponinT       0.22
Urine          21.73
WBC             2.05
Weight         12.36
pH              3.44
dtype: float64

<h4>+65 missing rate</h4>

In [71]:
more_than_or_equal_to_65_train_ids = train_X[train_X["Age"] >= 65]
more_than_or_equal_to_65_train_ids = more_than_or_equal_to_65_train_ids[more_than_or_equal_to_65_train_ids["Time"] == 0.0]
more_than_or_equal_to_65_train_ids = more_than_or_equal_to_65_train_ids["RecordID"]
more_than_or_equal_to_65_train = train_X[train_X["RecordID"].isin(more_than_or_equal_to_65_train_ids)]
more_than_or_equal_to_65_train_missing = round((more_than_or_equal_to_65_train.isna().sum()/total_pacientes)*100,2)
more_than_or_equal_to_65_train_missing

RecordID        0.00
level_1         0.00
Time            0.00
ALP            53.67
ALT            53.65
AST            53.65
Age             3.11
Albumin        53.79
BUN            50.55
Bilirubin      53.64
Cholesterol    54.30
Creatinine     50.53
DiasABP        24.41
FiO2           45.85
GCS            37.50
Gender         53.27
Glucose        50.82
HCO3           50.64
HCT            49.26
HR              5.15
Height          3.11
ICUType        53.27
K              50.38
Lactate        52.27
MAP            24.57
MechVent       46.37
Mg             50.61
NIDiasABP      31.65
NIMAP          31.89
NISysABP       31.63
Na             50.68
PaCO2          48.02
PaO2           48.03
Platelets      50.47
RespRate       41.11
SaO2           51.93
SysABP         24.41
Temp           33.36
TroponinI      54.26
TroponinT      53.63
Urine          15.70
WBC            50.82
Weight         25.28
pH             47.74
dtype: float64

<h4>+65 measurements</h4>

In [72]:
age_65_and_above_measurements_training = round((more_than_or_equal_to_65_train.count()/total_pacientes)*100,2)
age_65_and_above_measurements_training

RecordID       54.40
level_1        54.40
Time           54.40
ALP             0.73
ALT             0.75
AST             0.75
Age            51.29
Albumin         0.61
BUN             3.85
Bilirubin       0.76
Cholesterol     0.10
Creatinine      3.87
DiasABP        29.99
FiO2            8.55
GCS            16.90
Gender          1.13
Glucose         3.58
HCO3            3.76
HCT             5.14
HR             49.25
Height         51.29
ICUType         1.13
K               4.02
Lactate         2.13
MAP            29.83
MechVent        8.03
Mg              3.79
NIDiasABP      22.75
NIMAP          22.51
NISysABP       22.77
Na              3.72
PaCO2           6.38
PaO2            6.36
Platelets       3.93
RespRate       13.29
SaO2            2.47
SysABP         29.99
Temp           21.04
TroponinI       0.14
TroponinT       0.77
Urine          38.70
WBC             3.58
Weight         29.12
pH              6.66
dtype: float64

<h4>-65 missing rate</h4>

In [73]:
less_than_65_train_ids = train_X[train_X["Age"] < 65]
less_than_65_train_ids = less_than_65_train_ids[less_than_65_train_ids["Time"] == 0.0]
less_than_65_train_ids = less_than_65_train_ids["RecordID"]
less_than_65_train = train_X[train_X["RecordID"].isin(less_than_65_train_ids)]
less_than_65_train_missing = round((less_than_65_train.isna().sum()/total_pacientes)*100,2)
less_than_65_train_missing


RecordID        0.00
level_1         0.00
Time            0.00
ALP            44.67
ALT            44.64
AST            44.64
Age             2.83
Albumin        44.95
BUN            42.19
Bilirubin      44.64
Cholesterol    45.53
Creatinine     42.18
DiasABP        20.88
FiO2           38.43
GCS            30.47
Gender         44.65
Glucose        42.34
HCO3           42.26
HCT            41.20
HR              4.75
Height          2.83
ICUType        44.65
K              42.00
Lactate        43.56
MAP            21.06
MechVent       38.52
Mg             42.27
NIDiasABP      26.74
NIMAP          27.07
NISysABP       26.72
Na             42.20
PaCO2          40.37
PaO2           40.39
Platelets      42.11
RespRate       34.86
SaO2           44.05
SysABP         20.88
Temp           29.54
TroponinI      45.54
TroponinT      45.27
Urine          15.23
WBC            42.41
Weight         22.04
pH             40.16
dtype: float64

<h4>-65 measurements</h4>

In [74]:
age_under_65_measurements_training  = round((less_than_65_train.count()/total_pacientes)*100,2)
age_under_65_measurements_training 

RecordID       45.60
level_1        45.60
Time           45.60
ALP             0.93
ALT             0.96
AST             0.96
Age            42.77
Albumin         0.65
BUN             3.41
Bilirubin       0.96
Cholesterol     0.07
Creatinine      3.42
DiasABP        24.72
FiO2            7.17
GCS            15.13
Gender          0.95
Glucose         3.26
HCO3            3.34
HCT             4.40
HR             40.85
Height         42.77
ICUType         0.95
K               3.60
Lactate         2.04
MAP            24.54
MechVent        7.08
Mg              3.33
NIDiasABP      18.86
NIMAP          18.53
NISysABP       18.88
Na              3.40
PaCO2           5.23
PaO2            5.22
Platelets       3.49
RespRate       10.74
SaO2            1.55
SysABP         24.72
Temp           16.06
TroponinI       0.06
TroponinT       0.33
Urine          30.37
WBC             3.19
Weight         23.56
pH              5.44
dtype: float64

<h4>Filtering only patients who have the same height and weight</h4>

In [75]:
filtered_train_X = train_X[(train_X['Height'] != -1) & (train_X['Weight'] != -1) & (train_X['Height'].notna()) & (train_X['Weight'].notna())] 

<h4>Classify BMI</h4>

In [76]:
def classify_BMI(BMI):
    if BMI <= 18.5:
        return "Baixo peso"
    elif BMI >= 18.6 and BMI <= 24.9:
        return "Peso normal"
    elif BMI >= 25 and BMI <= 29.9:
        return "Sobrepeso"
    elif BMI >= 30 and BMI <= 34.9:
        return "Obesidade grau 1"
    elif BMI >= 35 and BMI <= 39.9:
        return "Obesidade grau 2"
    elif BMI >= 40:
        return "Obesidade grau 3"

<h4>Set the height to meters</h4>

In [77]:
filtered_train_X_metros = filtered_train_X.copy()
filtered_train_X_metros["Height"] = filtered_train_X["Height"]/100
filtered_train_X_metros["Height"]

48        1.753
67        1.753
68        1.753
69        1.753
70        1.753
          ...  
575035    1.600
575037    1.600
575038    1.600
575039    1.600
575088    1.727
Name: Height, Length: 101565, dtype: float64

<h4>BMI Calculation and Classification</h4>

In [78]:
bmi_data_train = filtered_train_X_metros
bmi_data_train["BMI"] = round(filtered_train_X_metros["Weight"] / (filtered_train_X_metros["Height"]**2), 1)
bmi_data_train["Classificacao"] = bmi_data_train["BMI"].apply(classify_BMI)
bmi_data_train.head()

Unnamed: 0,RecordID,level_1,Time,ALP,ALT,AST,Age,Albumin,BUN,Bilirubin,...,SysABP,Temp,TroponinI,TroponinT,Urine,WBC,Weight,pH,BMI,Classificacao
48,132540,0,0.0,,,,76.0,,,,...,,,,,,,76.0,7.45,24.7,Peso normal
67,132540,19,19.0,,,,76.0,,,,...,122.0,37.5,,,50.0,,80.6,,26.2,Sobrepeso
68,132540,20,20.0,,,,76.0,,,,...,107.0,37.4,,,380.0,,80.6,,26.2,Sobrepeso
69,132540,21,21.0,,,,76.0,,,,...,121.0,37.5,,,170.0,,80.6,,26.2,Sobrepeso
70,132540,22,22.0,,,,76.0,,,,...,128.0,37.5,,,130.0,,80.6,,26.2,Sobrepeso


<h4>Taking only the first occurrence of each patient</h4>

In [79]:
bmi_data_train = bmi_data_train.groupby("RecordID").first().reset_index()
bmi_data_train

Unnamed: 0,RecordID,level_1,Time,ALP,ALT,AST,Age,Albumin,BUN,Bilirubin,...,SysABP,Temp,TroponinI,TroponinT,Urine,WBC,Weight,pH,BMI,Classificacao
0,132540,0,0.0,,,,76.0,,21.0,,...,122.00,37.50,,,50.0,13.3,76.0,7.45,24.7,Peso normal
1,132543,0,0.0,105.0,12.0,15.0,68.0,4.4,23.0,0.2,...,,36.30,,,600.0,11.5,84.6,,26.0,Sobrepeso
2,132547,0,0.0,,,,64.0,,,,...,,,,,,,114.0,,35.1,Obesidade grau 2
3,132551,0,0.0,47.0,46.0,82.0,78.0,1.9,81.0,0.3,...,102.75,38.00,3.5,,120.0,16.1,48.4,7.40,18.3,Baixo peso
4,132567,0,0.0,,,,71.0,,9.0,,...,111.50,35.60,,,15.0,9.0,56.0,7.44,22.6,Peso normal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3983,163007,0,0.0,42.0,30.0,40.0,19.0,2.8,16.0,0.6,...,0.00,40.55,,1.00,150.0,14.1,114.3,7.36,34.2,Obesidade grau 1
3984,163008,0,0.0,,,,59.0,,24.0,,...,97.00,37.60,,,45.0,6.9,98.5,7.38,34.0,Obesidade grau 1
3985,163013,0,0.0,82.0,11.0,30.0,74.0,2.5,30.0,1.2,...,118.00,36.50,,0.03,40.0,9.6,68.6,7.35,29.5,Sobrepeso
3986,163016,0,0.0,,27.0,120.0,65.0,,29.0,0.4,...,101.00,38.10,,,75.0,8.0,63.6,7.37,24.8,Peso normal


In [80]:
bmi_data_train["Classificacao"].value_counts()

Classificacao
Sobrepeso           1353
Peso normal         1216
Obesidade grau 1     717
Obesidade grau 2     297
Obesidade grau 3     277
Baixo peso           128
Name: count, dtype: int64

<h4>Classification Undefined missing rate</h4>

In [81]:
classificacao_undefined_ids = bmi_data_train["RecordID"]
classificacao_undefined = train_X[~train_X["RecordID"].isin(classificacao_undefined_ids)]
classificacao_undefined_missing = round((classificacao_undefined.isna().sum()/total_pacientes)*100,2)
classificacao_undefined_missing

RecordID        0.00
level_1         0.00
Time            0.00
ALP            47.21
ALT            47.18
AST            47.18
Age             3.76
Albumin        47.36
BUN            44.62
Bilirubin      47.17
Cholesterol    47.94
Creatinine     44.60
DiasABP        28.54
FiO2           41.39
GCS            32.00
Gender         47.01
Glucose        44.63
HCO3           44.64
HCT            43.88
HR              5.51
Height          3.76
ICUType        47.01
K              44.29
Lactate        46.35
MAP            28.76
MechVent       41.88
Mg             44.71
NIDiasABP      22.85
NIMAP          23.25
NISysABP       22.83
Na             44.48
PaCO2          44.57
PaO2           44.58
Platelets      44.84
RespRate       31.71
SaO2           47.48
SysABP         28.54
Temp           35.06
TroponinI      47.93
TroponinT      47.42
Urine          17.63
WBC            44.99
Weight         22.91
pH             44.51
dtype: float64

<h4>Classification Undefined measurements</h4>

In [82]:
classification_undefined_measurements = round((classificacao_undefined.count()/total_pacientes)*100,2)
classification_undefined_measurements

RecordID       48.01
level_1        48.01
Time           48.01
ALP             0.80
ALT             0.83
AST             0.83
Age            44.25
Albumin         0.65
BUN             3.39
Bilirubin       0.84
Cholesterol     0.08
Creatinine      3.41
DiasABP        19.47
FiO2            6.62
GCS            16.01
Gender          1.00
Glucose         3.38
HCO3            3.37
HCT             4.13
HR             42.50
Height         44.25
ICUType         1.00
K               3.73
Lactate         1.66
MAP            19.25
MechVent        6.13
Mg              3.30
NIDiasABP      25.16
NIMAP          24.77
NISysABP       25.18
Na              3.53
PaCO2           3.44
PaO2            3.43
Platelets       3.17
RespRate       16.30
SaO2            0.53
SysABP         19.47
Temp           12.96
TroponinI       0.08
TroponinT       0.59
Urine          30.38
WBC             3.03
Weight         25.10
pH              3.50
dtype: float64

<h4>Low weight classification missing rate</h4>

In [83]:
classificacao_baixo_peso_ids = bmi_data_train[bmi_data_train["Classificacao"] == "Baixo peso"]
classificacao_baixo_peso_ids = classificacao_baixo_peso_ids["RecordID"]
classificacao_baixo_peso = train_X[train_X["RecordID"].isin(classificacao_baixo_peso_ids)]
classificacao_baixo_peso_missing = round((classificacao_baixo_peso.isna().sum()/total_pacientes)*100,2)
classificacao_baixo_peso_missing

RecordID       0.00
level_1        0.00
Time           0.00
ALP            1.64
ALT            1.64
AST            1.64
Age            0.07
Albumin        1.65
BUN            1.55
Bilirubin      1.64
Cholesterol    1.67
Creatinine     1.55
DiasABP        0.65
FiO2           1.39
GCS            1.15
Gender         1.63
Glucose        1.55
HCO3           1.55
HCT            1.51
HR             0.13
Height         0.07
ICUType        1.63
K              1.54
Lactate        1.58
MAP            0.64
MechVent       1.40
Mg             1.54
NIDiasABP      1.04
NIMAP          1.06
NISysABP       1.04
Na             1.55
PaCO2          1.46
PaO2           1.46
Platelets      1.54
RespRate       1.34
SaO2           1.59
SysABP         0.64
Temp           1.00
TroponinI      1.66
TroponinT      1.65
Urine          0.49
WBC            1.56
Weight         0.79
pH             1.45
dtype: float64

<h4>Classificacao low weight measurements</h4>

In [84]:
classificacao_baixo_peso_measurements = round((classificacao_baixo_peso.count()/total_pacientes)*100,2)
classificacao_baixo_peso_measurements

RecordID       1.67
level_1        1.67
Time           1.67
ALP            0.03
ALT            0.03
AST            0.03
Age            1.59
Albumin        0.02
BUN            0.12
Bilirubin      0.03
Cholesterol    0.00
Creatinine     0.12
DiasABP        1.02
FiO2           0.28
GCS            0.52
Gender         0.03
Glucose        0.12
HCO3           0.12
HCT            0.16
HR             1.54
Height         1.59
ICUType        0.03
K              0.13
Lactate        0.09
MAP            1.03
MechVent       0.27
Mg             0.12
NIDiasABP      0.62
NIMAP          0.61
NISysABP       0.63
Na             0.12
PaCO2          0.21
PaO2           0.21
Platelets      0.12
RespRate       0.33
SaO2           0.08
SysABP         1.02
Temp           0.67
TroponinI      0.01
TroponinT      0.02
Urine          1.18
WBC            0.11
Weight         0.88
pH             0.22
dtype: float64

<h4>Classification normal weight missing rate</h4>

In [85]:
classificacao_normal_peso_ids = bmi_data_train[bmi_data_train["Classificacao"] == "Peso normal"]
#classificacao_normal_peso_ids = classificacao_normal_peso_ids[classificacao_normal_peso_ids["Time"] == 0.0]
classificacao_normal_peso_ids = classificacao_normal_peso_ids["RecordID"]
classificacao_normal_peso = train_X[train_X["RecordID"].isin(classificacao_normal_peso_ids)]
classificacao_normal_peso_missing = round((classificacao_normal_peso.isna().sum()/total_pacientes)*100,2)
classificacao_normal_peso_missing

RecordID        0.00
level_1         0.00
Time            0.00
ALP            15.60
ALT            15.60
AST            15.60
Age             0.65
Albumin        15.67
BUN            14.69
Bilirubin      15.60
Cholesterol    15.82
Creatinine     14.68
DiasABP         5.30
FiO2           13.18
GCS            10.80
Gender         15.52
Glucose        14.80
HCO3           14.73
HCT            14.22
HR              1.31
Height          0.65
ICUType        15.52
K              14.66
Lactate        15.11
MAP             5.34
MechVent       13.17
Mg             14.69
NIDiasABP      10.68
NIMAP          10.73
NISysABP       10.67
Na             14.75
PaCO2          13.49
PaO2           13.50
Platelets      14.57
RespRate       13.35
SaO2           14.88
SysABP          5.30
Temp            8.66
TroponinI      15.81
TroponinT      15.71
Urine           4.10
WBC            14.72
Weight          7.70
pH             13.35
dtype: float64

<h4>Classification normal weight measurements</h4>

In [86]:
classificacao_normal_peso_measurements = round((classificacao_normal_peso.count()/total_pacientes)*100,2)
classificacao_normal_peso_measurements

RecordID       15.85
level_1        15.85
Time           15.85
ALP             0.25
ALT             0.25
AST             0.25
Age            15.20
Albumin         0.19
BUN             1.16
Bilirubin       0.26
Cholesterol     0.03
Creatinine      1.17
DiasABP        10.56
FiO2            2.67
GCS             5.05
Gender          0.33
Glucose         1.06
HCO3            1.13
HCT             1.63
HR             14.54
Height         15.20
ICUType         0.33
K               1.19
Lactate         0.75
MAP            10.51
MechVent        2.68
Mg              1.16
NIDiasABP       5.17
NIMAP           5.12
NISysABP        5.18
Na              1.10
PaCO2           2.36
PaO2            2.35
Platelets       1.29
RespRate        2.51
SaO2            0.97
SysABP         10.56
Temp            7.19
TroponinI       0.04
TroponinT       0.15
Urine          11.75
WBC             1.13
Weight          8.15
pH              2.50
dtype: float64

<h4>Classification overweight missing rate</h4>

In [87]:
classificacao_sobrepeso_ids = bmi_data_train[bmi_data_train["Classificacao"] == "Sobrepeso"]
classificacao_sobrepeso_ids = classificacao_sobrepeso_ids["RecordID"]
classificacao_sobrepeso = train_X[train_X["RecordID"].isin(classificacao_sobrepeso_ids)]
classificacao_sobrepeso_missing = round((classificacao_sobrepeso.isna().sum()/total_pacientes)*100,2)
classificacao_sobrepeso_missing

RecordID        0.00
level_1         0.00
Time            0.00
ALP            17.35
ALT            17.35
AST            17.35
Age             0.76
Albumin        17.44
BUN            16.33
Bilirubin      17.35
Cholesterol    17.60
Creatinine     16.32
DiasABP         5.54
FiO2           14.66
GCS            12.22
Gender         17.27
Glucose        16.48
HCO3           16.38
HCT            15.78
HR              1.54
Height          0.76
ICUType        17.27
K              16.35
Lactate        16.84
MAP             5.57
MechVent       14.70
Mg             16.36
NIDiasABP      12.07
NIMAP          12.12
NISysABP       12.07
Na             16.45
PaCO2          14.87
PaO2           14.88
Platelets      16.17
RespRate       15.04
SaO2           16.42
SysABP          5.54
Temp            9.31
TroponinI      17.60
TroponinT      17.46
Urine           4.59
WBC            16.35
Weight          8.44
pH             14.71
dtype: float64

<h4>Classification overweight measurements</h4>

In [88]:
classificacao_sobrepeso_measurements = round((classificacao_sobrepeso.count()/total_pacientes)*100,2)
classificacao_baixo_peso_measurements

RecordID       1.67
level_1        1.67
Time           1.67
ALP            0.03
ALT            0.03
AST            0.03
Age            1.59
Albumin        0.02
BUN            0.12
Bilirubin      0.03
Cholesterol    0.00
Creatinine     0.12
DiasABP        1.02
FiO2           0.28
GCS            0.52
Gender         0.03
Glucose        0.12
HCO3           0.12
HCT            0.16
HR             1.54
Height         1.59
ICUType        0.03
K              0.13
Lactate        0.09
MAP            1.03
MechVent       0.27
Mg             0.12
NIDiasABP      0.62
NIMAP          0.61
NISysABP       0.63
Na             0.12
PaCO2          0.21
PaO2           0.21
Platelets      0.12
RespRate       0.33
SaO2           0.08
SysABP         1.02
Temp           0.67
TroponinI      0.01
TroponinT      0.02
Urine          1.18
WBC            0.11
Weight         0.88
pH             0.22
dtype: float64

<h4>Grade 1 obesity missing rate</h4>

In [89]:
classificacao_obesidade_1_ids = bmi_data_train[bmi_data_train["Classificacao"] == "Obesidade grau 1"]
classificacao_obesidade_1_ids = classificacao_obesidade_1_ids["RecordID"]
classificacao_obesidade_1 = train_X[train_X["RecordID"].isin(classificacao_obesidade_1_ids)]
classificacao_obesidade_1_missing = round((classificacao_obesidade_1.isna().sum()/total_pacientes)*100,2)
classificacao_obesidade_1_missing

RecordID       0.00
level_1        0.00
Time           0.00
ALP            9.18
ALT            9.18
AST            9.18
Age            0.38
Albumin        9.23
BUN            8.65
Bilirubin      9.17
Cholesterol    9.33
Creatinine     8.64
DiasABP        2.86
FiO2           7.63
GCS            6.52
Gender         9.15
Glucose        8.72
HCO3           8.67
HCT            8.36
HR             0.79
Height         0.38
ICUType        9.15
K              8.64
Lactate        8.87
MAP            2.90
MechVent       7.66
Mg             8.66
NIDiasABP      6.51
NIMAP          6.55
NISysABP       6.51
Na             8.69
PaCO2          7.79
PaO2           7.79
Platelets      8.58
RespRate       8.08
SaO2           8.69
SysABP         2.86
Temp           4.89
TroponinI      9.33
TroponinT      9.25
Urine          2.21
WBC            8.67
Weight         4.28
pH             7.71
dtype: float64

<h4>Grade 1 obesity measurements </h4>

In [90]:
classificacao_obesidade_1_measurements = round((classificacao_obesidade_1.count()/total_pacientes)*100,2)
classificacao_obesidade_1_measurements

RecordID       9.35
level_1        9.35
Time           9.35
ALP            0.17
ALT            0.17
AST            0.17
Age            8.97
Albumin        0.12
BUN            0.70
Bilirubin      0.17
Cholesterol    0.02
Creatinine     0.70
DiasABP        6.48
FiO2           1.72
GCS            2.83
Gender         0.19
Glucose        0.63
HCO3           0.68
HCT            0.99
HR             8.56
Height         8.97
ICUType        0.19
K              0.71
Lactate        0.48
MAP            6.45
MechVent       1.69
Mg             0.69
NIDiasABP      2.84
NIMAP          2.80
NISysABP       2.84
Na             0.65
PaCO2          1.56
PaO2           1.56
Platelets      0.77
RespRate       1.27
SaO2           0.66
SysABP         6.48
Temp           4.46
TroponinI      0.02
TroponinT      0.10
Urine          7.14
WBC            0.67
Weight         5.07
pH             1.64
dtype: float64

<h4>Grade 2 Obesity missing rate</h4>

In [91]:
classificacao_obesidade_2_ids = bmi_data_train[bmi_data_train["Classificacao"] == "Obesidade grau 2"]
classificacao_obesidade_2_ids = classificacao_obesidade_2_ids["RecordID"]
classificacao_obesidade_2 = train_X[train_X["RecordID"].isin(classificacao_obesidade_2_ids)]
classificacao_obesidade_2_missing = round((classificacao_obesidade_2.isna().sum()/total_pacientes)*100,2)
classificacao_obesidade_2_missing

RecordID       0.00
level_1        0.00
Time           0.00
ALP            3.80
ALT            3.80
AST            3.80
Age            0.16
Albumin        3.83
BUN            3.57
Bilirubin      3.80
Cholesterol    3.86
Creatinine     3.57
DiasABP        1.21
FiO2           3.15
GCS            2.69
Gender         3.79
Glucose        3.61
HCO3           3.58
HCT            3.46
HR             0.33
Height         0.16
ICUType        3.79
K              3.58
Lactate        3.67
MAP            1.22
MechVent       3.18
Mg             3.58
NIDiasABP      2.66
NIMAP          2.67
NISysABP       2.65
Na             3.59
PaCO2          3.22
PaO2           3.22
Platelets      3.55
RespRate       3.31
SaO2           3.57
SysABP         1.21
Temp           1.98
TroponinI      3.86
TroponinT      3.83
Urine          0.98
WBC            3.58
Weight         1.68
pH             3.19
dtype: float64

<h4>Grade 2 Obesity measurements</h4>

In [92]:
classificacao_obesidade_2_measurements = round((classificacao_obesidade_2.count()/total_pacientes)*100,2)
classificacao_obesidade_2_measurements

RecordID       3.87
level_1        3.87
Time           3.87
ALP            0.07
ALT            0.07
AST            0.07
Age            3.71
Albumin        0.05
BUN            0.30
Bilirubin      0.07
Cholesterol    0.01
Creatinine     0.30
DiasABP        2.66
FiO2           0.72
GCS            1.18
Gender         0.08
Glucose        0.26
HCO3           0.29
HCT            0.42
HR             3.54
Height         3.71
ICUType        0.08
K              0.29
Lactate        0.21
MAP            2.65
MechVent       0.69
Mg             0.29
NIDiasABP      1.22
NIMAP          1.20
NISysABP       1.22
Na             0.28
PaCO2          0.66
PaO2           0.66
Platelets      0.33
RespRate       0.56
SaO2           0.30
SysABP         2.66
Temp           1.89
TroponinI      0.01
TroponinT      0.04
Urine          2.89
WBC            0.29
Weight         2.19
pH             0.68
dtype: float64

<h4>Grade 3 Obesity missing rate</h4>

In [93]:
classificacao_obesidade_3_ids = bmi_data_train[bmi_data_train["Classificacao"] == "Obesidade grau 3"]
classificacao_obesidade_3_ids = classificacao_obesidade_3_ids["RecordID"]
classificacao_obesidade_3 = train_X[train_X["RecordID"].isin(classificacao_obesidade_3_ids)]
classificacao_obesidade_3_missing = round((classificacao_obesidade_3.isna().sum()/total_pacientes)*100,2)
classificacao_obesidade_3_missing

RecordID       0.00
level_1        0.00
Time           0.00
ALP            3.55
ALT            3.55
AST            3.55
Age            0.16
Albumin        3.57
BUN            3.34
Bilirubin      3.55
Cholesterol    3.60
Creatinine     3.34
DiasABP        1.19
FiO2           2.88
GCS            2.59
Gender         3.54
Glucose        3.37
HCO3           3.35
HCT            3.26
HR             0.30
Height         0.16
ICUType        3.54
K              3.34
Lactate        3.42
MAP            1.20
MechVent       2.89
Mg             3.35
NIDiasABP      2.58
NIMAP          2.59
NISysABP       2.57
Na             3.36
PaCO2          3.00
PaO2           3.00
Platelets      3.34
RespRate       3.13
SaO2           3.35
SysABP         1.19
Temp           2.00
TroponinI      3.61
TroponinT      3.57
Urine          0.92
WBC            3.36
Weight         1.51
pH             2.98
dtype: float64

<h4>Grade 3 Obesity measurements</h4>

In [94]:
classificacao_obesidade_3_measurements = round((classificacao_obesidade_3.count()/total_pacientes)*100,2)
classificacao_obesidade_3_measurements

RecordID       3.61
level_1        3.61
Time           3.61
ALP            0.06
ALT            0.06
AST            0.06
Age            3.45
Albumin        0.04
BUN            0.27
Bilirubin      0.06
Cholesterol    0.01
Creatinine     0.27
DiasABP        2.42
FiO2           0.73
GCS            1.02
Gender         0.08
Glucose        0.24
HCO3           0.26
HCT            0.35
HR             3.32
Height         3.45
ICUType        0.08
K              0.27
Lactate        0.19
MAP            2.41
MechVent       0.72
Mg             0.27
NIDiasABP      1.04
NIMAP          1.02
NISysABP       1.04
Na             0.25
PaCO2          0.61
PaO2           0.61
Platelets      0.27
RespRate       0.48
SaO2           0.26
SysABP         2.42
Temp           1.61
TroponinI      0.01
TroponinT      0.04
Urine          2.69
WBC            0.25
Weight         2.10
pH             0.63
dtype: float64

<h4>Columns for tables</h4>

In [95]:
df_columns = train_X.columns
df_columns

Index(['RecordID', 'level_1', 'Time', 'ALP', 'ALT', 'AST', 'Age', 'Albumin',
       'BUN', 'Bilirubin', 'Cholesterol', 'Creatinine', 'DiasABP', 'FiO2',
       'GCS', 'Gender', 'Glucose', 'HCO3', 'HCT', 'HR', 'Height', 'ICUType',
       'K', 'Lactate', 'MAP', 'MechVent', 'Mg', 'NIDiasABP', 'NIMAP',
       'NISysABP', 'Na', 'PaCO2', 'PaO2', 'Platelets', 'RespRate', 'SaO2',
       'SysABP', 'Temp', 'TroponinI', 'TroponinT', 'Urine', 'WBC', 'Weight',
       'pH'],
      dtype='object')

<h4>Building the missing rate table</h4>

In [96]:
df_missing = pd.DataFrame(columns=df_columns)
df_missing_transpose = df_missing.T
df_missing_transpose["Female"] = female_gender_missing_rate
df_missing_transpose["Male"] = male_gender_missing_rate
df_missing_transpose["Undefined gender"] = undefined_gender_missing_rate
df_missing_transpose["ICUType 1"] = ICUType_1_training_missing
df_missing_transpose["ICUType 2"] = ICUType_2_training_missing
df_missing_transpose["ICUType 3"] = ICUType_3_training_missing
df_missing_transpose["ICUType 4"] = ICUType_4_training_missing
df_missing_transpose["Age 65+"] = more_than_or_equal_to_65_train_missing
df_missing_transpose["Age 65-"] = less_than_65_train_missing
df_missing_transpose["Low Weight"] = classificacao_baixo_peso_missing
df_missing_transpose["Normal Weight"] = classificacao_normal_peso_missing
df_missing_transpose["Overweight"] = classificacao_sobrepeso_missing
df_missing_transpose["Obesity Grade 1"] = classificacao_obesidade_1_missing
df_missing_transpose["Obesity Grade 2"] = classificacao_obesidade_2_missing
df_missing_transpose["Obesity Grade 3"] = classificacao_obesidade_3_missing
df_missing_transpose["Undefined classification"] = classificacao_undefined_missing
df_missing_transpose = df_missing_transpose.drop("RecordID", axis=0)
df_missing_transpose = df_missing_transpose.drop("level_1", axis=0)
df_missing_transpose = df_missing_transpose.drop("Time", axis=0)
df_missing_transpose = df_missing_transpose.drop("Age", axis=0)
df_missing_transpose = df_missing_transpose.drop("Gender", axis=0)
display(HTML("<h2 style='text-align: center; font-size: 24px; font-weight: bold;'>Original Missing Rate per Variable by demographics - Train</h2>"))
df_missing_transpose

Unnamed: 0,Female,Male,Undefined gender,ICUType 1,ICUType 2,ICUType 3,ICUType 4,Age 65+,Age 65-,Low Weight,Normal Weight,Overweight,Obesity Grade 1,Obesity Grade 2,Obesity Grade 3,Undefined classification
ALP,43.01,55.25,0.08,14.51,20.85,34.82,28.17,53.67,44.67,1.64,15.6,17.35,9.18,3.8,3.55,47.21
ALT,42.99,55.22,0.08,14.49,20.84,34.8,28.16,53.65,44.64,1.64,15.6,17.35,9.18,3.8,3.55,47.18
AST,42.99,55.22,0.08,14.49,20.84,34.8,28.16,53.65,44.64,1.64,15.6,17.35,9.18,3.8,3.55,47.18
Albumin,43.17,55.48,0.08,14.57,20.9,35.0,28.27,53.79,44.95,1.65,15.67,17.44,9.23,3.83,3.57,47.36
BUN,40.58,52.09,0.07,13.68,19.64,32.91,26.52,50.55,42.19,1.55,14.69,16.33,8.65,3.57,3.34,44.62
Bilirubin,42.98,55.21,0.08,14.5,20.84,34.76,28.17,53.64,44.64,1.64,15.6,17.35,9.17,3.8,3.55,47.17
Cholesterol,43.66,56.08,0.08,14.63,21.0,35.55,28.64,54.3,45.53,1.67,15.82,17.6,9.33,3.86,3.6,47.94
Creatinine,40.57,52.07,0.07,13.66,19.64,32.9,26.52,50.53,42.18,1.55,14.68,16.32,8.64,3.57,3.34,44.6
DiasABP,21.14,24.11,0.04,8.45,4.45,22.81,9.6,24.41,20.88,0.65,5.3,5.54,2.86,1.21,1.19,28.54
FiO2,36.97,47.24,0.07,13.12,17.46,30.4,23.3,45.85,38.43,1.39,13.18,14.66,7.63,3.15,2.88,41.39


<h4>Building the measurements table</h4>

In [97]:
df_measurements = pd.DataFrame(columns=df_columns)
df_measurements_transpose = df_measurements.T
df_measurements_transpose["Female"] = female_gender_measurements_training
df_measurements_transpose["Male"] = male_gender_measurements_training
df_measurements_transpose["Undefined gender"] = undefined_gender_measurements_training
df_measurements_transpose["ICUType 1"] = ICUType_1_measurements_training
df_measurements_transpose["ICUType 2"] = ICUType_2_measurements_training
df_measurements_transpose["ICUType 3"] = ICUType_3_measurements_training
df_measurements_transpose["ICUType 4"] = ICUType_4_measurements_training
df_measurements_transpose["Age 65+"] = age_65_and_above_measurements_training
df_measurements_transpose["Age 65-"] = age_under_65_measurements_training
df_measurements_transpose["Low Weight"] = classificacao_baixo_peso_measurements
df_measurements_transpose["Normal Weight"] = classificacao_normal_peso_measurements
df_measurements_transpose["Overweight"] = classificacao_sobrepeso_measurements
df_measurements_transpose["Obesity Grade 1"] = classificacao_obesidade_1_measurements
df_measurements_transpose["Obesity Grade 2"] = classificacao_obesidade_2_measurements
df_measurements_transpose["Obesity Grade 3"] = classificacao_obesidade_3_measurements
df_measurements_transpose["Undefined classification"] = classification_undefined_measurements
df_measurements_transpose = df_measurements_transpose.drop("RecordID", axis=0)
df_measurements_transpose = df_measurements_transpose.drop("level_1", axis=0)
df_measurements_transpose = df_measurements_transpose.drop("Time", axis=0)
df_measurements_transpose = df_measurements_transpose.drop("Age", axis=0)
df_measurements_transpose = df_measurements_transpose.drop("Gender", axis=0)
display(HTML("<h2 style='text-align: center; font-size: 24px; font-weight: bold;'>Repeated Measurements per Variable by Demographics - Train Set</h2>"))
df_measurements_transpose

Unnamed: 0,Female,Male,Undefined gender,ICUType 1,ICUType 2,ICUType 3,ICUType 4,Age 65+,Age 65-,Low Weight,Normal Weight,Overweight,Obesity Grade 1,Obesity Grade 2,Obesity Grade 3,Undefined classification
ALP,0.72,0.94,0.0,0.22,0.15,0.77,0.51,0.73,0.93,0.03,0.25,0.29,0.17,0.07,0.06,0.8
ALT,0.74,0.96,0.0,0.24,0.16,0.79,0.52,0.75,0.96,0.03,0.25,0.29,0.17,0.07,0.06,0.83
AST,0.74,0.96,0.0,0.24,0.16,0.79,0.52,0.75,0.96,0.03,0.25,0.29,0.17,0.07,0.06,0.83
Albumin,0.56,0.7,0.0,0.16,0.1,0.59,0.41,0.61,0.65,0.02,0.19,0.2,0.12,0.05,0.04,0.65
BUN,3.15,4.1,0.0,1.05,1.36,2.68,2.16,3.85,3.41,0.12,1.16,1.31,0.7,0.3,0.27,3.39
Bilirubin,0.76,0.97,0.0,0.23,0.16,0.82,0.51,0.76,0.96,0.03,0.26,0.29,0.17,0.07,0.06,0.84
Cholesterol,0.07,0.1,0.0,0.1,0.01,0.03,0.04,0.1,0.07,0.0,0.03,0.03,0.02,0.01,0.01,0.08
Creatinine,3.16,4.12,0.0,1.07,1.37,2.69,2.16,3.87,3.42,0.12,1.17,1.32,0.7,0.3,0.27,3.41
DiasABP,22.6,32.07,0.03,6.28,16.56,12.78,19.08,29.99,24.72,1.02,10.56,12.09,6.48,2.66,2.42,19.47
FiO2,6.77,8.94,0.0,1.61,3.54,5.19,5.38,8.55,7.17,0.28,2.67,2.97,1.72,0.72,0.73,6.62


# Validation data

<h4>Loading training database</h4>

In [5]:
validation_X = physionet2012_dataset['val_X']

<h4>Total of the patients</h4>

In [9]:
total_patients_validation = validation_X.groupby("RecordID").first().reset_index()
total_patients_validation = total_patients_validation["RecordID"].count()
total_patients_validation = total_patients_validation*48
total_patients_validation

92064

<h4>female gender missing rate</h1>

In [10]:
female_gender_validation_ids = validation_X[validation_X["Gender"] == 0.0]
female_gender_validation_ids = female_gender_validation_ids["RecordID"]
female_gender_validation = validation_X[validation_X["RecordID"].isin(female_gender_validation_ids)]
female_gender_missing_rate_validation = round((female_gender_validation.isna().sum()/total_patients_validation)*100,2)
female_gender_missing_rate_validation

RecordID        0.00
level_1         0.00
Time            0.00
ALP            43.87
ALT            43.85
AST            43.85
Age             2.75
Albumin        43.98
BUN            41.42
Bilirubin      43.85
Cholesterol    44.46
Creatinine     41.40
DiasABP        22.14
FiO2           37.86
GCS            30.59
Gender         43.60
Glucose        41.57
HCO3           41.46
HCT            40.57
HR              4.47
Height          2.75
ICUType        43.60
K              41.23
Lactate        42.87
MAP            22.21
MechVent       38.00
Mg             41.47
NIDiasABP      24.67
NIMAP          25.02
NISysABP       24.66
Na             41.45
PaCO2          39.90
PaO2           39.90
Platelets      41.43
RespRate       32.28
SaO2           42.90
SysABP         22.14
Temp           28.96
TroponinI      44.43
TroponinT      44.08
Urine          13.44
WBC            41.66
Weight         21.18
pH             39.70
dtype: float64

<h4>Famele gender measurements</h4>

In [11]:
female_gender_measurements_validation = round((female_gender_validation.count()/total_patients_validation)*100,2)
female_gender_measurements_validation

RecordID       44.53
level_1        44.53
Time           44.53
ALP             0.66
ALT             0.68
AST             0.68
Age            41.78
Albumin         0.55
BUN             3.10
Bilirubin       0.68
Cholesterol     0.07
Creatinine      3.12
DiasABP        22.38
FiO2            6.67
GCS            13.93
Gender          0.93
Glucose         2.96
HCO3            3.07
HCT             3.95
HR             40.06
Height         41.78
ICUType         0.93
K               3.30
Lactate         1.65
MAP            22.32
MechVent        6.53
Mg              3.06
NIDiasABP      19.86
NIMAP          19.51
NISysABP       19.87
Na              3.07
PaCO2           4.63
PaO2            4.63
Platelets       3.10
RespRate       12.25
SaO2            1.62
SysABP         22.38
Temp           15.56
TroponinI       0.10
TroponinT       0.45
Urine          31.09
WBC             2.87
Weight         23.34
pH              4.82
dtype: float64

<h4>Male gender missing rate</h4>

In [16]:
male_gender_validation_ids = validation_X[validation_X["Gender"] == 1.0]
male_gender_validation_ids = male_gender_validation_ids["RecordID"]
male_gender_validation = validation_X[validation_X["RecordID"].isin(male_gender_validation_ids)]
male_gender_missing_rate_validation = round((male_gender_validation.isna().sum()/total_patients_validation)*100,2)
male_gender_missing_rate_validation

RecordID        0.00
level_1         0.00
Time            0.00
ALP            54.43
ALT            54.41
AST            54.40
Age             3.26
Albumin        54.62
BUN            51.39
Bilirubin      54.39
Cholesterol    55.22
Creatinine     51.38
DiasABP        25.64
FiO2           46.57
GCS            37.54
Gender         54.17
Glucose        51.65
HCO3           51.48
HCT            50.00
HR              5.55
Height          3.26
ICUType        54.17
K              51.25
Lactate        52.97
MAP            25.76
MechVent       47.11
Mg             51.52
NIDiasABP      31.98
NIMAP          32.37
NISysABP       31.96
Na             51.50
PaCO2          48.86
PaO2           48.87
Platelets      51.24
RespRate       42.56
SaO2           53.11
SysABP         25.64
Temp           34.39
TroponinI      55.22
TroponinT      54.73
Urine          17.01
WBC            51.64
Weight         26.43
pH             48.49
dtype: float64

<h4>Male gender measurements</h4>

In [17]:
male_gender_measurements_validation = round((male_gender_validation.count()/total_patients_validation)*100,2)
male_gender_measurements_validation

RecordID       55.32
level_1        55.32
Time           55.32
ALP             0.89
ALT             0.91
AST             0.91
Age            52.06
Albumin         0.69
BUN             3.93
Bilirubin       0.93
Cholesterol     0.10
Creatinine      3.94
DiasABP        29.68
FiO2            8.75
GCS            17.78
Gender          1.15
Glucose         3.67
HCO3            3.84
HCT             5.31
HR             49.77
Height         52.06
ICUType         1.15
K               4.06
Lactate         2.35
MAP            29.56
MechVent        8.21
Mg              3.80
NIDiasABP      23.34
NIMAP          22.95
NISysABP       23.36
Na              3.82
PaCO2           6.46
PaO2            6.45
Platelets       4.08
RespRate       12.76
SaO2            2.20
SysABP         29.68
Temp           20.92
TroponinI       0.10
TroponinT       0.58
Urine          38.31
WBC             3.68
Weight         28.88
pH              6.83
dtype: float64

<h4>Undefined gender missing rate</h4>

In [19]:
undefined_gender_ids_validation = validation_X[validation_X["Gender"] == -1.0]
undefined_gender_ids_validation = undefined_gender_ids_validation["RecordID"]
undefined_gender_validation = validation_X[validation_X["RecordID"].isin(undefined_gender_ids_validation)]
undefined_gender_missing_rate_validation = round((undefined_gender_validation.isna().sum()/total_patients_validation)*100,2)
undefined_gender_missing_rate_validation

RecordID       0.00
level_1        0.00
Time           0.00
ALP            0.16
ALT            0.15
AST            0.15
Age            0.00
Albumin        0.16
BUN            0.15
Bilirubin      0.16
Cholesterol    0.16
Creatinine     0.15
DiasABP        0.08
FiO2           0.15
GCS            0.12
Gender         0.15
Glucose        0.15
HCO3           0.15
HCT            0.15
HR             0.00
Height         0.00
ICUType        0.15
K              0.15
Lactate        0.16
MAP            0.08
MechVent       0.16
Mg             0.15
NIDiasABP      0.07
NIMAP          0.07
NISysABP       0.07
Na             0.15
PaCO2          0.15
PaO2           0.15
Platelets      0.15
RespRate       0.06
SaO2           0.16
SysABP         0.08
Temp           0.12
TroponinI      0.16
TroponinT      0.15
Urine          0.08
WBC            0.15
Weight         0.02
pH             0.15
dtype: float64

<h4>Undefined gender measurements</h4>

In [25]:
undefined_gender_measurements_validation = round((undefined_gender_validation.count()/total_patients_validation)*100,2)
undefined_gender_measurements_validation

RecordID       0.16
level_1        0.16
Time           0.16
ALP            0.00
ALT            0.00
AST            0.00
Age            0.16
Albumin        0.00
BUN            0.01
Bilirubin      0.00
Cholesterol    0.00
Creatinine     0.01
DiasABP        0.08
FiO2           0.00
GCS            0.04
Gender         0.00
Glucose        0.01
HCO3           0.01
HCT            0.01
HR             0.15
Height         0.16
ICUType        0.00
K              0.01
Lactate        0.00
MAP            0.08
MechVent       0.00
Mg             0.01
NIDiasABP      0.08
NIMAP          0.08
NISysABP       0.08
Na             0.01
PaCO2          0.01
PaO2           0.01
Platelets      0.01
RespRate       0.10
SaO2           0.00
SysABP         0.08
Temp           0.03
TroponinI      0.00
TroponinT      0.00
Urine          0.08
WBC            0.01
Weight         0.13
pH             0.01
dtype: float64

<h4>ICUType 1 missing rate</h4>

In [28]:
ICUType_1_validation_ids = validation_X[validation_X["ICUType"] == 1.0]
ICUType_1_validation_ids = ICUType_1_validation_ids[ICUType_1_validation_ids["Time"] == 0.0]
ICUType_1_validation_ids = ICUType_1_validation_ids["RecordID"]
ICUType_1_validation = validation_X[validation_X["RecordID"].isin(ICUType_1_validation_ids)]
ICUType_1_validation_missing = round((ICUType_1_validation.isna().sum()/total_patients_validation)*100,2)
ICUType_1_validation_missing

RecordID        0.00
level_1         0.00
Time            0.00
ALP            14.51
ALT            14.50
AST            14.50
Age             1.37
Albumin        14.53
BUN            13.67
Bilirubin      14.50
Cholesterol    14.61
Creatinine     13.65
DiasABP         9.14
FiO2           13.03
GCS            10.98
Gender         14.40
Glucose        13.70
HCO3           13.70
HCT            13.45
HR              2.07
Height          1.37
ICUType        14.40
K              13.50
Lactate        14.38
MAP             9.18
MechVent       13.26
Mg             13.69
NIDiasABP       7.31
NIMAP           7.39
NISysABP        7.30
Na             13.69
PaCO2          13.55
PaO2           13.56
Platelets      13.66
RespRate        9.44
SaO2           14.02
SysABP          9.14
Temp           10.30
TroponinI      14.67
TroponinT      14.40
Urine           6.14
WBC            13.77
Weight          8.34
pH             13.54
dtype: float64

<h4>ICUType 1 measurements</h4>

In [30]:
ICUType_1_measurements_validation = round((ICUType_1_validation.count()/total_patients_validation)*100,2)
ICUType_1_measurements_validation

RecordID       14.70
level_1        14.70
Time           14.70
ALP             0.20
ALT             0.21
AST             0.21
Age            13.33
Albumin         0.17
BUN             1.04
Bilirubin       0.21
Cholesterol     0.09
Creatinine      1.05
DiasABP         5.56
FiO2            1.67
GCS             3.72
Gender          0.31
Glucose         1.00
HCO3            1.01
HCT             1.25
HR             12.63
Height         13.33
ICUType         0.31
K               1.20
Lactate         0.32
MAP             5.52
MechVent        1.44
Mg              1.01
NIDiasABP       7.39
NIMAP           7.31
NISysABP        7.40
Na              1.01
PaCO2           1.15
PaO2            1.15
Platelets       1.05
RespRate        5.26
SaO2            0.68
SysABP          5.56
Temp            4.41
TroponinI       0.03
TroponinT       0.30
Urine           8.56
WBC             0.94
Weight          6.37
pH              1.17
dtype: float64

<h4>ICUType 2 missing rate</h4>

In [32]:
ICUType_2_validation_ids = validation_X[validation_X["ICUType"] == 2.0]
ICUType_2_validation_ids = ICUType_2_validation_ids[ICUType_2_validation_ids["Time"] == 0.0]
ICUType_2_validation_ids = ICUType_2_validation_ids["RecordID"]
ICUType_2_validation = validation_X[validation_X["RecordID"].isin(ICUType_2_validation_ids)]
ICUType_2_validation_missing = round((ICUType_2_validation.isna().sum()/total_patients_validation)*100,2)
ICUType_2_validation_missing

RecordID        0.00
level_1         0.00
Time            0.00
ALP            21.03
ALT            21.03
AST            21.03
Age             0.72
Albumin        21.07
BUN            19.82
Bilirubin      21.04
Cholesterol    21.16
Creatinine     19.81
DiasABP         4.76
FiO2           17.80
GCS            15.46
Gender         20.73
Glucose        20.21
HCO3           19.90
HCT            18.89
HR              1.74
Height          0.72
ICUType        20.73
K              20.06
Lactate        20.47
MAP             4.69
MechVent       17.68
Mg             19.84
NIDiasABP      16.50
NIMAP          16.54
NISysABP       16.50
Na             20.12
PaCO2          17.10
PaO2           17.11
Platelets      19.46
RespRate       20.04
SaO2           19.01
SysABP          4.76
Temp            9.12
TroponinI      21.14
TroponinT      21.14
Urine           3.34
WBC            19.75
Weight         10.35
pH             16.64
dtype: float64

<h4>ICUType 2 measurements</h4>

In [34]:
ICUType_2_measurements_validation = round((ICUType_2_validation.count()/total_patients_validation)*100,2)
ICUType_2_measurements_validation

RecordID       21.17
level_1        21.17
Time           21.17
ALP             0.14
ALT             0.14
AST             0.14
Age            20.45
Albumin         0.09
BUN             1.35
Bilirubin       0.13
Cholesterol     0.01
Creatinine      1.36
DiasABP        16.41
FiO2            3.37
GCS             5.70
Gender          0.44
Glucose         0.96
HCO3            1.26
HCT             2.28
HR             19.43
Height         20.45
ICUType         0.44
K               1.11
Lactate         0.69
MAP            16.48
MechVent        3.49
Mg              1.33
NIDiasABP       4.66
NIMAP           4.63
NISysABP        4.67
Na              1.05
PaCO2           4.07
PaO2            4.06
Platelets       1.71
RespRate        1.13
SaO2            2.16
SysABP         16.41
Temp           12.04
TroponinI       0.03
TroponinT       0.03
Urine          17.83
WBC             1.42
Weight         10.82
pH              4.52
dtype: float64

<h4>ICUType 4 missing rate</h4>

In [36]:
ICUType_3_validation_ids = validation_X[validation_X["ICUType"] == 3.0]
ICUType_3_validation_ids = ICUType_3_validation_ids[ICUType_3_validation_ids["Time"] == 0.0]
ICUType_3_validation_ids = ICUType_3_validation_ids["RecordID"]
ICUType_3_validation = validation_X[validation_X["RecordID"].isin(ICUType_3_validation_ids)]
ICUType_3_validation_missing = round((ICUType_3_validation.isna().sum()/total_patients_validation)*100,2)
ICUType_3_validation_missing

RecordID        0.00
level_1         0.00
Time            0.00
ALP            35.80
ALT            35.78
AST            35.78
Age             2.58
Albumin        35.97
BUN            33.91
Bilirubin      35.75
Cholesterol    36.52
Creatinine     33.90
DiasABP        24.41
FiO2           31.12
GCS            26.73
Gender         35.79
Glucose        33.90
HCO3           33.90
HCT            33.40
HR              3.81
Height          2.58
ICUType        35.79
K              33.69
Lactate        35.16
MAP            24.57
MechVent       31.52
Mg             34.06
NIDiasABP      15.14
NIMAP          15.65
NISysABP       15.13
Na             33.84
PaCO2          33.83
PaO2           33.83
Platelets      34.15
RespRate       25.68
SaO2           36.10
SysABP         24.41
Temp           26.22
TroponinI      36.46
TroponinT      36.07
Urine          14.10
WBC            34.25
Weight         12.83
pH             33.79
dtype: float64

<h4>ICUType 3 measurements</h4>

In [39]:
ICUType_3_measurements_validation = round((ICUType_3_validation.count()/total_patients_validation)*100,2)
ICUType_3_measurements_validation

RecordID       36.55
level_1        36.55
Time           36.55
ALP             0.74
ALT             0.76
AST             0.76
Age            33.97
Albumin         0.58
BUN             2.64
Bilirubin       0.80
Cholesterol     0.03
Creatinine      2.65
DiasABP        12.14
FiO2            5.43
GCS             9.82
Gender          0.76
Glucose         2.65
HCO3            2.65
HCT             3.15
HR             32.74
Height         33.97
ICUType         0.76
K               2.86
Lactate         1.39
MAP            11.98
MechVent        5.03
Mg              2.49
NIDiasABP      21.41
NIMAP          20.90
NISysABP       21.42
Na              2.70
PaCO2           2.72
PaO2            2.72
Platelets       2.40
RespRate       10.87
SaO2            0.44
SysABP         12.14
Temp           10.33
TroponinI       0.09
TroponinT       0.48
Urine          22.45
WBC             2.30
Weight         23.72
pH              2.76
dtype: float64

<h4>ICUType 4 missing rate</h4>

In [41]:
ICUType_4_validation_ids = validation_X[validation_X["ICUType"] == 4.0]
ICUType_4_validation_ids = ICUType_4_validation_ids[ICUType_4_validation_ids["Time"] == 0.0]
ICUType_4_validation_ids = ICUType_4_validation_ids["RecordID"]
ICUType_4_validation = validation_X[validation_X["RecordID"].isin(ICUType_4_validation_ids)]
ICUType_4_validation_missing = round((ICUType_4_validation.isna().sum()/total_patients_validation)*100,2)
ICUType_4_validation_missing

RecordID        0.00
level_1         0.00
Time            0.00
ALP            27.11
ALT            27.10
AST            27.10
Age             1.35
Albumin        27.19
BUN            25.57
Bilirubin      27.11
Cholesterol    27.54
Creatinine     25.56
DiasABP         9.55
FiO2           22.64
GCS            15.08
Gender         27.01
Glucose        25.55
HCO3           25.58
HCT            24.98
HR              2.39
Height          1.35
ICUType        27.01
K              25.39
Lactate        25.99
MAP             9.61
MechVent       22.80
Mg             25.55
NIDiasABP      17.77
NIMAP          17.89
NISysABP       17.76
Na             25.44
PaCO2          24.43
PaO2           24.44
Platelets      25.55
RespRate       19.74
SaO2           27.04
SysABP          9.55
Temp           17.84
TroponinI      27.53
TroponinT      27.35
Urine           6.95
WBC            25.68
Weight         16.13
pH             24.38
dtype: float64

<h4>ICUTYpe 4 measurements</h4>

In [43]:
ICUType_4_measurements_validation = round((ICUType_4_validation.count()/total_patients_validation)*100,2)
ICUType_4_measurements_validation

RecordID       27.58
level_1        27.58
Time           27.58
ALP             0.47
ALT             0.48
AST             0.48
Age            26.23
Albumin         0.39
BUN             2.01
Bilirubin       0.47
Cholesterol     0.04
Creatinine      2.02
DiasABP        18.03
FiO2            4.94
GCS            12.51
Gender          0.57
Glucose         2.03
HCO3            2.00
HCT             2.60
HR             25.19
Height         26.23
ICUType         0.57
K               2.19
Lactate         1.59
MAP            17.97
MechVent        4.78
Mg              2.03
NIDiasABP       9.81
NIMAP           9.70
NISysABP        9.82
Na              2.14
PaCO2           3.15
PaO2            3.15
Platelets       2.03
RespRate        7.84
SaO2            0.55
SysABP         18.03
Temp            9.74
TroponinI       0.05
TroponinT       0.23
Urine          20.63
WBC             1.90
Weight         11.45
pH              3.21
dtype: float64

<h4>+65 missing rate</h4>

In [45]:
more_than_or_equal_to_65_validation_ids = validation_X[validation_X["Age"] >= 65]
more_than_or_equal_to_65_validation_ids = more_than_or_equal_to_65_validation_ids[more_than_or_equal_to_65_validation_ids["Time"] == 0.0]
more_than_or_equal_to_65_validation_ids = more_than_or_equal_to_65_validation_ids["RecordID"]
more_than_or_equal_to_65_validation = validation_X[validation_X["RecordID"].isin(more_than_or_equal_to_65_validation_ids)]
more_than_or_equal_to_65_validation_missing = round((more_than_or_equal_to_65_validation.isna().sum()/total_patients_validation)*100,2)
more_than_or_equal_to_65_validation_missing

RecordID        0.00
level_1         0.00
Time            0.00
ALP            55.66
ALT            55.64
AST            55.64
Age             2.84
Albumin        55.74
BUN            52.44
Bilirubin      55.62
Cholesterol    56.26
Creatinine     52.42
DiasABP        26.31
FiO2           47.79
GCS            38.85
Gender         55.19
Glucose        52.70
HCO3           52.51
HCT            51.14
HR              4.98
Height          2.84
ICUType        55.19
K              52.27
Lactate        54.21
MAP            26.42
MechVent       48.23
Mg             52.51
NIDiasABP      31.78
NIMAP          32.20
NISysABP       31.76
Na             52.55
PaCO2          50.06
PaO2           50.07
Platelets      52.34
RespRate       41.60
SaO2           53.93
SysABP         26.31
Temp           35.07
TroponinI      56.21
TroponinT      55.61
Urine          16.53
WBC            52.70
Weight         26.23
pH             49.73
dtype: float64

<h4>+65 measurements</h4>

In [47]:
more_than_or_equal_to_65_validation_measurements = round((more_than_or_equal_to_65_validation.count()/total_patients_validation)*100,2)
more_than_or_equal_to_65_validation_measurements

RecordID       56.36
level_1        56.36
Time           56.36
ALP             0.70
ALT             0.72
AST             0.72
Age            53.52
Albumin         0.62
BUN             3.92
Bilirubin       0.74
Cholesterol     0.10
Creatinine      3.94
DiasABP        30.05
FiO2            8.57
GCS            17.51
Gender          1.17
Glucose         3.66
HCO3            3.85
HCT             5.22
HR             51.38
Height         53.52
ICUType         1.17
K               4.09
Lactate         2.15
MAP            29.94
MechVent        8.14
Mg              3.85
NIDiasABP      24.58
NIMAP          24.16
NISysABP       24.60
Na              3.81
PaCO2           6.30
PaO2            6.29
Platelets       4.02
RespRate       14.76
SaO2            2.43
SysABP         30.05
Temp           21.29
TroponinI       0.15
TroponinT       0.75
Urine          39.83
WBC             3.66
Weight         30.13
pH              6.63
dtype: float64

<h4>-65 missing rate</h4>

In [49]:
less_than_65_validation_ids = validation_X[validation_X["Age"] < 65]
less_than_65_validation_ids = less_than_65_validation_ids[less_than_65_validation_ids["Time"] == 0.0]
less_than_65_validation_ids = less_than_65_validation_ids["RecordID"]
less_than_65_validation = validation_X[validation_X["RecordID"].isin(less_than_65_validation_ids)]
less_than_65_validation_missing = round((less_than_65_validation.isna().sum()/total_patients_validation)*100,2)
less_than_65_validation_missing

RecordID        0.00
level_1         0.00
Time            0.00
ALP            42.79
ALT            42.76
AST            42.77
Age             3.17
Albumin        43.02
BUN            40.52
Bilirubin      42.77
Cholesterol    43.58
Creatinine     40.51
DiasABP        21.55
FiO2           36.79
GCS            29.40
Gender         42.73
Glucose        40.66
HCO3           40.57
HCT            39.59
HR              5.03
Height          3.17
ICUType        42.73
K              40.36
Lactate        41.79
MAP            21.63
MechVent       37.04
Mg             40.62
NIDiasABP      24.94
NIMAP          25.27
NISysABP       24.93
Na             40.55
PaCO2          38.85
PaO2           38.85
Platelets      40.47
RespRate       33.29
SaO2           42.24
SysABP         21.55
Temp           28.42
TroponinI      43.60
TroponinT      43.36
Urine          13.99
WBC            40.74
Weight         21.41
pH             38.61
dtype: float64

<h4>-65 measurements</h4>

In [51]:
less_than_65_validation_measurements = round((less_than_65_validation.count()/total_patients_validation)*100,2)
less_than_65_validation_measurements

RecordID       43.64
level_1        43.64
Time           43.64
ALP             0.85
ALT             0.88
AST             0.87
Age            40.46
Albumin         0.62
BUN             3.12
Bilirubin       0.87
Cholesterol     0.06
Creatinine      3.13
DiasABP        22.09
FiO2            6.85
GCS            14.23
Gender          0.91
Glucose         2.98
HCO3            3.07
HCT             4.05
HR             38.60
Height         40.46
ICUType         0.91
K               3.28
Lactate         1.85
MAP            22.00
MechVent        6.60
Mg              3.02
NIDiasABP      18.70
NIMAP          18.37
NISysABP       18.71
Na              3.09
PaCO2           4.79
PaO2            4.79
Platelets       3.17
RespRate       10.35
SaO2            1.40
SysABP         22.09
Temp           15.22
TroponinI       0.04
TroponinT       0.28
Urine          29.65
WBC             2.89
Weight         22.23
pH              5.02
dtype: float64

<h4>Filtering only patients who have height and weight</h4>

In [52]:
filtered_validation_X = validation_X[(validation_X['Height'] != -1) & (validation_X['Weight'] != -1) & (validation_X['Height'].notna()) & (validation_X['Weight'].notna())] 

<h4>Changing the height to meters</h4>

In [53]:
filtered_validation_X_metros = filtered_validation_X.copy()
filtered_validation_X_metros["Height"] = filtered_validation_X["Height"]/100
filtered_validation_X_metros["Height"]

432       1.753
433       1.753
434       1.753
435       1.753
436       1.753
          ...  
574459    1.930
574460    1.930
574461    1.930
574462    1.930
574463    1.930
Name: Height, Length: 24630, dtype: float64

<h4>Calculating BMI and classification</h4>

In [98]:
bmi_data_validation = filtered_validation_X_metros
bmi_data_validation["BMI"] = round(filtered_validation_X_metros["Weight"] / (filtered_validation_X_metros["Height"]**2), 1)
bmi_data_validation["Classificacao"] = bmi_data_validation["BMI"].apply(classify_BMI)
bmi_data_validation.head()

Unnamed: 0,RecordID,level_1,Time,ALP,ALT,AST,Age,Albumin,BUN,Bilirubin,...,SysABP,Temp,TroponinI,TroponinT,Urine,WBC,Weight,pH,BMI,Classificacao
432,132555,0,0.0,,,,74.0,,,,...,98.0,34.8,,,35.0,,66.1,7.39,21.5,Peso normal
433,132555,1,1.0,,,,74.0,,19.0,,...,112.0,35.3,,,130.0,9.0,66.1,7.41,21.5,Peso normal
434,132555,2,2.0,,,,74.0,,,,...,104.0,36.05,,,210.0,,66.1,,21.5,Peso normal
435,132555,3,3.0,,,,74.0,,,,...,114.0,36.2,,,120.0,,66.1,,21.5,Peso normal
436,132555,4,4.0,,,,74.0,,,,...,111.0,36.1,,,185.0,,66.1,7.29,21.5,Peso normal


<h4>Taking only one moment from each patient</h4>

In [99]:
bmi_data_validation = bmi_data_validation.groupby("RecordID").first().reset_index()
bmi_data_validation

Unnamed: 0,RecordID,level_1,Time,ALP,ALT,AST,Age,Albumin,BUN,Bilirubin,...,SysABP,Temp,TroponinI,TroponinT,Urine,WBC,Weight,pH,BMI,Classificacao
0,132555,0,0.0,,,,74.0,,19.0,,...,98.000000,34.8,,,35.0,9.0,66.1,7.39,21.5,Peso normal
1,132573,0,0.0,,,,77.0,,,,...,,36.9,,,120.0,,90.1,,34.1,Obesidade grau 1
2,132575,0,0.0,,,,78.0,,18.0,,...,122.000000,37.4,,,38.0,12.5,63.0,7.34,22.4,Peso normal
3,132599,0,0.0,,,,53.0,,,,...,,37.3,,,350.0,,73.5,,23.3,Peso normal
4,132602,0,0.0,,,,80.0,,,,...,,37.3,,,150.0,,70.0,,21.5,Peso normal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
997,162855,0,0.0,,,,90.0,,12.0,,...,111.500000,36.9,,,70.0,6.0,59.1,7.30,20.4,Peso normal
998,162883,0,0.0,,,,32.0,,,,...,,,,,,,75.0,,25.1,Sobrepeso
999,162907,0,0.0,,,,78.0,,17.0,,...,79.000000,39.3,,0.12,20.0,13.7,87.0,7.37,28.3,Sobrepeso
1000,162912,0,0.0,34.0,17.0,24.0,63.0,2.3,6.0,0.4,...,123.133333,35.0,,,370.0,5.4,80.0,7.03,25.3,Sobrepeso


In [100]:
bmi_data_validation["Classificacao"].value_counts()

Classificacao
Sobrepeso           372
Peso normal         282
Obesidade grau 1    174
Obesidade grau 2     75
Obesidade grau 3     68
Baixo peso           31
Name: count, dtype: int64

<h4>Undefined classification missing rate</h4>

In [103]:
classificacao_undefined_ids_validation = bmi_data_validation["RecordID"]
classificacao_undefined_validation = validation_X[~validation_X["RecordID"].isin(classificacao_undefined_ids_validation)]
classificacao_undefined_missing_validation = round((classificacao_undefined_validation.isna().sum()/total_patients_validation)*100,2)
classificacao_undefined_missing_validation

RecordID        0.00
level_1         0.00
Time            0.00
ALP            47.00
ALT            46.98
AST            46.97
Age             3.65
Albumin        47.13
BUN            44.49
Bilirubin      46.96
Cholesterol    47.67
Creatinine     44.47
DiasABP        29.60
FiO2           41.26
GCS            32.08
Gender         46.76
Glucose        44.49
HCO3           44.50
HCT            43.81
HR              5.40
Height          3.65
ICUType        46.76
K              44.18
Lactate        46.16
MAP            29.74
MechVent       41.75
Mg             44.59
NIDiasABP      21.56
NIMAP          22.04
NISysABP       21.54
Na             44.37
PaCO2          44.54
PaO2           44.54
Platelets      44.71
RespRate       31.16
SaO2           47.21
SysABP         29.60
Temp           35.06
TroponinI      47.67
TroponinT      47.22
Urine          17.53
WBC            44.84
Weight         22.15
pH             44.47
dtype: float64

<h4>Undefined classification measurements</h4>

In [104]:
classificacao_undefined_measurements_validation = round((classificacao_undefined_validation.count()/total_patients_validation)*100,2)
classificacao_undefined_measurements_validation

RecordID       47.76
level_1        47.76
Time           47.76
ALP             0.76
ALT             0.78
AST             0.79
Age            44.11
Albumin         0.63
BUN             3.27
Bilirubin       0.80
Cholesterol     0.09
Creatinine      3.29
DiasABP        18.16
FiO2            6.50
GCS            15.68
Gender          0.99
Glucose         3.27
HCO3            3.26
HCT             3.95
HR             42.36
Height         44.11
ICUType         0.99
K               3.58
Lactate         1.59
MAP            18.02
MechVent        6.01
Mg              3.17
NIDiasABP      26.20
NIMAP          25.72
NISysABP       26.22
Na              3.38
PaCO2           3.22
PaO2            3.21
Platelets       3.04
RespRate       16.60
SaO2            0.54
SysABP         18.16
Temp           12.70
TroponinI       0.09
TroponinT       0.54
Urine          30.23
WBC             2.92
Weight         25.61
pH              3.28
dtype: float64

<h4>Classification low weight missing rate</h4>

In [106]:
classificacao_baixo_peso_ids_validation = bmi_data_validation[bmi_data_validation["Classificacao"] == "Baixo peso"]
classificacao_baixo_peso_ids_validation = classificacao_baixo_peso_ids_validation["RecordID"]
classificacao_baixo_peso_validation = validation_X[validation_X["RecordID"].isin(classificacao_baixo_peso_ids_validation)]
classificacao_baixo_peso_missing_validation = round((classificacao_baixo_peso_validation.isna().sum()/total_patients_validation)*100,2)
classificacao_baixo_peso_missing_validation

RecordID       0.00
level_1        0.00
Time           0.00
ALP            1.60
ALT            1.60
AST            1.60
Age            0.04
Albumin        1.60
BUN            1.49
Bilirubin      1.60
Cholesterol    1.61
Creatinine     1.49
DiasABP        0.42
FiO2           1.31
GCS            1.10
Gender         1.58
Glucose        1.50
HCO3           1.50
HCT            1.46
HR             0.11
Height         0.04
ICUType        1.58
K              1.49
Lactate        1.52
MAP            0.42
MechVent       1.26
Mg             1.49
NIDiasABP      1.14
NIMAP          1.15
NISysABP       1.14
Na             1.50
PaCO2          1.35
PaO2           1.35
Platelets      1.49
RespRate       1.52
SaO2           1.46
SysABP         0.42
Temp           0.95
TroponinI      1.61
TroponinT      1.60
Urine          0.41
WBC            1.50
Weight         0.91
pH             1.34
dtype: float64

<h4>Classification low weight measurements</h4>

In [108]:
classificacao_baixo_peso_measurements_validation = round((classificacao_baixo_peso_validation.count()/total_patients_validation)*100,2)
classificacao_baixo_peso_measurements_validation

RecordID       1.62
level_1        1.62
Time           1.62
ALP            0.02
ALT            0.02
AST            0.02
Age            1.57
Albumin        0.02
BUN            0.13
Bilirubin      0.02
Cholesterol    0.00
Creatinine     0.13
DiasABP        1.20
FiO2           0.30
GCS            0.51
Gender         0.03
Glucose        0.11
HCO3           0.12
HCT            0.16
HR             1.51
Height         1.57
ICUType        0.03
K              0.12
Lactate        0.10
MAP            1.19
MechVent       0.36
Mg             0.12
NIDiasABP      0.47
NIMAP          0.47
NISysABP       0.47
Na             0.12
PaCO2          0.26
PaO2           0.26
Platelets      0.12
RespRate       0.10
SaO2           0.15
SysABP         1.20
Temp           0.67
TroponinI      0.01
TroponinT      0.02
Urine          1.20
WBC            0.12
Weight         0.70
pH             0.28
dtype: float64

<h4>Classification normal weight missing rate</h4>

In [110]:
classificacao_peso_normal_ids_validation = bmi_data_validation[bmi_data_validation["Classificacao"] == "Peso normal"]
classificacao_peso_normal_ids_validation = classificacao_peso_normal_ids_validation["RecordID"]
classificacao_peso_normal_validation = validation_X[validation_X["RecordID"].isin(classificacao_peso_normal_ids_validation)]
classificacao_peso_normal_missing_validation = round((classificacao_peso_normal_validation.isna().sum()/total_patients_validation)*100,2)
classificacao_peso_normal_missing_validation

RecordID        0.00
level_1         0.00
Time            0.00
ALP            14.50
ALT            14.49
AST            14.49
Age             0.62
Albumin        14.54
BUN            13.69
Bilirubin      14.50
Cholesterol    14.69
Creatinine     13.68
DiasABP         5.31
FiO2           12.28
GCS            10.05
Gender         14.40
Glucose        13.77
HCO3           13.71
HCT            13.25
HR              1.23
Height          0.62
ICUType        14.40
K              13.65
Lactate        14.13
MAP             5.35
MechVent       12.36
Mg             13.71
NIDiasABP       9.64
NIMAP           9.67
NISysABP        9.64
Na             13.73
PaCO2          12.70
PaO2           12.71
Platelets      13.59
RespRate       12.17
SaO2           13.84
SysABP          5.31
Temp            8.32
TroponinI      14.68
TroponinT      14.56
Urine           3.89
WBC            13.71
Weight          7.23
pH             12.57
dtype: float64

<h4>Classification normal weight measurements</h4>

In [112]:
classificacao_peso_normal_measurements_validation = round((classificacao_peso_normal_validation.count()/total_patients_validation)*100,2)
classificacao_peso_normal_measurements_validation

RecordID       14.70
level_1        14.70
Time           14.70
ALP             0.20
ALT             0.21
AST             0.21
Age            14.09
Albumin         0.16
BUN             1.02
Bilirubin       0.21
Cholesterol     0.01
Creatinine      1.02
DiasABP         9.39
FiO2            2.43
GCS             4.65
Gender          0.31
Glucose         0.93
HCO3            0.99
HCT             1.45
HR             13.47
Height         14.09
ICUType         0.31
K               1.05
Lactate         0.57
MAP             9.35
MechVent        2.34
Mg              1.00
NIDiasABP       5.06
NIMAP           5.03
NISysABP        5.06
Na              0.97
PaCO2           2.00
PaO2            2.00
Platelets       1.11
RespRate        2.53
SaO2            0.86
SysABP          9.39
Temp            6.38
TroponinI       0.03
TroponinT       0.14
Urine          10.82
WBC             0.99
Weight          7.48
pH              2.13
dtype: float64

<h4>Classification overweight missing rate</h4>

In [114]:
classificacao_sobrepeso_ids_validation = bmi_data_validation[bmi_data_validation["Classificacao"] == "Sobrepeso"]
classificacao_sobrepeso_ids_validation = classificacao_sobrepeso_ids_validation["RecordID"]
classificacao_sobrepeso_validation = validation_X[validation_X["RecordID"].isin(classificacao_sobrepeso_ids_validation)]
classificacao_sobrepeso_missing_validation = round((classificacao_sobrepeso_validation.isna().sum()/total_patients_validation)*100,2)
classificacao_sobrepeso_missing_validation

RecordID        0.00
level_1         0.00
Time            0.00
ALP            19.06
ALT            19.05
AST            19.06
Age             0.95
Albumin        19.15
BUN            17.99
Bilirubin      19.06
Cholesterol    19.36
Creatinine     17.98
DiasABP         7.02
FiO2           16.16
GCS            13.49
Gender         18.99
Glucose        18.15
HCO3           18.03
HCT            17.32
HR              1.81
Height          0.95
ICUType        18.99
K              17.99
Lactate        18.58
MAP             7.05
MechVent       16.20
Mg             17.96
NIDiasABP      13.02
NIMAP          13.13
NISysABP       13.01
Na             18.09
PaCO2          16.47
PaO2           16.48
Platelets      17.76
RespRate       16.24
SaO2           18.15
SysABP          7.02
Temp           10.30
TroponinI      19.35
TroponinT      19.22
Urine           4.73
WBC            18.00
Weight          9.68
pH             16.26
dtype: float64

<h4>Classification overweight measurements</h4>

In [116]:
classificacao_sobrepeso_measurements_validation = round((classificacao_sobrepeso_validation.count()/total_patients_validation)*100,2)
classificacao_sobrepeso_measurements_validation

RecordID       19.40
level_1        19.40
Time           19.40
ALP             0.33
ALT             0.34
AST             0.34
Age            18.45
Albumin         0.24
BUN             1.41
Bilirubin       0.34
Cholesterol     0.04
Creatinine      1.42
DiasABP        12.38
FiO2            3.23
GCS             5.91
Gender          0.40
Glucose         1.25
HCO3            1.36
HCT             2.08
HR             17.59
Height         18.45
ICUType         0.40
K               1.41
Lactate         0.82
MAP            12.34
MechVent        3.20
Mg              1.44
NIDiasABP       6.38
NIMAP           6.27
NISysABP        6.39
Na              1.31
PaCO2           2.93
PaO2            2.92
Platelets       1.64
RespRate        3.16
SaO2            1.24
SysABP         12.38
Temp            9.09
TroponinI       0.04
TroponinT       0.17
Urine          14.66
WBC             1.39
Weight          9.71
pH              3.14
dtype: float64

<h4>Grade 1 obesity missing rate</h4>

In [118]:
classificacao_obesidade_1_ids_validation = bmi_data_validation[bmi_data_validation["Classificacao"] == "Obesidade grau 1"]
classificacao_obesidade_1_ids_validation = classificacao_obesidade_1_ids_validation["RecordID"]
classificacao_obesidade_1_validation = validation_X[validation_X["RecordID"].isin(classificacao_obesidade_1_ids_validation)]
classificacao_obesidade_1_missing_validation = round((classificacao_obesidade_1_validation.isna().sum()/total_patients_validation)*100,2)
classificacao_obesidade_1_missing_validation

RecordID       0.00
level_1        0.00
Time           0.00
ALP            8.94
ALT            8.94
AST            8.94
Age            0.32
Albumin        8.96
BUN            8.40
Bilirubin      8.93
Cholesterol    9.05
Creatinine     8.40
DiasABP        2.94
FiO2           7.51
GCS            6.24
Gender         8.88
Glucose        8.48
HCO3           8.42
HCT            8.15
HR             0.67
Height         0.32
ICUType        8.88
K              8.39
Lactate        8.61
MAP            2.95
MechVent       7.55
Mg             8.43
NIDiasABP      6.11
NIMAP          6.13
NISysABP       6.10
Na             8.45
PaCO2          7.63
PaO2           7.63
Platelets      8.36
RespRate       7.69
SaO2           8.50
SysABP         2.94
Temp           4.71
TroponinI      9.05
TroponinT      8.98
Urine          1.99
WBC            8.45
Weight         4.33
pH             7.54
dtype: float64

<h4>Grade 1 obesity measurements</h4>

In [120]:
classificacao_obesidade_1_measurements_validation = round((classificacao_obesidade_1_validation.count()/total_patients_validation)*100,2)
classificacao_obesidade_1_measurements_validation

RecordID       9.07
level_1        9.07
Time           9.07
ALP            0.13
ALT            0.14
AST            0.14
Age            8.75
Albumin        0.11
BUN            0.67
Bilirubin      0.14
Cholesterol    0.02
Creatinine     0.67
DiasABP        6.14
FiO2           1.56
GCS            2.83
Gender         0.19
Glucose        0.59
HCO3           0.65
HCT            0.92
HR             8.40
Height         8.75
ICUType        0.19
K              0.68
Lactate        0.47
MAP            6.12
MechVent       1.52
Mg             0.64
NIDiasABP      2.97
NIMAP          2.94
NISysABP       2.97
Na             0.62
PaCO2          1.44
PaO2           1.44
Platelets      0.71
RespRate       1.39
SaO2           0.57
SysABP         6.14
Temp           4.36
TroponinI      0.02
TroponinT      0.09
Urine          7.08
WBC            0.63
Weight         4.74
pH             1.53
dtype: float64

<h4>Grade 2 obesity missing rate</h4>

In [122]:
classificacao_obesidade_2_ids_validation = bmi_data_validation[bmi_data_validation["Classificacao"] == "Obesidade grau 2"]
classificacao_obesidade_2_ids_validation = classificacao_obesidade_2_ids_validation["RecordID"]
classificacao_obesidade_2_validation = validation_X[validation_X["RecordID"].isin(classificacao_obesidade_2_ids_validation)]
classificacao_obesidade_2_missing_validation = round((classificacao_obesidade_2_validation.isna().sum()/total_patients_validation)*100,2)
classificacao_obesidade_2_missing_validation

RecordID       0.00
level_1        0.00
Time           0.00
ALP            3.85
ALT            3.86
AST            3.86
Age            0.25
Albumin        3.88
BUN            3.62
Bilirubin      3.86
Cholesterol    3.91
Creatinine     3.62
DiasABP        1.37
FiO2           3.29
GCS            2.80
Gender         3.83
Glucose        3.66
HCO3           3.63
HCT            3.51
HR             0.44
Height         0.25
ICUType        3.83
K              3.64
Lactate        3.70
MAP            1.35
MechVent       3.34
Mg             3.65
NIDiasABP      2.73
NIMAP          2.80
NISysABP       2.73
Na             3.65
PaCO2          3.30
PaO2           3.30
Platelets      3.59
RespRate       3.06
SaO2           3.68
SysABP         1.37
Temp           2.10
TroponinI      3.91
TroponinT      3.87
Urine          1.09
WBC            3.63
Weight         1.73
pH             3.27
dtype: float64

<h4>Grade 2 obesity measurements</h4>

In [124]:
classificacao_obesidade_2_measurements_validation = round((classificacao_obesidade_2_validation.count()/total_patients_validation)*100,2)
classificacao_obesidade_2_measurements_validation

RecordID       3.91
level_1        3.91
Time           3.91
ALP            0.06
ALT            0.05
AST            0.05
Age            3.66
Albumin        0.03
BUN            0.29
Bilirubin      0.05
Cholesterol    0.00
Creatinine     0.29
DiasABP        2.54
FiO2           0.62
GCS            1.11
Gender         0.08
Glucose        0.26
HCO3           0.28
HCT            0.40
HR             3.47
Height         3.66
ICUType        0.08
K              0.27
Lactate        0.21
MAP            2.56
MechVent       0.57
Mg             0.26
NIDiasABP      1.18
NIMAP          1.11
NISysABP       1.18
Na             0.26
PaCO2          0.61
PaO2           0.61
Platelets      0.32
RespRate       0.85
SaO2           0.23
SysABP         2.54
Temp           1.81
TroponinI      0.00
TroponinT      0.04
Urine          2.82
WBC            0.28
Weight         2.18
pH             0.64
dtype: float64

<h4>Grade 3 obesity missing rate</h4>

In [126]:
classificacao_obesidade_3_ids_validation = bmi_data_validation[bmi_data_validation["Classificacao"] == "Obesidade grau 3"]
classificacao_obesidade_3_ids_validation = classificacao_obesidade_3_ids_validation["RecordID"]
classificacao_obesidade_3_validation = validation_X[validation_X["RecordID"].isin(classificacao_obesidade_3_ids_validation)]
classificacao_obesidade_3_missing_validation = round((classificacao_obesidade_3_validation.isna().sum()/total_patients_validation)*100,2)
classificacao_obesidade_3_missing_validation

RecordID       0.00
level_1        0.00
Time           0.00
ALP            3.50
ALT            3.50
AST            3.50
Age            0.18
Albumin        3.50
BUN            3.28
Bilirubin      3.50
Cholesterol    3.54
Creatinine     3.28
DiasABP        1.21
FiO2           2.78
GCS            2.48
Gender         3.47
Glucose        3.32
HCO3           3.29
HCT            3.23
HR             0.37
Height         0.18
ICUType        3.47
K              3.29
Lactate        3.30
MAP            1.18
MechVent       2.81
Mg             3.30
NIDiasABP      2.53
NIMAP          2.55
NISysABP       2.53
Na             3.31
PaCO2          2.92
PaO2           2.91
Platelets      3.30
RespRate       3.05
SaO2           3.33
SysABP         1.21
Temp           2.04
TroponinI      3.54
TroponinT      3.52
Urine          0.88
WBC            3.31
Weight         1.61
pH             2.89
dtype: float64

<h4>Grade 3 obesity measurements</h4>

In [129]:
classificacao_obesidade_3_measurements_validation = round((classificacao_obesidade_3_validation.count()/total_patients_validation)*100,2)
classificacao_obesidade_3_measurements_validation

RecordID       3.55
level_1        3.55
Time           3.55
ALP            0.05
ALT            0.05
AST            0.05
Age            3.36
Albumin        0.04
BUN            0.26
Bilirubin      0.05
Cholesterol    0.00
Creatinine     0.26
DiasABP        2.34
FiO2           0.77
GCS            1.06
Gender         0.07
Glucose        0.23
HCO3           0.25
HCT            0.31
HR             3.18
Height         3.36
ICUType        0.07
K              0.25
Lactate        0.24
MAP            2.37
MechVent       0.74
Mg             0.25
NIDiasABP      1.02
NIMAP          1.00
NISysABP       1.02
Na             0.24
PaCO2          0.63
PaO2           0.63
Platelets      0.25
RespRate       0.49
SaO2           0.22
SysABP         2.34
Temp           1.50
TroponinI      0.00
TroponinT      0.03
Urine          2.67
WBC            0.23
Weight         1.93
pH             0.66
dtype: float64

<h4>Construction of the missing rate table</h4>

In [134]:
df_missing_validation = pd.DataFrame(columns=df_columns)
df_missing_transpose_validation = df_missing_validation.T
df_missing_transpose_validation["Female"] = female_gender_missing_rate_validation
df_missing_transpose_validation["Male"] = male_gender_missing_rate_validation
df_missing_transpose_validation["Undefined gender"] = undefined_gender_missing_rate_validation
df_missing_transpose_validation["ICUType 1"] = ICUType_1_validation_missing
df_missing_transpose_validation["ICUType 2"] = ICUType_2_validation_missing
df_missing_transpose_validation["ICUType 3"] = ICUType_3_validation_missing
df_missing_transpose_validation["ICUType 4"] = ICUType_4_validation_missing
df_missing_transpose_validation["Age 65+"] = more_than_or_equal_to_65_validation_missing
df_missing_transpose_validation["Age 65-"] = less_than_65_validation_missing
df_missing_transpose_validation["Low Weight"] = classificacao_baixo_peso_missing_validation
df_missing_transpose_validation["Normal Weight"] = classificacao_peso_normal_missing_validation
df_missing_transpose_validation["Overweight"] = classificacao_sobrepeso_missing_validation
df_missing_transpose_validation["Obesity Grade 1"] = classificacao_obesidade_1_missing_validation
df_missing_transpose_validation["Obesity Grade 2"] = classificacao_obesidade_2_missing_validation
df_missing_transpose_validation["Obesity Grade 3"] = classificacao_obesidade_3_missing_validation
df_missing_transpose_validation["Undefined classification"] = classificacao_undefined_missing_validation
df_missing_transpose_validation = df_missing_transpose_validation.drop("RecordID", axis=0)
df_missing_transpose_validation = df_missing_transpose_validation.drop("level_1", axis=0)
df_missing_transpose_validation = df_missing_transpose_validation.drop("Time", axis=0)
df_missing_transpose_validation = df_missing_transpose_validation.drop("Age", axis=0)
df_missing_transpose_validation = df_missing_transpose_validation.drop("Gender", axis=0)
display(HTML("<h2 style='text-align: center; font-size: 24px; font-weight: bold;'>Original Missing Rate per Variable by demographcs - Validation</h2>"))
df_missing_transpose_validation

Unnamed: 0,Female,Male,Undefined gender,ICUType 1,ICUType 2,ICUType 3,ICUType 4,Age 65+,Age 65-,Low Weight,Normal Weight,Overweight,Obesity Grade 1,Obesity Grade 2,Obesity Grade 3,Undefined classification
ALP,43.87,54.43,0.16,14.51,21.03,35.8,27.11,55.66,42.79,1.6,14.5,19.06,8.94,3.85,3.5,47.0
ALT,43.85,54.41,0.15,14.5,21.03,35.78,27.1,55.64,42.76,1.6,14.49,19.05,8.94,3.86,3.5,46.98
AST,43.85,54.4,0.15,14.5,21.03,35.78,27.1,55.64,42.77,1.6,14.49,19.06,8.94,3.86,3.5,46.97
Albumin,43.98,54.62,0.16,14.53,21.07,35.97,27.19,55.74,43.02,1.6,14.54,19.15,8.96,3.88,3.5,47.13
BUN,41.42,51.39,0.15,13.67,19.82,33.91,25.57,52.44,40.52,1.49,13.69,17.99,8.4,3.62,3.28,44.49
Bilirubin,43.85,54.39,0.16,14.5,21.04,35.75,27.11,55.62,42.77,1.6,14.5,19.06,8.93,3.86,3.5,46.96
Cholesterol,44.46,55.22,0.16,14.61,21.16,36.52,27.54,56.26,43.58,1.61,14.69,19.36,9.05,3.91,3.54,47.67
Creatinine,41.4,51.38,0.15,13.65,19.81,33.9,25.56,52.42,40.51,1.49,13.68,17.98,8.4,3.62,3.28,44.47
DiasABP,22.14,25.64,0.08,9.14,4.76,24.41,9.55,26.31,21.55,0.42,5.31,7.02,2.94,1.37,1.21,29.6
FiO2,37.86,46.57,0.15,13.03,17.8,31.12,22.64,47.79,36.79,1.31,12.28,16.16,7.51,3.29,2.78,41.26


<h4>Construction of the measurements table</h4>

In [133]:
df_measurements_validation = pd.DataFrame(columns=df_columns)
df_measurements_transpose_validation = df_measurements_validation.T
df_measurements_transpose_validation["Female"] = female_gender_measurements_validation
df_measurements_transpose_validation["Male"] = male_gender_measurements_validation
df_measurements_transpose_validation["Undefined gender"] = undefined_gender_measurements_validation
df_measurements_transpose_validation["ICUType 1"] = ICUType_1_measurements_validation
df_measurements_transpose_validation["ICUType 2"] = ICUType_2_measurements_validation
df_measurements_transpose_validation["ICUType 3"] = ICUType_3_measurements_validation
df_measurements_transpose_validation["ICUType 4"] = ICUType_4_measurements_validation
df_measurements_transpose_validation["Age 65+"] = more_than_or_equal_to_65_validation_measurements
df_measurements_transpose_validation["Age 65-"] = less_than_65_validation_measurements
df_measurements_transpose_validation["Low Weight"] = classificacao_baixo_peso_measurements_validation
df_measurements_transpose_validation["Normal Weight"] = classificacao_peso_normal_measurements_validation
df_measurements_transpose_validation["Overweight"] = classificacao_sobrepeso_measurements_validation
df_measurements_transpose_validation["Obesity Grade 1"] = classificacao_obesidade_1_measurements_validation
df_measurements_transpose_validation["Obesity Grade 2"] = classificacao_obesidade_2_measurements_validation
df_measurements_transpose_validation["Obesity Grade 3"] = classificacao_obesidade_3_measurements_validation
df_measurements_transpose_validation["Undefined classification"] = classificacao_undefined_measurements_validation
df_measurements_transpose_validation = df_measurements_transpose_validation.drop("RecordID", axis=0)
df_measurements_transpose_validation = df_measurements_transpose_validation.drop("level_1", axis=0)
df_measurements_transpose_validation = df_measurements_transpose_validation.drop("Time", axis=0)
df_measurements_transpose_validation = df_measurements_transpose_validation.drop("Age", axis=0)
df_measurements_transpose_validation = df_measurements_transpose_validation.drop("Gender", axis=0)
display(HTML("<h2 style='text-align: center; font-size: 24px; font-weight: bold;'>Repeated Measurements per Variable by Demographics - Validation Set</h2>"))
df_measurements_transpose_validation

Unnamed: 0,Female,Male,Undefined gender,ICUType 1,ICUType 2,ICUType 3,ICUType 4,Age 65+,Age 65-,Low Weight,Normal Weight,Overweight,Obesity Grade 1,Obesity Grade 2,Obesity Grade 3,Undefined classification
ALP,0.66,0.89,0.0,0.2,0.14,0.74,0.47,0.7,0.85,0.02,0.2,0.33,0.13,0.06,0.05,0.76
ALT,0.68,0.91,0.0,0.21,0.14,0.76,0.48,0.72,0.88,0.02,0.21,0.34,0.14,0.05,0.05,0.78
AST,0.68,0.91,0.0,0.21,0.14,0.76,0.48,0.72,0.87,0.02,0.21,0.34,0.14,0.05,0.05,0.79
Albumin,0.55,0.69,0.0,0.17,0.09,0.58,0.39,0.62,0.62,0.02,0.16,0.24,0.11,0.03,0.04,0.63
BUN,3.1,3.93,0.01,1.04,1.35,2.64,2.01,3.92,3.12,0.13,1.02,1.41,0.67,0.29,0.26,3.27
Bilirubin,0.68,0.93,0.0,0.21,0.13,0.8,0.47,0.74,0.87,0.02,0.21,0.34,0.14,0.05,0.05,0.8
Cholesterol,0.07,0.1,0.0,0.09,0.01,0.03,0.04,0.1,0.06,0.0,0.01,0.04,0.02,0.0,0.0,0.09
Creatinine,3.12,3.94,0.01,1.05,1.36,2.65,2.02,3.94,3.13,0.13,1.02,1.42,0.67,0.29,0.26,3.29
DiasABP,22.38,29.68,0.08,5.56,16.41,12.14,18.03,30.05,22.09,1.2,9.39,12.38,6.14,2.54,2.34,18.16
FiO2,6.67,8.75,0.0,1.67,3.37,5.43,4.94,8.57,6.85,0.3,2.43,3.23,1.56,0.62,0.77,6.5


# Test data

<h4>Loading test database</h4>

In [141]:
test_X = physionet2012_dataset['test_X']

<h4>Total number of patients tested</h4>

In [149]:
total_pacientes_test = test_X["RecordID"].count()
total_pacientes_test

115152

<h4>Female gender missing rate</h4>

In [150]:
female_gender_test_ids = test_X[test_X['Gender'] == 0.0]
female_gender_test_ids = female_gender_test_ids["RecordID"]
female_gender_test = test_X[test_X["RecordID"].isin(female_gender_test_ids)]
female_gender_missing_rate_test = round((female_gender_test.isna().sum()/total_pacientes_test)*100,2)
female_gender_missing_rate_test

RecordID        0.00
level_1         0.00
Time            0.00
ALP            43.04
ALT            43.02
AST            43.02
Age             2.21
Albumin        43.21
BUN            40.51
Bilirubin      43.02
Cholesterol    43.70
Creatinine     40.50
DiasABP        20.62
FiO2           36.92
GCS            29.43
Gender         42.86
Glucose        40.68
HCO3           40.57
HCT            39.65
HR              3.80
Height          2.21
ICUType        42.86
K              40.34
Lactate        41.99
MAP            20.80
MechVent       36.98
Mg             40.59
NIDiasABP      24.69
NIMAP          24.91
NISysABP       24.67
Na             40.55
PaCO2          38.91
PaO2           38.93
Platelets      40.58
RespRate       32.01
SaO2           42.01
SysABP         20.62
Temp           27.74
TroponinI      43.66
TroponinT      43.29
Urine          12.96
WBC            40.79
Weight         19.70
pH             38.73
dtype: float64

<h4>Female gender measurements</h4>

In [152]:
female_gender_measurements_test = round((female_gender_test.count()/total_pacientes_test)*100,2)
female_gender_measurements_test

RecordID       43.77
level_1        43.77
Time           43.77
ALP             0.72
ALT             0.75
AST             0.75
Age            41.56
Albumin         0.55
BUN             3.26
Bilirubin       0.75
Cholesterol     0.07
Creatinine      3.27
DiasABP        23.15
FiO2            6.85
GCS            14.34
Gender          0.91
Glucose         3.09
HCO3            3.20
HCT             4.12
HR             39.96
Height         41.56
ICUType         0.91
K               3.43
Lactate         1.78
MAP            22.97
MechVent        6.79
Mg              3.18
NIDiasABP      19.08
NIMAP          18.86
NISysABP       19.10
Na              3.22
PaCO2           4.86
PaO2            4.84
Platelets       3.19
RespRate       11.76
SaO2            1.75
SysABP         23.15
Temp           16.02
TroponinI       0.11
TroponinT       0.47
Urine          30.81
WBC             2.98
Weight         24.07
pH              5.04
dtype: float64

<h4>Male gender missing rate</h4>

In [154]:
male_gender_test_ids = test_X[test_X['Gender'] == 1.0]
male_gender_test_ids = male_gender_test_ids["RecordID"]
male_gender_test = test_X[test_X["RecordID"].isin(male_gender_test_ids)]
male_gender_missing_rate_test = round((male_gender_test.isna().sum()/total_pacientes_test)*100,2)
male_gender_missing_rate_test

RecordID        0.00
level_1         0.00
Time            0.00
ALP            55.20
ALT            55.16
AST            55.16
Age             3.16
Albumin        55.41
BUN            52.02
Bilirubin      55.17
Cholesterol    56.01
Creatinine     52.00
DiasABP        25.16
FiO2           47.12
GCS            38.11
Gender         54.94
Glucose        52.30
HCO3           52.13
HCT            50.67
HR              5.46
Height          3.16
ICUType        54.94
K              51.89
Lactate        53.86
MAP            25.31
MechVent       47.72
Mg             52.09
NIDiasABP      32.64
NIMAP          32.89
NISysABP       32.62
Na             52.15
PaCO2          49.29
PaO2           49.29
Platelets      51.93
RespRate       44.56
SaO2           53.71
SysABP         25.16
Temp           34.50
TroponinI      55.99
TroponinT      55.51
Urine          17.27
WBC            52.35
Weight         26.52
pH             48.90
dtype: float64

<h4>Male gender measurements</h4>

In [156]:
male_gender_measurements_test = round((male_gender_test.count()/total_pacientes_test)*100,2)
male_gender_measurements_test

RecordID       56.11
level_1        56.11
Time           56.11
ALP             0.91
ALT             0.94
AST             0.94
Age            52.95
Albumin         0.69
BUN             4.09
Bilirubin       0.94
Cholesterol     0.10
Creatinine      4.11
DiasABP        30.94
FiO2            8.99
GCS            18.00
Gender          1.17
Glucose         3.81
HCO3            3.98
HCT             5.44
HR             50.64
Height         52.95
ICUType         1.17
K               4.22
Lactate         2.24
MAP            30.79
MechVent        8.38
Mg              4.02
NIDiasABP      23.47
NIMAP          23.22
NISysABP       23.49
Na              3.96
PaCO2           6.81
PaO2            6.81
Platelets       4.17
RespRate       11.55
SaO2            2.40
SysABP         30.95
Temp           21.61
TroponinI       0.11
TroponinT       0.60
Urine          38.84
WBC             3.75
Weight         29.58
pH              7.21
dtype: float64

<h4>Undefined gender missing rate</h4>

In [158]:
undefined_gender_ids_test = test_X[test_X["Gender"] == -1.0]
undefined_gender_ids_test = undefined_gender_ids_test["RecordID"]
undefined_gender_test = test_X[test_X["RecordID"].isin(undefined_gender_ids_test)]
undefined_gender_missing_rate_test = round((undefined_gender_test.isna().sum()/total_pacientes_test)*100,2)
undefined_gender_missing_rate_test

RecordID       0.00
level_1        0.00
Time           0.00
ALP            0.12
ALT            0.12
AST            0.12
Age            0.04
Albumin        0.12
BUN            0.11
Bilirubin      0.12
Cholesterol    0.13
Creatinine     0.11
DiasABP        0.05
FiO2           0.10
GCS            0.10
Gender         0.12
Glucose        0.11
HCO3           0.11
HCT            0.12
HR             0.04
Height         0.04
ICUType        0.12
K              0.11
Lactate        0.10
MAP            0.05
MechVent       0.09
Mg             0.11
NIDiasABP      0.11
NIMAP          0.11
NISysABP       0.11
Na             0.11
PaCO2          0.09
PaO2           0.09
Platelets      0.11
RespRate       0.13
SaO2           0.12
SysABP         0.05
Temp           0.09
TroponinI      0.13
TroponinT      0.12
Urine          0.07
WBC            0.11
Weight         0.04
pH             0.09
dtype: float64

<h4>Undefined gender measurements</h4>

In [160]:
undefined_gender_measurements_test = round((undefined_gender_test.count()/total_pacientes_test)*100,2)
undefined_gender_measurements_test

RecordID       0.13
level_1        0.13
Time           0.13
ALP            0.00
ALT            0.00
AST            0.00
Age            0.08
Albumin        0.00
BUN            0.01
Bilirubin      0.00
Cholesterol    0.00
Creatinine     0.01
DiasABP        0.08
FiO2           0.03
GCS            0.03
Gender         0.00
Glucose        0.01
HCO3           0.01
HCT            0.01
HR             0.08
Height         0.08
ICUType        0.00
K              0.01
Lactate        0.02
MAP            0.08
MechVent       0.03
Mg             0.01
NIDiasABP      0.01
NIMAP          0.01
NISysABP       0.01
Na             0.01
PaCO2          0.03
PaO2           0.03
Platelets      0.01
RespRate       0.00
SaO2           0.00
SysABP         0.08
Temp           0.04
TroponinI      0.00
TroponinT      0.00
Urine          0.06
WBC            0.01
Weight         0.08
pH             0.03
dtype: float64

<h4>ICUType 1 missing rate</h4>

In [162]:
ICUType_1_test_ids = test_X[test_X["ICUType"] == 1.0]
ICUType_1_test_ids = ICUType_1_test_ids[ICUType_1_test_ids["Time"] == 0.0]
ICUType_1_test_ids = ICUType_1_test_ids["RecordID"]
ICUType_1_test = test_X[test_X["RecordID"].isin(ICUType_1_test_ids)]
ICUType_1_test_missing = round((ICUType_1_test.isna().sum()/total_pacientes_test)*100,2)
ICUType_1_test_missing

RecordID        0.00
level_1         0.00
Time            0.00
ALP            14.49
ALT            14.48
AST            14.48
Age             1.31
Albumin        14.54
BUN            13.66
Bilirubin      14.49
Cholesterol    14.63
Creatinine     13.64
DiasABP         8.85
FiO2           13.17
GCS            10.86
Gender         14.41
Glucose        13.70
HCO3           13.69
HCT            13.41
HR              1.95
Height          1.31
ICUType        14.41
K              13.49
Lactate        14.37
MAP             8.90
MechVent       13.25
Mg             13.68
NIDiasABP       7.49
NIMAP           7.51
NISysABP        7.49
Na             13.68
PaCO2          13.48
PaO2           13.48
Platelets      13.67
RespRate        9.52
SaO2           14.00
SysABP          8.85
Temp           10.25
TroponinI      14.66
TroponinT      14.40
Urine           6.58
WBC            13.78
Weight          7.91
pH             13.44
dtype: float64

<h4>ICUType 1 measurements</h4>

In [164]:
ICUType_1_measurements_test = round((ICUType_1_test.count()/total_pacientes_test)*100,2)
ICUType_1_measurements_test

RecordID       14.71
level_1        14.71
Time           14.71
ALP             0.22
ALT             0.24
AST             0.23
Age            13.40
Albumin         0.17
BUN             1.06
Bilirubin       0.23
Cholesterol     0.08
Creatinine      1.08
DiasABP         5.86
FiO2            1.54
GCS             3.85
Gender          0.31
Glucose         1.01
HCO3            1.03
HCT             1.30
HR             12.77
Height         13.40
ICUType         0.31
K               1.22
Lactate         0.35
MAP             5.82
MechVent        1.47
Mg              1.03
NIDiasABP       7.22
NIMAP           7.20
NISysABP        7.23
Na              1.04
PaCO2           1.23
PaO2            1.24
Platelets       1.05
RespRate        5.20
SaO2            0.71
SysABP          5.87
Temp            4.46
TroponinI       0.06
TroponinT       0.32
Urine           8.14
WBC             0.94
Weight          6.80
pH              1.27
dtype: float64

<h4>ICUType 2 missing rate</h4>

In [166]:
ICUType_2_test_ids = test_X[test_X["ICUType"] == 2.0]
ICUType_2_test_ids = ICUType_2_test_ids[ICUType_2_test_ids["Time"] == 0.0]
ICUType_2_test_ids = ICUType_2_test_ids["RecordID"]
ICUType_2_test = test_X[test_X["RecordID"].isin(ICUType_2_test_ids)]
ICUType_2_test_missing = round((ICUType_2_test.isna().sum()/total_pacientes_test)*100,2)
ICUType_2_test_missing

RecordID        0.00
level_1         0.00
Time            0.00
ALP            21.16
ALT            21.15
AST            21.15
Age             0.54
Albumin        21.21
BUN            19.91
Bilirubin      21.16
Cholesterol    21.30
Creatinine     19.90
DiasABP         4.69
FiO2           17.45
GCS            15.67
Gender         20.86
Glucose        20.32
HCO3           20.03
HCT            18.95
HR              1.55
Height          0.54
ICUType        20.86
K              20.19
Lactate        20.58
MAP             4.62
MechVent       17.83
Mg             19.94
NIDiasABP      16.48
NIMAP          16.52
NISysABP       16.47
Na             20.23
PaCO2          16.93
PaO2           16.94
Platelets      19.57
RespRate       20.40
SaO2           18.90
SysABP          4.68
Temp            8.79
TroponinI      21.27
TroponinT      21.26
Urine           3.12
WBC            19.86
Weight         10.15
pH             16.49
dtype: float64

<h4>ICUType 2 measurements</h4>

In [168]:
ICUType_2_measurements_test = round((ICUType_2_test.count()/total_pacientes_test)*100,2)
ICUType_2_measurements_test

RecordID       21.30
level_1        21.30
Time           21.30
ALP             0.14
ALT             0.15
AST             0.15
Age            20.76
Albumin         0.09
BUN             1.39
Bilirubin       0.14
Cholesterol     0.00
Creatinine      1.40
DiasABP        16.61
FiO2            3.85
GCS             5.63
Gender          0.44
Glucose         0.98
HCO3            1.27
HCT             2.35
HR             19.75
Height         20.76
ICUType         0.44
K               1.11
Lactate         0.72
MAP            16.68
MechVent        3.47
Mg              1.36
NIDiasABP       4.82
NIMAP           4.78
NISysABP        4.83
Na              1.07
PaCO2           4.37
PaO2            4.36
Platelets       1.73
RespRate        0.90
SaO2            2.40
SysABP         16.62
Temp           12.51
TroponinI       0.03
TroponinT       0.04
Urine          18.18
WBC             1.44
Weight         11.15
pH              4.81
dtype: float64

<h4>ICUType 3 missing rate</h4>

In [170]:
ICUType_3_test_ids = test_X[test_X["ICUType"] == 3.0]
ICUType_3_test_ids = ICUType_3_test_ids[ICUType_3_test_ids["Time"] == 0.0]
ICUType_3_test_ids = ICUType_3_test_ids["RecordID"]
ICUType_3_test = test_X[test_X["RecordID"].isin(ICUType_3_test_ids)]
ICUType_3_test_missing = round((ICUType_3_test.isna().sum()/total_pacientes_test)*100,2)
ICUType_3_test_missing

RecordID        0.00
level_1         0.00
Time            0.00
ALP            34.88
ALT            34.85
AST            34.85
Age             2.35
Albumin        35.06
BUN            32.89
Bilirubin      34.82
Cholesterol    35.64
Creatinine     32.88
DiasABP        23.01
FiO2           30.35
GCS            26.22
Gender         34.94
Glucose        32.90
HCO3           32.89
HCT            32.48
HR              3.59
Height          2.35
ICUType        34.94
K              32.67
Lactate        34.27
MAP            23.15
MechVent       30.59
Mg             33.05
NIDiasABP      15.26
NIMAP          15.55
NISysABP       15.25
Na             32.84
PaCO2          32.86
PaO2           32.86
Platelets      33.21
RespRate       25.35
SaO2           35.24
SysABP         23.01
Temp           25.32
TroponinI      35.59
TroponinT      35.19
Urine          13.70
WBC            33.31
Weight         11.67
pH             32.81
dtype: float64

<h4>ICUType 3 measurements</h4>

In [172]:
ICUType_3_measurements_test = round((ICUType_3_test.count()/total_pacientes_test)*100,2)
ICUType_3_measurements_test

RecordID       35.68
level_1        35.68
Time           35.68
ALP             0.80
ALT             0.83
AST             0.83
Age            33.33
Albumin         0.62
BUN             2.79
Bilirubin       0.86
Cholesterol     0.04
Creatinine      2.80
DiasABP        12.67
FiO2            5.33
GCS             9.46
Gender          0.74
Glucose         2.79
HCO3            2.79
HCT             3.21
HR             32.09
Height         33.33
ICUType         0.74
K               3.01
Lactate         1.41
MAP            12.53
MechVent        5.09
Mg              2.63
NIDiasABP      20.42
NIMAP          20.13
NISysABP       20.43
Na              2.84
PaCO2           2.82
PaO2            2.82
Platelets       2.47
RespRate       10.33
SaO2            0.44
SysABP         12.67
Temp           10.36
TroponinI       0.09
TroponinT       0.49
Urine          21.98
WBC             2.37
Weight         24.01
pH              2.87
dtype: float64

<h4>ICUType 4 missing rate</h4>

In [174]:
ICUType_4_test_ids = test_X[test_X["ICUType"] == 4.0]
ICUType_4_test_ids = ICUType_4_test_ids[ICUType_4_test_ids["Time"] == 0.0]
ICUType_4_test_ids = ICUType_4_test_ids["RecordID"]
ICUType_4_test = test_X[test_X["RecordID"].isin(ICUType_4_test_ids)]
ICUType_4_test_missing = round((ICUType_4_test.isna().sum()/total_pacientes_test)*100,2)
ICUType_4_test_missing

RecordID        0.00
level_1         0.00
Time            0.00
ALP            27.84
ALT            27.82
AST            27.82
Age             1.21
Albumin        27.94
BUN            26.19
Bilirubin      27.84
Cholesterol    28.26
Creatinine     26.19
DiasABP         9.29
FiO2           23.16
GCS            14.88
Gender         27.71
Glucose        26.17
HCO3           26.21
HCT            25.59
HR              2.22
Height          1.21
ICUType        27.71
K              25.98
Lactate        26.73
MAP             9.48
MechVent       23.13
Mg             26.12
NIDiasABP      18.21
NIMAP          18.33
NISysABP       18.20
Na             26.05
PaCO2          25.03
PaO2           25.03
Platelets      26.18
RespRate       21.43
SaO2           27.70
SysABP          9.28
Temp           17.96
TroponinI      28.26
TroponinT      28.08
Urine           6.90
WBC            26.31
Weight         16.53
pH             24.97
dtype: float64

<h4>ICUType 4 measurements</h4>

In [176]:
ICUType_4_measurements_test = round((ICUType_4_test.count()/total_pacientes_test)*100,2)
ICUType_4_measurements_test

RecordID       28.30
level_1        28.30
Time           28.30
ALP             0.47
ALT             0.48
AST             0.48
Age            27.09
Albumin         0.37
BUN             2.11
Bilirubin       0.46
Cholesterol     0.04
Creatinine      2.11
DiasABP        19.02
FiO2            5.14
GCS            13.43
Gender          0.59
Glucose         2.13
HCO3            2.09
HCT             2.71
HR             26.08
Height         27.09
ICUType         0.59
K               2.32
Lactate         1.57
MAP            18.82
MechVent        5.17
Mg              2.19
NIDiasABP      10.10
NIMAP           9.97
NISysABP       10.11
Na              2.25
PaCO2           3.28
PaO2            3.27
Platelets       2.13
RespRate        6.87
SaO2            0.60
SysABP         19.02
Temp           10.34
TroponinI       0.04
TroponinT       0.22
Urine          21.40
WBC             2.00
Weight         11.77
pH              3.34
dtype: float64

<h4>+64 missing rate</h4>

In [178]:
more_than_or_equal_to_65_test_ids = test_X[test_X["Age"] >= 65]
more_than_or_equal_to_65_test_ids = more_than_or_equal_to_65_test_ids[more_than_or_equal_to_65_test_ids["Time"] == 0.0]
more_than_or_equal_to_65_test_ids = more_than_or_equal_to_65_test_ids["RecordID"]
more_than_or_equal_to_65_test = test_X[test_X["RecordID"].isin(more_than_or_equal_to_65_test_ids)]
more_than_or_equal_to_65_test_missing = round((more_than_or_equal_to_65_test.isna().sum()/total_pacientes_test)*100,2)
more_than_or_equal_to_65_test_missing

RecordID        0.00
level_1         0.00
Time            0.00
ALP            53.49
ALT            53.47
AST            53.47
Age             2.87
Albumin        53.65
BUN            50.39
Bilirubin      53.47
Cholesterol    54.12
Creatinine     50.37
DiasABP        24.31
FiO2           45.54
GCS            37.22
Gender         53.10
Glucose        50.69
HCO3           50.49
HCT            49.07
HR              4.99
Height          2.87
ICUType        53.10
K              50.27
Lactate        52.08
MAP            24.45
MechVent       46.11
Mg             50.43
NIDiasABP      31.39
NIMAP          31.60
NISysABP       31.37
Na             50.53
PaCO2          47.74
PaO2           47.75
Platelets      50.28
RespRate       41.47
SaO2           51.64
SysABP         24.31
Temp           33.11
TroponinI      54.10
TroponinT      53.52
Urine          15.85
WBC            50.62
Weight         25.56
pH             47.41
dtype: float64

<h4>+65 measurements</h4>

In [180]:
more_than_or_equal_to_65_test_measurements = round((more_than_or_equal_to_65_test.count()/total_pacientes_test)*100,2)
more_than_or_equal_to_65_test_measurements

RecordID       54.23
level_1        54.23
Time           54.23
ALP             0.74
ALT             0.76
AST             0.76
Age            51.36
Albumin         0.58
BUN             3.84
Bilirubin       0.76
Cholesterol     0.11
Creatinine      3.86
DiasABP        29.92
FiO2            8.69
GCS            17.01
Gender          1.13
Glucose         3.54
HCO3            3.74
HCT             5.16
HR             49.25
Height         51.36
ICUType         1.13
K               3.96
Lactate         2.15
MAP            29.78
MechVent        8.13
Mg              3.80
NIDiasABP      22.84
NIMAP          22.63
NISysABP       22.86
Na              3.70
PaCO2           6.49
PaO2            6.48
Platelets       3.96
RespRate       12.76
SaO2            2.59
SysABP         29.92
Temp           21.13
TroponinI       0.13
TroponinT       0.71
Urine          38.38
WBC             3.61
Weight         28.67
pH              6.82
dtype: float64

<h4>-65 missing rate</h4>

In [182]:
less_than_65_test_ids = test_X[test_X["Age"] < 65]
less_than_65_test_ids = less_than_65_test_ids[less_than_65_test_ids["Time"] == 0.0]
less_than_65_test_ids = less_than_65_test_ids["RecordID"]
less_than_65_test = test_X[test_X["RecordID"].isin(less_than_65_test_ids)]
less_than_65_test_missing = round((less_than_65_test.isna().sum()/total_pacientes_test)*100,2)
less_than_65_test_missing

RecordID        0.00
level_1         0.00
Time            0.00
ALP            44.87
ALT            44.83
AST            44.84
Age             2.55
Albumin        45.09
BUN            42.25
Bilirubin      44.84
Cholesterol    45.71
Creatinine     42.24
DiasABP        21.52
FiO2           38.60
GCS            30.41
Gender         44.82
Glucose        42.40
HCO3           42.33
HCT            41.36
HR              4.33
Height          2.55
ICUType        44.82
K              42.07
Lactate        43.87
MAP            21.71
MechVent       38.69
Mg             42.36
NIDiasABP      26.05
NIMAP          26.31
NISysABP       26.03
Na             42.28
PaCO2          40.56
PaO2           40.56
Platelets      42.36
RespRate       35.22
SaO2           44.21
SysABP         21.52
Temp           29.22
TroponinI      45.68
TroponinT      45.41
Urine          14.45
WBC            42.63
Weight         20.70
pH             40.30
dtype: float64

<h4>-65 measurements</h4>

In [184]:
less_than_65_test_measurements = round((less_than_65_test.count()/total_pacientes_test)*100,2)
less_than_65_test_measurements

RecordID       45.77
level_1        45.77
Time           45.77
ALP             0.90
ALT             0.93
AST             0.93
Age            43.22
Albumin         0.68
BUN             3.51
Bilirubin       0.93
Cholesterol     0.06
Creatinine      3.53
DiasABP        24.25
FiO2            7.17
GCS            15.36
Gender          0.95
Glucose         3.37
HCO3            3.44
HCT             4.41
HR             41.44
Height         43.22
ICUType         0.95
K               3.70
Lactate         1.90
MAP            24.06
MechVent        7.08
Mg              3.41
NIDiasABP      19.72
NIMAP          19.46
NISysABP       19.74
Na              3.49
PaCO2           5.21
PaO2            5.21
Platelets       3.41
RespRate       10.55
SaO2            1.56
SysABP         24.25
Temp           16.55
TroponinI       0.09
TroponinT       0.36
Urine          31.32
WBC             3.14
Weight         25.06
pH              5.46
dtype: float64

<h4>Filtering only patients who have the same height and weight</h4>

In [185]:
filtered_test_X = test_X[(test_X['Height'] != -1) & (test_X['Weight'] != -1) & (test_X['Height'].notna()) & (test_X['Weight'].notna())] 

<h4>Converting height to meters</h4>

In [186]:
filtered_test_X_metros = filtered_test_X.copy()
filtered_test_X_metros["Height"] = filtered_test_X["Height"]/100
filtered_test_X_metros["Height"]

288       1.626
289       1.626
290       1.626
291       1.626
293       1.626
          ...  
575321    1.727
575322    1.727
575323    1.727
575325    1.727
575327    1.727
Name: Height, Length: 32604, dtype: float64

<h4>Calculate BMI and classification</h4>

In [187]:
bmi_data_test = filtered_test_X_metros
bmi_data_test["BMI"] = round(filtered_test_X_metros["Weight"] / (filtered_test_X_metros["Height"]**2), 1)
bmi_data_test["Classificacao"] = bmi_data_test["BMI"].apply(classify_BMI)
bmi_data_test.head()

Unnamed: 0,RecordID,level_1,Time,ALP,ALT,AST,Age,Albumin,BUN,Bilirubin,...,SysABP,Temp,TroponinI,TroponinT,Urine,WBC,Weight,pH,BMI,Classificacao
288,132548,0,0.0,,,,68.0,,,,...,,36.3,,,,,87.0,,32.9,Obesidade grau 1
289,132548,1,1.0,,,,68.0,,,,...,205.0,35.8,,,120.0,,87.0,,32.9,Obesidade grau 1
290,132548,2,2.0,,,,68.0,,32.0,,...,212.142857,,,,60.0,6.2,87.0,,32.9,Obesidade grau 1
291,132548,3,3.0,,,,68.0,,,,...,201.25,,,,140.0,,87.0,,32.9,Obesidade grau 1
293,132548,5,5.0,,,,68.0,,,,...,175.0,36.6,,,190.0,,87.0,,32.9,Obesidade grau 1


<h4>Taking the first occurrence of each patient</h4>

In [188]:
bmi_data_test = bmi_data_test.groupby("RecordID").first().reset_index()
bmi_data_test

Unnamed: 0,RecordID,level_1,Time,ALP,ALT,AST,Age,Albumin,BUN,Bilirubin,...,SysABP,Temp,TroponinI,TroponinT,Urine,WBC,Weight,pH,BMI,Classificacao
0,132548,0,0.0,,,,68.0,,32.0,,...,205.00,36.3,0.7,,120.0,6.2,87.0,,32.9,Obesidade grau 1
1,132585,0,0.0,,,,40.0,,,,...,90.50,,,,320.0,,84.7,,31.1,Obesidade grau 1
2,132590,0,0.0,,,,58.0,,,,...,119.00,36.8,,,70.0,,98.0,,27.7,Sobrepeso
3,132597,0,0.0,,,,66.0,,27.0,,...,,36.5,1.2,,,18.6,82.0,,43.6,Obesidade grau 3
4,132601,0,0.0,,,,74.0,,,,...,,,,,,,75.9,7.39,24.0,Peso normal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1273,162971,0,0.0,,,,63.0,,,,...,130.50,35.8,,,0.0,,83.3,,37.1,Obesidade grau 2
1274,162991,0,0.0,,,,56.0,,,,...,155.25,,,,,,96.2,,33.7,Obesidade grau 1
1275,162999,0,0.0,,,,70.0,,30.0,,...,0.00,36.3,,,,2.5,68.1,,20.4,Peso normal
1276,163029,0,0.0,,,,61.0,,,,...,,,,,,,85.0,,28.5,Sobrepeso


In [189]:
bmi_data_test["Classificacao"].value_counts()

Classificacao
Sobrepeso           435
Peso normal         364
Obesidade grau 1    248
Obesidade grau 2    107
Obesidade grau 3     88
Baixo peso           36
Name: count, dtype: int64

<h4>Classification undefined missing rate</h4>

In [191]:
classificacao_undefined_ids_test = bmi_data_test["RecordID"]
classificacao_undefined_test = test_X[~test_X["RecordID"].isin(classificacao_undefined_ids_test)]
classificacao_undefined_missing_test = round((classificacao_undefined_test.isna().sum()/total_pacientes_test)*100,2)
classificacao_undefined_missing_test

RecordID        0.00
level_1         0.00
Time            0.00
ALP            45.95
ALT            45.93
AST            45.93
Age             3.41
Albumin        46.11
BUN            43.42
Bilirubin      45.92
Cholesterol    46.65
Creatinine     43.41
DiasABP        28.43
FiO2           40.16
GCS            30.91
Gender         45.75
Glucose        43.42
HCO3           43.44
HCT            42.76
HR              5.05
Height          3.41
ICUType        45.75
K              43.10
Lactate        45.23
MAP            28.64
MechVent       40.63
Mg             43.52
NIDiasABP      21.33
NIMAP          21.65
NISysABP       21.31
Na             43.29
PaCO2          43.44
PaO2           43.44
Platelets      43.72
RespRate       31.28
SaO2           46.15
SysABP         28.42
Temp           34.11
TroponinI      46.67
TroponinT      46.14
Urine          16.68
WBC            43.82
Weight         21.31
pH             43.37
dtype: float64

<h4>Classification undefined measurements</h4>

In [194]:
classificacao_undefined_measurements_test = round((classificacao_undefined_test.count()/total_pacientes_test)*100,2)
classificacao_undefined_measurements_test

RecordID       46.73
level_1        46.73
Time           46.73
ALP             0.78
ALT             0.80
AST             0.80
Age            43.32
Albumin         0.62
BUN             3.31
Bilirubin       0.81
Cholesterol     0.08
Creatinine      3.32
DiasABP        18.30
FiO2            6.57
GCS            15.82
Gender          0.97
Glucose         3.31
HCO3            3.29
HCT             3.97
HR             41.68
Height         43.32
ICUType         0.97
K               3.63
Lactate         1.50
MAP            18.08
MechVent        6.09
Mg              3.21
NIDiasABP      25.40
NIMAP          25.07
NISysABP       25.41
Na              3.44
PaCO2           3.29
PaO2            3.29
Platelets       3.01
RespRate       15.45
SaO2            0.57
SysABP         18.31
Temp           12.62
TroponinI       0.06
TroponinT       0.59
Urine          30.05
WBC             2.90
Weight         25.42
pH              3.36
dtype: float64

<h4>Classification low weight</h4>

In [196]:
classificacao_baixo_peso_ids_test = bmi_data_test[bmi_data_test["Classificacao"] == "Baixo peso"]
classificacao_baixo_peso_ids_test = classificacao_baixo_peso_ids_test["RecordID"]
classificacao_baixo_peso_test = test_X[test_X["RecordID"].isin(classificacao_baixo_peso_ids_test)]
classificacao_baixo_peso_missing_test = round((classificacao_baixo_peso_test.isna().sum()/total_pacientes_test)*100,2)
classificacao_baixo_peso_missing_test

RecordID       0.00
level_1        0.00
Time           0.00
ALP            1.48
ALT            1.48
AST            1.48
Age            0.05
Albumin        1.48
BUN            1.39
Bilirubin      1.48
Cholesterol    1.50
Creatinine     1.39
DiasABP        0.55
FiO2           1.29
GCS            1.04
Gender         1.47
Glucose        1.40
HCO3           1.39
HCT            1.34
HR             0.11
Height         0.05
ICUType        1.47
K              1.38
Lactate        1.43
MAP            0.53
MechVent       1.26
Mg             1.39
NIDiasABP      0.99
NIMAP          1.00
NISysABP       0.99
Na             1.39
PaCO2          1.30
PaO2           1.30
Platelets      1.38
RespRate       1.23
SaO2           1.43
SysABP         0.55
Temp           0.79
TroponinI      1.50
TroponinT      1.48
Urine          0.39
WBC            1.39
Weight         0.70
pH             1.29
dtype: float64

<h4>Classification low weight measurements</h4>

In [198]:
classificacao_baixo_peso_measurements_test = round((classificacao_baixo_peso_test.count()/total_pacientes_test)*100,2)
classificacao_baixo_peso_measurements_test

RecordID       1.50
level_1        1.50
Time           1.50
ALP            0.02
ALT            0.02
AST            0.02
Age            1.45
Albumin        0.02
BUN            0.11
Bilirubin      0.02
Cholesterol    0.00
Creatinine     0.11
DiasABP        0.95
FiO2           0.22
GCS            0.46
Gender         0.03
Glucose        0.10
HCO3           0.11
HCT            0.16
HR             1.39
Height         1.45
ICUType        0.03
K              0.12
Lactate        0.07
MAP            0.97
MechVent       0.24
Mg             0.11
NIDiasABP      0.51
NIMAP          0.50
NISysABP       0.51
Na             0.11
PaCO2          0.20
PaO2           0.20
Platelets      0.12
RespRate       0.27
SaO2           0.07
SysABP         0.95
Temp           0.71
TroponinI      0.00
TroponinT      0.02
Urine          1.11
WBC            0.11
Weight         0.80
pH             0.22
dtype: float64

<h4>Classification normal weight missing rate</h4>

In [201]:
classificacao_normal_peso_ids_test = bmi_data_test[bmi_data_test["Classificacao"] == "Peso normal"]
classificacao_normal_peso_ids_test = classificacao_normal_peso_ids_test["RecordID"]
classificacao_normal_peso_test = test_X[test_X["RecordID"].isin(classificacao_normal_peso_ids_test)]
classificacao_normal_peso_missing_test = round((classificacao_normal_peso_test.isna().sum()/total_pacientes_test)*100,2)
classificacao_normal_peso_missing_test

RecordID        0.00
level_1         0.00
Time            0.00
ALP            14.92
ALT            14.91
AST            14.91
Age             0.60
Albumin        14.97
BUN            14.03
Bilirubin      14.92
Cholesterol    15.14
Creatinine     14.02
DiasABP         5.39
FiO2           12.64
GCS            10.15
Gender         14.86
Glucose        14.13
HCO3           14.07
HCT            13.59
HR              1.23
Height          0.60
ICUType        14.86
K              14.00
Lactate        14.42
MAP             5.42
MechVent       12.66
Mg             14.04
NIDiasABP       9.67
NIMAP           9.72
NISysABP        9.67
Na             14.10
PaCO2          12.98
PaO2           12.99
Platelets      13.93
RespRate       12.70
SaO2           14.32
SysABP          5.39
Temp            8.34
TroponinI      15.13
TroponinT      15.04
Urine           4.28
WBC            14.08
Weight          7.32
pH             12.84
dtype: float64

<h4>Classification normal weight measurements</h4>

In [203]:
classificacao_normal_peso_measurements_test = round((classificacao_normal_peso_test.count()/total_pacientes_test)*100,2)
classificacao_normal_peso_measurements_test

RecordID       15.17
level_1        15.17
Time           15.17
ALP             0.25
ALT             0.27
AST             0.27
Age            14.57
Albumin         0.20
BUN             1.15
Bilirubin       0.26
Cholesterol     0.03
Creatinine      1.15
DiasABP         9.79
FiO2            2.53
GCS             5.02
Gender          0.32
Glucose         1.04
HCO3            1.11
HCT             1.59
HR             13.95
Height         14.57
ICUType         0.32
K               1.17
Lactate         0.76
MAP             9.75
MechVent        2.51
Mg              1.14
NIDiasABP       5.50
NIMAP           5.46
NISysABP        5.50
Na              1.07
PaCO2           2.19
PaO2            2.18
Platelets       1.24
RespRate        2.47
SaO2            0.86
SysABP          9.79
Temp            6.83
TroponinI       0.04
TroponinT       0.13
Urine          10.89
WBC             1.10
Weight          7.85
pH              2.34
dtype: float64

<h4>Classification overweight missing rate</h4>

In [206]:
classificacao_sobrepeso_ids_test = bmi_data_test[bmi_data_test["Classificacao"] == "Sobrepeso"]
classificacao_sobrepeso_ids_test = classificacao_sobrepeso_ids_test["RecordID"]
classificacao_sobrepeso_test = test_X[test_X["RecordID"].isin(classificacao_sobrepeso_ids_test)]
classificacao_sobrepeso_missing_test = round((classificacao_sobrepeso_test.isna().sum()/total_pacientes_test)*100,2)
classificacao_sobrepeso_missing_test

RecordID        0.00
level_1         0.00
Time            0.00
ALP            17.84
ALT            17.82
AST            17.82
Age             0.73
Albumin        17.93
BUN            16.75
Bilirubin      17.82
Cholesterol    18.10
Creatinine     16.74
DiasABP         5.31
FiO2           14.92
GCS            12.59
Gender         17.75
Glucose        16.93
HCO3           16.80
HCT            16.19
HR              1.57
Height          0.73
ICUType        17.75
K              16.78
Lactate        17.29
MAP             5.33
MechVent       15.03
Mg             16.76
NIDiasABP      12.84
NIMAP          12.90
NISysABP       12.83
Na             16.86
PaCO2          15.12
PaO2           15.13
Platelets      16.63
RespRate       15.48
SaO2           16.72
SysABP          5.31
Temp            9.23
TroponinI      18.08
TroponinT      18.00
Urine           4.51
WBC            16.81
Weight          8.38
pH             14.92
dtype: float64

<h4>Classification overweight measurements</h4>

In [208]:
classificacao_sobrepeso_measurements_test = round((classificacao_sobrepeso_test.count()/total_pacientes_test)*100,2)
classificacao_sobrepeso_measurements_test

RecordID       18.13
level_1        18.13
Time           18.13
ALP             0.30
ALT             0.31
AST             0.31
Age            17.40
Albumin         0.21
BUN             1.38
Bilirubin       0.31
Cholesterol     0.03
Creatinine      1.39
DiasABP        12.82
FiO2            3.21
GCS             5.55
Gender          0.38
Glucose         1.21
HCO3            1.34
HCT             1.95
HR             16.57
Height         17.40
ICUType         0.38
K               1.35
Lactate         0.84
MAP            12.80
MechVent        3.10
Mg              1.37
NIDiasABP       5.29
NIMAP           5.24
NISysABP        5.30
Na              1.28
PaCO2           3.01
PaO2            3.00
Platelets       1.50
RespRate        2.65
SaO2            1.42
SysABP         12.83
Temp            8.90
TroponinI       0.05
TroponinT       0.14
Urine          13.62
WBC             1.32
Weight          9.75
pH              3.21
dtype: float64

<h4>Grade 1 obesity missing rate</h4>

In [210]:
classificacao_obesidade_1_ids_test = bmi_data_test[bmi_data_test["Classificacao"] == "Obesidade grau 1"]
classificacao_obesidade_1_ids_test = classificacao_obesidade_1_ids_test["RecordID"]
classificacao_obesidade_1_test = test_X[test_X["RecordID"].isin(classificacao_obesidade_1_ids_test)]
classificacao_obesidade_1_missing_test = round((classificacao_obesidade_1_test.isna().sum()/total_pacientes_test)*100,2)
classificacao_obesidade_1_missing_test

RecordID        0.00
level_1         0.00
Time            0.00
ALP            10.19
ALT            10.19
AST            10.19
Age             0.37
Albumin        10.23
BUN             9.56
Bilirubin      10.18
Cholesterol    10.32
Creatinine      9.56
DiasABP         3.27
FiO2            8.51
GCS             7.13
Gender         10.12
Glucose         9.65
HCO3            9.61
HCT             9.26
HR              0.80
Height          0.37
ICUType        10.12
K               9.58
Lactate         9.88
MAP             3.32
MechVent        8.55
Mg              9.58
NIDiasABP       7.21
NIMAP           7.23
NISysABP        7.21
Na              9.63
PaCO2           8.68
PaO2            8.68
Platelets       9.52
RespRate        8.97
SaO2            9.64
SysABP          3.27
Temp            5.31
TroponinI      10.29
TroponinT      10.25
Urine           2.40
WBC             9.60
Weight          5.05
pH              8.57
dtype: float64

<h4>Grade 1 obesity measurements</h4>

In [212]:
classificacao_obesidade_1_measurements_test = round((classificacao_obesidade_1_test.count()/total_pacientes_test)*100,2)
classificacao_obesidade_1_measurements_test

RecordID       10.34
level_1        10.34
Time           10.34
ALP             0.15
ALT             0.15
AST             0.15
Age             9.97
Albumin         0.10
BUN             0.77
Bilirubin       0.15
Cholesterol     0.01
Creatinine      0.77
DiasABP         7.07
FiO2            1.83
GCS             3.21
Gender          0.22
Glucose         0.68
HCO3            0.73
HCT             1.08
HR              9.54
Height          9.97
ICUType         0.22
K               0.76
Lactate         0.46
MAP             7.02
MechVent        1.79
Mg              0.76
NIDiasABP       3.13
NIMAP           3.11
NISysABP        3.13
Na              0.71
PaCO2           1.66
PaO2            1.66
Platelets       0.82
RespRate        1.37
SaO2            0.70
SysABP          7.07
Temp            5.02
TroponinI       0.05
TroponinT       0.09
Urine           7.93
WBC             0.73
Weight          5.29
pH              1.77
dtype: float64

<h4>Grade 2 obesity missing rate</h4>

In [214]:
classificacao_obesidade_2_ids_test = bmi_data_test[bmi_data_test["Classificacao"] == "Obesidade grau 2"]
classificacao_obesidade_2_ids_test = classificacao_obesidade_2_ids_test["RecordID"]
classificacao_obesidade_2_test = test_X[test_X["RecordID"].isin(classificacao_obesidade_2_ids_test)]
classificacao_obesidade_2_missing_test = round((classificacao_obesidade_2_test.isna().sum()/total_pacientes_test)*100,2)
classificacao_obesidade_2_missing_test

RecordID       0.00
level_1        0.00
Time           0.00
ALP            4.39
ALT            4.39
AST            4.39
Age            0.16
Albumin        4.41
BUN            4.11
Bilirubin      4.39
Cholesterol    4.46
Creatinine     4.11
DiasABP        1.37
FiO2           3.62
GCS            3.23
Gender         4.37
Glucose        4.16
HCO3           4.13
HCT            3.99
HR             0.33
Height         0.16
ICUType        4.37
K              4.12
Lactate        4.24
MAP            1.37
MechVent       3.65
Mg             4.12
NIDiasABP      3.27
NIMAP          3.28
NISysABP       3.27
Na             4.15
PaCO2          3.68
PaO2           3.68
Platelets      4.08
RespRate       3.89
SaO2           4.13
SysABP         1.37
Temp           2.27
TroponinI      4.45
TroponinT      4.39
Urine          1.09
WBC            4.13
Weight         1.92
pH             3.65
dtype: float64

<h4>Grade 2 obesity measurements</h4>

In [216]:
classificacao_obesidade_2_measurements_test = round((classificacao_obesidade_2_test.count()/total_pacientes_test)*100,2)
classificacao_obesidade_2_measurements_test

RecordID       4.46
level_1        4.46
Time           4.46
ALP            0.07
ALT            0.07
AST            0.07
Age            4.30
Albumin        0.05
BUN            0.35
Bilirubin      0.07
Cholesterol    0.00
Creatinine     0.35
DiasABP        3.09
FiO2           0.84
GCS            1.23
Gender         0.09
Glucose        0.30
HCO3           0.33
HCT            0.47
HR             4.13
Height         4.30
ICUType        0.09
K              0.34
Lactate        0.22
MAP            3.09
MechVent       0.81
Mg             0.34
NIDiasABP      1.19
NIMAP          1.18
NISysABP       1.19
Na             0.31
PaCO2          0.78
PaO2           0.78
Platelets      0.38
RespRate       0.57
SaO2           0.33
SysABP         3.09
Temp           2.19
TroponinI      0.01
TroponinT      0.07
Urine          3.37
WBC            0.33
Weight         2.54
pH             0.81
dtype: float64

<h4>Grade 3 obesity missing rate</h4>

In [218]:
classificacao_obesidade_3_ids_test = bmi_data_test[bmi_data_test["Classificacao"] == "Obesidade grau 3"]
classificacao_obesidade_3_ids_test = classificacao_obesidade_3_ids_test["RecordID"]
classificacao_obesidade_3_test = test_X[test_X["RecordID"].isin(classificacao_obesidade_3_ids_test)]
classificacao_obesidade_3_missing_test = round((classificacao_obesidade_3_test.isna().sum()/total_pacientes_test)*100,2)
classificacao_obesidade_3_missing_test

RecordID       0.00
level_1        0.00
Time           0.00
ALP            3.60
ALT            3.59
AST            3.59
Age            0.10
Albumin        3.62
BUN            3.38
Bilirubin      3.59
Cholesterol    3.66
Creatinine     3.38
DiasABP        1.53
FiO2           3.00
GCS            2.59
Gender         3.59
Glucose        3.40
HCO3           3.39
HCT            3.31
HR             0.23
Height         0.10
ICUType        3.59
K              3.38
Lactate        3.46
MAP            1.54
MechVent       3.01
Mg             3.38
NIDiasABP      2.12
NIMAP          2.14
NISysABP       2.12
Na             3.40
PaCO2          3.10
PaO2           3.10
Platelets      3.38
RespRate       3.15
SaO2           3.46
SysABP         1.53
Temp           2.27
TroponinI      3.66
TroponinT      3.63
Urine          0.94
WBC            3.42
Weight         1.59
pH             3.08
dtype: float64

<h4>Grade 3 obesity measurements</h4>

In [220]:
classificacao_obesidade_3_measurements_test = round((classificacao_obesidade_3_test.count()/total_pacientes_test)*100,2)
classificacao_obesidade_3_measurements_test

RecordID       3.67
level_1        3.67
Time           3.67
ALP            0.07
ALT            0.07
AST            0.07
Age            3.57
Albumin        0.05
BUN            0.29
Bilirubin      0.08
Cholesterol    0.01
Creatinine     0.29
DiasABP        2.14
FiO2           0.67
GCS            1.08
Gender         0.08
Glucose        0.27
HCO3           0.28
HCT            0.36
HR             3.44
Height         3.57
ICUType        0.08
K              0.29
Lactate        0.21
MAP            2.13
MechVent       0.65
Mg             0.29
NIDiasABP      1.54
NIMAP          1.53
NISysABP       1.55
Na             0.27
PaCO2          0.57
PaO2           0.57
Platelets      0.29
RespRate       0.52
SaO2           0.21
SysABP         2.14
Temp           1.40
TroponinI      0.01
TroponinT      0.04
Urine          2.73
WBC            0.25
Weight         2.08
pH             0.59
dtype: float64

<h4>Building of the missing rate table</h4>

In [221]:
df_missing_test = pd.DataFrame(columns=df_columns)
df_missing_transpose_test = df_missing_test.T
df_missing_transpose_test ["Female"] = female_gender_missing_rate_test
df_missing_transpose_test ["Male"] = male_gender_missing_rate_test
df_missing_transpose_test["Undefined gender"] = undefined_gender_missing_rate_test
df_missing_transpose_test["ICUType 1"] = ICUType_1_test_missing
df_missing_transpose_test ["ICUType 2"] = ICUType_2_test_missing
df_missing_transpose_test ["ICUType 3"] = ICUType_3_test_missing
df_missing_transpose_test ["ICUType 4"] = ICUType_4_test_missing
df_missing_transpose_test["Age 65+"] = more_than_or_equal_to_65_test_missing
df_missing_transpose_test["Age 65-"] = less_than_65_test_missing
df_missing_transpose_test ["Low Weight"] = classificacao_baixo_peso_missing_test
df_missing_transpose_test ["Normal Weight"] = classificacao_normal_peso_missing_test
df_missing_transpose_test ["Overweight"] = classificacao_sobrepeso_missing_test
df_missing_transpose_test ["Obesity Grade 1"] = classificacao_obesidade_1_missing_test
df_missing_transpose_test ["Obesity Grade 2"] = classificacao_obesidade_2_missing_test
df_missing_transpose_test ["Obesity Grade 3"] = classificacao_obesidade_3_missing_test
df_missing_transpose_test["Undefined classification"] = classificacao_undefined_missing_test
df_missing_transpose_test = df_missing_transpose_test.drop("RecordID", axis=0)
df_missing_transpose_test = df_missing_transpose_test.drop("level_1", axis=0)
df_missing_transpose_test = df_missing_transpose_test.drop("Time", axis=0)
df_missing_transpose_test = df_missing_transpose_test.drop("Age", axis=0)
df_missing_transpose_test = df_missing_transpose_test.drop("Gender", axis=0)
display(HTML("<h2 style='text-align: center; font-size: 24px; font-weight: bold;'>original Missing rate per Variable by demographics - Test</h2>"))
df_missing_transpose_test 

Unnamed: 0,Female,Male,Undefined gender,ICUType 1,ICUType 2,ICUType 3,ICUType 4,Age 65+,Age 65-,Low Weight,Normal Weight,Overweight,Obesity Grade 1,Obesity Grade 2,Obesity Grade 3,Undefined classification
ALP,43.04,55.2,0.12,14.49,21.16,34.88,27.84,53.49,44.87,1.48,14.92,17.84,10.19,4.39,3.6,45.95
ALT,43.02,55.16,0.12,14.48,21.15,34.85,27.82,53.47,44.83,1.48,14.91,17.82,10.19,4.39,3.59,45.93
AST,43.02,55.16,0.12,14.48,21.15,34.85,27.82,53.47,44.84,1.48,14.91,17.82,10.19,4.39,3.59,45.93
Albumin,43.21,55.41,0.12,14.54,21.21,35.06,27.94,53.65,45.09,1.48,14.97,17.93,10.23,4.41,3.62,46.11
BUN,40.51,52.02,0.11,13.66,19.91,32.89,26.19,50.39,42.25,1.39,14.03,16.75,9.56,4.11,3.38,43.42
Bilirubin,43.02,55.17,0.12,14.49,21.16,34.82,27.84,53.47,44.84,1.48,14.92,17.82,10.18,4.39,3.59,45.92
Cholesterol,43.7,56.01,0.13,14.63,21.3,35.64,28.26,54.12,45.71,1.5,15.14,18.1,10.32,4.46,3.66,46.65
Creatinine,40.5,52.0,0.11,13.64,19.9,32.88,26.19,50.37,42.24,1.39,14.02,16.74,9.56,4.11,3.38,43.41
DiasABP,20.62,25.16,0.05,8.85,4.69,23.01,9.29,24.31,21.52,0.55,5.39,5.31,3.27,1.37,1.53,28.43
FiO2,36.92,47.12,0.1,13.17,17.45,30.35,23.16,45.54,38.6,1.29,12.64,14.92,8.51,3.62,3.0,40.16


<h4>Building of the measurements table</h4>

In [222]:
df_measurements_test = pd.DataFrame(columns=df_columns)
df_measurements_transpose_test = df_measurements_test.T
df_measurements_transpose_test ["Female"] = female_gender_measurements_test
df_measurements_transpose_test ["Male"] = male_gender_measurements_test
df_measurements_transpose_test["Undefined gender"] = undefined_gender_measurements_test
df_measurements_transpose_test["ICUType 1"] = ICUType_1_measurements_test
df_measurements_transpose_test ["ICUType 2"] = ICUType_2_measurements_test
df_measurements_transpose_test ["ICUType 3"] = ICUType_3_measurements_test
df_measurements_transpose_test ["ICUType 4"] = ICUType_4_measurements_test
df_measurements_transpose_test["Age 65+"] = more_than_or_equal_to_65_test_measurements
df_measurements_transpose_test["Age 65-"] = less_than_65_test_measurements
df_measurements_transpose_test ["Low Weight"] =classificacao_baixo_peso_measurements_test
df_measurements_transpose_test ["Normal Weight"] = classificacao_normal_peso_measurements_test
df_measurements_transpose_test ["Overweight"] = classificacao_sobrepeso_measurements_test
df_measurements_transpose_test ["Obesity Grade 1"] = classificacao_obesidade_1_measurements_test
df_measurements_transpose_test ["Obesity Grade 2"] = classificacao_obesidade_2_measurements_test
df_measurements_transpose_test ["Obesity Grade 3"] = classificacao_obesidade_3_measurements_test
df_measurements_transpose_test["Undefined classification"] = classificacao_undefined_measurements_test
df_measurements_transpose_test = df_measurements_transpose_test.drop("RecordID", axis=0)
df_measurements_transpose_test = df_measurements_transpose_test.drop("level_1", axis=0)
df_measurements_transpose_test = df_measurements_transpose_test.drop("Time", axis=0)
df_measurements_transpose_test = df_measurements_transpose_test.drop("Age", axis=0)
df_measurements_transpose_test = df_measurements_transpose_test.drop("Gender", axis=0)
display(HTML("<h2 style='text-align: center; font-size: 24px; font-weight: bold;'>Repeated Measurements per Variable by Demographics - test Set</h2>"))
df_measurements_transpose_test 

Unnamed: 0,Female,Male,Undefined gender,ICUType 1,ICUType 2,ICUType 3,ICUType 4,Age 65+,Age 65-,Low Weight,Normal Weight,Overweight,Obesity Grade 1,Obesity Grade 2,Obesity Grade 3,Undefined classification
ALP,0.72,0.91,0.0,0.22,0.14,0.8,0.47,0.74,0.9,0.02,0.25,0.3,0.15,0.07,0.07,0.78
ALT,0.75,0.94,0.0,0.24,0.15,0.83,0.48,0.76,0.93,0.02,0.27,0.31,0.15,0.07,0.07,0.8
AST,0.75,0.94,0.0,0.23,0.15,0.83,0.48,0.76,0.93,0.02,0.27,0.31,0.15,0.07,0.07,0.8
Albumin,0.55,0.69,0.0,0.17,0.09,0.62,0.37,0.58,0.68,0.02,0.2,0.21,0.1,0.05,0.05,0.62
BUN,3.26,4.09,0.01,1.06,1.39,2.79,2.11,3.84,3.51,0.11,1.15,1.38,0.77,0.35,0.29,3.31
Bilirubin,0.75,0.94,0.0,0.23,0.14,0.86,0.46,0.76,0.93,0.02,0.26,0.31,0.15,0.07,0.08,0.81
Cholesterol,0.07,0.1,0.0,0.08,0.0,0.04,0.04,0.11,0.06,0.0,0.03,0.03,0.01,0.0,0.01,0.08
Creatinine,3.27,4.11,0.01,1.08,1.4,2.8,2.11,3.86,3.53,0.11,1.15,1.39,0.77,0.35,0.29,3.32
DiasABP,23.15,30.94,0.08,5.86,16.61,12.67,19.02,29.92,24.25,0.95,9.79,12.82,7.07,3.09,2.14,18.3
FiO2,6.85,8.99,0.03,1.54,3.85,5.33,5.14,8.69,7.17,0.22,2.53,3.21,1.83,0.84,0.67,6.57
