In [1]:
import os
import sys
import pandas as pd
from IPython.display import display, HTML
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

<h2>Loading dataset</h2>

In [2]:
from pypots.benchpots.datasets import preprocess_physionet2012
physionet2012_dataset = preprocess_physionet2012(subset="all", rate=0.1)

2024-11-04 22:14:59 [INFO]: You're using dataset physionet_2012, please cite it properly in your work. You can find its reference information at the below link: 
https://github.com/WenjieDu/TSDB/tree/main/dataset_profiles/physionet_2012
2024-11-04 22:14:59 [INFO]: Dataset physionet_2012 has already been downloaded. Processing directly...
2024-11-04 22:14:59 [INFO]: Dataset physionet_2012 has already been cached. Loading from cache directly...
2024-11-04 22:14:59 [INFO]: Loaded successfully!


<h3>Training data</h3>

<h4>Loading training dataset</h4>

In [3]:
train_X = physionet2012_dataset['train_X']

In [73]:
female_gender_ids = train_X[train_X["Gender"] == 0.0]
female_gender_ids = female_gender_ids["RecordID"]
female_gender_missing_rate = train_X[train_X["RecordID"].isin(female_gender_ids)]
female_gender_missing_rate = female_gender_missing_rate.isna().sum()
female_gender_missing_rate

RecordID            0
level_1             0
Time                0
ALP            159014
ALT            158936
AST            158940
Age              9360
Albumin        159632
BUN            150057
Bilirubin      158919
Cholesterol    161339
Creatinine     150001
DiasABP         77819
FiO2           136476
GCS            109199
Gender         158249
Glucose        150569
HCO3           150247
HCT            147021
HR              15340
Height           9360
ICUType        158249
K              149354
Lactate        155394
MAP             78401
MechVent       137154
Mg             150225
NIDiasABP       90550
NIMAP           91547
NISysABP        90493
Na             150134
PaCO2          144376
PaO2           144400
Platelets      150163
RespRate       117502
SaO2           155599
SysABP          77816
Temp           104388
TroponinI      161247
TroponinT      159840
Urine           49302
WBC            150968
Weight          74191
pH             143721
dtype: int64

In [72]:
male_gender_ids = train_X[train_X["Gender"] == 1.0]
male_gender_ids = male_gender_ids["RecordID"]
male_gender_missing_rate = train_X[train_X["RecordID"].isin(male_gender_ids)]
male_gender_missing_rate = male_gender_missing_rate.isna().sum()
male_gender_missing_rate

RecordID            0
level_1             0
Time                0
ALP            202634
ALT            202531
AST            202524
Age             11632
Albumin        203508
BUN            190989
Bilirubin      202501
Cholesterol    205692
Creatinine     190914
DiasABP         90514
FiO2           173411
GCS            140375
Gender         201771
Glucose        191984
HCO3           191373
HCT            185779
HR              20207
Height          11632
ICUType        201771
K              190395
Lactate        197453
MAP             91189
MechVent       175018
Mg             191339
NIDiasABP      122215
NIMAP          123380
NISysABP       122136
Na             191440
PaCO2          180984
PaO2           181038
Platelets      190395
RespRate       160607
SaO2           197305
SysABP          90506
Temp           126732
TroponinI      205649
TroponinT      203799
Urine           63636
WBC            191968
Weight          97959
pH             179699
dtype: int64

In [94]:
ICUType_1_training_ids = train_X[train_X['ICUType'] == 1.0]
ICUType_1_training_ids = ICUType_1_training_ids[ICUType_1_training_ids["Gender"] != -1.0]
ICUType_1_training_ids = ICUType_1_training_ids[ICUType_1_training_ids["Time"] == 0.0]
ICUType_1_training_ids = ICUType_1_training_ids["RecordID"]
ICUType_1_training_missing = train_X[train_X["RecordID"].isin(ICUType_1_training_ids)]
ICUType_1_training_missing = ICUType_1_training_missing.isna().sum()
ICUType_1_training_missing

RecordID           0
level_1            0
Time               0
ALP            53860
ALT            53812
AST            53813
Age             4823
Albumin        54066
BUN            50784
Bilirubin      53834
Cholesterol    54328
Creatinine     50719
DiasABP        32260
FiO2           48690
GCS            40577
Gender         53533
Glucose        50941
HCO3           50917
HCT            49887
HR              7302
Height          4823
ICUType        53533
K              50124
Lactate        53449
MAP            32302
MechVent       49138
Mg             50779
NIDiasABP      28533
NIMAP          28676
NISysABP       28518
Na             50931
PaCO2          50168
PaO2           50171
Platelets      50700
RespRate       35558
SaO2           51968
SysABP         32259
Temp           38449
TroponinI      54482
TroponinT      53494
Urine          23636
WBC            51117
Weight         30083
pH             50077
dtype: int64

In [95]:
ICUType_2_training_ids = train_X[train_X['ICUType'] == 2.0]
ICUType_2_training_ids = ICUType_2_training_ids[ICUType_2_training_ids["Gender"] != -1.0]
ICUType_2_training_ids = ICUType_2_training_ids[ICUType_2_training_ids["Time"] == 0.0]
ICUType_2_training_ids = ICUType_2_training_ids["RecordID"]
ICUType_2_training_missing = train_X[train_X["RecordID"].isin(ICUType_2_training_ids)]
ICUType_2_training_missing = ICUType_2_training_missing.isna().sum()
ICUType_2_training_missing

RecordID           0
level_1            0
Time               0
ALP            76938
ALT            76921
AST            76921
Age             2520
Albumin        77117
BUN            72449
Bilirubin      76931
Cholesterol    77448
Creatinine     72435
DiasABP        16437
FiO2           64269
GCS            56884
Gender         75858
Glucose        73836
HCO3           72821
HCT            69049
HR              6345
Height          2520
ICUType        75858
K              73369
Lactate        74831
MAP            16366
MechVent       64606
Mg             72620
NIDiasABP      60841
NIMAP          60998
NISysABP       60807
Na             73575
PaCO2          61716
PaO2           61768
Platelets      71208
RespRate       74007
SaO2           69155
SysABP         16435
Temp           32066
TroponinI      77359
TroponinT      77276
Urine          12121
WBC            72289
Weight         37086
pH             60205
dtype: int64

In [96]:
ICUType_3_training_ids = train_X[train_X['ICUType'] == 3.0]
ICUType_3_training_ids = ICUType_3_training_ids[ICUType_3_training_ids["Gender"] != -1.0]
ICUType_3_training_ids = ICUType_3_training_ids[ICUType_3_training_ids["Time"] == 0.0]
ICUType_3_training_ids = ICUType_3_training_ids["RecordID"]
ICUType_3_training_missing = train_X[train_X["RecordID"].isin(ICUType_3_training_ids)]
ICUType_3_training_missing = ICUType_3_training_missing.isna().sum()
ICUType_3_training_missing

RecordID            0
level_1             0
Time                0
ALP            129170
ALT            129085
AST            129084
Age              9065
Albumin        129876
BUN            122023
Bilirubin      128977
Cholesterol    131914
Creatinine     121980
DiasABP         85170
FiO2           112584
GCS             96587
Gender         129297
Glucose        122036
HCO3           121993
HCT            120291
HR              13576
Height           9065
ICUType        129297
K              121201
Lactate        126965
MAP             85860
MechVent       113974
Mg             122598
NIDiasABP       56615
NIMAP           57988
NISysABP        56574
Na             121762
PaCO2          121979
PaO2           121973
Platelets      122983
RespRate        90588
SaO2           130473
SysABP          85168
Temp            93940
TroponinI      131706
TroponinT      130158
Urine           52002
WBC            123365
Weight          45879
pH             121821
dtype: int64

In [97]:
ICUType_4_training_ids = train_X[train_X['ICUType'] == 4.0]
ICUType_4_training_ids = ICUType_4_training_ids[ICUType_4_training_ids["Gender"] != -1.0]
ICUType_4_training_ids = ICUType_4_training_ids[ICUType_4_training_ids["Time"] == 0.0]
ICUType_4_training_ids = ICUType_4_training_ids["RecordID"]
ICUType_4_training_missing = train_X[train_X["RecordID"].isin(ICUType_4_training_ids)]
ICUType_4_training_missing = ICUType_4_training_missing.isna().sum()
ICUType_4_training_missing

RecordID            0
level_1             0
Time                0
ALP            101680
ALT            101649
AST            101646
Age              4584
Albumin        102081
BUN             95790
Bilirubin      101678
Cholesterol    103341
Creatinine      95781
DiasABP         34466
FiO2            84344
GCS             55526
Gender         101332
Glucose         95740
HCO3            95889
HCT             93573
HR               8324
Height           4584
ICUType        101332
K               95055
Lactate         97602
MAP             35062
MechVent        84454
Mg              95567
NIDiasABP       66776
NIMAP           67265
NISysABP        66730
Na              95306
PaCO2           91497
PaO2            91526
Platelets       95667
RespRate        77956
SaO2           101308
SysABP          34460
Temp            66665
TroponinI      103349
TroponinT      102711
Urine           25179
WBC             96165
Weight          59102
pH              91317
dtype: int64

In [86]:
more_than_or_equal_to_65_train_ids = train_X[train_X["Age"] >= 65]
more_than_or_equal_to_65_train_ids = more_than_or_equal_to_65_train_ids[more_than_or_equal_to_65_train_ids["Gender"] != -1.0]
more_than_or_equal_to_65_train_ids = more_than_or_equal_to_65_train_ids[more_than_or_equal_to_65_train_ids["Time"] == 0.0]
more_than_or_equal_to_65_train_ids = more_than_or_equal_to_65_train_ids["RecordID"]
more_than_or_equal_to_65_train_missing = train_X[train_X["RecordID"].isin(more_than_or_equal_to_65_train_ids)]
more_than_or_equal_to_65_train_missing = more_than_or_equal_to_65_train_missing.isna().sum()
more_than_or_equal_to_65_train_missing

RecordID            0
level_1             0
Time                0
ALP            196327
ALT            196255
AST            196254
Age             10815
Albumin        196843
BUN            184955
Bilirubin      196231
Cholesterol    198615
Creatinine     184879
DiasABP         89651
FiO2           167564
GCS            136918
Gender         194862
Glucose        185945
HCO3           185278
HCT            180264
HR              18374
Height          10815
ICUType        194862
K              184408
Lactate        191318
MAP             90161
MechVent       169376
Mg             185179
NIDiasABP      115217
NIMAP          116207
NISysABP       115143
Na             185405
PaCO2          175592
PaO2           175635
Platelets      184585
RespRate       149825
SaO2           189876
SysABP          89648
Temp           122046
TroponinI      198465
TroponinT      196236
Urine           57959
WBC            185858
Weight          92169
pH             174496
dtype: int64

In [88]:
less_than_65_train_ids = train_X[train_X["Age"] < 65]
less_than_65_train_ids = less_than_65_train_ids[less_than_65_train_ids["Gender"] != -1.0]
less_than_65_train_ids = less_than_65_train_ids[less_than_65_train_ids["Time"] == 0.0]
less_than_65_train_ids = less_than_65_train_ids["RecordID"]
less_than_65_train_missing = train_X[train_X["RecordID"].isin(less_than_65_train_ids)]
less_than_65_train_missing = less_than_65_train_missing.isna().sum()
less_than_65_train_missing


RecordID            0
level_1             0
Time                0
ALP            165321
ALT            165212
AST            165210
Age             10177
Albumin        166297
BUN            156091
Bilirubin      165189
Cholesterol    168416
Creatinine     156036
DiasABP         78682
FiO2           142323
GCS            112656
Gender         165158
Glucose        156608
HCO3           156342
HCT            152536
HR              17173
Height          10177
ICUType        165158
K              155341
Lactate        161529
MAP             79429
MechVent       142796
Mg             156385
NIDiasABP       97548
NIMAP           98720
NISysABP        97486
Na             156169
PaCO2          149768
PaO2           149803
Platelets      155973
RespRate       128284
SaO2           163028
SysABP          78674
Temp           109074
TroponinI      168431
TroponinT      167403
Urine           54979
WBC            157078
Weight          79981
pH             148924
dtype: int64

In [12]:
filtered_train_X = train_X[(train_X['Height'] != -1) & (train_X['Weight'] != -1) & (train_X['Height'].notna()) & (train_X['Weight'].notna())] 

In [13]:
filtered_train_X = filtered_train_X.groupby("RecordID").first().reset_index()

In [14]:
def classify_BMI(BMI):
    if BMI < 18.5:
        return "Baixo peso"
    elif BMI >= 18.6 and BMI <= 24.9:
        return "Peso normal"
    elif BMI >= 25 and BMI <= 29.9:
        return "Sobrepeso"
    elif BMI >= 30 and BMI <= 34.9:
        return "Obesidade grau 1"
    elif BMI >= 35 and BMI <= 39.9:
        return "Obesidade grau 2"
    elif BMI >= 40:
        return "Obesidade grau 3"

In [15]:
filtered_train_X_metros = filtered_train_X.copy()
filtered_train_X_metros["Height"] = filtered_train_X["Height"]/100
filtered_train_X_metros["Height"]

0       1.803
1       1.803
2       1.626
3       1.753
4       1.575
        ...  
4005    1.702
4006    1.524
4007    1.600
4008    1.727
4009    1.727
Name: Height, Length: 4010, dtype: float64

In [16]:
bmi_data_train = filtered_train_X_metros
bmi_data_train["BMI"] = filtered_train_X_metros["Weight"] / (filtered_train_X_metros["Height"]**2)
bmi_data_train["Classificacao"] = bmi_data_train["BMI"].apply(classify_BMI)
bmi_data_train.head()

Unnamed: 0,RecordID,level_1,Time,ALP,ALT,AST,Age,Albumin,BUN,Bilirubin,...,SysABP,Temp,TroponinI,TroponinT,Urine,WBC,Weight,pH,BMI,Classificacao
0,132543,0,0.0,105.0,12.0,15.0,68.0,4.4,23.0,0.2,...,,36.3,,,600.0,11.5,84.6,,26.024291,Sobrepeso
1,132547,0,0.0,,,,64.0,,,,...,,,,,,,114.0,,35.068194,Obesidade grau 2
2,132548,0,0.0,,,,68.0,,32.0,,...,205.0,36.3,0.7,,120.0,6.2,87.0,,32.906233,Obesidade grau 1
3,132555,0,0.0,,,,74.0,,19.0,,...,98.0,34.8,,,35.0,9.0,66.1,7.39,21.509862,Peso normal
4,132567,0,0.0,,,,71.0,,9.0,,...,111.5,35.6,,,15.0,9.0,56.0,7.44,22.574956,Peso normal


In [17]:
classificacao_baixo_peso = bmi_data_train[bmi_data_train["Classificacao"] == "Baixo peso"].groupby("RecordID").first().reset_index()
classificacao_baixo_peso = classificacao_baixo_peso[classificacao_baixo_peso["Gender"] != -1.0]
classificacao_baixo_peso_missing = classificacao_baixo_peso.isna().sum()
classificacao_baixo_peso_missing

RecordID           0
level_1            0
Time               0
ALP               75
ALT               76
AST               76
Age                0
Albumin           84
BUN               39
Bilirubin         76
Cholesterol       98
Creatinine        39
DiasABP           36
FiO2              47
GCS               23
Gender             0
Glucose           43
HCO3              39
HCT               39
HR                17
Height             0
ICUType            0
K                 41
Lactate           65
MAP               36
MechVent          50
Mg                41
NIDiasABP         31
NIMAP             32
NISysABP          31
Na                39
PaCO2             38
PaO2              38
Platelets         39
RespRate          92
SaO2              69
SysABP            36
Temp              19
TroponinI        104
TroponinT         87
Urine             26
WBC               40
Weight             0
pH                38
BMI                0
Classificacao      0
dtype: int64

In [18]:
classificacao_peso_normal = bmi_data_train[bmi_data_train["Classificacao"] == "Peso normal"].groupby("RecordID").first().reset_index()
classificacao_peso_normal = classificacao_peso_normal[classificacao_peso_normal["Gender"] != -1.0]
classificacao_peso_normal_missing = classificacao_peso_normal.isna().sum()
classificacao_peso_normal_missing

RecordID            0
level_1             0
Time                0
ALP               892
ALT               881
AST               880
Age                 0
Albumin           898
BUN               354
Bilirubin         888
Cholesterol      1102
Creatinine        354
DiasABP           386
FiO2              544
GCS               212
Gender              0
Glucose           388
HCO3              367
HCT               342
HR                164
Height              0
ICUType             0
K                 374
Lactate           724
MAP               389
MechVent          549
Mg                381
NIDiasABP         394
NIMAP             393
NISysABP          393
Na                371
PaCO2             412
PaO2              416
Platelets         346
RespRate          961
SaO2              736
SysABP            386
Temp              202
TroponinI        1114
TroponinT        1009
Urine             243
WBC               358
Weight              0
pH                397
BMI                 0
Classifica

In [19]:
classificacao_sobrepeso = bmi_data_train[bmi_data_train["Classificacao"] == "Sobrepeso"].groupby("RecordID").first().reset_index()
classificacao_sobrepeso = classificacao_sobrepeso[classificacao_sobrepeso["Gender"] != -1.0]
classificacao_sobrepeso_missing = classificacao_sobrepeso.isna().sum()
classificacao_sobrepeso_missing

RecordID            0
level_1             0
Time                0
ALP              1047
ALT              1041
AST              1043
Age                 0
Albumin          1088
BUN               429
Bilirubin        1039
Cholesterol      1318
Creatinine        426
DiasABP           400
FiO2              623
GCS               235
Gender              0
Glucose           486
HCO3              449
HCT               406
HR                169
Height              0
ICUType             0
K                 464
Lactate           884
MAP               407
MechVent          618
Mg                457
NIDiasABP         471
NIMAP             476
NISysABP          470
Na                454
PaCO2             447
PaO2              452
Platelets         408
RespRate         1160
SaO2              790
SysABP            400
Temp              218
TroponinI        1331
TroponinT        1227
Urine             251
WBC               437
Weight              0
pH                434
BMI                 0
Classifica

In [20]:
classificacao_obesidade_1 = bmi_data_train[bmi_data_train["Classificacao"] == "Obesidade grau 1"].groupby("RecordID").first().reset_index()
classificacao_obesidade_1 = classificacao_obesidade_1[classificacao_obesidade_1["Gender"] != -1.0]
classificacao_obesidade_1_missing = classificacao_obesidade_1.isna().sum()
classificacao_obesidade_1_missing

RecordID           0
level_1            0
Time               0
ALP              524
ALT              521
AST              521
Age                0
Albumin          550
BUN              226
Bilirubin        520
Cholesterol      687
Creatinine       225
DiasABP          222
FiO2             326
GCS              140
Gender             0
Glucose          258
HCO3             232
HCT              212
HR               100
Height             0
ICUType            0
K                250
Lactate          458
MAP              224
MechVent         332
Mg               248
NIDiasABP        257
NIMAP            259
NISysABP         256
Na               240
PaCO2            224
PaO2             224
Platelets        222
RespRate         607
SaO2             433
SysABP           222
Temp             128
TroponinI        692
TroponinT        621
Urine            133
WBC              227
Weight             0
pH               216
BMI                0
Classificacao      0
dtype: int64

In [21]:
classificacao_obesidade_2 = bmi_data_train[bmi_data_train["Classificacao"] == "Obesidade grau 2"].groupby("RecordID").first().reset_index()
classificacao_obesidade_2 = classificacao_obesidade_2[classificacao_obesidade_2["Gender"] != -1.0]
classificacao_obesidade_2_missing = classificacao_obesidade_2.isna().sum()
classificacao_obesidade_2_missing

RecordID           0
level_1            0
Time               0
ALP              233
ALT              231
AST              231
Age                0
Albumin          250
BUN               82
Bilirubin        233
Cholesterol      296
Creatinine        81
DiasABP           96
FiO2             128
GCS               50
Gender             0
Glucose          102
HCO3              85
HCT               79
HR                40
Height             0
ICUType            0
K                 94
Lactate          177
MAP               96
MechVent         137
Mg                91
NIDiasABP        105
NIMAP            106
NISysABP         104
Na                88
PaCO2             94
PaO2              94
Platelets         77
RespRate         268
SaO2             179
SysABP            96
Temp              49
TroponinI        300
TroponinT        263
Urine             59
WBC               84
Weight             0
pH                92
BMI                0
Classificacao      0
dtype: int64

In [22]:
classificacao_obesidade_3 = bmi_data_train[bmi_data_train["Classificacao"] == "Obesidade grau 3"].groupby("RecordID").first().reset_index()
classificacao_obesidade_3 = classificacao_obesidade_3[classificacao_obesidade_3["Gender"] != -1.0]
classificacao_obesidade_3_missing = classificacao_obesidade_3.isna().sum()
classificacao_obesidade_3_missing

RecordID           0
level_1            0
Time               0
ALP              185
ALT              184
AST              184
Age                0
Albumin          193
BUN               75
Bilirubin        187
Cholesterol      258
Creatinine        76
DiasABP           91
FiO2             112
GCS               44
Gender             0
Glucose           86
HCO3              75
HCT               71
HR                30
Height             0
ICUType            0
K                 80
Lactate          163
MAP               93
MechVent         117
Mg                79
NIDiasABP         92
NIMAP             92
NISysABP          91
Na                78
PaCO2             88
PaO2              88
Platelets         72
RespRate         229
SaO2             160
SysABP            91
Temp              40
TroponinI        264
TroponinT        236
Urine             42
WBC               72
Weight             0
pH                88
BMI                0
Classificacao      0
dtype: int64

In [23]:
df_columns = train_X.columns
df_columns

Index(['RecordID', 'level_1', 'Time', 'ALP', 'ALT', 'AST', 'Age', 'Albumin',
       'BUN', 'Bilirubin', 'Cholesterol', 'Creatinine', 'DiasABP', 'FiO2',
       'GCS', 'Gender', 'Glucose', 'HCO3', 'HCT', 'HR', 'Height', 'ICUType',
       'K', 'Lactate', 'MAP', 'MechVent', 'Mg', 'NIDiasABP', 'NIMAP',
       'NISysABP', 'Na', 'PaCO2', 'PaO2', 'Platelets', 'RespRate', 'SaO2',
       'SysABP', 'Temp', 'TroponinI', 'TroponinT', 'Urine', 'WBC', 'Weight',
       'pH'],
      dtype='object')

In [24]:
df_missing = pd.DataFrame(columns=df_columns)
df_missing_transpose = df_missing.T
df_missing_transpose["Female"] = female_gender_missing_rate
df_missing_transpose["Male"] = male_gender_missing_rate
df_missing_transpose["ICUType 1"] = ICUType_1_training_missing
df_missing_transpose["ICUType 2"] = ICUType_2_training_missing
df_missing_transpose["ICUType 3"] = ICUType_3_training_missing
df_missing_transpose["ICUType 4"] = ICUType_4_training_missing
df_missing_transpose["Age 65+"] = more_than_or_equal_to_65_train_missing
df_missing_transpose["Age 65-"] = less_than_65_train_missing
df_missing_transpose["Low Weight"] = classificacao_baixo_peso_missing
df_missing_transpose["Normal Weight"] = classificacao_peso_normal_missing
df_missing_transpose["Overweight"] = classificacao_sobrepeso_missing
df_missing_transpose["Obesity Grade 1"] = classificacao_obesidade_1_missing
df_missing_transpose["Obesity Grade 2"] = classificacao_obesidade_2_missing
df_missing_transpose["Obesity Grade 3"] = classificacao_obesidade_3_missing
display(HTML("<h2 style='text-align: center; font-size: 24px; font-weight: bold;'>Original Missing Rate per Variable by demographics - Train</h2>"))
df_missing_transpose

Unnamed: 0,Female,Male,ICUType 1,ICUType 2,ICUType 3,ICUType 4,Age 65+,Age 65-,Low Weight,Normal Weight,Overweight,Obesity Grade 1,Obesity Grade 2,Obesity Grade 3
RecordID,0,0,0,0,0,0,0,0,0,0,0,0,0,0
level_1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
Time,0,0,0,0,0,0,0,0,0,0,0,0,0,0
ALP,3219,4079,1068,1594,2580,2067,2530,1886,75,892,1047,524,233,185
ALT,3213,4072,1064,1592,2576,2064,2488,1848,76,881,1041,521,231,184
AST,3215,4069,1064,1592,2575,2064,2487,1850,76,880,1043,521,231,184
Age,0,0,0,0,0,0,0,0,0,0,0,0,0,0
Albumin,3239,4109,1077,1597,2601,2084,2585,2020,84,898,1088,550,250,193
BUN,2950,3787,982,1542,2393,1829,58,55,39,354,429,226,82,75
Bilirubin,3220,4078,1073,1592,2577,2067,2482,1865,76,888,1039,520,233,187


<h3>Validation data</h3>

In [25]:
validation_X = physionet2012_dataset['val_X']

In [26]:
female_gender_distribution_validation = validation_X[validation_X['Gender'] == 0.0]
female_gender_missing_rate_validation = female_gender_distribution_validation.isna().sum()
female_gender_missing_rate_validation

RecordID         0
level_1          0
Time             0
ALP            828
ALT            824
AST            824
Age              0
Albumin        831
BUN            762
Bilirubin      828
Cholesterol    850
Creatinine     762
DiasABP        712
FiO2           705
GCS            555
Gender           0
Glucose        777
HCO3           773
HCT            755
HR             407
Height           0
ICUType          0
K              772
Lactate        747
MAP            716
MechVent       713
Mg             798
NIDiasABP      508
NIMAP          511
NISysABP       508
Na             775
PaCO2          689
PaO2           692
Platelets      756
RespRate       725
SaO2           832
SysABP         712
Temp           529
TroponinI      855
TroponinT      845
Urine          578
WBC            765
Weight           0
pH             688
dtype: int64

In [27]:
male_gender_distribution_validation = validation_X[validation_X['Gender'] == 1.0]
male_gender_missing_rate_validation = male_gender_distribution_validation.isna().sum()
male_gender_missing_rate_validation

RecordID          0
level_1           0
Time              0
ALP            1018
ALT            1019
AST            1019
Age               0
Albumin        1016
BUN             934
Bilirubin      1020
Cholesterol    1054
Creatinine      934
DiasABP         866
FiO2            862
GCS             703
Gender            0
Glucose         951
HCO3            946
HCT             918
HR              551
Height            0
ICUType           0
K               948
Lactate         939
MAP             873
MechVent        870
Mg              968
NIDiasABP       681
NIMAP           683
NISysABP        681
Na              949
PaCO2           795
PaO2            798
Platelets       914
RespRate        918
SaO2           1026
SysABP          866
Temp            685
TroponinI      1057
TroponinT      1029
Urine           764
WBC             927
Weight            0
pH              790
dtype: int64

In [28]:
ICUType_1_validation = validation_X[validation_X['ICUType'] == 1.0]
ICUType_1_validation_missing = ICUType_1_validation.isna().sum()
ICUType_1_validation_missing

RecordID         0
level_1          0
Time             0
ALP            255
ALT            255
AST            255
Age              0
Albumin        257
BUN            231
Bilirubin      255
Cholesterol    263
Creatinine     231
DiasABP        195
FiO2           233
GCS            142
Gender           0
Glucose        234
HCO3           233
HCT            231
HR              97
Height           0
ICUType          0
K              231
Lactate        259
MAP            197
MechVent       235
Mg             243
NIDiasABP      147
NIMAP          147
NISysABP       147
Na             233
PaCO2          242
PaO2           243
Platelets      230
RespRate       199
SaO2           253
SysABP         195
Temp           153
TroponinI      268
TroponinT      255
Urine          188
WBC            234
Weight           0
pH             241
dtype: int64

In [29]:
ICUType_2_validation = validation_X[validation_X['ICUType'] == 2.0]
ICUType_2_validation_missing = ICUType_2_validation.isna().sum()
ICUType_2_validation_missing

RecordID         0
level_1          0
Time             0
ALP            415
ALT            415
AST            415
Age              0
Albumin        415
BUN            400
Bilirubin      416
Cholesterol    417
Creatinine     400
DiasABP        362
FiO2           375
GCS            361
Gender           0
Glucose        406
HCO3           400
HCT            389
HR             324
Height           0
ICUType          0
K              406
Lactate        393
MAP            364
MechVent       368
Mg             412
NIDiasABP      376
NIMAP          376
NISysABP       376
Na             406
PaCO2          183
PaO2           186
Platelets      382
RespRate       405
SaO2           408
SysABP         362
Temp           342
TroponinI      419
TroponinT      418
Urine          348
WBC            391
Weight           0
pH             183
dtype: int64

In [30]:
ICUType_3_validation = validation_X[validation_X['ICUType'] == 3.0]
ICUType_3_validation_missing = ICUType_3_validation.isna().sum()
ICUType_3_validation_missing

RecordID         0
level_1          0
Time             0
ALP            642
ALT            638
AST            638
Age              0
Albumin        641
BUN            597
Bilirubin      641
Cholesterol    671
Creatinine     597
DiasABP        624
FiO2           553
GCS            433
Gender           0
Glucose        600
HCO3           599
HCT            596
HR             312
Height           0
ICUType          0
K              597
Lactate        586
MAP            625
MechVent       568
Mg             610
NIDiasABP      342
NIMAP          345
NISysABP       342
Na             598
PaCO2          605
PaO2           605
Platelets      601
RespRate       563
SaO2           663
SysABP         624
Temp           423
TroponinI      672
TroponinT      656
Urine          475
WBC            601
Weight           0
pH             601
dtype: int64

In [31]:
ICUType_4_validation = validation_X[validation_X['ICUType'] == 4.0]
ICUType_4_validation_missing = ICUType_4_validation.isna().sum()
ICUType_4_validation_missing

RecordID         0
level_1          0
Time             0
ALP            534
ALT            535
AST            535
Age              0
Albumin        534
BUN            468
Bilirubin      536
Cholesterol    553
Creatinine     468
DiasABP        397
FiO2           406
GCS            322
Gender           0
Glucose        488
HCO3           487
HCT            457
HR             225
Height           0
ICUType          0
K              486
Lactate        448
MAP            403
MechVent       412
Mg             501
NIDiasABP      324
NIMAP          326
NISysABP       324
Na             487
PaCO2          454
PaO2           456
Platelets      457
RespRate       476
SaO2           534
SysABP         397
Temp           296
TroponinI      553
TroponinT      545
Urine          331
WBC            466
Weight           0
pH             453
dtype: int64

In [32]:
more_than_or_equal_to_65_validation = validation_X[validation_X["Age"] >= 65]
more_than_or_equal_to_65_validation_missing = more_than_or_equal_to_65_validation.isna().sum()
more_than_or_equal_to_65_validation_missing

RecordID           0
level_1            0
Time               0
ALP            46108
ALT            46097
AST            46095
Age                0
Albumin        46167
BUN            43247
Bilirubin      46062
Cholesterol    46650
Creatinine     43226
DiasABP        19719
FiO2           39042
GCS            31395
Gender         45706
Glucose        43521
HCO3           43352
HCT            42072
HR              1925
Height             0
ICUType        45706
K              43075
Lactate        44747
MAP            19873
MechVent       39647
Mg             43316
NIDiasABP      25867
NIMAP          26091
NISysABP       25850
Na             43396
PaCO2          40977
PaO2           40985
Platelets      43200
RespRate       34615
SaO2           44457
SysABP         19716
Temp           27565
TroponinI      46626
TroponinT      46038
Urine          11488
WBC            43540
Weight         20933
pH             40703
dtype: int64

In [33]:
less_than_65_validation = validation_X[validation_X["Age"] < 65]
less_than_65_validation_missing = less_than_65_validation.isna().sum()
less_than_65_validation_missing

RecordID           0
level_1            0
Time               0
ALP            38977
ALT            38960
AST            38962
Age                0
Albumin        39219
BUN            36681
Bilirubin      38956
Cholesterol    39787
Creatinine     36674
DiasABP        17023
FiO2           33031
GCS            25727
Gender         38966
Glucose        36816
HCO3           36730
HCT            35701
HR              1743
Height             0
ICUType        38966
K              36491
Lactate        37911
MAP            17125
MechVent       32951
Mg             36732
NIDiasABP      22040
NIMAP          22320
NISysABP       22019
Na             36637
PaCO2          34961
PaO2           34980
Platelets      36606
RespRate       30787
SaO2           38437
SysABP         17021
Temp           24611
TroponinI      39794
TroponinT      39576
Urine          11273
WBC            36877
Weight         18562
pH             34765
dtype: int64

In [34]:
filtered_validation_X = validation_X[(validation_X['Height'] != -1) & (validation_X['Weight'] != -1) & (validation_X['Height'].notna()) & (validation_X['Weight'].notna())] 

In [35]:
filtered_validation_X_metros = filtered_validation_X.copy()
filtered_validation_X_metros["Height"] = filtered_validation_X["Height"]/100
filtered_validation_X_metros["Height"]

336       1.626
337       1.626
341       1.626
342       1.626
343       1.626
          ...  
573884    1.600
573885    1.600
573886    1.600
573887    1.600
575184    1.727
Name: Height, Length: 24282, dtype: float64

In [36]:
bmi_data_validation = filtered_validation_X_metros
bmi_data_validation["BMI"] = filtered_validation_X_metros["Weight"] / (filtered_validation_X_metros["Height"]**2)
bmi_data_validation["Classificacao"] = bmi_data_validation["BMI"].apply(classify_BMI)
bmi_data_validation.head()

Unnamed: 0,RecordID,level_1,Time,ALP,ALT,AST,Age,Albumin,BUN,Bilirubin,...,SysABP,Temp,TroponinI,TroponinT,Urine,WBC,Weight,pH,BMI,Classificacao
336,132551,0,0.0,47.0,46.0,82.0,78.0,1.9,81.0,0.3,...,102.75,38.0,3.5,,,16.1,48.4,7.4,18.306456,Baixo peso
337,132551,1,1.0,,,,78.0,,,,...,114.5,,,,120.0,,48.4,,18.306456,Baixo peso
341,132551,5,5.0,,,,78.0,,,,...,104.0,,,,130.0,,48.4,7.29,18.306456,Baixo peso
342,132551,6,6.0,,,,78.0,,67.0,,...,141.0,35.6,3.1,,60.0,20.4,48.4,7.25,18.306456,Baixo peso
343,132551,7,7.0,,,,78.0,,,,...,132.0,,,,,,48.4,,18.306456,Baixo peso


In [37]:
classificacao_baixo_peso_validation = bmi_data_validation[bmi_data_validation["Classificacao"] == "Baixo peso"]
classificacao_baixo_peso_missing_validation = classificacao_baixo_peso_validation.isna().sum()
classificacao_baixo_peso_missing_validation

RecordID           0
level_1            0
Time               0
ALP              703
ALT              703
AST              703
Age                0
Albumin          703
BUN              652
Bilirubin        702
Cholesterol      707
Creatinine       652
DiasABP          383
FiO2             603
GCS              468
Gender           675
Glucose          655
HCO3             654
HCT              648
HR                14
Height             0
ICUType          675
K                654
Lactate          678
MAP              379
MechVent         618
Mg               661
NIDiasABP        268
NIMAP            278
NISysABP         268
Na               655
PaCO2            644
PaO2             644
Platelets        659
RespRate         507
SaO2             690
SysABP           383
Temp             476
TroponinI        704
TroponinT        705
Urine            260
WBC              660
Weight             0
pH               644
BMI                0
Classificacao      0
dtype: int64

In [38]:
classificacao_peso_normal_validation = bmi_data_validation[bmi_data_validation["Classificacao"] == "Peso normal"]
classificacao_peso_normal_missing_validation = classificacao_peso_normal_validation.isna().sum()
classificacao_peso_normal_missing_validation

RecordID            0
level_1             0
Time                0
ALP              5624
ALT              5624
AST              5624
Age                 0
Albumin          5649
BUN              5312
Bilirubin        5618
Cholesterol      5742
Creatinine       5308
DiasABP          2126
FiO2             4792
GCS              3868
Gender           5437
Glucose          5338
HCO3             5323
HCT              5182
HR                162
Height              0
ICUType          5437
K                5294
Lactate          5454
MAP              2153
MechVent         4763
Mg               5318
NIDiasABP        3293
NIMAP            3311
NISysABP         3292
Na               5317
PaCO2            5003
PaO2             5004
Platelets        5297
RespRate         4892
SaO2             5455
SysABP           2126
Temp             3268
TroponinI        5725
TroponinT        5687
Urine            1560
WBC              5330
Weight              0
pH               4967
BMI                 0
Classifica

In [39]:
classificacao_sobrepeso_validation = bmi_data_validation[bmi_data_validation["Classificacao"] == "Sobrepeso"]
classificacao_sobrepeso_missing_validation = classificacao_sobrepeso_validation.isna().sum()
classificacao_sobrepeso_missing_validation

RecordID            0
level_1             0
Time                0
ALP              7481
ALT              7479
AST              7479
Age                 0
Albumin          7499
BUN              7033
Bilirubin        7471
Cholesterol      7571
Creatinine       7032
DiasABP          2619
FiO2             6364
GCS              5152
Gender           7265
Glucose          7075
HCO3             7049
HCT              6813
HR                172
Height              0
ICUType          7265
K                7027
Lactate          7263
MAP              2582
MechVent         6397
Mg               7032
NIDiasABP        4622
NIMAP            4647
NISysABP         4617
Na               7056
PaCO2            6631
PaO2             6633
Platelets        7024
RespRate         6271
SaO2             7120
SysABP           2618
Temp             4086
TroponinI        7573
TroponinT        7505
Urine            1742
WBC              7070
Weight              0
pH               6549
BMI                 0
Classifica

In [40]:
classificacao_obesidade_grau_1_validation = bmi_data_validation[bmi_data_validation["Classificacao"] == "Obesidade grau 1"]
classificacao_obesidade_grau_1_missing_validation = classificacao_obesidade_grau_1_validation.isna().sum()
classificacao_obesidade_grau_1_missing_validation

RecordID            0
level_1             0
Time                0
ALP              5337
ALT              5337
AST              5338
Age                 0
Albumin          5344
BUN              5039
Bilirubin        5337
Cholesterol      5393
Creatinine       5035
DiasABP          1374
FiO2             4416
GCS              3835
Gender           5210
Glucose          5081
HCO3             5058
HCT              4924
HR                111
Height              0
ICUType          5210
K                5013
Lactate          5200
MAP              1382
MechVent         4441
Mg               5005
NIDiasABP        3609
NIMAP            3646
NISysABP         3609
Na               5059
PaCO2            4655
PaO2             4655
Platelets        5031
RespRate         4892
SaO2             5045
SysABP           1374
Temp             2591
TroponinI        5392
TroponinT        5368
Urine             987
WBC              5061
Weight              0
pH               4589
BMI                 0
Classifica

In [41]:
classificacao_obesidade_grau_2_validation = bmi_data_validation[bmi_data_validation["Classificacao"] == "Obesidade grau 2"]
classificacao_obesidade_grau_2_missing_validation = classificacao_obesidade_grau_2_validation.isna().sum()
classificacao_obesidade_grau_2_missing_validation

RecordID            0
level_1             0
Time                0
ALP              2509
ALT              2510
AST              2510
Age                 0
Albumin          2517
BUN              2383
Bilirubin        2509
Cholesterol      2536
Creatinine       2382
DiasABP           578
FiO2             2082
GCS              1767
Gender           2468
Glucose          2386
HCO3             2385
HCT              2325
HR                 42
Height              0
ICUType          2468
K                2363
Lactate          2411
MAP               601
MechVent         2089
Mg               2380
NIDiasABP        1693
NIMAP            1713
NISysABP         1693
Na               2387
PaCO2            2182
PaO2             2182
Platelets        2383
RespRate         2267
SaO2             2360
SysABP            578
Temp             1182
TroponinI        2538
TroponinT        2518
Urine             482
WBC              2393
Weight              0
pH               2155
BMI                 0
Classifica

In [42]:
classificacao_obesidade_grau_3_validation = bmi_data_validation[bmi_data_validation["Classificacao"] == "Obesidade grau 3"]
classificacao_obesidade_grau_3_missing_validation = classificacao_obesidade_grau_3_validation.isna().sum()
classificacao_obesidade_grau_3_missing_validation

RecordID            0
level_1             0
Time                0
ALP              1980
ALT              1978
AST              1978
Age                 0
Albumin          1985
BUN              1873
Bilirubin        1976
Cholesterol      2011
Creatinine       1872
DiasABP           682
FiO2             1614
GCS              1371
Gender           1957
Glucose          1888
HCO3             1877
HCT              1823
HR                 28
Height              0
ICUType          1957
K                1871
Lactate          1931
MAP               679
MechVent         1644
Mg               1872
NIDiasABP        1318
NIMAP            1327
NISysABP         1316
Na               1869
PaCO2            1731
PaO2             1731
Platelets        1864
RespRate         1798
SaO2             1898
SysABP            682
Temp             1075
TroponinI        2015
TroponinT        1987
Urine             440
WBC              1886
Weight              0
pH               1722
BMI                 0
Classifica

In [43]:
df_missing_validation = pd.DataFrame(columns=df_columns)
df_missing_transpose_validation = df_missing_validation.T
df_missing_transpose_validation["Female"] = female_gender_missing_rate_validation
df_missing_transpose_validation["Male"] = male_gender_missing_rate_validation
df_missing_transpose_validation["ICUType 1"] = ICUType_1_validation_missing
df_missing_transpose_validation["ICUType 2"] = ICUType_2_validation_missing
df_missing_transpose_validation["ICUType 3"] = ICUType_3_validation_missing
df_missing_transpose_validation["ICUType 4"] = ICUType_4_validation_missing
df_missing_transpose_validation["Age 65+"] = more_than_or_equal_to_65_validation_missing
df_missing_transpose_validation["Age 65-"] = less_than_65_validation_missing
df_missing_transpose_validation["Low Weight"] = classificacao_baixo_peso_missing_validation
df_missing_transpose_validation["Normal Weight"] = classificacao_peso_normal_missing_validation
df_missing_transpose_validation["Overweight"] = classificacao_sobrepeso_missing_validation
df_missing_transpose_validation["Obesity Grade 1"] = classificacao_obesidade_grau_1_missing_validation
df_missing_transpose_validation["Obesity Grade 2"] = classificacao_obesidade_grau_2_missing_validation
df_missing_transpose_validation["Obesity Grade 3"] = classificacao_obesidade_grau_3_missing_validation
display(HTML("<h2 style='text-align: center; font-size: 24px; font-weight: bold;'>Original Missing Rate per Variable by demographcs - Validation</h2>"))
df_missing_transpose_validation

Unnamed: 0,Female,Male,ICUType 1,ICUType 2,ICUType 3,ICUType 4,Age 65+,Age 65-,Low Weight,Normal Weight,Overweight,Obesity Grade 1,Obesity Grade 2,Obesity Grade 3
RecordID,0,0,0,0,0,0,0,0,0,0,0,0,0,0
level_1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
Time,0,0,0,0,0,0,0,0,0,0,0,0,0,0
ALP,828,1018,255,415,642,534,46108,38977,703,5624,7481,5337,2509,1980
ALT,824,1019,255,415,638,535,46097,38960,703,5624,7479,5337,2510,1978
AST,824,1019,255,415,638,535,46095,38962,703,5624,7479,5338,2510,1978
Age,0,0,0,0,0,0,0,0,0,0,0,0,0,0
Albumin,831,1016,257,415,641,534,46167,39219,703,5649,7499,5344,2517,1985
BUN,762,934,231,400,597,468,43247,36681,652,5312,7033,5039,2383,1873
Bilirubin,828,1020,255,416,641,536,46062,38956,702,5618,7471,5337,2509,1976


<h3>Test data</h3>

In [44]:
test_X = physionet2012_dataset['test_X']

In [45]:
female_gender_distribution_test = test_X[test_X['Gender'] == 0.0]
female_gender_missing_rate_test = female_gender_distribution_test.isna().sum()
female_gender_missing_rate_test

RecordID          0
level_1           0
Time              0
ALP             986
ALT             986
AST             987
Age               0
Albumin         995
BUN             902
Bilirubin       986
Cholesterol    1032
Creatinine      903
DiasABP         850
FiO2            834
GCS             640
Gender            0
Glucose         914
HCO3            910
HCT             893
HR              501
Height            0
ICUType           0
K               911
Lactate         916
MAP             855
MechVent        841
Mg              947
NIDiasABP       633
NIMAP           637
NISysABP        632
Na              913
PaCO2           816
PaO2            822
Platelets       894
RespRate        876
SaO2           1000
SysABP          850
Temp            634
TroponinI      1031
TroponinT      1009
Urine           676
WBC             907
Weight            0
pH              816
dtype: int64

In [46]:
male_gender_distribution_test = test_X[test_X['Gender'] == 1.0]
male_gender_missing_rate_test = male_gender_distribution_test.isna().sum()
male_gender_missing_rate_test

RecordID          0
level_1           0
Time              0
ALP            1310
ALT            1309
AST            1309
Age               0
Albumin        1318
BUN            1212
Bilirubin      1309
Cholesterol    1356
Creatinine     1212
DiasABP        1103
FiO2           1122
GCS             875
Gender            0
Glucose        1236
HCO3           1231
HCT            1185
HR              678
Height            0
ICUType           0
K              1234
Lactate        1204
MAP            1109
MechVent       1132
Mg             1271
NIDiasABP       837
NIMAP           842
NISysABP        837
Na             1233
PaCO2          1030
PaO2           1033
Platelets      1186
RespRate       1154
SaO2           1306
SysABP         1102
Temp            848
TroponinI      1359
TroponinT      1334
Urine           935
WBC            1203
Weight            0
pH             1025
dtype: int64

In [47]:
ICUType_1_test = test_X[test_X['ICUType'] == 1.0]
ICUType_1_test_missing = ICUType_1_test.isna().sum()
ICUType_1_test_missing

RecordID         0
level_1          0
Time             0
ALP            329
ALT            329
AST            329
Age              0
Albumin        332
BUN            299
Bilirubin      328
Cholesterol    348
Creatinine     299
DiasABP        279
FiO2           314
GCS            201
Gender           0
Glucose        303
HCO3           302
HCT            295
HR             149
Height           0
ICUType          0
K              300
Lactate        340
MAP            278
MechVent       319
Mg             325
NIDiasABP      200
NIMAP          200
NISysABP       199
Na             301
PaCO2          312
PaO2           311
Platelets      298
RespRate       269
SaO2           325
SysABP         279
Temp           219
TroponinI      353
TroponinT      335
Urine          238
WBC            299
Weight           0
pH             311
dtype: int64

In [48]:
ICUType_2_test = test_X[test_X['ICUType'] == 2.0]
ICUType_2_test_missing = ICUType_2_test.isna().sum()
ICUType_2_test_missing

RecordID         0
level_1          0
Time             0
ALP            488
ALT            488
AST            488
Age              0
Albumin        490
BUN            462
Bilirubin      488
Cholesterol    492
Creatinine     462
DiasABP        394
FiO2           421
GCS            408
Gender           0
Glucose        471
HCO3           464
HCT            438
HR             360
Height           0
ICUType          0
K              471
Lactate        459
MAP            395
MechVent       409
Mg             472
NIDiasABP      443
NIMAP          443
NISysABP       443
Na             471
PaCO2          193
PaO2           197
Platelets      433
RespRate       482
SaO2           468
SysABP         394
Temp           386
TroponinI      492
TroponinT      489
Urine          391
WBC            451
Weight           0
pH             192
dtype: int64

In [49]:
ICUType_3_test = test_X[test_X['ICUType'] == 3.0]
ICUType_3_test_missing = ICUType_3_test.isna().sum()
ICUType_3_test_missing

RecordID         0
level_1          0
Time             0
ALP            810
ALT            810
AST            811
Age              0
Albumin        817
BUN            765
Bilirubin      811
Cholesterol    854
Creatinine     765
DiasABP        788
FiO2           711
GCS            508
Gender           0
Glucose        764
HCO3           765
HCT            769
HR             379
Height           0
ICUType          0
K              763
Lactate        747
MAP            796
MechVent       724
Mg             790
NIDiasABP      409
NIMAP          415
NISysABP       409
Na             764
PaCO2          778
PaO2           780
Platelets      774
RespRate       698
SaO2           841
SysABP         787
Temp           507
TroponinI      851
TroponinT      833
Urine          576
WBC            776
Weight           0
pH             775
dtype: int64

In [50]:
ICUType_4_test = test_X[test_X['ICUType'] == 4.0]
ICUType_4_test_missing = ICUType_4_test.isna().sum()
ICUType_4_test_missing

RecordID         0
level_1          0
Time             0
ALP            670
ALT            669
AST            669
Age              0
Albumin        675
BUN            589
Bilirubin      669
Cholesterol    695
Creatinine     590
DiasABP        493
FiO2           511
GCS            399
Gender           0
Glucose        613
HCO3           611
HCT            577
HR             292
Height           0
ICUType          0
K              612
Lactate        575
MAP            496
MechVent       522
Mg             632
NIDiasABP      419
NIMAP          422
NISysABP       419
Na             611
PaCO2          564
PaO2           568
Platelets      576
RespRate       582
SaO2           673
SysABP         493
Temp           371
TroponinI      695
TroponinT      687
Urine          407
WBC            585
Weight           0
pH             564
dtype: int64

In [51]:
more_than_or_equal_to_65_test = test_X[test_X["Age"] >= 65]
more_than_or_equal_to_65_test_missing = more_than_or_equal_to_65_test.isna().sum()
more_than_or_equal_to_65_test_missing

RecordID           0
level_1            0
Time               0
ALP            61319
ALT            61304
AST            61299
Age                0
Albumin        61437
BUN            57543
Bilirubin      61282
Cholesterol    62082
Creatinine     57530
DiasABP        26116
FiO2           51968
GCS            41740
Gender         60818
Glucose        57856
HCO3           57626
HCT            55937
HR              2430
Height             0
ICUType        60818
K              57337
Lactate        59570
MAP            26277
MechVent       52489
Mg             57585
NIDiasABP      34226
NIMAP          34552
NISysABP       34202
Na             57660
PaCO2          54619
PaO2           54631
Platelets      57448
RespRate       46399
SaO2           59287
SysABP         26111
Temp           36941
TroponinI      62030
TroponinT      61309
Urine          15220
WBC            57833
Weight         26936
pH             54248
dtype: int64

In [52]:
less_than_65_test = test_X[test_X["Age"] < 65]
less_than_65_test_missing = less_than_65_test.isna().sum()
less_than_65_test_missing

RecordID           0
level_1            0
Time               0
ALP            44893
ALT            44848
AST            44855
Age                0
Albumin        45168
BUN            42213
Bilirubin      44857
Cholesterol    45821
Creatinine     42196
DiasABP        19628
FiO2           38155
GCS            29619
Gender         44878
Glucose        42384
HCO3           42287
HCT            41206
HR              2072
Height             0
ICUType        44878
K              42086
Lactate        43625
MAP            19727
MechVent       38434
Mg             42365
NIDiasABP      25559
NIMAP          25901
NISysABP       25541
Na             42255
PaCO2          40082
PaO2           40085
Platelets      42179
RespRate       34148
SaO2           44160
SysABP         19626
Temp           28610
TroponinI      45846
TroponinT      45565
Urine          13107
WBC            42468
Weight         20015
pH             39836
dtype: int64

In [53]:
filtered_test_X = test_X[(test_X['Height'] != -1) & (test_X['Weight'] != -1) & (test_X['Height'].notna()) & (test_X['Weight'].notna())] 

In [54]:
filtered_test_X_metros = filtered_test_X.copy()
filtered_test_X_metros["Height"] = filtered_test_X["Height"]/100
filtered_test_X_metros["Height"]

48        1.753
67        1.753
68        1.753
69        1.753
70        1.753
          ...  
574412    1.702
574413    1.702
574414    1.702
574415    1.702
574560    1.689
Name: Height, Length: 31889, dtype: float64

In [55]:
bmi_data_test = filtered_test_X_metros
bmi_data_test["BMI"] = filtered_test_X_metros["Weight"] / (filtered_test_X_metros["Height"]**2)
bmi_data_test["Classificacao"] = bmi_data_test["BMI"].apply(classify_BMI)
bmi_data_test.head()

Unnamed: 0,RecordID,level_1,Time,ALP,ALT,AST,Age,Albumin,BUN,Bilirubin,...,SysABP,Temp,TroponinI,TroponinT,Urine,WBC,Weight,pH,BMI,Classificacao
48,132540,0,0.0,,,,76.0,,,,...,,,,,,,76.0,7.45,24.73146,Peso normal
67,132540,19,19.0,,,,76.0,,,,...,122.0,37.5,,,50.0,,80.6,,26.228364,Sobrepeso
68,132540,20,20.0,,,,76.0,,,,...,107.0,37.4,,,380.0,,80.6,,26.228364,Sobrepeso
69,132540,21,21.0,,,,76.0,,,,...,121.0,37.5,,,170.0,,80.6,,26.228364,Sobrepeso
70,132540,22,22.0,,,,76.0,,,,...,128.0,37.5,,,130.0,,80.6,,26.228364,Sobrepeso


In [56]:
classificacao_baixo_peso_test = bmi_data_test[bmi_data_test["Classificacao"] == "Baixo peso"]
classificacao_baixo_peso_missing_test = classificacao_baixo_peso_test.isna().sum()
classificacao_baixo_peso_missing_test

RecordID           0
level_1            0
Time               0
ALP              807
ALT              807
AST              807
Age                0
Albumin          811
BUN              760
Bilirubin        807
Cholesterol      821
Creatinine       760
DiasABP          305
FiO2             678
GCS              552
Gender           780
Glucose          761
HCO3             760
HCT              742
HR                18
Height             0
ICUType          780
K                759
Lactate          793
MAP              305
MechVent         657
Mg               765
NIDiasABP        499
NIMAP            499
NISysABP         498
Na               761
PaCO2            722
PaO2             722
Platelets        762
RespRate         698
SaO2             810
SysABP           305
Temp             535
TroponinI        816
TroponinT        817
Urine            326
WBC              770
Weight             0
pH               720
BMI                0
Classificacao      0
dtype: int64

In [57]:
classificacao_peso_normal_test = bmi_data_test[bmi_data_test["Classificacao"] == "Peso normal"]
classificacao_peso_normal_missing_test = classificacao_peso_normal_test.isna().sum()
classificacao_peso_normal_missing_test

RecordID            0
level_1             0
Time                0
ALP              7202
ALT              7202
AST              7203
Age                 0
Albumin          7237
BUN              6746
Bilirubin        7203
Cholesterol      7321
Creatinine       6744
DiasABP          2870
FiO2             6038
GCS              4849
Gender           6967
Glucose          6783
HCO3             6766
HCT              6614
HR                186
Height              0
ICUType          6967
K                6727
Lactate          7002
MAP              2892
MechVent         6096
Mg               6754
NIDiasABP        4196
NIMAP            4232
NISysABP         4194
Na               6759
PaCO2            6410
PaO2             6407
Platelets        6766
RespRate         5808
SaO2             6946
SysABP           2870
Temp             4236
TroponinI        7309
TroponinT        7246
Urine            1950
WBC              6814
Weight              0
pH               6347
BMI                 0
Classifica

In [58]:
classificacao_sobrepeso_test = bmi_data_test[bmi_data_test["Classificacao"] == "Sobrepeso"]
classificacao_sobrepeso_missing_test = classificacao_sobrepeso_test.isna().sum()
classificacao_sobrepeso_missing_test

RecordID            0
level_1             0
Time                0
ALP              9863
ALT              9860
AST              9860
Age                 0
Albumin          9903
BUN              9339
Bilirubin        9858
Cholesterol      9981
Creatinine       9333
DiasABP          3144
FiO2             8252
GCS              6839
Gender           9583
Glucose          9386
HCO3             9361
HCT              9061
HR                247
Height              0
ICUType          9583
K                9324
Lactate          9632
MAP              3165
MechVent         8411
Mg               9314
NIDiasABP        6202
NIMAP            6235
NISysABP         6198
Na               9388
PaCO2            8672
PaO2             8669
Platelets        9297
RespRate         8392
SaO2             9325
SysABP           3144
Temp             5335
TroponinI        9983
TroponinT        9913
Urine            2317
WBC              9368
Weight              0
pH               8565
BMI                 0
Classifica

In [59]:
classificacao_obesidade_grau_1_test = bmi_data_test[bmi_data_test["Classificacao"] == "Obesidade grau 1"]
classificacao_obesidade_grau_1_missing_test = classificacao_obesidade_grau_1_test.isna().sum()
classificacao_obesidade_grau_1_missing_test

RecordID            0
level_1             0
Time                0
ALP              6962
ALT              6961
AST              6961
Age                 0
Albumin          6990
BUN              6608
Bilirubin        6960
Cholesterol      7086
Creatinine       6610
DiasABP          1723
FiO2             5780
GCS              4913
Gender           6880
Glucose          6650
HCO3             6623
HCT              6423
HR                100
Height              0
ICUType          6880
K                6589
Lactate          6795
MAP              1754
MechVent         5834
Mg               6558
NIDiasABP        4837
NIMAP            4854
NISysABP         4836
Na               6629
PaCO2            6005
PaO2             6007
Platelets        6580
RespRate         6377
SaO2             6626
SysABP           1723
Temp             3393
TroponinI        7073
TroponinT        7028
Urine            1166
WBC              6628
Weight              0
pH               5930
BMI                 0
Classifica

In [60]:
classificacao_obesidade_grau_2_test = bmi_data_test[bmi_data_test["Classificacao"] == "Obesidade grau 2"]
classificacao_obesidade_grau_2_missing_test = classificacao_obesidade_grau_2_test.isna().sum()
classificacao_obesidade_grau_2_missing_test

RecordID            0
level_1             0
Time                0
ALP              3044
ALT              3044
AST              3046
Age                 0
Albumin          3061
BUN              2873
Bilirubin        3042
Cholesterol      3092
Creatinine       2873
DiasABP           820
FiO2             2541
GCS              2135
Gender           3005
Glucose          2898
HCO3             2887
HCT              2789
HR                 47
Height              0
ICUType          3005
K                2885
Lactate          2944
MAP               794
MechVent         2605
Mg               2874
NIDiasABP        1996
NIMAP            2011
NISysABP         1995
Na               2886
PaCO2            2659
PaO2             2660
Platelets        2881
RespRate         2554
SaO2             2857
SysABP            819
Temp             1498
TroponinI        3089
TroponinT        3075
Urine             507
WBC              2889
Weight              0
pH               2624
BMI                 0
Classifica

In [61]:
classificacao_obesidade_grau_3_test = bmi_data_test[bmi_data_test["Classificacao"] == "Obesidade grau 3"]
classificacao_obesidade_grau_3_missing_test = classificacao_obesidade_grau_3_test.isna().sum()
classificacao_obesidade_grau_3_missing_test

RecordID            0
level_1             0
Time                0
ALP              3107
ALT              3107
AST              3107
Age                 0
Albumin          3123
BUN              2970
Bilirubin        3104
Cholesterol      3156
Creatinine       2969
DiasABP           866
FiO2             2539
GCS              2273
Gender           3060
Glucose          2987
HCO3             2977
HCT              2891
HR                 54
Height              0
ICUType          3060
K                2976
Lactate          3004
MAP               838
MechVent         2569
Mg               2957
NIDiasABP        2181
NIMAP            2190
NISysABP         2178
Na               2980
PaCO2            2686
PaO2             2684
Platelets        2967
RespRate         2806
SaO2             2928
SysABP            866
Temp             1606
TroponinI        3156
TroponinT        3129
Urine             668
WBC              2984
Weight              0
pH               2656
BMI                 0
Classifica

In [62]:
df_missing_test = pd.DataFrame(columns=df_columns)
df_missing_transpose_test = df_missing_test.T
df_missing_transpose_test ["Female"] = female_gender_missing_rate_test
df_missing_transpose_test ["Male"] = male_gender_missing_rate_test
df_missing_transpose_test ["ICUType 2"] = ICUType_2_test_missing
df_missing_transpose_test ["ICUType 3"] = ICUType_3_test_missing
df_missing_transpose_test ["ICUType 4"] = ICUType_4_test_missing
df_missing_transpose_test["Age 65+"] = more_than_or_equal_to_65_test_missing
df_missing_transpose_test["Age 65-"] = less_than_65_test_missing
df_missing_transpose_test ["Low Weight"] = classificacao_baixo_peso_missing_test
df_missing_transpose_test ["Normal Weight"] = classificacao_peso_normal_missing_test
df_missing_transpose_test ["Overweight"] = classificacao_sobrepeso_missing_test
df_missing_transpose_test ["Obesity Grade 1"] = classificacao_obesidade_grau_1_missing_test
df_missing_transpose_test ["Obesity Grade 2"] = classificacao_obesidade_grau_2_missing_test
df_missing_transpose_test ["Obesity Grade 3"] = classificacao_obesidade_grau_3_missing_test
display(HTML("<h2 style='text-align: center; font-size: 24px; font-weight: bold;'>original Missing rate per Variable by demographics - Test</h2>"))
df_missing_transpose_test 

Unnamed: 0,Female,Male,ICUType 2,ICUType 3,ICUType 4,Age 65+,Age 65-,Low Weight,Normal Weight,Overweight,Obesity Grade 1,Obesity Grade 2,Obesity Grade 3
RecordID,0,0,0,0,0,0,0,0,0,0,0,0,0
level_1,0,0,0,0,0,0,0,0,0,0,0,0,0
Time,0,0,0,0,0,0,0,0,0,0,0,0,0
ALP,986,1310,488,810,670,61319,44893,807,7202,9863,6962,3044,3107
ALT,986,1309,488,810,669,61304,44848,807,7202,9860,6961,3044,3107
AST,987,1309,488,811,669,61299,44855,807,7203,9860,6961,3046,3107
Age,0,0,0,0,0,0,0,0,0,0,0,0,0
Albumin,995,1318,490,817,675,61437,45168,811,7237,9903,6990,3061,3123
BUN,902,1212,462,765,589,57543,42213,760,6746,9339,6608,2873,2970
Bilirubin,986,1309,488,811,669,61282,44857,807,7203,9858,6960,3042,3104
