In [51]:
import os
import sys
import pandas as pd
from IPython.display import display, HTML
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

<h2>Loading dataset</h2>

In [52]:
from pypots.benchpots.datasets import preprocess_physionet2012
physionet2012_dataset = preprocess_physionet2012(subset="all", rate=0.1)

2024-11-05 21:00:07 [INFO]: You're using dataset physionet_2012, please cite it properly in your work. You can find its reference information at the below link: 
https://github.com/WenjieDu/TSDB/tree/main/dataset_profiles/physionet_2012
2024-11-05 21:00:07 [INFO]: Dataset physionet_2012 has already been downloaded. Processing directly...
2024-11-05 21:00:07 [INFO]: Dataset physionet_2012 has already been cached. Loading from cache directly...
2024-11-05 21:00:08 [INFO]: Loaded successfully!


<h3>Training data</h3>

<h4>Loading training dataset</h4>

In [53]:
train_X = physionet2012_dataset['train_X']

In [54]:
female_gender_ids = train_X[train_X["Gender"] == 0.0]
female_gender_ids = female_gender_ids["RecordID"]
female_gender_missing_rate = train_X[train_X["RecordID"].isin(female_gender_ids)]
female_gender_missing_rate = female_gender_missing_rate.isna().sum()
female_gender_missing_rate

RecordID            0
level_1             0
Time                0
ALP            160614
ALT            160533
AST            160538
Age              9681
Albumin        161194
BUN            151503
Bilirubin      160505
Cholesterol    163043
Creatinine     151452
DiasABP         78459
FiO2           138206
GCS            110583
Gender         159894
Glucose        152007
HCO3           151688
HCT            148304
HR              15709
Height           9681
ICUType        159894
K              150817
Lactate        156908
MAP             78967
MechVent       138813
Mg             151730
NIDiasABP       91659
NIMAP           92670
NISysABP        91600
Na             151523
PaCO2          145854
PaO2           145873
Platelets      151672
RespRate       119939
SaO2           157311
SysABP          78454
Temp           105298
TroponinI      162913
TroponinT      161531
Urine           49708
WBC            152481
Weight          75546
pH             145190
dtype: int64

In [55]:
male_gender_ids = train_X[train_X["Gender"] == 1.0]
male_gender_ids = male_gender_ids["RecordID"]
male_gender_missing_rate = train_X[train_X["RecordID"].isin(male_gender_ids)]
male_gender_missing_rate = male_gender_missing_rate.isna().sum()
male_gender_missing_rate

RecordID            0
level_1             0
Time                0
ALP            201179
ALT            201081
AST            201074
Age             11917
Albumin        201961
BUN            189683
Bilirubin      201061
Cholesterol    204118
Creatinine     189619
DiasABP         89989
FiO2           172026
GCS            138866
Gender         200220
Glucose        190695
HCO3           190036
HCT            184553
HR              20388
Height          11917
ICUType        200220
K              189089
Lactate        196001
MAP             90616
MechVent       173763
Mg             190009
NIDiasABP      121122
NIMAP          122198
NISysABP       121044
Na             190098
PaCO2          179656
PaO2           179692
Platelets      189128
RespRate       158373
SaO2           195961
SysABP          89979
Temp           126309
TroponinI      204077
TroponinT      202318
Urine           63454
WBC            190656
Weight          98049
pH             178372
dtype: int64

In [56]:
ICUType_1_training_ids = train_X[train_X['ICUType'] == 1.0]
ICUType_1_training_ids = ICUType_1_training_ids[ICUType_1_training_ids["Gender"] != -1.0]
ICUType_1_training_ids = ICUType_1_training_ids[ICUType_1_training_ids["Time"] == 0.0]
ICUType_1_training_ids = ICUType_1_training_ids["RecordID"]
ICUType_1_training_missing = train_X[train_X["RecordID"].isin(ICUType_1_training_ids)]
ICUType_1_training_missing = ICUType_1_training_missing.isna().sum()
ICUType_1_training_missing

RecordID           0
level_1            0
Time               0
ALP            52366
ALT            52316
AST            52318
Age             4392
Albumin        52548
BUN            49342
Bilirubin      52328
Cholesterol    52793
Creatinine     49279
DiasABP        31587
FiO2           47406
GCS            39434
Gender         52029
Glucose        49504
HCO3           49466
HCT            48500
HR              6679
Height          4392
ICUType        52029
K              48720
Lactate        52009
MAP            31715
MechVent       47796
Mg             49384
NIDiasABP      27052
NIMAP          27194
NISysABP       27038
Na             49481
PaCO2          48813
PaO2           48807
Platelets      49351
RespRate       33956
SaO2           50574
SysABP         31586
Temp           37393
TroponinI      52967
TroponinT      51990
Urine          23130
WBC            49723
Weight         29600
pH             48731
dtype: int64

In [57]:
ICUType_2_training_ids = train_X[train_X['ICUType'] == 2.0]
ICUType_2_training_ids = ICUType_2_training_ids[ICUType_2_training_ids["Gender"] != -1.0]
ICUType_2_training_ids = ICUType_2_training_ids[ICUType_2_training_ids["Time"] == 0.0]
ICUType_2_training_ids = ICUType_2_training_ids["RecordID"]
ICUType_2_training_missing = train_X[train_X["RecordID"].isin(ICUType_2_training_ids)]
ICUType_2_training_missing = ICUType_2_training_missing.isna().sum()
ICUType_2_training_missing

RecordID           0
level_1            0
Time               0
ALP            76753
ALT            76738
AST            76739
Age             2380
Albumin        76939
BUN            72308
Bilirubin      76752
Cholesterol    77262
Creatinine     72301
DiasABP        16520
FiO2           64212
GCS            56734
Gender         75670
Glucose        73686
HCO3           72663
HCT            68857
HR              6112
Height          2380
ICUType        75670
K              73230
Lactate        74620
MAP            16410
MechVent       64352
Mg             72414
NIDiasABP      60276
NIMAP          60408
NISysABP       60238
Na             73400
PaCO2          61523
PaO2           61566
Platelets      71026
RespRate       73932
SaO2           68955
SysABP         16519
Temp           31922
TroponinI      77160
TroponinT      77086
Urine          11905
WBC            72114
Weight         36958
pH             60012
dtype: int64

In [58]:
ICUType_3_training_ids = train_X[train_X['ICUType'] == 3.0]
ICUType_3_training_ids = ICUType_3_training_ids[ICUType_3_training_ids["Gender"] != -1.0]
ICUType_3_training_ids = ICUType_3_training_ids[ICUType_3_training_ids["Time"] == 0.0]
ICUType_3_training_ids = ICUType_3_training_ids["RecordID"]
ICUType_3_training_missing = train_X[train_X["RecordID"].isin(ICUType_3_training_ids)]
ICUType_3_training_missing = ICUType_3_training_missing.isna().sum()
ICUType_3_training_missing

RecordID            0
level_1             0
Time                0
ALP            130897
ALT            130808
AST            130805
Age              9804
Albumin        131596
BUN            123692
Bilirubin      130702
Cholesterol    133750
Creatinine     123653
DiasABP         85912
FiO2           114381
GCS             98337
Gender         131083
Glucose        123712
HCO3           123654
HCT            121902
HR              14454
Height           9804
ICUType        131083
K              122848
Lactate        128688
MAP             86567
MechVent       115752
Mg             124288
NIDiasABP       58103
NIMAP           59496
NISysABP        58060
Na             123425
PaCO2          123776
PaO2           123768
Platelets      124659
RespRate        92091
SaO2           132316
SysABP          85904
Temp            95655
TroponinI      133534
TroponinT      132063
Urine           52860
WBC            125048
Weight          47058
pH             123613
dtype: int64

In [59]:
ICUType_4_training_ids = train_X[train_X['ICUType'] == 4.0]
ICUType_4_training_ids = ICUType_4_training_ids[ICUType_4_training_ids["Gender"] != -1.0]
ICUType_4_training_ids = ICUType_4_training_ids[ICUType_4_training_ids["Time"] == 0.0]
ICUType_4_training_ids = ICUType_4_training_ids["RecordID"]
ICUType_4_training_missing = train_X[train_X["RecordID"].isin(ICUType_4_training_ids)]
ICUType_4_training_missing = ICUType_4_training_missing.isna().sum()
ICUType_4_training_missing

RecordID            0
level_1             0
Time                0
ALP            101777
ALT            101752
AST            101750
Age              5022
Albumin        102072
BUN             95844
Bilirubin      101784
Cholesterol    103356
Creatinine      95838
DiasABP         34429
FiO2            84233
GCS             54944
Gender         101332
Glucose         95800
HCO3            95941
HCT             93598
HR               8852
Height           5022
ICUType        101332
K               95108
Lactate         97592
MAP             34891
MechVent        84676
Mg              95653
NIDiasABP       67350
NIMAP           67770
NISysABP        67308
Na              95315
PaCO2           91398
PaO2            91424
Platelets       95764
RespRate        78333
SaO2           101427
SysABP          34424
Temp            66637
TroponinI      103329
TroponinT      102710
Urine           25267
WBC             96252
Weight          59979
pH              91206
dtype: int64

In [60]:
more_than_or_equal_to_65_train_ids = train_X[train_X["Age"] >= 65]
more_than_or_equal_to_65_train_ids = more_than_or_equal_to_65_train_ids[more_than_or_equal_to_65_train_ids["Gender"] != -1.0]
more_than_or_equal_to_65_train_ids = more_than_or_equal_to_65_train_ids[more_than_or_equal_to_65_train_ids["Time"] == 0.0]
more_than_or_equal_to_65_train_ids = more_than_or_equal_to_65_train_ids["RecordID"]
more_than_or_equal_to_65_train_missing = train_X[train_X["RecordID"].isin(more_than_or_equal_to_65_train_ids)]
more_than_or_equal_to_65_train_missing = more_than_or_equal_to_65_train_missing.isna().sum()
more_than_or_equal_to_65_train_missing

RecordID            0
level_1             0
Time                0
ALP            199526
ALT            199463
AST            199459
Age             11223
Albumin        199933
BUN            187980
Bilirubin      199408
Cholesterol    201751
Creatinine     187905
DiasABP         91379
FiO2           170766
GCS            138750
Gender         197917
Glucose        188957
HCO3           188297
HCT            183204
HR              18879
Height          11223
ICUType        197917
K              187353
Lactate        194490
MAP             91942
MechVent       172658
Mg             188203
NIDiasABP      116631
NIMAP          117566
NISysABP       116555
Na             188364
PaCO2          178736
PaO2           178767
Platelets      187679
RespRate       151588
SaO2           192999
SysABP          91373
Temp           124507
TroponinI      201595
TroponinT      199373
Urine           58858
WBC            188955
Weight          94762
pH             177621
dtype: int64

In [61]:
less_than_65_train_ids = train_X[train_X["Age"] < 65]
less_than_65_train_ids = less_than_65_train_ids[less_than_65_train_ids["Gender"] != -1.0]
less_than_65_train_ids = less_than_65_train_ids[less_than_65_train_ids["Time"] == 0.0]
less_than_65_train_ids = less_than_65_train_ids["RecordID"]
less_than_65_train_missing = train_X[train_X["RecordID"].isin(less_than_65_train_ids)]
less_than_65_train_missing = less_than_65_train_missing.isna().sum()
less_than_65_train_missing


RecordID            0
level_1             0
Time                0
ALP            162267
ALT            162151
AST            162153
Age             10375
Albumin        163222
BUN            153206
Bilirubin      162158
Cholesterol    165410
Creatinine     153166
DiasABP         77069
FiO2           139466
GCS            110699
Gender         162197
Glucose        153745
HCO3           153427
HCT            149653
HR              17218
Height          10375
ICUType        162197
K              152553
Lactate        158419
MAP             77641
MechVent       139918
Mg             153536
NIDiasABP       96150
NIMAP           97302
NISysABP        96089
Na             153257
PaCO2          146774
PaO2           146798
Platelets      153121
RespRate       126724
SaO2           160273
SysABP          77060
Temp           107100
TroponinI      165395
TroponinT      164476
Urine           54304
WBC            154182
Weight          78833
pH             145941
dtype: int64

In [62]:
filtered_train_X = train_X[(train_X['Height'] != -1) & (train_X['Weight'] != -1) & (train_X['Height'].notna()) & (train_X['Weight'].notna())] 

In [63]:
def classify_BMI(BMI):
    if BMI < 18.5:
        return "Baixo peso"
    elif BMI >= 18.6 and BMI <= 24.9:
        return "Peso normal"
    elif BMI >= 25 and BMI <= 29.9:
        return "Sobrepeso"
    elif BMI >= 30 and BMI <= 34.9:
        return "Obesidade grau 1"
    elif BMI >= 35 and BMI <= 39.9:
        return "Obesidade grau 2"
    elif BMI >= 40:
        return "Obesidade grau 3"

In [64]:
filtered_train_X_metros = filtered_train_X.copy()
filtered_train_X_metros["Height"] = filtered_train_X["Height"]/100
filtered_train_X_metros["Height"]

48        1.753
67        1.753
68        1.753
69        1.753
70        1.753
          ...  
574940    1.702
574941    1.702
574942    1.702
574943    1.702
575184    1.727
Name: Height, Length: 101584, dtype: float64

In [65]:
bmi_data_train = filtered_train_X_metros
bmi_data_train["BMI"] = filtered_train_X_metros["Weight"] / (filtered_train_X_metros["Height"]**2)
bmi_data_train["Classificacao"] = bmi_data_train["BMI"].apply(classify_BMI)
bmi_data_train.head()

Unnamed: 0,RecordID,level_1,Time,ALP,ALT,AST,Age,Albumin,BUN,Bilirubin,...,SysABP,Temp,TroponinI,TroponinT,Urine,WBC,Weight,pH,BMI,Classificacao
48,132540,0,0.0,,,,76.0,,,,...,,,,,,,76.0,7.45,24.73146,Peso normal
67,132540,19,19.0,,,,76.0,,,,...,122.0,37.5,,,50.0,,80.6,,26.228364,Sobrepeso
68,132540,20,20.0,,,,76.0,,,,...,107.0,37.4,,,380.0,,80.6,,26.228364,Sobrepeso
69,132540,21,21.0,,,,76.0,,,,...,121.0,37.5,,,170.0,,80.6,,26.228364,Sobrepeso
70,132540,22,22.0,,,,76.0,,,,...,128.0,37.5,,,130.0,,80.6,,26.228364,Sobrepeso


In [66]:
classificacao_baixo_peso_ids = bmi_data_train[bmi_data_train["Classificacao"] == "Baixo peso"]
classificacao_baixo_peso_ids = classificacao_baixo_peso_ids[classificacao_baixo_peso_ids["Gender"] != -1.0]
classificacao_baixo_peso_ids = classificacao_baixo_peso_ids["RecordID"]
classificacao_baixo_peso_missing = train_X[train_X["RecordID"].isin(classificacao_baixo_peso_ids)]
classificacao_baixo_peso_missing = classificacao_baixo_peso_missing.isna().sum()
classificacao_baixo_peso_missing

RecordID          0
level_1           0
Time              0
ALP            6431
ALT            6430
AST            6429
Age             267
Albumin        6441
BUN            6036
Bilirubin      6425
Cholesterol    6519
Creatinine     6034
DiasABP        2423
FiO2           5448
GCS            4533
Gender         6392
Glucose        6062
HCO3           6045
HCT            5886
HR              490
Height          267
ICUType        6392
K              6020
Lactate        6219
MAP            2377
MechVent       5462
Mg             6046
NIDiasABP      4147
NIMAP          4180
NISysABP       4147
Na             6050
PaCO2          5656
PaO2           5653
Platelets      6025
RespRate       5401
SaO2           6158
SysABP         2422
Temp           3671
TroponinI      6503
TroponinT      6477
Urine          1748
WBC            6073
Weight         2913
pH             5601
dtype: int64

In [67]:
classificacao_normal_peso_ids = bmi_data_train[bmi_data_train["Classificacao"] == "Peso normal"]
classificacao_normal_peso_ids = classificacao_normal_peso_ids[classificacao_normal_peso_ids["Gender"] != -1.0]
classificacao_normal_peso_ids = classificacao_normal_peso_ids["RecordID"]
classificacao_normal_peso_missing = train_X[train_X["RecordID"].isin(classificacao_normal_peso_ids)]
classificacao_normal_peso_missing = classificacao_normal_peso_missing.isna().sum()
classificacao_normal_peso_missing

RecordID           0
level_1            0
Time               0
ALP            57642
ALT            57616
AST            57616
Age             2336
Albumin        57852
BUN            54225
Bilirubin      57610
Cholesterol    58407
Creatinine     54205
DiasABP        19777
FiO2           48788
GCS            39489
Gender         57293
Glucose        54611
HCO3           54357
HCT            52608
HR              4690
Height          2336
ICUType        57293
K              54096
Lactate        55798
MAP            19936
MechVent       48718
Mg             54243
NIDiasABP      38955
NIMAP          39079
NISysABP       38930
Na             54437
PaCO2          50033
PaO2           50056
Platelets      53874
RespRate       48722
SaO2           55061
SysABP         19777
Temp           32070
TroponinI      58341
TroponinT      57968
Urine          15318
WBC            54433
Weight         27812
pH             49501
dtype: int64

In [68]:
classificacao_sobrepeso_ids = bmi_data_train[bmi_data_train["Classificacao"] == "Sobrepeso"]
classificacao_sobrepeso_ids = classificacao_sobrepeso_ids[classificacao_sobrepeso_ids["Gender"] != -1.0]
classificacao_sobrepeso_ids = classificacao_sobrepeso_ids["RecordID"]
classificacao_sobrepeso_missing = train_X[train_X["RecordID"].isin(classificacao_sobrepeso_ids)]
classificacao_sobrepeso_missing = classificacao_sobrepeso_missing.isna().sum()
classificacao_sobrepeso_missing

RecordID           0
level_1            0
Time               0
ALP            76582
ALT            76540
AST            76540
Age             2993
Albumin        76915
BUN            72037
Bilirubin      76539
Cholesterol    77638
Creatinine     72005
DiasABP        22699
FiO2           64343
GCS            54209
Gender         76140
Glucose        72811
HCO3           72255
HCT            69358
HR              6502
Height          2993
ICUType        76140
K              72231
Lactate        74298
MAP            22826
MechVent       64515
Mg             72119
NIDiasABP      54990
NIMAP          55244
NISysABP       54961
Na             72583
PaCO2          64819
PaO2           64850
Platelets      71210
RespRate       67769
SaO2           71941
SysABP         22696
Temp           39005
TroponinI      77577
TroponinT      77142
Urine          18505
WBC            72113
Weight         34964
pH             63919
dtype: int64

In [69]:
classificacao_obesidade_1_ids = bmi_data_train[bmi_data_train["Classificacao"] == "Obesidade grau 1"]
classificacao_obesidade_1_ids = classificacao_obesidade_1_ids[classificacao_obesidade_1_ids["Gender"] != -1.0]
classificacao_obesidade_1_ids = classificacao_obesidade_1_ids["RecordID"]
classificacao_obesidade_1_missing = train_X[train_X["RecordID"].isin(classificacao_obesidade_1_ids)]
classificacao_obesidade_1_missing = classificacao_obesidade_1_missing.isna().sum()
classificacao_obesidade_1_missing

RecordID           0
level_1            0
Time               0
ALP            49653
ALT            49635
AST            49640
Age             1637
Albumin        49879
BUN            46668
Bilirubin      49644
Cholesterol    50379
Creatinine     46656
DiasABP        12877
FiO2           40952
GCS            35391
Gender         49397
Glucose        47268
HCO3           46831
HCT            44866
HR              3937
Height          1637
ICUType        49397
K              46862
Lactate        47867
MAP            13020
MechVent       41254
Mg             46726
NIDiasABP      36915
NIMAP          37052
NISysABP       36900
Na             47071
PaCO2          41022
PaO2           41036
Platelets      46150
RespRate       45384
SaO2           46230
SysABP         12876
Temp           23573
TroponinI      50324
TroponinT      50015
Urine          10595
WBC            46775
Weight         22147
pH             40441
dtype: int64

In [70]:
classificacao_obesidade_2_ids = bmi_data_train[bmi_data_train["Classificacao"] == "Obesidade grau 2"]
classificacao_obesidade_2_ids = classificacao_obesidade_2_ids[classificacao_obesidade_2_ids["Gender"] != -1.0]
classificacao_obesidade_2_ids = classificacao_obesidade_2_ids["RecordID"]
classificacao_obesidade_2_missing = train_X[train_X["RecordID"].isin(classificacao_obesidade_2_ids)]
classificacao_obesidade_2_missing = classificacao_obesidade_2_missing.isna().sum()
classificacao_obesidade_2_missing

RecordID           0
level_1            0
Time               0
ALP            22126
ALT            22120
AST            22119
Age              763
Albumin        22254
BUN            20772
Bilirubin      22113
Cholesterol    22491
Creatinine     20761
DiasABP         6067
FiO2           18071
GCS            16101
Gender         22043
Glucose        21049
HCO3           20851
HCT            19929
HR              1813
Height           763
ICUType        22043
K              20881
Lactate        21159
MAP             6107
MechVent       18163
Mg             20805
NIDiasABP      16507
NIMAP          16608
NISysABP       16498
Na             20960
PaCO2          18062
PaO2           18077
Platelets      20511
RespRate       19983
SaO2           20593
SysABP          6065
Temp           10115
TroponinI      22459
TroponinT      22332
Urine           4981
WBC            20771
Weight          9114
pH             17852
dtype: int64

In [71]:
classificacao_obesidade_3_ids = bmi_data_train[bmi_data_train["Classificacao"] == "Obesidade grau 3"]
classificacao_obesidade_3_ids = classificacao_obesidade_3_ids[classificacao_obesidade_3_ids["Gender"] != -1.0]
classificacao_obesidade_3_ids = classificacao_obesidade_3_ids["RecordID"]
classificacao_obesidade_3_missing = train_X[train_X["RecordID"].isin(classificacao_obesidade_3_ids)]
classificacao_obesidade_3_missing = classificacao_obesidade_3_missing.isna().sum()
classificacao_obesidade_3_missing

RecordID           0
level_1            0
Time               0
ALP            18022
ALT            18014
AST            18017
Age              688
Albumin        18121
BUN            16972
Bilirubin      18015
Cholesterol    18308
Creatinine     16971
DiasABP         5548
FiO2           14570
GCS            13066
Gender         17954
Glucose        17129
HCO3           17026
HCT            16484
HR              1459
Height           688
ICUType        17954
K              17011
Lactate        17296
MAP             5573
MechVent       14585
Mg             16987
NIDiasABP      13149
NIMAP          13211
NISysABP       13139
Na             17093
PaCO2          14884
PaO2           14883
Platelets      16911
RespRate       16252
SaO2           16829
SysABP          5547
Temp            9456
TroponinI      18304
TroponinT      18182
Urine           4390
WBC            17069
Weight          7529
pH             14763
dtype: int64

In [72]:
df_columns = train_X.columns
df_columns

Index(['RecordID', 'level_1', 'Time', 'ALP', 'ALT', 'AST', 'Age', 'Albumin',
       'BUN', 'Bilirubin', 'Cholesterol', 'Creatinine', 'DiasABP', 'FiO2',
       'GCS', 'Gender', 'Glucose', 'HCO3', 'HCT', 'HR', 'Height', 'ICUType',
       'K', 'Lactate', 'MAP', 'MechVent', 'Mg', 'NIDiasABP', 'NIMAP',
       'NISysABP', 'Na', 'PaCO2', 'PaO2', 'Platelets', 'RespRate', 'SaO2',
       'SysABP', 'Temp', 'TroponinI', 'TroponinT', 'Urine', 'WBC', 'Weight',
       'pH'],
      dtype='object')

In [73]:
df_missing = pd.DataFrame(columns=df_columns)
df_missing_transpose = df_missing.T
df_missing_transpose["Female"] = female_gender_missing_rate
df_missing_transpose["Male"] = male_gender_missing_rate
df_missing_transpose["ICUType 1"] = ICUType_1_training_missing
df_missing_transpose["ICUType 2"] = ICUType_2_training_missing
df_missing_transpose["ICUType 3"] = ICUType_3_training_missing
df_missing_transpose["ICUType 4"] = ICUType_4_training_missing
df_missing_transpose["Age 65+"] = more_than_or_equal_to_65_train_missing
df_missing_transpose["Age 65-"] = less_than_65_train_missing
df_missing_transpose["Low Weight"] = classificacao_baixo_peso_missing
df_missing_transpose["Normal Weight"] = classificacao_normal_peso_missing
df_missing_transpose["Overweight"] = classificacao_sobrepeso_missing
df_missing_transpose["Obesity Grade 1"] = classificacao_obesidade_1_missing
df_missing_transpose["Obesity Grade 2"] = classificacao_obesidade_2_missing
df_missing_transpose["Obesity Grade 3"] = classificacao_obesidade_3_missing
display(HTML("<h2 style='text-align: center; font-size: 24px; font-weight: bold;'>Original Missing Rate per Variable by demographics - Train</h2>"))
df_missing_transpose

Unnamed: 0,Female,Male,ICUType 1,ICUType 2,ICUType 3,ICUType 4,Age 65+,Age 65-,Low Weight,Normal Weight,Overweight,Obesity Grade 1,Obesity Grade 2,Obesity Grade 3
RecordID,0,0,0,0,0,0,0,0,0,0,0,0,0,0
level_1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
Time,0,0,0,0,0,0,0,0,0,0,0,0,0,0
ALP,160614,201179,52366,76753,130897,101777,199526,162267,6431,57642,76582,49653,22126,18022
ALT,160533,201081,52316,76738,130808,101752,199463,162151,6430,57616,76540,49635,22120,18014
AST,160538,201074,52318,76739,130805,101750,199459,162153,6429,57616,76540,49640,22119,18017
Age,9681,11917,4392,2380,9804,5022,11223,10375,267,2336,2993,1637,763,688
Albumin,161194,201961,52548,76939,131596,102072,199933,163222,6441,57852,76915,49879,22254,18121
BUN,151503,189683,49342,72308,123692,95844,187980,153206,6036,54225,72037,46668,20772,16972
Bilirubin,160505,201061,52328,76752,130702,101784,199408,162158,6425,57610,76539,49644,22113,18015


<h3>Validation data</h3>

In [74]:
validation_X = physionet2012_dataset['val_X']

In [75]:
female_gender_distribution_validation = validation_X[validation_X['Gender'] == 0.0]
female_gender_missing_rate_validation = female_gender_distribution_validation.isna().sum()
female_gender_missing_rate_validation

RecordID         0
level_1          0
Time             0
ALP            793
ALT            792
AST            793
Age              0
Albumin        796
BUN            728
Bilirubin      792
Cholesterol    827
Creatinine     728
DiasABP        679
FiO2           675
GCS            522
Gender           0
Glucose        740
HCO3           738
HCT            716
HR             393
Height           0
ICUType          0
K              737
Lactate        723
MAP            688
MechVent       683
Mg             754
NIDiasABP      493
NIMAP          497
NISysABP       493
Na             739
PaCO2          673
PaO2           679
Platelets      717
RespRate       687
SaO2           807
SysABP         679
Temp           500
TroponinI      830
TroponinT      811
Urine          556
WBC            724
Weight           0
pH             671
dtype: int64

In [76]:
male_gender_distribution_validation = validation_X[validation_X['Gender'] == 1.0]
male_gender_missing_rate_validation = male_gender_distribution_validation.isna().sum()
male_gender_missing_rate_validation

RecordID          0
level_1           0
Time              0
ALP            1040
ALT            1040
AST            1040
Age               0
Albumin        1046
BUN             958
Bilirubin      1041
Cholesterol    1081
Creatinine      958
DiasABP         877
FiO2            875
GCS             723
Gender            0
Glucose         979
HCO3            977
HCT             938
HR              547
Height            0
ICUType           0
K               977
Lactate         956
MAP             885
MechVent        887
Mg             1000
NIDiasABP       696
NIMAP           698
NISysABP        696
Na              978
PaCO2           825
PaO2            831
Platelets       943
RespRate        949
SaO2           1046
SysABP          877
Temp            685
TroponinI      1080
TroponinT      1056
Urine           756
WBC             951
Weight            0
pH              823
dtype: int64

In [77]:
ICUType_1_validation = validation_X[validation_X['ICUType'] == 1.0]
ICUType_1_validation_missing = ICUType_1_validation.isna().sum()
ICUType_1_validation_missing

RecordID         0
level_1          0
Time             0
ALP            273
ALT            273
AST            274
Age              0
Albumin        278
BUN            244
Bilirubin      272
Cholesterol    289
Creatinine     244
DiasABP        222
FiO2           252
GCS            160
Gender           0
Glucose        247
HCO3           247
HCT            244
HR             116
Height           0
ICUType          0
K              246
Lactate        278
MAP            224
MechVent       257
Mg             269
NIDiasABP      164
NIMAP          164
NISysABP       164
Na             247
PaCO2          260
PaO2           261
Platelets      245
RespRate       226
SaO2           275
SysABP         222
Temp           171
TroponinI      293
TroponinT      278
Urine          201
WBC            245
Weight           0
pH             259
dtype: int64

In [78]:
ICUType_2_validation = validation_X[validation_X['ICUType'] == 2.0]
ICUType_2_validation_missing = ICUType_2_validation.isna().sum()
ICUType_2_validation_missing

RecordID         0
level_1          0
Time             0
ALP            401
ALT            401
AST            401
Age              0
Albumin        401
BUN            386
Bilirubin      402
Cholesterol    405
Creatinine     386
DiasABP        340
FiO2           358
GCS            348
Gender           0
Glucose        393
HCO3           389
HCT            367
HR             308
Height           0
ICUType          0
K              393
Lactate        376
MAP            342
MechVent       355
Mg             396
NIDiasABP      367
NIMAP          366
NISysABP       367
Na             392
PaCO2          184
PaO2           189
Platelets      364
RespRate       393
SaO2           396
SysABP         340
Temp           323
TroponinI      406
TroponinT      404
Urine          324
WBC            373
Weight           0
pH             184
dtype: int64

In [79]:
ICUType_3_validation = validation_X[validation_X['ICUType'] == 3.0]
ICUType_3_validation_missing = ICUType_3_validation.isna().sum()
ICUType_3_validation_missing

RecordID         0
level_1          0
Time             0
ALP            646
ALT            645
AST            645
Age              0
Albumin        648
BUN            608
Bilirubin      646
Cholesterol    680
Creatinine     608
DiasABP        625
FiO2           549
GCS            430
Gender           0
Glucose        608
HCO3           609
HCT            611
HR             305
Height           0
ICUType          0
K              604
Lactate        589
MAP            632
MechVent       563
Mg             617
NIDiasABP      336
NIMAP          342
NISysABP       336
Na             608
PaCO2          620
PaO2           624
Platelets      614
RespRate       558
SaO2           672
SysABP         625
Temp           419
TroponinI      678
TroponinT      659
Urine          487
WBC            614
Weight           0
pH             618
dtype: int64

In [80]:
ICUType_4_validation = validation_X[validation_X['ICUType'] == 4.0]
ICUType_4_validation_missing = ICUType_4_validation.isna().sum()
ICUType_4_validation_missing

RecordID         0
level_1          0
Time             0
ALP            514
ALT            514
AST            514
Age              0
Albumin        516
BUN            449
Bilirubin      514
Cholesterol    535
Creatinine     449
DiasABP        370
FiO2           392
GCS            308
Gender           0
Glucose        472
HCO3           471
HCT            433
HR             212
Height           0
ICUType          0
K              472
Lactate        437
MAP            376
MechVent       396
Mg             473
NIDiasABP      323
NIMAP          324
NISysABP       323
Na             471
PaCO2          434
PaO2           436
Platelets      438
RespRate       460
SaO2           511
SysABP         370
Temp           273
TroponinI      534
TroponinT      527
Urine          301
WBC            444
Weight           0
pH             433
dtype: int64

In [81]:
more_than_or_equal_to_65_validation = validation_X[validation_X["Age"] >= 65]
more_than_or_equal_to_65_validation_missing = more_than_or_equal_to_65_validation.isna().sum()
more_than_or_equal_to_65_validation_missing

RecordID           0
level_1            0
Time               0
ALP            45457
ALT            45445
AST            45444
Age                0
Albumin        45585
BUN            42635
Bilirubin      45431
Cholesterol    46052
Creatinine     42623
DiasABP        19466
FiO2           38123
GCS            31541
Gender         45125
Glucose        42923
HCO3           42724
HCT            41421
HR              1907
Height             0
ICUType        45125
K              42512
Lactate        44012
MAP            19594
MechVent       38658
Mg             42707
NIDiasABP      25521
NIMAP          25811
NISysABP       25507
Na             42786
PaCO2          40368
PaO2           40384
Platelets      42513
RespRate       34583
SaO2           43897
SysABP         19463
Temp           27227
TroponinI      46028
TroponinT      45448
Urine          11650
WBC            42838
Weight         19511
pH             40122
dtype: int64

In [82]:
less_than_65_validation = validation_X[validation_X["Age"] < 65]
less_than_65_validation_missing = less_than_65_validation.isna().sum()
less_than_65_validation_missing

RecordID           0
level_1            0
Time               0
ALP            39624
ALT            39600
AST            39602
Age                0
Albumin        39842
BUN            37305
Bilirubin      39583
Cholesterol    40366
Creatinine     37294
DiasABP        17731
FiO2           33830
GCS            26109
Gender         39542
Glucose        37422
HCO3           37371
HCT            36325
HR              1769
Height             0
ICUType        39542
K              37098
Lactate        38675
MAP            17922
MechVent       33943
Mg             37383
NIDiasABP      22024
NIMAP          22262
NISysABP       22008
Na             37308
PaCO2          35669
PaO2           35684
Platelets      37164
RespRate       30109
SaO2           38940
SysABP         17730
Temp           25292
TroponinI      40390
TroponinT      40085
Urine          11771
WBC            37469
Weight         17865
pH             35458
dtype: int64

In [83]:
filtered_validation_X = validation_X[(validation_X['Height'] != -1) & (validation_X['Weight'] != -1) & (validation_X['Height'].notna()) & (validation_X['Weight'].notna())] 

In [84]:
filtered_validation_X_metros = filtered_validation_X.copy()
filtered_validation_X_metros["Height"] = filtered_validation_X["Height"]/100
filtered_validation_X_metros["Height"]

960       1.549
2688      1.676
2736      1.803
2754      1.803
2755      1.803
          ...  
574988    1.524
574989    1.524
574990    1.524
574991    1.524
575088    1.727
Name: Height, Length: 25843, dtype: float64

In [85]:
bmi_data_validation = filtered_validation_X_metros
bmi_data_validation["BMI"] = filtered_validation_X_metros["Weight"] / (filtered_validation_X_metros["Height"]**2)
bmi_data_validation["Classificacao"] = bmi_data_validation["BMI"].apply(classify_BMI)
bmi_data_validation.head()

Unnamed: 0,RecordID,level_1,Time,ALP,ALT,AST,Age,Albumin,BUN,Bilirubin,...,SysABP,Temp,TroponinI,TroponinT,Urine,WBC,Weight,pH,BMI,Classificacao
960,132588,0,0.0,,,,48.0,,,,...,,,,,,,42.3,,17.6294,Baixo peso
2688,132666,0,0.0,,,,53.0,,81.0,,...,,,,,,,62.0,,22.0721,Peso normal
2736,132669,0,0.0,,,,74.0,,,,...,,,,,,,81.8,,25.162967,Sobrepeso
2754,132669,18,18.0,,,,74.0,,,,...,87.0,37.0,,,60.0,,91.5,,28.14684,Sobrepeso
2755,132669,19,19.0,,,,74.0,,10.0,,...,95.666667,37.1,,,23.0,8.6,91.5,7.41,28.14684,Sobrepeso


In [86]:
classificacao_baixo_peso_validation = bmi_data_validation[bmi_data_validation["Classificacao"] == "Baixo peso"]
classificacao_baixo_peso_missing_validation = classificacao_baixo_peso_validation.isna().sum()
classificacao_baixo_peso_missing_validation

RecordID           0
level_1            0
Time               0
ALP              560
ALT              560
AST              560
Age                0
Albumin          567
BUN              530
Bilirubin        560
Cholesterol      573
Creatinine       529
DiasABP          194
FiO2             457
GCS              352
Gender           548
Glucose          530
HCO3             530
HCT              524
HR                12
Height             0
ICUType          548
K                528
Lactate          535
MAP              195
MechVent         461
Mg               532
NIDiasABP        337
NIMAP            337
NISysABP         336
Na               530
PaCO2            516
PaO2             517
Platelets        533
RespRate         417
SaO2             559
SysABP           194
Temp             378
TroponinI        568
TroponinT        558
Urine            200
WBC              539
Weight             0
pH               514
BMI                0
Classificacao      0
dtype: int64

In [87]:
classificacao_peso_normal_validation = bmi_data_validation[bmi_data_validation["Classificacao"] == "Peso normal"]
classificacao_peso_normal_missing_validation = classificacao_peso_normal_validation.isna().sum()
classificacao_peso_normal_missing_validation

RecordID            0
level_1             0
Time                0
ALP              6077
ALT              6074
AST              6075
Age                 0
Albumin          6092
BUN              5735
Bilirubin        6077
Cholesterol      6174
Creatinine       5734
DiasABP          2440
FiO2             5121
GCS              4108
Gender           5877
Glucose          5764
HCO3             5745
HCT              5559
HR                156
Height              0
ICUType          5877
K                5707
Lactate          5906
MAP              2484
MechVent         5158
Mg               5748
NIDiasABP        3455
NIMAP            3491
NISysABP         3453
Na               5746
PaCO2            5430
PaO2             5431
Platelets        5686
RespRate         4927
SaO2             5858
SysABP           2440
Temp             3498
TroponinI        6174
TroponinT        6107
Urine            1761
WBC              5732
Weight              0
pH               5382
BMI                 0
Classifica

In [88]:
classificacao_sobrepeso_validation = bmi_data_validation[bmi_data_validation["Classificacao"] == "Sobrepeso"]
classificacao_sobrepeso_missing_validation = classificacao_sobrepeso_validation.isna().sum()
classificacao_sobrepeso_missing_validation

RecordID            0
level_1             0
Time                0
ALP              8451
ALT              8449
AST              8450
Age                 0
Albumin          8510
BUN              7971
Bilirubin        8447
Cholesterol      8618
Creatinine       7969
DiasABP          2570
FiO2             7018
GCS              6018
Gender           8279
Glucose          8034
HCO3             7995
HCT              7739
HR                189
Height              0
ICUType          8279
K                7954
Lactate          8195
MAP              2570
MechVent         7118
Mg               7974
NIDiasABP        5520
NIMAP            5564
NISysABP         5514
Na               8017
PaCO2            7429
PaO2             7431
Platelets        7942
RespRate         7319
SaO2             8034
SysABP           2569
Temp             4472
TroponinI        8607
TroponinT        8531
Urine            1940
WBC              8007
Weight              0
pH               7355
BMI                 0
Classifica

In [89]:
classificacao_obesidade_grau_1_validation = bmi_data_validation[bmi_data_validation["Classificacao"] == "Obesidade grau 1"]
classificacao_obesidade_grau_1_missing_validation = classificacao_obesidade_grau_1_validation.isna().sum()
classificacao_obesidade_grau_1_missing_validation

RecordID            0
level_1             0
Time                0
ALP              5898
ALT              5899
AST              5898
Age                 0
Albumin          5928
BUN              5582
Bilirubin        5896
Cholesterol      5980
Creatinine       5579
DiasABP          1849
FiO2             4926
GCS              4182
Gender           5797
Glucose          5623
HCO3             5609
HCT              5401
HR                101
Height              0
ICUType          5797
K                5572
Lactate          5740
MAP              1844
MechVent         4966
Mg               5554
NIDiasABP        3796
NIMAP            3805
NISysABP         3795
Na               5605
PaCO2            5187
PaO2             5189
Platelets        5563
RespRate         5275
SaO2             5635
SysABP           1849
Temp             2972
TroponinI        5970
TroponinT        5926
Urine            1137
WBC              5599
Weight              0
pH               5120
BMI                 0
Classifica

In [90]:
classificacao_obesidade_grau_2_validation = bmi_data_validation[bmi_data_validation["Classificacao"] == "Obesidade grau 2"]
classificacao_obesidade_grau_2_missing_validation = classificacao_obesidade_grau_2_validation.isna().sum()
classificacao_obesidade_grau_2_missing_validation

RecordID            0
level_1             0
Time                0
ALP              2096
ALT              2096
AST              2096
Age                 0
Albumin          2096
BUN              1988
Bilirubin        2097
Cholesterol      2118
Creatinine       1986
DiasABP           581
FiO2             1824
GCS              1531
Gender           2057
Glucose          1994
HCO3             1993
HCT              1955
HR                 33
Height              0
ICUType          2057
K                1978
Lactate          2047
MAP               585
MechVent         1846
Mg               1987
NIDiasABP        1331
NIMAP            1352
NISysABP         1331
Na               1995
PaCO2            1849
PaO2             1848
Platelets        1990
RespRate         1892
SaO2             1931
SysABP            581
Temp             1097
TroponinI        2119
TroponinT        2110
Urine             284
WBC              1997
Weight              0
pH               1805
BMI                 0
Classifica

In [91]:
classificacao_obesidade_grau_3_validation = bmi_data_validation[bmi_data_validation["Classificacao"] == "Obesidade grau 3"]
classificacao_obesidade_grau_3_missing_validation = classificacao_obesidade_grau_3_validation.isna().sum()
classificacao_obesidade_grau_3_missing_validation

RecordID            0
level_1             0
Time                0
ALP              1921
ALT              1920
AST              1920
Age                 0
Albumin          1925
BUN              1807
Bilirubin        1915
Cholesterol      1946
Creatinine       1807
DiasABP           569
FiO2             1599
GCS              1360
Gender           1890
Glucose          1820
HCO3             1812
HCT              1762
HR                 35
Height              0
ICUType          1890
K                1798
Lactate          1825
MAP               581
MechVent         1615
Mg               1805
NIDiasABP        1311
NIMAP            1332
NISysABP         1309
Na               1813
PaCO2            1681
PaO2             1681
Platelets        1815
RespRate         1623
SaO2             1858
SysABP            569
Temp             1032
TroponinI        1945
TroponinT        1918
Urine             361
WBC              1829
Weight              0
pH               1666
BMI                 0
Classifica

In [92]:
df_missing_validation = pd.DataFrame(columns=df_columns)
df_missing_transpose_validation = df_missing_validation.T
df_missing_transpose_validation["Female"] = female_gender_missing_rate_validation
df_missing_transpose_validation["Male"] = male_gender_missing_rate_validation
df_missing_transpose_validation["ICUType 1"] = ICUType_1_validation_missing
df_missing_transpose_validation["ICUType 2"] = ICUType_2_validation_missing
df_missing_transpose_validation["ICUType 3"] = ICUType_3_validation_missing
df_missing_transpose_validation["ICUType 4"] = ICUType_4_validation_missing
df_missing_transpose_validation["Age 65+"] = more_than_or_equal_to_65_validation_missing
df_missing_transpose_validation["Age 65-"] = less_than_65_validation_missing
df_missing_transpose_validation["Low Weight"] = classificacao_baixo_peso_missing_validation
df_missing_transpose_validation["Normal Weight"] = classificacao_peso_normal_missing_validation
df_missing_transpose_validation["Overweight"] = classificacao_sobrepeso_missing_validation
df_missing_transpose_validation["Obesity Grade 1"] = classificacao_obesidade_grau_1_missing_validation
df_missing_transpose_validation["Obesity Grade 2"] = classificacao_obesidade_grau_2_missing_validation
df_missing_transpose_validation["Obesity Grade 3"] = classificacao_obesidade_grau_3_missing_validation
display(HTML("<h2 style='text-align: center; font-size: 24px; font-weight: bold;'>Original Missing Rate per Variable by demographcs - Validation</h2>"))
df_missing_transpose_validation

Unnamed: 0,Female,Male,ICUType 1,ICUType 2,ICUType 3,ICUType 4,Age 65+,Age 65-,Low Weight,Normal Weight,Overweight,Obesity Grade 1,Obesity Grade 2,Obesity Grade 3
RecordID,0,0,0,0,0,0,0,0,0,0,0,0,0,0
level_1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
Time,0,0,0,0,0,0,0,0,0,0,0,0,0,0
ALP,793,1040,273,401,646,514,45457,39624,560,6077,8451,5898,2096,1921
ALT,792,1040,273,401,645,514,45445,39600,560,6074,8449,5899,2096,1920
AST,793,1040,274,401,645,514,45444,39602,560,6075,8450,5898,2096,1920
Age,0,0,0,0,0,0,0,0,0,0,0,0,0,0
Albumin,796,1046,278,401,648,516,45585,39842,567,6092,8510,5928,2096,1925
BUN,728,958,244,386,608,449,42635,37305,530,5735,7971,5582,1988,1807
Bilirubin,792,1041,272,402,646,514,45431,39583,560,6077,8447,5896,2097,1915


<h3>Test data</h3>

In [93]:
test_X = physionet2012_dataset['test_X']

In [94]:
female_gender_distribution_test = test_X[test_X['Gender'] == 0.0]
female_gender_missing_rate_test = female_gender_distribution_test.isna().sum()
female_gender_missing_rate_test

RecordID          0
level_1           0
Time              0
ALP             981
ALT             978
AST             979
Age               0
Albumin         984
BUN             891
Bilirubin       982
Cholesterol    1021
Creatinine      891
DiasABP         852
FiO2            832
GCS             655
Gender            0
Glucose         904
HCO3            903
HCT             877
HR              500
Height            0
ICUType           0
K               904
Lactate         903
MAP             857
MechVent        837
Mg              930
NIDiasABP       633
NIMAP           639
NISysABP        632
Na              904
PaCO2           812
PaO2            816
Platelets       877
RespRate        865
SaO2            991
SysABP          852
Temp            631
TroponinI      1023
TroponinT       999
Urine           674
WBC             891
Weight            0
pH              811
dtype: int64

In [95]:
male_gender_distribution_test = test_X[test_X['Gender'] == 1.0]
male_gender_missing_rate_test = male_gender_distribution_test.isna().sum()
male_gender_missing_rate_test

RecordID          0
level_1           0
Time              0
ALP            1308
ALT            1302
AST            1302
Age               0
Albumin        1312
BUN            1198
Bilirubin      1308
Cholesterol    1363
Creatinine     1197
DiasABP        1097
FiO2           1130
GCS             879
Gender            0
Glucose        1219
HCO3           1214
HCT            1183
HR              690
Height            0
ICUType           0
K              1217
Lactate        1218
MAP            1101
MechVent       1136
Mg             1258
NIDiasABP       863
NIMAP           865
NISysABP        863
Na             1216
PaCO2          1038
PaO2           1043
Platelets      1173
RespRate       1183
SaO2           1317
SysABP         1097
Temp            847
TroponinI      1366
TroponinT      1325
Urine           957
WBC            1193
Weight            0
pH             1036
dtype: int64

In [96]:
ICUType_1_test = test_X[test_X['ICUType'] == 1.0]
ICUType_1_test_missing = ICUType_1_test.isna().sum()
ICUType_1_test_missing

RecordID         0
level_1          0
Time             0
ALP            340
ALT            338
AST            338
Age              0
Albumin        340
BUN            313
Bilirubin      345
Cholesterol    353
Creatinine     313
DiasABP        273
FiO2           320
GCS            208
Gender           0
Glucose        314
HCO3           313
HCT            312
HR             149
Height           0
ICUType          0
K              312
Lactate        346
MAP            272
MechVent       326
Mg             335
NIDiasABP      209
NIMAP          210
NISysABP       208
Na             313
PaCO2          326
PaO2           326
Platelets      311
RespRate       269
SaO2           336
SysABP         273
Temp           214
TroponinI      358
TroponinT      339
Urine          231
WBC            314
Weight           0
pH             325
dtype: int64

In [97]:
ICUType_2_test = test_X[test_X['ICUType'] == 2.0]
ICUType_2_test_missing = ICUType_2_test.isna().sum()
ICUType_2_test_missing

RecordID         0
level_1          0
Time             0
ALP            502
ALT            501
AST            501
Age              0
Albumin        502
BUN            480
Bilirubin      501
Cholesterol    507
Creatinine     479
DiasABP        419
FiO2           448
GCS            415
Gender           0
Glucose        489
HCO3           483
HCT            457
HR             382
Height           0
ICUType          0
K              489
Lactate        478
MAP            420
MechVent       433
Mg             486
NIDiasABP      457
NIMAP          457
NISysABP       457
Na             489
PaCO2          209
PaO2           213
Platelets      447
RespRate       495
SaO2           487
SysABP         419
Temp           407
TroponinI      509
TroponinT      503
Urine          415
WBC            466
Weight           0
pH             209
dtype: int64

In [98]:
ICUType_3_test = test_X[test_X['ICUType'] == 3.0]
ICUType_3_test_missing = ICUType_3_test.isna().sum()
ICUType_3_test_missing

RecordID         0
level_1          0
Time             0
ALP            763
ALT            760
AST            761
Age              0
Albumin        768
BUN            703
Bilirubin      762
Cholesterol    810
Creatinine     703
DiasABP        754
FiO2           654
GCS            491
Gender           0
Glucose        704
HCO3           706
HCT            702
HR             347
Height           0
ICUType          0
K              705
Lactate        706
MAP            759
MechVent       669
Mg             731
NIDiasABP      383
NIMAP          388
NISysABP       383
Na             704
PaCO2          750
PaO2           752
Platelets      710
RespRate       681
SaO2           803
SysABP         754
Temp           477
TroponinI      808
TroponinT      781
Urine          548
WBC            710
Weight           0
pH             748
dtype: int64

In [99]:
ICUType_4_test = test_X[test_X['ICUType'] == 4.0]
ICUType_4_test_missing = ICUType_4_test.isna().sum()
ICUType_4_test_missing

RecordID         0
level_1          0
Time             0
ALP            686
ALT            683
AST            683
Age              0
Albumin        688
BUN            595
Bilirubin      684
Cholesterol    716
Creatinine     595
DiasABP        505
FiO2           542
GCS            420
Gender           0
Glucose        618
HCO3           617
HCT            591
HR             312
Height           0
ICUType          0
K              617
Lactate        593
MAP            509
MechVent       547
Mg             638
NIDiasABP      447
NIMAP          449
NISysABP       447
Na             616
PaCO2          567
PaO2           570
Platelets      584
RespRate       604
SaO2           684
SysABP         505
Temp           381
TroponinI      716
TroponinT      703
Urine          439
WBC            596
Weight           0
pH             567
dtype: int64

In [100]:
more_than_or_equal_to_65_test = test_X[test_X["Age"] >= 65]
more_than_or_equal_to_65_test_missing = more_than_or_equal_to_65_test.isna().sum()
more_than_or_equal_to_65_test_missing

RecordID           0
level_1            0
Time               0
ALP            59176
ALT            59153
AST            59150
Age                0
Albumin        59334
BUN            55533
Bilirubin      59141
Cholesterol    59949
Creatinine     55510
DiasABP        25043
FiO2           50094
GCS            40147
Gender         58749
Glucose        55846
HCO3           55638
HCT            54054
HR              2351
Height             0
ICUType        58749
K              55359
Lactate        57538
MAP            25177
MechVent       50603
Mg             55573
NIDiasABP      33553
NIMAP          33868
NISysABP       33528
Na             55714
PaCO2          52491
PaO2           52507
Platelets      55446
RespRate       45031
SaO2           57129
SysABP         25041
Temp           35253
TroponinI      59903
TroponinT      59167
Urine          14577
WBC            55842
Weight         26170
pH             52114
dtype: int64

In [101]:
less_than_65_test = test_X[test_X["Age"] < 65]
less_than_65_test_missing = less_than_65_test.isna().sum()
less_than_65_test_missing

RecordID           0
level_1            0
Time               0
ALP            47547
ALT            47516
AST            47519
Age                0
Albumin        47867
BUN            44719
Bilirubin      47508
Cholesterol    48495
Creatinine     44691
DiasABP        20780
FiO2           40459
GCS            31429
Gender         47508
Glucose        44886
HCO3           44806
HCT            43710
HR              2201
Height             0
ICUType        47508
K              44512
Lactate        46217
MAP            20965
MechVent       40560
Mg             44809
NIDiasABP      27182
NIMAP          27586
NISysABP       27158
Na             44741
PaCO2          42613
PaO2           42631
Platelets      44718
RespRate       36633
SaO2           46659
SysABP         20778
Temp           30139
TroponinI      48533
TroponinT      48230
Urine          13501
WBC            45017
Weight         22062
pH             42371
dtype: int64

In [102]:
filtered_test_X = test_X[(test_X['Height'] != -1) & (test_X['Weight'] != -1) & (test_X['Height'].notna()) & (test_X['Weight'].notna())] 

In [103]:
filtered_test_X_metros = filtered_test_X.copy()
filtered_test_X_metros["Height"] = filtered_test_X["Height"]/100
filtered_test_X_metros["Height"]

144       1.803
145       1.803
146       1.803
147       1.803
148       1.803
          ...  
575321    1.727
575322    1.727
575323    1.727
575325    1.727
575327    1.727
Name: Height, Length: 31372, dtype: float64

In [104]:
bmi_data_test = filtered_test_X_metros
bmi_data_test["BMI"] = filtered_test_X_metros["Weight"] / (filtered_test_X_metros["Height"]**2)
bmi_data_test["Classificacao"] = bmi_data_test["BMI"].apply(classify_BMI)
bmi_data_test.head()

Unnamed: 0,RecordID,level_1,Time,ALP,ALT,AST,Age,Albumin,BUN,Bilirubin,...,SysABP,Temp,TroponinI,TroponinT,Urine,WBC,Weight,pH,BMI,Classificacao
144,132543,0,0.0,105.0,12.0,15.0,68.0,4.4,23.0,0.2,...,,36.3,,,,11.5,84.6,,26.024291,Sobrepeso
145,132543,1,1.0,,,,68.0,,,,...,,,,,,,84.6,,26.024291,Sobrepeso
146,132543,2,2.0,,,,68.0,,,,...,,,,,,,84.6,,26.024291,Sobrepeso
147,132543,3,3.0,,,,68.0,,,,...,,36.4,,,,,84.6,,26.024291,Sobrepeso
148,132543,4,4.0,,,,68.0,,,,...,,,,,,,84.6,,26.024291,Sobrepeso


In [105]:
classificacao_baixo_peso_test = bmi_data_test[bmi_data_test["Classificacao"] == "Baixo peso"]
classificacao_baixo_peso_missing_test = classificacao_baixo_peso_test.isna().sum()
classificacao_baixo_peso_missing_test

RecordID           0
level_1            0
Time               0
ALP              528
ALT              528
AST              528
Age                0
Albumin          532
BUN              497
Bilirubin        528
Cholesterol      535
Creatinine       497
DiasABP          201
FiO2             442
GCS              347
Gender           509
Glucose          502
HCO3             498
HCT              490
HR                15
Height             0
ICUType          509
K                500
Lactate          510
MAP              202
MechVent         404
Mg               500
NIDiasABP        307
NIMAP            315
NISysABP         307
Na               499
PaCO2            452
PaO2             451
Platelets        501
RespRate         452
SaO2             515
SysABP           201
Temp             361
TroponinI        538
TroponinT        536
Urine            158
WBC              502
Weight             0
pH               451
BMI                0
Classificacao      0
dtype: int64

In [106]:
classificacao_peso_normal_test = bmi_data_test[bmi_data_test["Classificacao"] == "Peso normal"]
classificacao_peso_normal_missing_test = classificacao_peso_normal_test.isna().sum()
classificacao_peso_normal_missing_test

RecordID            0
level_1             0
Time                0
ALP              6625
ALT              6623
AST              6623
Age                 0
Albumin          6650
BUN              6253
Bilirubin        6620
Cholesterol      6753
Creatinine       6254
DiasABP          2846
FiO2             5681
GCS              4571
Gender           6406
Glucose          6282
HCO3             6273
HCT              6093
HR                197
Height              0
ICUType          6406
K                6236
Lactate          6505
MAP              2874
MechVent         5731
Mg               6268
NIDiasABP        3610
NIMAP            3652
NISysABP         3608
Na               6277
PaCO2            5976
PaO2             5973
Platelets        6255
RespRate         5561
SaO2             6457
SysABP           2846
Temp             4062
TroponinI        6747
TroponinT        6683
Urine            1878
WBC              6302
Weight              0
pH               5911
BMI                 0
Classifica

In [107]:
classificacao_sobrepeso_test = bmi_data_test[bmi_data_test["Classificacao"] == "Sobrepeso"]
classificacao_sobrepeso_missing_test = classificacao_sobrepeso_test.isna().sum()
classificacao_sobrepeso_missing_test

RecordID             0
level_1              0
Time                 0
ALP               9995
ALT               9990
AST               9990
Age                  0
Albumin          10033
BUN               9437
Bilirubin         9985
Cholesterol      10121
Creatinine        9433
DiasABP           3194
FiO2              8527
GCS               6948
Gender            9721
Glucose           9495
HCO3              9474
HCT               9147
HR                 213
Height               0
ICUType           9721
K                 9415
Lactate           9785
MAP               3211
MechVent          8532
Mg                9380
NIDiasABP         6257
NIMAP             6296
NISysABP          6250
Na                9480
PaCO2             8829
PaO2              8831
Platelets         9402
RespRate          8490
SaO2              9463
SysABP            3193
Temp              5080
TroponinI        10117
TroponinT        10044
Urine             2150
WBC               9469
Weight               0
pH         

In [108]:
classificacao_obesidade_grau_1_test = bmi_data_test[bmi_data_test["Classificacao"] == "Obesidade grau 1"]
classificacao_obesidade_grau_1_missing_test = classificacao_obesidade_grau_1_test.isna().sum()
classificacao_obesidade_grau_1_missing_test

RecordID            0
level_1             0
Time                0
ALP              6490
ALT              6489
AST              6488
Age                 0
Albumin          6514
BUN              6157
Bilirubin        6486
Cholesterol      6592
Creatinine       6156
DiasABP          1870
FiO2             5561
GCS              4613
Gender           6373
Glucose          6204
HCO3             6172
HCT              6027
HR                127
Height              0
ICUType          6373
K                6138
Lactate          6312
MAP              1903
MechVent         5446
Mg               6129
NIDiasABP        4312
NIMAP            4344
NISysABP         4310
Na               6184
PaCO2            5614
PaO2             5615
Platelets        6157
RespRate         5736
SaO2             6092
SysABP           1870
Temp             3257
TroponinI        6580
TroponinT        6552
Urine            1231
WBC              6201
Weight              0
pH               5543
BMI                 0
Classifica

In [109]:
classificacao_obesidade_grau_2_test = bmi_data_test[bmi_data_test["Classificacao"] == "Obesidade grau 2"]
classificacao_obesidade_grau_2_missing_test = classificacao_obesidade_grau_2_test.isna().sum()
classificacao_obesidade_grau_2_missing_test

RecordID            0
level_1             0
Time                0
ALP              3659
ALT              3660
AST              3660
Age                 0
Albumin          3677
BUN              3445
Bilirubin        3656
Cholesterol      3704
Creatinine       3444
DiasABP          1025
FiO2             2997
GCS              2526
Gender           3604
Glucose          3468
HCO3             3459
HCT              3366
HR                 62
Height              0
ICUType          3604
K                3434
Lactate          3510
MAP               972
MechVent         3050
Mg               3448
NIDiasABP        2463
NIMAP            2478
NISysABP         2463
Na               3464
PaCO2            3180
PaO2             3180
Platelets        3450
RespRate         3276
SaO2             3460
SysABP           1025
Temp             1736
TroponinI        3712
TroponinT        3664
Urine             752
WBC              3480
Weight              0
pH               3151
BMI                 0
Classifica

In [110]:
classificacao_obesidade_grau_3_test = bmi_data_test[bmi_data_test["Classificacao"] == "Obesidade grau 3"]
classificacao_obesidade_grau_3_missing_test = classificacao_obesidade_grau_3_test.isna().sum()
classificacao_obesidade_grau_3_missing_test

RecordID            0
level_1             0
Time                0
ALP              3105
ALT              3104
AST              3104
Age                 0
Albumin          3126
BUN              2942
Bilirubin        3102
Cholesterol      3163
Creatinine       2942
DiasABP          1003
FiO2             2534
GCS              2296
Gender           3074
Glucose          2968
HCO3             2948
HCT              2884
HR                 42
Height              0
ICUType          3074
K                2937
Lactate          3015
MAP               996
MechVent         2577
Mg               2935
NIDiasABP        2098
NIMAP            2105
NISysABP         2095
Na               2955
PaCO2            2673
PaO2             2673
Platelets        2945
RespRate         2745
SaO2             2910
SysABP           1003
Temp             1542
TroponinI        3161
TroponinT        3139
Urine             603
WBC              2966
Weight              0
pH               2648
BMI                 0
Classifica

In [111]:
df_missing_test = pd.DataFrame(columns=df_columns)
df_missing_transpose_test = df_missing_test.T
df_missing_transpose_test ["Female"] = female_gender_missing_rate_test
df_missing_transpose_test ["Male"] = male_gender_missing_rate_test
df_missing_transpose_test ["ICUType 2"] = ICUType_2_test_missing
df_missing_transpose_test ["ICUType 3"] = ICUType_3_test_missing
df_missing_transpose_test ["ICUType 4"] = ICUType_4_test_missing
df_missing_transpose_test["Age 65+"] = more_than_or_equal_to_65_test_missing
df_missing_transpose_test["Age 65-"] = less_than_65_test_missing
df_missing_transpose_test ["Low Weight"] = classificacao_baixo_peso_missing_test
df_missing_transpose_test ["Normal Weight"] = classificacao_peso_normal_missing_test
df_missing_transpose_test ["Overweight"] = classificacao_sobrepeso_missing_test
df_missing_transpose_test ["Obesity Grade 1"] = classificacao_obesidade_grau_1_missing_test
df_missing_transpose_test ["Obesity Grade 2"] = classificacao_obesidade_grau_2_missing_test
df_missing_transpose_test ["Obesity Grade 3"] = classificacao_obesidade_grau_3_missing_test
display(HTML("<h2 style='text-align: center; font-size: 24px; font-weight: bold;'>original Missing rate per Variable by demographics - Test</h2>"))
df_missing_transpose_test 

Unnamed: 0,Female,Male,ICUType 2,ICUType 3,ICUType 4,Age 65+,Age 65-,Low Weight,Normal Weight,Overweight,Obesity Grade 1,Obesity Grade 2,Obesity Grade 3
RecordID,0,0,0,0,0,0,0,0,0,0,0,0,0
level_1,0,0,0,0,0,0,0,0,0,0,0,0,0
Time,0,0,0,0,0,0,0,0,0,0,0,0,0
ALP,981,1308,502,763,686,59176,47547,528,6625,9995,6490,3659,3105
ALT,978,1302,501,760,683,59153,47516,528,6623,9990,6489,3660,3104
AST,979,1302,501,761,683,59150,47519,528,6623,9990,6488,3660,3104
Age,0,0,0,0,0,0,0,0,0,0,0,0,0
Albumin,984,1312,502,768,688,59334,47867,532,6650,10033,6514,3677,3126
BUN,891,1198,480,703,595,55533,44719,497,6253,9437,6157,3445,2942
Bilirubin,982,1308,501,762,684,59141,47508,528,6620,9985,6486,3656,3102
