In [2]:
import os
import sys
import pandas as pd
from IPython.display import display, HTML
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

<h2>Loading dataset</h2>

In [3]:
from pypots.benchpots.datasets import preprocess_physionet2012
physionet2012_dataset = preprocess_physionet2012(subset="all", rate=0.1)

2024-11-10 18:37:34 [INFO]: You're using dataset physionet_2012, please cite it properly in your work. You can find its reference information at the below link: 
https://github.com/WenjieDu/TSDB/tree/main/dataset_profiles/physionet_2012
2024-11-10 18:37:34 [INFO]: Dataset physionet_2012 has already been downloaded. Processing directly...
2024-11-10 18:37:34 [INFO]: Dataset physionet_2012 has already been cached. Loading from cache directly...
2024-11-10 18:37:35 [INFO]: Loaded successfully!


<h3>Training data</h3>

<h4>Loading training dataset</h4>

In [4]:
train_X = physionet2012_dataset['train_X']

In [5]:
training_female_gender = train_X[train_X["Gender"] == 0.0]
training_female_gender_ids = training_female_gender["RecordID"]
female_gender_measurements_training = train_X[train_X["RecordID"].isin(training_female_gender_ids)]
female_gender_measurements_training = female_gender_measurements_training.count()
female_gender_measurements_training

RecordID       160368
level_1        160368
Time           160368
ALP              2599
ALT              2665
AST              2658
Age            151142
Albumin          2080
BUN             11514
Bilirubin        2687
Cholesterol       258
Creatinine      11572
DiasABP         81790
FiO2            24727
GCS             51670
Gender           3341
Glucose         10979
HCO3            11296
HCT             14696
HR             145135
Height         151142
ICUType          3341
K               12134
Lactate          6304
MAP             81273
MechVent        24139
Mg              11225
NIDiasABP       71817
NIMAP           70652
NISysABP        71866
Na              11417
PaCO2           17100
PaO2            17069
Platelets       11380
RespRate        42371
SaO2             5905
SysABP          81797
Temp            56675
TroponinI         371
TroponinT        1685
Urine          111623
WBC             10605
Weight          87191
pH              17722
dtype: int64

In [6]:
training_male_gender = train_X[train_X['Gender'] == 1.0]
training_male_gender_ids = training_male_gender["RecordID"]
male_gender_measurements_training = train_X[train_X["RecordID"].isin(training_male_gender_ids)]
male_gender_measurements_training  = male_gender_measurements_training.count()
male_gender_measurements_training 

RecordID       207600
level_1        207600
Time           207600
ALP              3414
ALT              3513
AST              3513
Age            195520
Albumin          2574
BUN             15113
Bilirubin        3552
Cholesterol       373
Creatinine      15175
DiasABP        116635
FiO2            33265
GCS             66825
Gender           4325
Glucose         14148
HCO3            14722
HCT             20276
HR             186975
Height         195520
ICUType          4325
K               15777
Lactate          8856
MAP            115831
MechVent        31363
Mg              14846
NIDiasABP       84670
NIMAP           83666
NISysABP        84743
Na              14715
PaCO2           25271
PaO2            25228
Platelets       15691
RespRate        46307
SaO2             8624
SysABP         116643
Temp            79926
TroponinI         412
TroponinT        2301
Urine          143363
WBC             14194
Weight         107901
pH              26587
dtype: int64

In [7]:
training_undefined_gender = train_X[train_X['Gender'] == -1.0]
undefined_gender_training_ids = training_undefined_gender['RecordID']
undefined_gender_measurements_training = train_X[train_X["RecordID"].isin(undefined_gender_training_ids)]
undefined_gender_measurements_training = undefined_gender_measurements_training.count()
undefined_gender_measurements_training

RecordID       240
level_1        240
Time           240
ALP              0
ALT              1
AST              1
Age            191
Albumin          0
BUN             10
Bilirubin        0
Cholesterol      0
Creatinine      10
DiasABP         70
FiO2             7
GCS             43
Gender           5
Glucose         11
HCO3            10
HCT             11
HR             185
Height         191
ICUType          5
K               12
Lactate          0
MAP             68
MechVent         9
Mg               9
NIDiasABP      113
NIMAP          113
NISysABP       113
Na              10
PaCO2            7
PaO2             7
Platelets       10
RespRate        93
SaO2             0
SysABP          70
Temp            72
TroponinI        1
TroponinT        4
Urine          140
WBC              9
Weight         134
pH              11
dtype: int64

In [8]:
training_ICUType_1 = train_X[train_X['ICUType'] == 1.0]
training_ICUType_1 = training_ICUType_1[training_ICUType_1["Time"] == 0.0]
training_ICUType_1_ids = training_ICUType_1["RecordID"]
ICUType_1_measurements_training = train_X[train_X["RecordID"].isin(training_ICUType_1_ids)]
ICUType_1_measurements_training = ICUType_1_measurements_training.count() 
ICUType_1_measurements_training

RecordID       52512
level_1        52512
Time           52512
ALP              772
ALT              817
AST              816
Age            48169
Albumin          608
BUN             3788
Bilirubin        804
Cholesterol      338
Creatinine      3861
DiasABP        21615
FiO2            5722
GCS            13637
Gender          1094
Glucose         3617
HCO3            3654
HCT             4638
HR             45845
Height         48169
ICUType         1094
K               4394
Lactate         1214
MAP            21539
MechVent        5253
Mg              3770
NIDiasABP      25212
NIMAP          25097
NISysABP       25227
Na              3645
PaCO2           4397
PaO2            4398
Platelets       3817
RespRate       18819
SaO2            2660
SysABP         21616
Temp           15779
TroponinI        173
TroponinT       1113
Urine          29904
WBC             3431
Weight         23779
pH              4492
dtype: int64

In [9]:
training_ICUType_2 = train_X[train_X['ICUType'] == 2.0]
training_ICUType_2 = training_ICUType_2[training_ICUType_2["Time"] == 0.0]
training_ICUType2_ids = training_ICUType_2["RecordID"]
ICUType_2_measurements_training = train_X[train_X["RecordID"].isin(training_ICUType2_ids)]
ICUType_2_measurements_training = ICUType_2_measurements_training.count() 
ICUType_2_measurements_training

RecordID       77040
level_1        77040
Time           77040
ALP              514
ALT              534
AST              531
Age            74532
Albumin          344
BUN             4980
Bilirubin        523
Cholesterol       24
Creatinine      4991
DiasABP        60595
FiO2           12836
GCS            20388
Gender          1605
Glucose         3617
HCO3            4590
HCT             8402
HR             70782
Height         74532
ICUType         1605
K               4089
Lactate         2721
MAP            60593
MechVent       12447
Mg              4830
NIDiasABP      17133
NIMAP          16961
NISysABP       17162
Na              3838
PaCO2          15430
PaO2           15384
Platelets       6199
RespRate        3456
SaO2            8145
SysABP         60597
Temp           44946
TroponinI        100
TroponinT        202
Urine          64753
WBC             5172
Weight         40276
pH             16960
dtype: int64

In [10]:
training_ICUType_3 = train_X[train_X['ICUType'] == 3.0]
training_ICUType_3 = training_ICUType_3[training_ICUType_3["Time"] == 0.0]
training_ICUType_3_ids = training_ICUType_3["RecordID"]
ICUType_3_measurements_training = train_X[train_X["RecordID"].isin(training_ICUType_3_ids)]
ICUType_3_measurements_training = ICUType_3_measurements_training.count() 
ICUType_3_measurements_training

RecordID       132096
level_1        132096
Time           132096
ALP              2854
ALT              2924
AST              2921
Age            122411
Albumin          2200
BUN              9892
Bilirubin        3039
Cholesterol       126
Creatinine       9924
DiasABP         45036
FiO2            19324
GCS             35118
Gender           2752
Glucose          9866
HCO3             9897
HCT             11575
HR             117840
Height         122411
ICUType          2752
K               10689
Lactate          5073
MAP             44354
MechVent        17910
Mg               9315
NIDiasABP       76614
NIMAP           75234
NISysABP        76648
Na              10132
PaCO2            9898
PaO2             9908
Platelets        8961
RespRate        41597
SaO2             1599
SysABP          45043
Temp            37483
TroponinI         349
TroponinT        1810
Urine           79481
WBC              8624
Weight          86348
pH              10034
dtype: int64

In [11]:
training_ICUType_4 = train_X[train_X['ICUType'] == 4.0]
training_ICUType_4 = training_ICUType_4[training_ICUType_4["Time"] == 0.0]
training_ICUType_4_ids = training_ICUType_4["RecordID"]
ICUType_4_measurements_training = train_X[train_X["RecordID"].isin(training_ICUType_4_ids)]
ICUType_4_measurements_training = ICUType_4_measurements_training.count() 
ICUType_4_measurements_training 

RecordID       106560
level_1        106560
Time           106560
ALP              1873
ALT              1904
AST              1904
Age            101741
Albumin          1502
BUN              7977
Bilirubin        1873
Cholesterol       143
Creatinine       7981
DiasABP         71249
FiO2            20117
GCS             49395
Gender           2220
Glucose          8038
HCO3             7887
HCT             10368
HR              97828
Height         101741
ICUType          2220
K                8751
Lactate          6152
MAP             70686
MechVent        19901
Mg               8165
NIDiasABP       37641
NIMAP           37139
NISysABP        37685
Na               8527
PaCO2           12653
PaO2            12614
Platelets        8104
RespRate        24899
SaO2             2125
SysABP          71254
Temp            38465
TroponinI         162
TroponinT         865
Urine           80988
WBC              7581
Weight          44823
pH              12834
dtype: int64

In [12]:
age_65_and_above_training = train_X[train_X["Age"] >= 65]
age_65_and_above_training = age_65_and_above_training[age_65_and_above_training ["Time"] == 0.0]
age_65_and_above_training_ids = age_65_and_above_training ["RecordID"]
age_65_and_above_measurements_training = train_X[train_X["RecordID"].isin(age_65_and_above_training_ids)]
age_65_and_above_measurements_training = age_65_and_above_measurements_training.count()
age_65_and_above_measurements_training

RecordID       199872
level_1        199872
Time           199872
ALP              2686
ALT              2752
AST              2750
Age            188723
Albumin          2241
BUN             14122
Bilirubin        2784
Cholesterol       367
Creatinine      14193
DiasABP        109330
FiO2            31448
GCS             62207
Gender           4164
Glucose         13139
HCO3            13774
HCT             18854
HR             181126
Height         188723
ICUType          4164
K               14693
Lactate          7860
MAP            108690
MechVent        29376
Mg              13884
NIDiasABP       84765
NIMAP           83807
NISysABP        84826
Na              13657
PaCO2           23330
PaO2            23300
Platelets       14456
RespRate        48750
SaO2             8922
SysABP         109337
Temp            77163
TroponinI         529
TroponinT        2735
Urine          141993
WBC             13171
Weight         107826
pH              24458
dtype: int64

In [None]:
# Motivação para o teste: A quantidade de valores no grupo de idade que estava na tabela, era muito maior do que a quantidade de 
# valores para o grupo de gênero, o que não fazia sentido, pois, já que não temos dados faltantes dessas variáveis em ambos os grupos
# se gênero tem n ocorrências, idade deve ter n ocorrências também, e vice-versa. 

# Descoberta: Através do teste, vimos que tinha casos em que o valor do campo idade, se repete durante a série temporal do paciente, e visto isso, nós ajustamos o código para pegar apenas a primeira ocorrência de valor para cada recordID.
# Também descobrimos que, como estamos filtrando apenas os gêneros female e male e há um campo para gênero indefinido, o grupo de Age estava pegando as idades desse campo de gênero indefinido também, o que estava fazendo com que 
# a soma total de ocorrências para o grupo de Age estivesse maior do que para o grupo de Gender.

# teste2 = train_X[train_X['Age'] >= 65]
# teste2["RecordID"].value_counts()   

In [13]:
age_under_65_training  = train_X[train_X["Age"] < 65]
age_under_65_training  = age_under_65_training[age_under_65_training["Time"] == 0.0]
age_under_65_training_ids = age_under_65_training["RecordID"]
age_under_65_measurements_training = train_X[train_X["RecordID"].isin(age_under_65_training_ids)]
age_under_65_measurements_training  = age_under_65_measurements_training .count()
age_under_65_measurements_training 

RecordID       168336
level_1        168336
Time           168336
ALP              3327
ALT              3427
AST              3422
Age            158130
Albumin          2413
BUN             12515
Bilirubin        3455
Cholesterol       264
Creatinine      12564
DiasABP         89165
FiO2            26551
GCS             56331
Gender           3507
Glucose         11999
HCO3            12254
HCT             16129
HR             151169
Height         158130
ICUType          3507
K               13230
Lactate          7300
MAP             88482
MechVent        26135
Mg              12196
NIDiasABP       71835
NIMAP           70624
NISysABP        71896
Na              12485
PaCO2           19048
PaO2            19004
Platelets       12625
RespRate        40021
SaO2             5607
SysABP          89173
Temp            59510
TroponinI         255
TroponinT        1255
Urine          113133
WBC             11637
Weight          87400
pH              19862
dtype: int64

In [14]:
filtered_train_X = train_X[(train_X['Height'] != -1) & (train_X['Weight'] != -1) & (train_X['Height'].notna()) & (train_X['Weight'].notna())] 

In [15]:
def classify_BMI(BMI):
    if BMI <= 18.5:
        return "Baixo peso"
    elif BMI >= 18.6 and BMI <= 24.9:
        return "Peso normal"
    elif BMI >= 25 and BMI <= 29.9:
        return "Sobrepeso"
    elif BMI >= 30 and BMI <= 34.9:
        return "Obesidade grau 1"
    elif BMI >= 35 and BMI <= 39.9:
        return "Obesidade grau 2"
    elif BMI >= 40:
        return "Obesidade grau 3"

In [16]:
filtered_train_X_meters = filtered_train_X.copy()
filtered_train_X_meters['Height'] = filtered_train_X['Height'] / 100 # Converting Height from cm to meters
filtered_train_X_meters['Height']

48        1.753
67        1.753
68        1.753
69        1.753
70        1.753
          ...  
575321    1.727
575322    1.727
575323    1.727
575325    1.727
575327    1.727
Name: Height, Length: 101956, dtype: float64

In [17]:
bmi_data_train = filtered_train_X_meters
bmi_data_train["BMI"] = round(filtered_train_X_meters["Weight"] / (filtered_train_X_meters["Height"] ** 2), 1)
bmi_data_train["Classificacao"] = bmi_data_train["BMI"].apply(classify_BMI)
bmi_data_train.head()

Unnamed: 0,RecordID,level_1,Time,ALP,ALT,AST,Age,Albumin,BUN,Bilirubin,...,SysABP,Temp,TroponinI,TroponinT,Urine,WBC,Weight,pH,BMI,Classificacao
48,132540,0,0.0,,,,76.0,,,,...,,,,,,,76.0,7.45,24.7,Peso normal
67,132540,19,19.0,,,,76.0,,,,...,122.0,37.5,,,50.0,,80.6,,26.2,Sobrepeso
68,132540,20,20.0,,,,76.0,,,,...,107.0,37.4,,,380.0,,80.6,,26.2,Sobrepeso
69,132540,21,21.0,,,,76.0,,,,...,121.0,37.5,,,170.0,,80.6,,26.2,Sobrepeso
70,132540,22,22.0,,,,76.0,,,,...,128.0,37.5,,,130.0,,80.6,,26.2,Sobrepeso


In [18]:
bmi_data_train = bmi_data_train.groupby("RecordID").first().reset_index()
bmi_data_train

Unnamed: 0,RecordID,level_1,Time,ALP,ALT,AST,Age,Albumin,BUN,Bilirubin,...,SysABP,Temp,TroponinI,TroponinT,Urine,WBC,Weight,pH,BMI,Classificacao
0,132540,0,0.0,,,,76.0,,21.0,,...,122.0,37.5,,,50.0,13.3,76.0,7.45,24.7,Peso normal
1,132547,0,0.0,,,,64.0,,,,...,,,,,,,114.0,,35.1,Obesidade grau 2
2,132548,0,0.0,,,,68.0,,32.0,,...,205.0,36.3,0.7,,120.0,6.2,87.0,,32.9,Obesidade grau 1
3,132555,0,0.0,,,,74.0,,19.0,,...,98.0,34.8,,,35.0,9.0,66.1,7.39,21.5,Peso normal
4,132567,0,0.0,,,,71.0,,9.0,,...,111.5,35.6,,,15.0,9.0,56.0,7.44,22.6,Peso normal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3996,163008,0,0.0,,,,59.0,,24.0,,...,97.0,37.6,,,45.0,6.9,98.5,7.38,34.0,Obesidade grau 1
3997,163016,0,0.0,,27.0,120.0,65.0,,29.0,0.4,...,101.0,38.1,,,75.0,8.0,63.6,7.37,24.8,Peso normal
3998,163021,0,0.0,,,,72.0,,9.0,,...,,,,,,8.6,62.0,,20.8,Peso normal
3999,163029,0,0.0,,,,61.0,,,,...,,,,,,,85.0,,28.5,Sobrepeso


In [None]:
classification_low_weight_training = bmi_data_train[bmi_data_train["Classificacao"] == 'Baixo peso']
classification_low_weight_training_ids = classification_low_weight_training["RecordID"]
classification_measurements_l_w_t = train_X[train_X["RecordID"].isin(classification_low_weight_training_ids)]
classification_measurements_l_w_t = classification_measurements_l_w_t.count() 
classification_measurements_l_w_t 

RecordID       6144
level_1        6144
Time           6144
ALP              91
ALT              93
AST              93
Age            5921
Albumin          74
BUN             448
Bilirubin        94
Cholesterol      11
Creatinine      449
DiasABP        3798
FiO2           1040
GCS            1968
Gender          128
Glucose         427
HCO3            446
HCT             601
HR             5701
Height         5921
ICUType         128
K               473
Lactate         280
MAP            3819
MechVent       1024
Mg              445
NIDiasABP      2274
NIMAP          2239
NISysABP       2275
Na              434
PaCO2           736
PaO2            745
Platelets       467
RespRate       1299
SaO2            309
SysABP         3799
Temp           2508
TroponinI        17
TroponinT        62
Urine          4281
WBC             420
Weight         3361
pH              780
dtype: int64

In [20]:
classification_normal_weight_training = bmi_data_train[bmi_data_train["Classificacao"] == 'Peso normal']
classification_normal_weight_training_ids = classification_normal_weight_training["RecordID"]
classification_measurements_n_w_t = train_X[train_X["RecordID"].isin(classification_normal_weight_training_ids)]
classification_measurements_n_w_t = classification_measurements_n_w_t.count() 
classification_measurements_n_w_t 

RecordID       56160
level_1        56160
Time           56160
ALP              900
ALT              925
AST              924
Age            53976
Albumin          673
BUN             4166
Bilirubin        919
Cholesterol       99
Creatinine      4178
DiasABP        36549
FiO2            9655
GCS            18095
Gender          1170
Glucose         3785
HCO3            4022
HCT             5871
HR             51642
Height         53976
ICUType         1170
K               4260
Lactate         2623
MAP            36324
MechVent        9533
Mg              4129
NIDiasABP      19711
NIMAP          19556
NISysABP       19725
Na              3935
PaCO2           8187
PaO2            8162
Platelets       4594
RespRate        8563
SaO2            3297
SysABP         36549
Temp           25233
TroponinI        149
TroponinT        569
Urine          41479
WBC             4063
Weight         28755
pH              8691
dtype: int64

In [21]:
classification_overweight_training = bmi_data_train[bmi_data_train["Classificacao"] == 'Sobrepeso']
classification_overweight_training_ids = classification_overweight_training['RecordID']
classification_measurements_o_w_t = train_X[train_X["RecordID"].isin(classification_overweight_training_ids)]
classification_measurements_o_w_t = classification_measurements_o_w_t.count() 
classification_measurements_o_w_t

RecordID       66624
level_1        66624
Time           66624
ALP             1087
ALT             1116
AST             1114
Age            63864
Albumin          784
BUN             5000
Bilirubin       1112
Cholesterol      121
Creatinine      5026
DiasABP        45372
FiO2           11437
GCS            20562
Gender          1388
Glucose         4427
HCO3            4803
HCT             7067
HR             60917
Height         63864
ICUType         1388
K               4930
Lactate         3074
MAP            45248
MechVent       11100
Mg              4973
NIDiasABP      21425
NIMAP          21204
NISysABP       21449
Na              4572
PaCO2          10568
PaO2           10546
Platelets       5555
RespRate       10083
SaO2            4591
SysABP         45376
Temp           32053
TroponinI        137
TroponinT        574
Urine          50097
WBC             4889
Weight         35259
pH             11256
dtype: int64

In [22]:
classification_obesity_grade1_training = bmi_data_train[bmi_data_train["Classificacao"] == 'Obesidade grau 1']
classification_obesity_grade1_training_ids = classification_obesity_grade1_training["RecordID"]
classification_measurements_ob1_t = train_X[train_X["RecordID"].isin(classification_obesity_grade1_training_ids)]
classification_measurements_ob1_t  = classification_measurements_ob1_t .count() 
classification_measurements_ob1_t 

RecordID       35280
level_1        35280
Time           35280
ALP              547
ALT              560
AST              558
Age            33888
Albumin          410
BUN             2576
Bilirubin        574
Cholesterol       57
Creatinine      2585
DiasABP        23907
FiO2            6225
GCS            10912
Gender           735
Glucose         2301
HCO3            2472
HCT             3603
HR             32419
Height         33888
ICUType          735
K               2601
Lactate         1610
MAP            23772
MechVent        6081
Mg              2510
NIDiasABP      11045
NIMAP          10928
NISysABP       11057
Na              2392
PaCO2           5602
PaO2            5599
Platelets       2775
RespRate        5390
SaO2            2346
SysABP         23907
Temp           16773
TroponinI        106
TroponinT        346
Urine          26962
WBC             2435
Weight         18632
pH              5918
dtype: int64

In [23]:
classification_obesity_grade2_training = bmi_data_train[bmi_data_train["Classificacao"] == 'Obesidade grau 2']
classification_obesity_grade2_training_ids = classification_obesity_grade2_training["RecordID"]
classification_measurements_ob2_t = train_X[train_X["RecordID"].isin(classification_obesity_grade2_training_ids)]
classification_measurements_ob2_t = classification_measurements_ob2_t.count() 
classification_measurements_ob2_t 

RecordID       14400
level_1        14400
Time           14400
ALP              251
ALT              251
AST              250
Age            13744
Albumin          170
BUN             1110
Bilirubin        259
Cholesterol       30
Creatinine      1121
DiasABP         9765
FiO2            2567
GCS             4203
Gender           300
Glucose         1015
HCO3            1073
HCT             1512
HR             13157
Height         13744
ICUType          300
K               1136
Lactate          755
MAP             9753
MechVent        2509
Mg              1078
NIDiasABP       4304
NIMAP           4222
NISysABP        4306
Na              1039
PaCO2           2439
PaO2            2434
Platelets       1192
RespRate        2442
SaO2            1048
SysABP          9766
Temp            6970
TroponinI         32
TroponinT        166
Urine          10532
WBC             1061
Weight          8056
pH              2537
dtype: int64

In [24]:
classification_obesity_grade3_training = bmi_data_train[bmi_data_train["Classificacao"] == 'Obesidade grau 3']
classification_obesity_grade3_training_ids = classification_obesity_grade3_training["RecordID"]
classification_measurements_ob3_t = train_X[train_X["RecordID"].isin(classification_obesity_grade3_training_ids)]
classification_measurements_ob3_t = classification_measurements_ob3_t.count() 
classification_measurements_ob3_t 

RecordID       13440
level_1        13440
Time           13440
ALP              220
ALT              223
AST              222
Age            12905
Albumin          153
BUN             1011
Bilirubin        228
Cholesterol       28
Creatinine      1008
DiasABP         8729
FiO2            2669
GCS             3825
Gender           280
Glucose          901
HCO3             979
HCT             1326
HR             12380
Height         12905
ICUType          280
K               1034
Lactate          711
MAP             8714
MechVent        2630
Mg              1008
NIDiasABP       4343
NIMAP           4266
NISysABP        4350
Na               930
PaCO2           2250
PaO2            2256
Platelets       1018
RespRate        1820
SaO2             908
SysABP          8729
Temp            5822
TroponinI         31
TroponinT        152
Urine          10006
WBC              936
Weight          7893
pH              2337
dtype: int64

In [25]:
df_columns = train_X.columns
df_columns

Index(['RecordID', 'level_1', 'Time', 'ALP', 'ALT', 'AST', 'Age', 'Albumin',
       'BUN', 'Bilirubin', 'Cholesterol', 'Creatinine', 'DiasABP', 'FiO2',
       'GCS', 'Gender', 'Glucose', 'HCO3', 'HCT', 'HR', 'Height', 'ICUType',
       'K', 'Lactate', 'MAP', 'MechVent', 'Mg', 'NIDiasABP', 'NIMAP',
       'NISysABP', 'Na', 'PaCO2', 'PaO2', 'Platelets', 'RespRate', 'SaO2',
       'SysABP', 'Temp', 'TroponinI', 'TroponinT', 'Urine', 'WBC', 'Weight',
       'pH'],
      dtype='object')

In [26]:
df_test = pd.DataFrame(columns=df_columns)
df_test_transpose = df_test.T
df_test_transpose["Female"] = female_gender_measurements_training
df_test_transpose["Male"] = male_gender_measurements_training
df_test_transpose["Undefined Gender"] = undefined_gender_measurements_training
df_test_transpose["ICUType 1"] = ICUType_1_measurements_training 
df_test_transpose["ICUType 2"] = ICUType_2_measurements_training 
df_test_transpose["ICUType 3"] = ICUType_3_measurements_training 
df_test_transpose["ICUType 4"] = ICUType_4_measurements_training 
df_test_transpose["Age 65+"] = age_65_and_above_measurements_training
df_test_transpose["Age 65-"] = age_under_65_measurements_training 
df_test_transpose['Low Weight'] = classification_measurements_l_w_t 
df_test_transpose['Normal Weight'] = classification_measurements_n_w_t 
df_test_transpose['Overweight'] = classification_measurements_o_w_t 
df_test_transpose['Obesity Grade 1'] = classification_measurements_ob1_t 
df_test_transpose['Obesity Grade 2'] = classification_measurements_ob2_t 
df_test_transpose['Obesity Grade 3'] = classification_measurements_ob3_t 

#df_test_transpose.style.set_caption("Repeated Measurements per variable by demographics")

display(HTML("<h2 style='text-align: center; font-size: 24px; font-weight: bold;'>Repeated Measurements per Variable by Demographics - Train Set</h2>"))
df_test_transpose

Unnamed: 0,Female,Male,Undefined Gender,ICUType 1,ICUType 2,ICUType 3,ICUType 4,Age 65+,Age 65-,Low Weight,Normal Weight,Overweight,Obesity Grade 1,Obesity Grade 2,Obesity Grade 3
RecordID,160368,207600,240,52512,77040,132096,106560,199872,168336,6144,56160,66624,35280,14400,13440
level_1,160368,207600,240,52512,77040,132096,106560,199872,168336,6144,56160,66624,35280,14400,13440
Time,160368,207600,240,52512,77040,132096,106560,199872,168336,6144,56160,66624,35280,14400,13440
ALP,2599,3414,0,772,514,2854,1873,2686,3327,91,900,1087,547,251,220
ALT,2665,3513,1,817,534,2924,1904,2752,3427,93,925,1116,560,251,223
AST,2658,3513,1,816,531,2921,1904,2750,3422,93,924,1114,558,250,222
Age,151142,195520,191,48169,74532,122411,101741,188723,158130,5921,53976,63864,33888,13744,12905
Albumin,2080,2574,0,608,344,2200,1502,2241,2413,74,673,784,410,170,153
BUN,11514,15113,10,3788,4980,9892,7977,14122,12515,448,4166,5000,2576,1110,1011
Bilirubin,2687,3552,0,804,523,3039,1873,2784,3455,94,919,1112,574,259,228


<h3>Validation data</h3>

<h4>Loading validation dataset</h4>

In [27]:
validation_X = physionet2012_dataset['val_X']

In [28]:
validation_female_gender = validation_X[validation_X['Gender'] == 0.0]
validation_female_gender_ids = validation_female_gender["RecordID"]
female_gender_measurements_validation = validation_X[validation_X["RecordID"].isin(validation_female_gender_ids)]
female_gender_measurements_validation = female_gender_measurements_validation.count()
female_gender_measurements_validation

RecordID       41664
level_1        41664
Time           41664
ALP              670
ALT              695
AST              700
Age            39207
Albumin          490
BUN             3018
Bilirubin        719
Cholesterol       68
Creatinine      3029
DiasABP        22235
FiO2            6397
GCS            13422
Gender           868
Glucose         2885
HCO3            2983
HCT             3867
HR             37662
Height         39207
ICUType          868
K               3187
Lactate         1681
MAP            22168
MechVent        6238
Mg              2993
NIDiasABP      17828
NIMAP          17587
NISysABP       17847
Na              2980
PaCO2           4525
PaO2            4511
Platelets       2993
RespRate       11285
SaO2            1631
SysABP         22235
Temp           15304
TroponinI         87
TroponinT        411
Urine          29089
WBC             2755
Weight         22460
pH              4692
dtype: int64

In [None]:
validation_male_gender = validation_X[validation_X['Gender'] == 1.0]
validation_male_gender_ids = validation_male_gender["RecordID"]
male_gender_measurements_validation = validation_X[validation_X["RecordID"].isin(validation_male_gender_ids)]
male_gender_measurements_validation = male_gender_measurements_validation.count()
male_gender_measurements_validation

RecordID       50208
level_1        50208
Time           50208
ALP              830
ALT              854
AST              856
Age            47168
Albumin          650
BUN             3626
Bilirubin        843
Cholesterol       84
Creatinine      3656
DiasABP        27486
FiO2            7777
GCS            15664
Gender          1046
Glucose         3362
HCO3            3544
HCT             4874
HR             45046
Height         47168
ICUType         1046
K               3759
Lactate         2033
MAP            27362
MechVent        7360
Mg              3538
NIDiasABP      20538
NIMAP          20266
NISysABP       20561
Na              3519
PaCO2           6074
PaO2            6060
Platelets       3772
RespRate       11335
SaO2            2196
SysABP         27489
Temp           19260
TroponinI         95
TroponinT        528
Urine          33934
WBC             3352
Weight         26203
pH              6374
dtype: int64

In [30]:
validation_undefined_gender = validation_X[validation_X['Gender'] == -1.0]
validation_undefined_gender_ids = validation_undefined_gender["RecordID"]
undefined_gender_measurements_validation = validation_X[validation_X["RecordID"].isin(validation_undefined_gender_ids)]
undefined_gender_measurements_validation = undefined_gender_measurements_validation.count()
undefined_gender_measurements_validation

RecordID       192
level_1        192
Time           192
ALP              3
ALT              3
AST              3
Age            187
Albumin          3
BUN             23
Bilirubin        3
Cholesterol      0
Creatinine      23
DiasABP        176
FiO2            30
GCS             67
Gender           4
Glucose         22
HCO3            23
HCT             19
HR             182
Height         187
ICUType          4
K               22
Lactate         27
MAP            173
MechVent        23
Mg              22
NIDiasABP       51
NIMAP           51
NISysABP        51
Na              22
PaCO2           40
PaO2            40
Platelets       20
RespRate        42
SaO2             3
SysABP         176
Temp            69
TroponinI        0
TroponinT        5
Urine          107
WBC             19
Weight         118
pH              41
dtype: int64

In [31]:
validation_ICUType_1 = validation_X[validation_X['ICUType'] == 1.0]
validation_ICUType_1 = validation_ICUType_1[validation_ICUType_1['Time'] == 0.0]
validation_ICUType_1_ids = validation_ICUType_1['RecordID']
ICUType_1_measurements_validation = validation_X[validation_X['RecordID'].isin(validation_ICUType_1_ids)]
ICUType_1_measurements_validation = ICUType_1_measurements_validation.count()
ICUType_1_measurements_validation

RecordID       14112
level_1        14112
Time           14112
ALP              200
ALT              213
AST              211
Age            12928
Albumin          137
BUN              966
Bilirubin        210
Cholesterol       90
Creatinine       980
DiasABP         5689
FiO2            1397
GCS             3658
Gender           294
Glucose          934
HCO3             936
HCT             1161
HR             12307
Height         12928
ICUType          294
K               1136
Lactate          280
MAP             5658
MechVent        1316
Mg               970
NIDiasABP       6986
NIMAP           6969
NISysABP        6994
Na               930
PaCO2           1067
PaO2            1066
Platelets        944
RespRate        5399
SaO2             626
SysABP          5689
Temp            4182
TroponinI         43
TroponinT        316
Urine           7840
WBC              846
Weight          6098
pH              1086
dtype: int64

In [34]:
validation_ICUType_2 = validation_X[validation_X['ICUType'] == 2.0]
validation_ICUType_2 = validation_ICUType_2[validation_ICUType_2['Time'] == 0.0]
validation_ICUType_2_ids = validation_ICUType_2['RecordID']
ICUType_2_measurements_validation = validation_X[validation_X['RecordID'].isin(validation_ICUType_2_ids)]
ICUType_2_measurements_validation = ICUType_2_measurements_validation.count()
ICUType_2_measurements_validation

RecordID       20016
level_1        20016
Time           20016
ALP              168
ALT              172
AST              173
Age            19238
Albumin          103
BUN             1336
Bilirubin        167
Cholesterol        5
Creatinine      1343
DiasABP        15510
FiO2            3583
GCS             5312
Gender           417
Glucose          938
HCO3            1250
HCT             2234
HR             18213
Height         19238
ICUType          417
K               1073
Lactate          735
MAP            15626
MechVent        3453
Mg              1314
NIDiasABP       4007
NIMAP           3987
NISysABP        4017
Na              1055
PaCO2           4259
PaO2            4245
Platelets       1686
RespRate         731
SaO2            2314
SysABP         15510
Temp           11699
TroponinI         19
TroponinT         39
Urine          16752
WBC             1384
Weight         10285
pH              4622
dtype: int64

In [38]:
validation_ICUType_3 = validation_X[validation_X['ICUType'] == 3.0]
validation_ICUType_3 = validation_ICUType_3[validation_ICUType_3['Time'] == 0.0]
validation_ICUType_3_ids = validation_ICUType_3['RecordID']
ICUType_3_measurements_validation = validation_X[validation_X['RecordID'].isin(validation_ICUType_3_ids)]
ICUType_3_measurements_validation = ICUType_3_measurements_validation.count()
ICUType_3_measurements_validation

RecordID       33216
level_1        33216
Time           33216
ALP              738
ALT              765
AST              768
Age            30865
Albumin          583
BUN             2513
Bilirubin        791
Cholesterol       23
Creatinine      2528
DiasABP        12434
FiO2            4959
GCS             8841
Gender           692
Glucose         2511
HCO3            2530
HCT             3042
HR             29695
Height         30865
ICUType          692
K               2737
Lactate         1308
MAP            12314
MechVent        4670
Mg              2388
NIDiasABP      18389
NIMAP          18048
NISysABP       18402
Na              2585
PaCO2           2556
PaO2            2548
Platelets       2325
RespRate        9310
SaO2             404
SysABP         12435
Temp            9982
TroponinI         79
TroponinT        422
Urine          20046
WBC             2175
Weight         21267
pH              2592
dtype: int64

In [36]:
validation_ICUType_4 = validation_X[validation_X['ICUType'] == 4.0]
validation_ICUType_4 = validation_ICUType_4[validation_ICUType_4['Time'] == 0.0]
validation_ICUType_4_ids = validation_ICUType_4['RecordID']
ICUType_4_measurements_validation = validation_X[validation_X['RecordID'].isin(validation_ICUType_4_ids)]
ICUType_4_measurements_validation = ICUType_4_measurements_validation.count()
ICUType_4_measurements_validation

RecordID       24720
level_1        24720
Time           24720
ALP              397
ALT              402
AST              407
Age            23531
Albumin          320
BUN             1852
Bilirubin        397
Cholesterol       34
Creatinine      1857
DiasABP        16264
FiO2            4265
GCS            11342
Gender           515
Glucose         1886
HCO3            1834
HCT             2323
HR             22675
Height         23531
ICUType          515
K               2022
Lactate         1418
MAP            16105
MechVent        4182
Mg              1881
NIDiasABP       9035
NIMAP           8900
NISysABP        9046
Na              1951
PaCO2           2757
PaO2            2752
Platelets       1830
RespRate        7222
SaO2             486
SysABP         16266
Temp            8770
TroponinI         41
TroponinT        167
Urine          18492
WBC             1721
Weight         11131
pH              2807
dtype: int64

In [39]:
age_65_and_above_validation = validation_X[validation_X['Age'] >= 65]
age_65_and_above_validation = age_65_and_above_validation[age_65_and_above_validation['Time'] == 0.0]
age_65_and_above_validation_ids = age_65_and_above_validation['RecordID']
age_65_and_above_measurements_validation = validation_X[validation_X['RecordID'].isin(age_65_and_above_validation_ids)]
age_65_and_above_measurements_validation = age_65_and_above_measurements_validation.count()
age_65_and_above_measurements_validation

RecordID       50928
level_1        50928
Time           50928
ALP              648
ALT              656
AST              663
Age            47957
Albumin          544
BUN             3530
Bilirubin        679
Cholesterol       98
Creatinine      3554
DiasABP        27553
FiO2            7633
GCS            15786
Gender          1061
Glucose         3256
HCO3            3449
HCT             4801
HR             45954
Height         47957
ICUType         1061
K               3682
Lactate         1897
MAP            27485
MechVent        7292
Mg              3489
NIDiasABP      21498
NIMAP          21239
NISysABP       21523
Na              3402
PaCO2           5832
PaO2            5815
Platelets       3609
RespRate       13249
SaO2            2406
SysABP         27554
Temp           20041
TroponinI        125
TroponinT        634
Urine          35675
WBC             3274
Weight         26710
pH              6090
dtype: int64

In [41]:
age_under_65_validation = validation_X[validation_X['Age'] < 65]
age_under_65_validation = age_under_65_validation[age_under_65_validation['Time'] == 0.0]
age_under_65_validation_ids = age_under_65_validation["RecordID"]
age_under_65__measurements_validation = validation_X[validation_X["RecordID"].isin(age_under_65_validation_ids)]
age_under_65__measurements_validation = age_under_65__measurements_validation.count()
age_under_65__measurements_validation

RecordID       41136
level_1        41136
Time           41136
ALP              855
ALT              896
AST              896
Age            38605
Albumin          599
BUN             3137
Bilirubin        886
Cholesterol       54
Creatinine      3154
DiasABP        22344
FiO2            6571
GCS            13367
Gender           857
Glucose         3013
HCO3            3101
HCT             3959
HR             36936
Height         38605
ICUType          857
K               3286
Lactate         1844
MAP            22218
MechVent        6329
Mg              3064
NIDiasABP      16919
NIMAP          16665
NISysABP       16936
Na              3119
PaCO2           4807
PaO2            4796
Platelets       3176
RespRate        9413
SaO2            1424
SysABP         22346
Temp           14592
TroponinI         57
TroponinT        310
Urine          27455
WBC             2852
Weight         22071
pH              5017
dtype: int64

In [42]:
filtered_validation_X = validation_X[(validation_X['Height'] != -1) & (validation_X['Weight'] != -1) & (validation_X['Height'].notna()) & (validation_X['Weight'].notna())] 

In [43]:
filtered_validation_X_meters = filtered_validation_X.copy()
filtered_validation_X_meters['Height'] = filtered_validation_X['Height'] / 100 # Converting Height from cm to meters
filtered_validation_X_meters['Height']

144       1.803
145       1.803
146       1.803
147       1.803
148       1.803
          ...  
574890    1.829
574891    1.829
574892    1.829
574893    1.829
574894    1.829
Name: Height, Length: 25526, dtype: float64

In [44]:
bmi_data_validation = filtered_validation_X_meters
bmi_data_validation["BMI"] = filtered_validation_X_meters["Weight"] / (filtered_validation_X_meters["Height"] ** 2)
bmi_data_validation["Classificacao"] = bmi_data_validation["BMI"].apply(classify_BMI)
bmi_data_validation.head()

Unnamed: 0,RecordID,level_1,Time,ALP,ALT,AST,Age,Albumin,BUN,Bilirubin,...,SysABP,Temp,TroponinI,TroponinT,Urine,WBC,Weight,pH,BMI,Classificacao
144,132543,0,0.0,105.0,12.0,15.0,68.0,4.4,23.0,0.2,...,,36.3,,,,11.5,84.6,,26.024291,Sobrepeso
145,132543,1,1.0,,,,68.0,,,,...,,,,,,,84.6,,26.024291,Sobrepeso
146,132543,2,2.0,,,,68.0,,,,...,,,,,,,84.6,,26.024291,Sobrepeso
147,132543,3,3.0,,,,68.0,,,,...,,36.4,,,,,84.6,,26.024291,Sobrepeso
148,132543,4,4.0,,,,68.0,,,,...,,,,,,,84.6,,26.024291,Sobrepeso


In [46]:
classification_low_weight_validation = bmi_data_validation[bmi_data_validation["Classificacao"] == 'Baixo peso']
classification_low_weight_validation_ids = classification_low_weight_validation["RecordID"]
classification_measurements_l_w_v = validation_X[validation_X["RecordID"].isin(classification_low_weight_validation_ids)]
classification_measurements_l_w_v = classification_measurements_l_w_v.count() 
classification_measurements_l_w_v

RecordID       1536
level_1        1536
Time           1536
ALP              20
ALT              20
AST              20
Age            1461
Albumin          17
BUN             117
Bilirubin        19
Cholesterol       1
Creatinine      118
DiasABP        1120
FiO2            242
GCS             513
Gender           32
Glucose         113
HCO3            116
HCT             152
HR             1398
Height         1461
ICUType          32
K               129
Lactate          74
MAP            1135
MechVent        253
Mg              120
NIDiasABP       487
NIMAP           466
NISysABP        491
Na              121
PaCO2           242
PaO2            242
Platelets       118
RespRate        217
SaO2            104
SysABP         1120
Temp            687
TroponinI        15
TroponinT         6
Urine          1176
WBC             108
Weight          654
pH              252
dtype: int64

In [54]:
classification_normal_weight_validation = bmi_data_validation[bmi_data_validation["Classificacao"] == 'Peso normal']
classification_normal_weight_validation_ids = classification_normal_weight_validation["RecordID"]
classification_measurements_n_w_v = validation_X[validation_X["RecordID"].isin(classification_normal_weight_validation_ids)]
classification_measurements_n_w_v = classification_measurements_n_w_v.count() 
classification_measurements_n_w_v

RecordID       15696
level_1        15696
Time           15696
ALP              234
ALT              239
AST              240
Age            14971
Albumin          175
BUN             1116
Bilirubin        245
Cholesterol       29
Creatinine      1121
DiasABP        10567
FiO2            2461
GCS             4964
Gender           327
Glucose         1022
HCO3            1084
HCT             1545
HR             14310
Height         14971
ICUType          327
K               1160
Lactate          719
MAP            10522
MechVent        2564
Mg              1116
NIDiasABP       4942
NIMAP           4892
NISysABP        4951
Na              1050
PaCO2           2296
PaO2            2289
Platelets       1227
RespRate        2943
SaO2             961
SysABP         10567
Temp            7173
TroponinI         35
TroponinT        125
Urine          11378
WBC             1066
Weight          8080
pH              2434
dtype: int64

In [53]:
classification_overweight_validation = bmi_data_validation[bmi_data_validation["Classificacao"] == 'Sobrepeso']
classification_overweight_validation_ids = classification_overweight_validation["RecordID"]
classification_measurements_o_w_v = validation_X[validation_X["RecordID"].isin(classification_overweight_validation_ids)]
classification_measurements_o_w_v = classification_measurements_o_w_v.count() 
classification_measurements_o_w_v

RecordID       18768
level_1        18768
Time           18768
ALP              306
ALT              313
AST              314
Age            17962
Albumin          205
BUN             1373
Bilirubin        316
Cholesterol       36
Creatinine      1381
DiasABP        13315
FiO2            3252
GCS             5517
Gender           391
Glucose         1166
HCO3            1318
HCT             2046
HR             17132
Height         17962
ICUType          391
K               1323
Lactate          833
MAP            13347
MechVent        3293
Mg              1348
NIDiasABP       5016
NIMAP           4971
NISysABP        5024
Na              1226
PaCO2           3200
PaO2            3190
Platelets       1597
RespRate        2402
SaO2            1497
SysABP         13315
Temp            9536
TroponinI         35
TroponinT        163
Urine          14089
WBC             1357
Weight         10259
pH              3401
dtype: int64

In [52]:
classification_obesity_grade1_validation = bmi_data_validation[bmi_data_validation["Classificacao"] == 'Obesidade grau 1']
classification_obesity_grade1_validation_ids = classification_obesity_grade1_validation["RecordID"]
classification_measurements_ob1_v = validation_X[validation_X["RecordID"].isin(classification_obesity_grade1_validation_ids)]
classification_measurements_ob1_v  = classification_measurements_ob1_v .count() 
classification_measurements_ob1_v 

RecordID       14016
level_1        14016
Time           14016
ALP              246
ALT              252
AST              252
Age            13526
Albumin          172
BUN             1063
Bilirubin        254
Cholesterol       16
Creatinine      1067
DiasABP        10298
FiO2            2750
GCS             4047
Gender           292
Glucose          868
HCO3            1012
HCT             1600
HR             12843
Height         13526
ICUType          292
K                979
Lactate          856
MAP            10307
MechVent        2679
Mg              1058
NIDiasABP       3542
NIMAP           3496
NISysABP        3547
Na               943
PaCO2           2781
PaO2            2773
Platelets       1234
RespRate         980
SaO2            1241
SysABP         10298
Temp            7548
TroponinI         32
TroponinT        124
Urine          10981
WBC             1048
Weight          8066
pH              2933
dtype: int64

In [57]:
classification_obesity_grade2_validation = bmi_data_validation[bmi_data_validation["Classificacao"] == 'Obesidade grau 2']
classification_obesity_grade2_validation_ids = classification_obesity_grade2_validation["RecordID"]
classification_measurements_ob2_v = validation_X[validation_X["RecordID"].isin(classification_obesity_grade2_validation_ids)]
classification_measurements_ob2_v  = classification_measurements_ob2_v .count() 
classification_measurements_ob2_v 

RecordID       5760
level_1        5760
Time           5760
ALP              92
ALT              93
AST              95
Age            5585
Albumin          67
BUN             445
Bilirubin        96
Cholesterol       4
Creatinine      447
DiasABP        4146
FiO2           1111
GCS            1703
Gender          120
Glucose         363
HCO3            430
HCT             656
HR             5313
Height         5585
ICUType         120
K               392
Lactate         379
MAP            4142
MechVent       1082
Mg              422
NIDiasABP      1690
NIMAP          1678
NISysABP       1693
Na              394
PaCO2          1060
PaO2           1058
Platelets       510
RespRate        608
SaO2            491
SysABP         4146
Temp           3223
TroponinI         9
TroponinT        32
Urine          4547
WBC             436
Weight         3401
pH             1113
dtype: int64

In [50]:
classification_obesity_grade3_validation = bmi_data_validation[bmi_data_validation["Classificacao"] == 'Obesidade grau 3']
classification_obesity_grade3_validation_ids = classification_obesity_grade3_validation["RecordID"]
classification_measurements_ob3_v = validation_X[validation_X["RecordID"].isin(classification_obesity_grade3_validation_ids)]
classification_measurements_ob3_v  = classification_measurements_ob3_v .count() 
classification_measurements_ob3_v 

RecordID       4176
level_1        4176
Time           4176
ALP              58
ALT              62
AST              61
Age            3981
Albumin          40
BUN             298
Bilirubin        58
Cholesterol       4
Creatinine      301
DiasABP        2817
FiO2            809
GCS            1200
Gender           87
Glucose         269
HCO3            294
HCT             382
HR             3807
Height         3981
ICUType          87
K               286
Lactate         237
MAP            2839
MechVent        791
Mg              288
NIDiasABP      1139
NIMAP          1131
NISysABP       1142
Na              279
PaCO2           747
PaO2            746
Platelets       302
RespRate        505
SaO2            330
SysABP         2817
Temp           1962
TroponinI         2
TroponinT        31
Urine          3218
WBC             271
Weight         2294
pH              773
dtype: int64

In [None]:
df_columns = validation_X.columns
df_columns

Index(['RecordID', 'level_1', 'Time', 'ALP', 'ALT', 'AST', 'Age', 'Albumin',
       'BUN', 'Bilirubin', 'Cholesterol', 'Creatinine', 'DiasABP', 'FiO2',
       'GCS', 'Gender', 'Glucose', 'HCO3', 'HCT', 'HR', 'Height', 'ICUType',
       'K', 'Lactate', 'MAP', 'MechVent', 'Mg', 'NIDiasABP', 'NIMAP',
       'NISysABP', 'Na', 'PaCO2', 'PaO2', 'Platelets', 'RespRate', 'SaO2',
       'SysABP', 'Temp', 'TroponinI', 'TroponinT', 'Urine', 'WBC', 'Weight',
       'pH'],
      dtype='object')

In [58]:
df_test = pd.DataFrame(columns=df_columns)
df_test_transpose = df_test.T
df_test_transpose["Female"] = female_gender_measurements_validation
df_test_transpose["Male"] = male_gender_measurements_validation
df_test_transpose["Undefined Gender"] = undefined_gender_measurements_validation
df_test_transpose["ICUType 1"] = ICUType_1_measurements_validation
df_test_transpose["ICUType 2"] = ICUType_2_measurements_validation
df_test_transpose["ICUType 3"] = ICUType_3_measurements_validation
df_test_transpose["ICUType 4"] = ICUType_4_measurements_validation
df_test_transpose["Age 65+"] = age_65_and_above_measurements_validation
df_test_transpose["Age 65-"] = age_under_65__measurements_validation
df_test_transpose['Low Weight'] = classification_measurements_l_w_v
df_test_transpose['Normal Weight'] = classification_measurements_n_w_v
df_test_transpose['Overweight'] = classification_measurements_o_w_v
df_test_transpose['Obesity Grade 1'] = classification_measurements_ob1_v
df_test_transpose['Obesity Grade 2'] = classification_measurements_ob2_v
df_test_transpose['Obesity Grade 3'] = classification_measurements_ob3_v

#df_test_transpose.style.set_caption("Repeated Measurements per variable by demographics")

display(HTML("<h2 style='text-align: center; font-size: 24px; font-weight: bold;'>Repeated Measurements per Variable by Demographics - Validation Set</h2>"))
df_test_transpose

Unnamed: 0,Female,Male,Undefined Gender,ICUType 1,ICUType 2,ICUType 3,ICUType 4,Age 65+,Age 65-,Low Weight,Normal Weight,Overweight,Obesity Grade 1,Obesity Grade 2,Obesity Grade 3
RecordID,41664,50208,192,14112,20016,33216,24720,50928,41136,1536,15696,18768,14016,5760,4176
level_1,41664,50208,192,14112,20016,33216,24720,50928,41136,1536,15696,18768,14016,5760,4176
Time,41664,50208,192,14112,20016,33216,24720,50928,41136,1536,15696,18768,14016,5760,4176
ALP,670,830,3,200,168,738,397,648,855,20,234,306,246,92,58
ALT,695,854,3,213,172,765,402,656,896,20,239,313,252,93,62
AST,700,856,3,211,173,768,407,663,896,20,240,314,252,95,61
Age,39207,47168,187,12928,19238,30865,23531,47957,38605,1461,14971,17962,13526,5585,3981
Albumin,490,650,3,137,103,583,320,544,599,17,175,205,172,67,40
BUN,3018,3626,23,966,1336,2513,1852,3530,3137,117,1116,1373,1063,445,298
Bilirubin,719,843,3,210,167,791,397,679,886,19,245,316,254,96,58


<h3> Test data</h3>

<h4>Loading test dataset</h4>

In [216]:
test_X = physionet2012_dataset['test_X']

In [217]:
female_gender_test = test_X[test_X['Gender'] == 0.0]
female_gender_test = female_gender_test["RecordID"]
female_gender_distribution_test = test_X[test_X["RecordID"].isin(female_gender_test)]
female_gender_distribution_test = female_gender_distribution_test.count()
female_gender_distribution_test

RecordID       51312
level_1        51312
Time           51312
ALP              809
ALT              830
AST              829
Age            48409
Albumin          634
BUN             3679
Bilirubin        841
Cholesterol       78
Creatinine      3693
DiasABP        26544
FiO2            7914
GCS            16963
Gender          1069
Glucose         3542
HCO3            3631
HCT             4667
HR             46506
Height         48409
ICUType         1069
K               3892
Lactate         2123
MAP            26450
MechVent        7803
Mg              3583
NIDiasABP      22696
NIMAP          22377
NISysABP       22711
Na              3709
PaCO2           5628
PaO2            5626
Platelets       3598
RespRate       13478
SaO2            1955
SysABP         26547
Temp           18667
TroponinI        133
TroponinT        549
Urine          35939
WBC             3351
Weight         28065
pH              5856
dtype: int64

In [219]:
male_gender_test = test_X[test_X['Gender'] == 1.0]
male_gender_test = male_gender_test["RecordID"]
male_gender_distribution_test = test_X[test_X["RecordID"].isin(male_gender_test)]
male_gender_distribution_test = male_gender_distribution_test.count()
male_gender_distribution_test

RecordID       63792
level_1        63792
Time           63792
ALP             1063
ALT             1095
AST             1092
Age            59905
Albumin          783
BUN             4695
Bilirubin       1116
Cholesterol      130
Creatinine      4726
DiasABP        36067
FiO2           10073
GCS            20043
Gender          1329
Glucose         4414
HCO3            4581
HCT             6302
HR             57368
Height         59905
ICUType         1329
K               4911
Lactate         2561
MAP            35868
MechVent        9569
Mg              4598
NIDiasABP      25734
NIMAP          25343
NISysABP       25765
Na              4550
PaCO2           7601
PaO2            7585
Platelets       4793
RespRate       14014
SaO2            2682
SysABP         36069
Temp           23925
TroponinI        116
TroponinT        694
Urine          44209
WBC             4320
Weight         32734
pH              8017
dtype: int64

In [220]:
undefined_gender_test = test_X[test_X['Gender'] == -1.0]
undefined_gender_test = undefined_gender_test["RecordID"]
undefined_gender_distribution_test = test_X[test_X["RecordID"].isin(undefined_gender_test)]
undefined_gender_distribution_test = undefined_gender_distribution_test.count()
undefined_gender_distribution_test

RecordID       48
level_1        48
Time           48
ALP             0
ALT             0
AST             0
Age             2
Albumin         0
BUN             0
Bilirubin       0
Cholesterol     0
Creatinine      0
DiasABP         0
FiO2            0
GCS             0
Gender          1
Glucose         0
HCO3            0
HCT             0
HR              0
Height          2
ICUType         1
K               0
Lactate         1
MAP             0
MechVent        0
Mg              0
NIDiasABP       0
NIMAP           0
NISysABP        0
Na              0
PaCO2           2
PaO2            2
Platelets       0
RespRate        0
SaO2            0
SysABP          0
Temp            0
TroponinI       0
TroponinT       0
Urine           0
WBC             0
Weight          1
pH              2
dtype: int64

In [None]:
ICUType_1_test = test_X[test_X['ICUType'] == 1.0].count() 
ICUType_1_test

RecordID       347
level_1        347
Time           347
ALP             17
ALT             17
AST             18
Age            347
Albumin         12
BUN             46
Bilirubin       16
Cholesterol      5
Creatinine      46
DiasABP         80
FiO2            43
GCS            149
Gender         347
Glucose         43
HCO3            43
HCT             47
HR             201
Height         347
ICUType        347
K               44
Lactate         23
MAP             79
MechVent        44
Mg              29
NIDiasABP      147
NIMAP          147
NISysABP       147
Na              43
PaCO2           44
PaO2            43
Platelets       46
RespRate        82
SaO2            23
SysABP          80
Temp           136
TroponinI        4
TroponinT       18
Urine          103
WBC             44
Weight         347
pH              46
dtype: int64

In [None]:
ICUType_2_test = test_X[test_X['ICUType'] == 2.0].count() 
ICUType_2_test

RecordID       515
level_1        515
Time           515
ALP              6
ALT              6
AST              6
Age            515
Albumin          4
BUN             26
Bilirubin        6
Cholesterol      2
Creatinine      27
DiasABP        102
FiO2            63
GCS             83
Gender         515
Glucose         14
HCO3            23
HCT             48
HR             136
Height         515
ICUType        515
K               14
Lactate         30
MAP            101
MechVent        74
Mg              16
NIDiasABP       50
NIMAP           51
NISysABP        50
Na              15
PaCO2          308
PaO2           303
Platelets       57
RespRate        15
SaO2            20
SysABP         102
Temp           111
TroponinI        1
TroponinT        2
Urine          100
WBC             41
Weight         515
pH             308
dtype: int64

In [None]:
ICUType_3_test = test_X[test_X['ICUType'] == 3.0].count() 
ICUType_3_test

RecordID       872
level_1        872
Time           872
ALP             51
ALT             52
AST             52
Age            872
Albumin         49
BUN            108
Bilirubin       51
Cholesterol      2
Creatinine     108
DiasABP         65
FiO2           172
GCS            344
Gender         872
Glucose        107
HCO3           107
HCT            111
HR             488
Height         872
ICUType        872
K              108
Lactate        114
MAP             62
MechVent       153
Mg              85
NIDiasABP      444
NIMAP          441
NISysABP       446
Na             108
PaCO2           85
PaO2            83
Platelets      104
RespRate       140
SaO2            16
SysABP          65
Temp           353
TroponinI        2
TroponinT       25
Urine          283
WBC            103
Weight         872
pH              87
dtype: int64

In [None]:
ICUType_4_test = test_X[test_X['ICUType'] == 4.0].count() 
ICUType_4_test

RecordID       665
level_1        665
Time           665
ALP             32
ALT             34
AST             34
Age            665
Albumin         29
BUN            114
Bilirubin       31
Cholesterol      0
Creatinine     114
DiasABP        195
FiO2           178
GCS            289
Gender         665
Glucose         85
HCO3            87
HCT            126
HR             394
Height         665
ICUType        665
K               87
Lactate        123
MAP            192
MechVent       168
Mg              70
NIDiasABP      276
NIMAP          272
NISysABP       276
Na              87
PaCO2          128
PaO2           124
Platelets      125
RespRate       108
SaO2            30
SysABP         195
Temp           335
TroponinI        1
TroponinT        8
Urine          279
WBC            117
Weight         665
pH             128
dtype: int64

In [None]:
more_than_or_equal_to_65_test = test_X[test_X['Age'] >= 65].groupby('RecordID').first().reset_index()
more_than_or_equal_to_65_test = more_than_or_equal_to_65_test.count()
more_than_or_equal_to_65_test

RecordID       1348
level_1        1348
Time           1348
ALP             518
ALT             526
AST             527
Age            1348
Albumin         513
BUN            1339
Bilirubin       539
Cholesterol     109
Creatinine     1339
DiasABP         959
FiO2            940
GCS            1332
Gender         1348
Glucose        1329
HCO3           1339
HCT            1337
HR             1332
Height         1348
ICUType        1348
K              1330
Lactate         730
MAP             958
MechVent        869
Mg             1333
NIDiasABP      1168
NIMAP          1165
NISysABP       1169
Na             1339
PaCO2          1039
PaO2           1039
Platelets      1336
RespRate        362
SaO2            664
SysABP          959
Temp           1332
TroponinI        93
TroponinT       372
Urine          1318
WBC            1333
Weight         1348
pH             1044
dtype: int64

In [None]:
less_than_or_equal_to_65_test = test_X[test_X['Age'] < 65].groupby('RecordID').first().reset_index()
less_than_or_equal_to_65_test = less_than_or_equal_to_65_test.count()
less_than_or_equal_to_65_test

RecordID       1046
level_1        1046
Time           1046
ALP             497
ALT             506
AST             505
Age            1046
Albumin         450
BUN            1025
Bilirubin       505
Cholesterol      75
Creatinine     1025
DiasABP         743
FiO2            732
GCS            1035
Gender         1046
Glucose        1017
HCO3           1023
HCT            1026
HR             1035
Height         1046
ICUType        1046
K              1019
Lactate         595
MAP             740
MechVent        694
Mg             1010
NIDiasABP       921
NIMAP           919
NISysABP        923
Na             1022
PaCO2           791
PaO2            791
Platelets      1024
RespRate        273
SaO2            409
SysABP          743
Temp           1035
TroponinI        37
TroponinT       160
Urine          1026
WBC            1023
Weight         1046
pH              795
dtype: int64

In [None]:
filtered_test_X = test_X[(test_X['Height'] != -1) & (test_X['Weight'] != -1) & (test_X['Height'].notna()) & (test_X['Weight'].notna())] 

In [None]:
filtered_test_X_meters = filtered_test_X.copy()
filtered_test_X_meters['Height'] = filtered_test_X['Height'] / 100 # Converting Height from cm to meters
filtered_test_X_meters['Height']

336       1.626
337       1.626
341       1.626
342       1.626
343       1.626
          ...  
574841    1.651
574842    1.651
574843    1.651
574844    1.651
574845    1.651
Name: Height, Length: 31734, dtype: float64

In [None]:
bmi_data_test = filtered_test_X_meters
bmi_data_test["BMI"] = filtered_test_X_meters["Weight"] / (filtered_test_X_meters["Height"] ** 2)
bmi_data_test["Classificacao"] = bmi_data_test["BMI"].apply(classify_BMI)
bmi_data_test.head()

Unnamed: 0,RecordID,level_1,Time,ALP,ALT,AST,Age,Albumin,BUN,Bilirubin,...,SysABP,Temp,TroponinI,TroponinT,Urine,WBC,Weight,pH,BMI,Classificacao
336,132551,0,0.0,47.0,46.0,82.0,78.0,1.9,81.0,0.3,...,102.75,38.0,3.5,,,16.1,48.4,7.4,18.306456,Baixo peso
337,132551,1,1.0,,,,78.0,,,,...,114.5,,,,120.0,,48.4,,18.306456,Baixo peso
341,132551,5,5.0,,,,78.0,,,,...,104.0,,,,130.0,,48.4,7.29,18.306456,Baixo peso
342,132551,6,6.0,,,,78.0,,67.0,,...,141.0,35.6,3.1,,60.0,20.4,48.4,7.25,18.306456,Baixo peso
343,132551,7,7.0,,,,78.0,,,,...,132.0,,,,,,48.4,,18.306456,Baixo peso


In [None]:
classification_low_weight_test = bmi_data_test[bmi_data_test["Classificacao"] == 'Baixo peso'].groupby('RecordID').first().reset_index()
classification_l_w_t_test = classification_low_weight_test.count() 
classification_l_w_t_test

RecordID         43
level_1          43
Time             43
ALP              10
ALT               9
AST               9
Age              43
Albumin           9
BUN              24
Bilirubin        10
Cholesterol       2
Creatinine       24
DiasABP          22
FiO2             20
GCS              27
Gender           38
Glucose          20
HCO3             24
HCT              26
HR               30
Height           43
ICUType          38
K                20
Lactate          19
MAP              22
MechVent         19
Mg               22
NIDiasABP        22
NIMAP            22
NISysABP         22
Na               20
PaCO2            29
PaO2             29
Platelets        26
RespRate          4
SaO2             12
SysABP           22
Temp             29
TroponinI         2
TroponinT         6
Urine            27
WBC              24
Weight           43
pH               29
BMI              43
Classificacao    43
dtype: int64

In [None]:
classification_normal_weight_test = bmi_data_test[bmi_data_test["Classificacao"] == 'Peso normal'].groupby('RecordID').first().reset_index()
classification_n_w_t_test = classification_normal_weight_test.count() 
classification_n_w_t_test

RecordID         380
level_1          380
Time             380
ALP               78
ALT               79
AST               79
Age              380
Albumin           82
BUN              212
Bilirubin         78
Cholesterol       12
Creatinine       212
DiasABP          207
FiO2             175
GCS              272
Gender           362
Glucose          200
HCO3             206
HCT              217
HR               287
Height           380
ICUType          363
K                201
Lactate          122
MAP              207
MechVent         169
Mg               198
NIDiasABP        212
NIMAP            213
NISysABP         214
Na               203
PaCO2            223
PaO2             223
Platelets        218
RespRate          56
SaO2             103
SysABP           207
Temp             270
TroponinI         13
TroponinT         46
Urine            254
WBC              213
Weight           380
pH               229
BMI              380
Classificacao    380
dtype: int64

In [None]:
classification_overweight_test = bmi_data_test[bmi_data_test["Classificacao"] == 'Sobrepeso'].groupby('RecordID').first().reset_index()
classification_o_w_t_test = classification_overweight_test.count() 
classification_o_w_t_test

RecordID         529
level_1          529
Time             529
ALP              101
ALT              107
AST              108
Age              529
Albumin           82
BUN              303
Bilirubin        109
Cholesterol       23
Creatinine       304
DiasABP          343
FiO2             253
GCS              392
Gender           426
Glucose          283
HCO3             298
HCT              319
HR               424
Height           529
ICUType          426
K                290
Lactate          156
MAP              341
MechVent         261
Mg               291
NIDiasABP        293
NIMAP            292
NISysABP         293
Na               294
PaCO2            332
PaO2             328
Platelets        315
RespRate          67
SaO2             184
SysABP           343
Temp             403
TroponinI         12
TroponinT         51
Urine            380
WBC              307
Weight           529
pH               337
BMI              529
Classificacao    529
dtype: int64

In [None]:
classification_obesity_grade1_test = bmi_data_test[bmi_data_test["Classificacao"] == 'Obesidade grau 1'].groupby('RecordID').first().reset_index()
classification_ob1_t_test = classification_obesity_grade1_test.count() 
classification_ob1_t_test

RecordID         344
level_1          344
Time             344
ALP               64
ALT               64
AST               64
Age              344
Albumin           58
BUN              207
Bilirubin         63
Cholesterol        4
Creatinine       210
DiasABP          240
FiO2             178
GCS              262
Gender           221
Glucose          187
HCO3             202
HCT              213
HR               282
Height           344
ICUType          221
K                195
Lactate          104
MAP              240
MechVent         177
Mg               201
NIDiasABP        182
NIMAP            181
NISysABP         182
Na               201
PaCO2            232
PaO2             232
Platelets        207
RespRate          27
SaO2             142
SysABP           240
Temp             270
TroponinI         12
TroponinT         32
Urine            269
WBC              202
Weight           344
pH               243
BMI              344
Classificacao    344
dtype: int64

In [None]:
classification_obesity_grade2_test = bmi_data_test[bmi_data_test["Classificacao"] == 'Obesidade grau 2'].groupby('RecordID').first().reset_index()
classification_ob2_t_test = classification_obesity_grade2_test.count() 
classification_ob2_t_test

RecordID         143
level_1          143
Time             143
ALP               32
ALT               32
AST               32
Age              143
Albumin           24
BUN               90
Bilirubin         34
Cholesterol        6
Creatinine        90
DiasABP           97
FiO2              72
GCS              114
Gender            94
Glucose           81
HCO3              88
HCT              100
HR               118
Height           143
ICUType           94
K                 82
Lactate           47
MAP               97
MechVent          74
Mg                85
NIDiasABP         81
NIMAP             81
NISysABP          83
Na                87
PaCO2             94
PaO2              94
Platelets         97
RespRate          10
SaO2              60
SysABP            97
Temp             113
TroponinI          7
TroponinT         12
Urine            112
WBC               93
Weight           143
pH               100
BMI              143
Classificacao    143
dtype: int64

In [None]:
classification_obesity_grade3_test = bmi_data_test[bmi_data_test["Classificacao"] == 'Obesidade grau 3'].groupby('RecordID').first().reset_index()
classification_ob3_t_test = classification_obesity_grade3_test.count() 
classification_ob3_t_test 

RecordID         119
level_1          119
Time             119
ALP               28
ALT               30
AST               30
Age              119
Albumin           28
BUN               76
Bilirubin         28
Cholesterol        4
Creatinine        77
DiasABP           92
FiO2              72
GCS               99
Gender            81
Glucose           69
HCO3              76
HCT               84
HR               105
Height           119
ICUType           81
K                 70
Lactate           46
MAP               92
MechVent          74
Mg                73
NIDiasABP         60
NIMAP             60
NISysABP          61
Na                76
PaCO2             91
PaO2              91
Platelets         82
RespRate           8
SaO2              60
SysABP            92
Temp             101
TroponinI          1
TroponinT         12
Urine             98
WBC               81
Weight           119
pH                93
BMI              119
Classificacao    119
dtype: int64

In [None]:
df_columns = test_X.columns
df_columns

Index(['RecordID', 'level_1', 'Time', 'ALP', 'ALT', 'AST', 'Age', 'Albumin',
       'BUN', 'Bilirubin', 'Cholesterol', 'Creatinine', 'DiasABP', 'FiO2',
       'GCS', 'Gender', 'Glucose', 'HCO3', 'HCT', 'HR', 'Height', 'ICUType',
       'K', 'Lactate', 'MAP', 'MechVent', 'Mg', 'NIDiasABP', 'NIMAP',
       'NISysABP', 'Na', 'PaCO2', 'PaO2', 'Platelets', 'RespRate', 'SaO2',
       'SysABP', 'Temp', 'TroponinI', 'TroponinT', 'Urine', 'WBC', 'Weight',
       'pH'],
      dtype='object')

In [None]:
df_test = pd.DataFrame(columns=df_columns)
df_test_transpose = df_test.T
df_test_transpose["Female"] = female_gender_distribution_test
df_test_transpose["Male"] = male_gender_distribution_test
df_test_transpose["Undefined Gender"] = undefined_gender_distribution_test
df_test_transpose["ICUType 1"] = ICUType_1_test
df_test_transpose["ICUType 2"] = ICUType_2_test
df_test_transpose["ICUType 3"] = ICUType_3_test
df_test_transpose["ICUType 4"] = ICUType_4_test
df_test_transpose["Age 65+"] = more_than_or_equal_to_65_test
df_test_transpose["Age 65-"] = less_than_or_equal_to_65_test
df_test_transpose['Low Weight'] = classification_l_w_t_test 
df_test_transpose['Normal Weight'] = classification_n_w_t_test
df_test_transpose['Overweight'] = classification_o_w_t_test
df_test_transpose['Obesity Grade 1'] = classification_ob1_t_test 
df_test_transpose['Obesity Grade 2'] = classification_ob2_t_test
df_test_transpose['Obesity Grade 3'] = classification_ob3_t_test

#df_test_transpose.style.set_caption("Repeated Measurements per variable by demographics")

display(HTML("<h2 style='text-align: center; font-size: 24px; font-weight: bold;'>Repeated Measurements per Variable by Demographics - Test Set</h2>"))
df_test_transpose

Unnamed: 0,Female,Male,ICUType 1,ICUType 2,ICUType 3,ICUType 4,Age 65+,Age 65-,Low Weight,Normal Weight,Overweight,Obesity Grade 1,Obesity Grade 2,Obesity Grade 3
RecordID,1009,1385,347,515,872,665,1348,1046,43,380,529,344,143,119
level_1,1009,1385,347,515,872,665,1348,1046,43,380,529,344,143,119
Time,1009,1385,347,515,872,665,1348,1046,43,380,529,344,143,119
ALP,48,58,17,6,51,32,518,497,10,78,101,64,32,28
ALT,49,60,17,6,52,34,526,506,9,79,107,64,32,30
AST,49,61,18,6,52,34,527,505,9,79,108,64,32,30
Age,1009,1385,347,515,872,665,1348,1046,43,380,529,344,143,119
Albumin,40,54,12,4,49,29,513,450,9,82,82,58,24,28
BUN,140,153,46,26,108,114,1339,1025,24,212,303,207,90,76
Bilirubin,47,57,16,6,51,31,539,505,10,78,109,63,34,28
