In [79]:
import os
import sys
import pandas as pd
from IPython.display import display, HTML
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

<h2>Loading dataset</h2>

In [80]:
from pypots.benchpots.datasets import preprocess_physionet2012
physionet2012_dataset = preprocess_physionet2012(subset="all", rate=0.1)

2024-11-11 22:01:11 [INFO]: You're using dataset physionet_2012, please cite it properly in your work. You can find its reference information at the below link: 
https://github.com/WenjieDu/TSDB/tree/main/dataset_profiles/physionet_2012
2024-11-11 22:01:11 [INFO]: Dataset physionet_2012 has already been downloaded. Processing directly...
2024-11-11 22:01:11 [INFO]: Dataset physionet_2012 has already been cached. Loading from cache directly...
2024-11-11 22:01:11 [INFO]: Loaded successfully!


<h3>Training data</h3>

<h4>Loading training dataset</h4>

In [81]:
train_X = physionet2012_dataset['train_X']

In [82]:
training_female_gender = train_X[train_X["Gender"] == 0.0]
training_female_gender_ids = training_female_gender["RecordID"]
female_gender_measurements_training = train_X[train_X["RecordID"].isin(training_female_gender_ids)]
female_gender_measurements_training = female_gender_measurements_training.count()
female_gender_measurements_training

RecordID       164832
level_1        164832
Time           164832
ALP              2780
ALT              2851
AST              2846
Age            155270
Albumin          2161
BUN             11981
Bilirubin        2883
Cholesterol       258
Creatinine      12032
DiasABP         86185
FiO2            25713
GCS             53132
Gender           3434
Glucose         11412
HCO3            11773
HCT             15154
HR             149229
Height         155270
ICUType          3434
K               12624
Lactate          6641
MAP             85693
MechVent        24781
Mg              11717
NIDiasABP       72068
NIMAP           70993
NISysABP        72129
Na              11876
PaCO2           17964
PaO2            17931
Platelets       11790
RespRate        44225
SaO2             6238
SysABP          86191
Temp            59057
TroponinI         365
TroponinT        1774
Urine          114821
WBC             10989
Weight          88871
pH              18598
dtype: int64

In [83]:
training_male_gender = train_X[train_X['Gender'] == 1.0]
training_male_gender_ids = training_male_gender["RecordID"]
male_gender_measurements_training = train_X[train_X["RecordID"].isin(training_male_gender_ids)]
male_gender_measurements_training  = male_gender_measurements_training.count()
male_gender_measurements_training 

RecordID       202944
level_1        202944
Time           202944
ALP              3360
ALT              3461
AST              3460
Age            191024
Albumin          2538
BUN             14780
Bilirubin        3466
Cholesterol       374
Creatinine      14848
DiasABP        113600
FiO2            31886
GCS             64798
Gender           4228
Glucose         13781
HCO3            14391
HCT             19662
HR             182580
Height         191024
ICUType          4228
K               15363
Lactate          8528
MAP            113049
MechVent        30325
Mg              14383
NIDiasABP       82110
NIMAP           81056
NISysABP        82179
Na              14374
PaCO2           24573
PaO2            24527
Platelets       15287
RespRate        45926
SaO2             8735
SysABP         113607
Temp            78242
TroponinI         403
TroponinT        2173
Urine          139938
WBC             13797
Weight         105287
pH              25881
dtype: int64

In [84]:
training_undefined_gender = train_X[train_X['Gender'] == -1.0]
undefined_gender_training_ids = training_undefined_gender['RecordID']
undefined_gender_measurements_training = train_X[train_X["RecordID"].isin(undefined_gender_training_ids)]
undefined_gender_measurements_training = undefined_gender_measurements_training.count()
undefined_gender_measurements_training

RecordID       432
level_1        432
Time           432
ALP              3
ALT              4
AST              4
Age            292
Albumin          4
BUN             29
Bilirubin        3
Cholesterol      0
Creatinine      29
DiasABP        205
FiO2            36
GCS             69
Gender           9
Glucose         29
HCO3            29
HCT             27
HR             278
Height         292
ICUType          9
K               30
Lactate         28
MAP            200
MechVent        25
Mg              27
NIDiasABP      102
NIMAP          102
NISysABP       102
Na              28
PaCO2           48
PaO2            48
Platelets       27
RespRate        93
SaO2             3
SysABP         205
Temp           120
TroponinI        1
TroponinT        9
Urine          186
WBC             25
Weight         209
pH              52
dtype: int64

In [85]:
training_ICUType_1 = train_X[train_X['ICUType'] == 1.0]
training_ICUType_1 = training_ICUType_1[training_ICUType_1["Time"] == 0.0]
training_ICUType_1_ids = training_ICUType_1["RecordID"]
ICUType_1_measurements_training = train_X[train_X["RecordID"].isin(training_ICUType_1_ids)]
ICUType_1_measurements_training = ICUType_1_measurements_training.count() 
ICUType_1_measurements_training

RecordID       54000
level_1        54000
Time           54000
ALP              832
ALT              873
AST              871
Age            49592
Albumin          626
BUN             3837
Bilirubin        857
Cholesterol      333
Creatinine      3892
DiasABP        22513
FiO2            5947
GCS            13885
Gender          1125
Glucose         3672
HCO3            3719
HCT             4722
HR             47211
Height         49592
ICUType         1125
K               4445
Lactate         1265
MAP            22470
MechVent        5509
Mg              3799
NIDiasABP      25890
NIMAP          25750
NISysABP       25912
Na              3696
PaCO2           4529
PaO2            4531
Platelets       3859
RespRate       19368
SaO2            2740
SysABP         22514
Temp           16569
TroponinI        160
TroponinT       1155
Urine          30883
WBC             3479
Weight         24609
pH              4640
dtype: int64

In [86]:
training_ICUType_2 = train_X[train_X['ICUType'] == 2.0]
training_ICUType_2 = training_ICUType_2[training_ICUType_2["Time"] == 0.0]
training_ICUType2_ids = training_ICUType_2["RecordID"]
ICUType_2_measurements_training = train_X[train_X["RecordID"].isin(training_ICUType2_ids)]
ICUType_2_measurements_training = ICUType_2_measurements_training.count() 
ICUType_2_measurements_training

RecordID       78000
level_1        78000
Time           78000
ALP              548
ALT              564
AST              563
Age            75301
Albumin          374
BUN             5095
Bilirubin        554
Cholesterol       25
Creatinine      5114
DiasABP        61215
FiO2           13181
GCS            20581
Gender          1625
Glucose         3694
HCO3            4707
HCT             8430
HR             71576
Height         75301
ICUType         1625
K               4188
Lactate         2748
MAP            61327
MechVent       12939
Mg              4918
NIDiasABP      16871
NIMAP          16746
NISysABP       16902
Na              3960
PaCO2          15787
PaO2           15739
Platelets       6286
RespRate        3148
SaO2            8539
SysABP         61218
Temp           45560
TroponinI        113
TroponinT        172
Urine          65633
WBC             5226
Weight         40156
pH             17290
dtype: int64

In [87]:
training_ICUType_3 = train_X[train_X['ICUType'] == 3.0]
training_ICUType_3 = training_ICUType_3[training_ICUType_3["Time"] == 0.0]
training_ICUType_3_ids = training_ICUType_3["RecordID"]
ICUType_3_measurements_training = train_X[train_X["RecordID"].isin(training_ICUType_3_ids)]
ICUType_3_measurements_training = ICUType_3_measurements_training.count() 
ICUType_3_measurements_training

RecordID       131760
level_1        131760
Time           131760
ALP              2888
ALT              2976
AST              2976
Age            122035
Albumin          2222
BUN              9973
Bilirubin        3070
Cholesterol       132
Creatinine      10008
DiasABP         46493
FiO2            19501
GCS             35014
Gender           2745
Glucose          9940
HCO3             9990
HCT             11621
HR             117422
Height         122035
ICUType          2745
K               10791
Lactate          5066
MAP             45913
MechVent        17813
Mg               9387
NIDiasABP       74633
NIMAP           73244
NISysABP        74672
Na              10218
PaCO2           10084
PaO2            10084
Platelets        9010
RespRate        41416
SaO2             1601
SysABP          46498
Temp            37735
TroponinI         330
TroponinT        1816
Urine           79577
WBC              8654
Weight          85423
pH              10238
dtype: int64

In [88]:
training_ICUType_4 = train_X[train_X['ICUType'] == 4.0]
training_ICUType_4 = training_ICUType_4[training_ICUType_4["Time"] == 0.0]
training_ICUType_4_ids = training_ICUType_4["RecordID"]
ICUType_4_measurements_training = train_X[train_X["RecordID"].isin(training_ICUType_4_ids)]
ICUType_4_measurements_training = ICUType_4_measurements_training.count() 
ICUType_4_measurements_training 

RecordID       104448
level_1        104448
Time           104448
ALP              1875
ALT              1903
AST              1900
Age             99658
Albumin          1481
BUN              7885
Bilirubin        1871
Cholesterol       142
Creatinine       7895
DiasABP         69769
FiO2            19006
GCS             48519
Gender           2176
Glucose          7916
HCO3             7777
HCT             10070
HR              95878
Height          99658
ICUType          2176
K                8593
Lactate          6118
MAP             69232
MechVent        18870
Mg               8023
NIDiasABP       36886
NIMAP           36411
NISysABP        36924
Na               8404
PaCO2           12185
PaO2            12152
Platelets        7949
RespRate        26312
SaO2             2096
SysABP          69773
Temp            37555
TroponinI         166
TroponinT         813
Urine           78852
WBC              7452
Weight          44179
pH              12363
dtype: int64

In [89]:
age_65_and_above_training = train_X[train_X["Age"] >= 65]
age_65_and_above_training = age_65_and_above_training[age_65_and_above_training ["Time"] == 0.0]
age_65_and_above_training_ids = age_65_and_above_training ["RecordID"]
age_65_and_above_measurements_training = train_X[train_X["RecordID"].isin(age_65_and_above_training_ids)]
age_65_and_above_measurements_training = age_65_and_above_measurements_training.count()
age_65_and_above_measurements_training

RecordID       201264
level_1        201264
Time           201264
ALP              2705
ALT              2766
AST              2768
Age            190014
Albumin          2277
BUN             14155
Bilirubin        2811
Cholesterol       371
Creatinine      14225
DiasABP        110217
FiO2            31292
GCS             62648
Gender           4193
Glucose         13143
HCO3            13812
HCT             18843
HR             182503
Height         190014
ICUType          4193
K               14743
Lactate          7915
MAP            109733
MechVent        29477
Mg              13894
NIDiasABP       84605
NIMAP           83664
NISysABP        84670
Na              13677
PaCO2           23453
PaO2            23416
Platelets       14400
RespRate        50573
SaO2             9244
SysABP         110223
Temp            77908
TroponinI         560
TroponinT        2760
Urine          142763
WBC             13157
Weight         107595
pH              24570
dtype: int64

In [90]:
# Motivação para o teste: A quantidade de valores no grupo de idade que estava na tabela, era muito maior do que a quantidade de 
# valores para o grupo de gênero, o que não fazia sentido, pois, já que não temos dados faltantes dessas variáveis em ambos os grupos
# se gênero tem n ocorrências, idade deve ter n ocorrências também, e vice-versa. 

# Descoberta: Através do teste, vimos que tinha casos em que o valor do campo idade, se repete durante a série temporal do paciente, e visto isso, nós ajustamos o código para pegar apenas a primeira ocorrência de valor para cada recordID.
# Também descobrimos que, como estamos filtrando apenas os gêneros female e male e há um campo para gênero indefinido, o grupo de Age estava pegando as idades desse campo de gênero indefinido também, o que estava fazendo com que 
# a soma total de ocorrências para o grupo de Age estivesse maior do que para o grupo de Gender.

# teste2 = train_X[train_X['Age'] >= 65]
# teste2["RecordID"].value_counts()   

In [91]:
age_under_65_training  = train_X[train_X["Age"] < 65]
age_under_65_training  = age_under_65_training[age_under_65_training["Time"] == 0.0]
age_under_65_training_ids = age_under_65_training["RecordID"]
age_under_65_measurements_training = train_X[train_X["RecordID"].isin(age_under_65_training_ids)]
age_under_65_measurements_training  = age_under_65_measurements_training .count()
age_under_65_measurements_training 

RecordID       166944
level_1        166944
Time           166944
ALP              3438
ALT              3550
AST              3542
Age            156572
Albumin          2426
BUN             12635
Bilirubin        3541
Cholesterol       261
Creatinine      12684
DiasABP         89773
FiO2            26343
GCS             55351
Gender           3478
Glucose         12079
HCO3            12381
HCT             16000
HR             149584
Height         156572
ICUType          3478
K               13274
Lactate          7282
MAP             89209
MechVent        25654
Mg              12233
NIDiasABP       69675
NIMAP           68487
NISysABP        69740
Na              12601
PaCO2           19132
PaO2            19090
Platelets       12704
RespRate        39671
SaO2             5732
SysABP          89780
Temp            59511
TroponinI         209
TroponinT        1196
Urine          112182
WBC             11654
Weight          86772
pH              19961
dtype: int64

In [92]:
filtered_train_X = train_X[(train_X['Height'] != -1) & (train_X['Weight'] != -1) & (train_X['Height'].notna()) & (train_X['Weight'].notna())] 

In [93]:
def classify_BMI(BMI):
    if BMI <= 18.5:
        return "Baixo peso"
    elif BMI >= 18.6 and BMI <= 24.9:
        return "Peso normal"
    elif BMI >= 25 and BMI <= 29.9:
        return "Sobrepeso"
    elif BMI >= 30 and BMI <= 34.9:
        return "Obesidade grau 1"
    elif BMI >= 35 and BMI <= 39.9:
        return "Obesidade grau 2"
    elif BMI >= 40:
        return "Obesidade grau 3"

In [94]:
filtered_train_X_meters = filtered_train_X.copy()
filtered_train_X_meters['Height'] = filtered_train_X['Height'] / 100 # Converting Height from cm to meters
filtered_train_X_meters['Height']

240       1.803
336       1.626
337       1.626
341       1.626
342       1.626
          ...  
575321    1.727
575322    1.727
575323    1.727
575325    1.727
575327    1.727
Name: Height, Length: 101886, dtype: float64

In [95]:
bmi_data_train = filtered_train_X_meters
bmi_data_train["BMI"] = round(filtered_train_X_meters["Weight"] / (filtered_train_X_meters["Height"] ** 2), 1)
bmi_data_train["Classificacao"] = bmi_data_train["BMI"].apply(classify_BMI)
bmi_data_train.head()

Unnamed: 0,RecordID,level_1,Time,ALP,ALT,AST,Age,Albumin,BUN,Bilirubin,...,SysABP,Temp,TroponinI,TroponinT,Urine,WBC,Weight,pH,BMI,Classificacao
240,132547,0,0.0,,,,64.0,,,,...,,,,,,,114.0,,35.1,Obesidade grau 2
336,132551,0,0.0,47.0,46.0,82.0,78.0,1.9,81.0,0.3,...,102.75,38.0,3.5,,,16.1,48.4,7.4,18.3,Baixo peso
337,132551,1,1.0,,,,78.0,,,,...,114.5,,,,120.0,,48.4,,18.3,Baixo peso
341,132551,5,5.0,,,,78.0,,,,...,104.0,,,,130.0,,48.4,7.29,18.3,Baixo peso
342,132551,6,6.0,,,,78.0,,67.0,,...,141.0,35.6,3.1,,60.0,20.4,48.4,7.25,18.3,Baixo peso


In [96]:
bmi_data_train = bmi_data_train.groupby("RecordID").first().reset_index()
bmi_data_train

Unnamed: 0,RecordID,level_1,Time,ALP,ALT,AST,Age,Albumin,BUN,Bilirubin,...,SysABP,Temp,TroponinI,TroponinT,Urine,WBC,Weight,pH,BMI,Classificacao
0,132547,0,0.0,,,,64.0,,,,...,,,,,,,114.0,,35.1,Obesidade grau 2
1,132551,0,0.0,47.0,46.0,82.0,78.0,1.9,81.0,0.3,...,102.75,38.0,3.5,,120.0,16.1,48.4,7.40,18.3,Baixo peso
2,132567,0,0.0,,,,71.0,,9.0,,...,111.50,35.6,,,15.0,9.0,56.0,7.44,22.6,Peso normal
3,132570,0,0.0,19.0,15.0,20.0,84.0,,83.0,0.1,...,,36.6,,,600.0,8.8,102.6,,35.4,Obesidade grau 2
4,132575,0,0.0,,,,78.0,,18.0,,...,122.00,37.4,,,38.0,12.5,63.0,7.34,22.4,Peso normal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4041,163008,0,0.0,,,,59.0,,24.0,,...,97.00,37.6,,,45.0,6.9,98.5,7.38,34.0,Obesidade grau 1
4042,163013,0,0.0,82.0,11.0,30.0,74.0,2.5,30.0,1.2,...,118.00,36.5,,0.03,40.0,9.6,68.6,7.35,29.5,Sobrepeso
4043,163016,0,0.0,,27.0,120.0,65.0,,29.0,0.4,...,101.00,38.1,,,75.0,8.0,63.6,7.37,24.8,Peso normal
4044,163021,0,0.0,,,,72.0,,9.0,,...,,,,,,8.6,62.0,,20.8,Peso normal


In [97]:
bmi_data_train["Classificacao"].value_counts()

Classificacao
Sobrepeso           1426
Peso normal         1145
Obesidade grau 1     749
Obesidade grau 2     313
Obesidade grau 3     293
Baixo peso           120
Name: count, dtype: int64

In [98]:
classification_undefined_training_ids = bmi_data_train["RecordID"]
classification_undefined_measurements_training = train_X[~train_X["RecordID"].isin(classification_undefined_training_ids)]
classification_undefined_measurements_training = classification_undefined_measurements_training.count()
classification_undefined_measurements_training

RecordID       174000
level_1        174000
Time           174000
ALP              2909
ALT              3006
AST              3007
Age            160441
Albumin          2355
BUN             12276
Bilirubin        3028
Cholesterol       282
Creatinine      12334
DiasABP         68873
FiO2            23751
GCS             57664
Gender           3625
Glucose         12234
HCO3            12182
HCT             14810
HR             154082
Height         160441
ICUType          3625
K               13453
Lactate          5917
MAP             68225
MechVent        21909
Mg              11888
NIDiasABP       92757
NIMAP           91244
NISysABP        92823
Na              12760
PaCO2           12274
PaO2            12247
Platelets       11341
RespRate        60654
SaO2             1914
SysABP          68878
Temp            46707
TroponinI         296
TroponinT        2104
Urine          110137
WBC             10870
Weight          92481
pH              12500
dtype: int64

In [99]:
classification_low_weight_training = bmi_data_train[bmi_data_train["Classificacao"] == 'Baixo peso']
classification_low_weight_training_ids = classification_low_weight_training["RecordID"]
classification_measurements_l_w_t = train_X[train_X["RecordID"].isin(classification_low_weight_training_ids)]
classification_measurements_l_w_t = classification_measurements_l_w_t.count() 
classification_measurements_l_w_t 

RecordID       5760
level_1        5760
Time           5760
ALP              87
ALT              90
AST              90
Age            5534
Albumin          75
BUN             446
Bilirubin        93
Cholesterol      10
Creatinine      447
DiasABP        3651
FiO2            963
GCS            1839
Gender          120
Glucose         432
HCO3            446
HCT             580
HR             5331
Height         5534
ICUType         120
K               472
Lactate         286
MAP            3693
MechVent        959
Mg              443
NIDiasABP      2049
NIMAP          2017
NISysABP       2050
Na              440
PaCO2           784
PaO2            790
Platelets       446
RespRate       1002
SaO2            288
SysABP         3652
Temp           2392
TroponinI        25
TroponinT        59
Urine          4124
WBC             402
Weight         3047
pH              817
dtype: int64

In [100]:
classification_normal_weight_training = bmi_data_train[bmi_data_train["Classificacao"] == 'Peso normal']
classification_normal_weight_training_ids = classification_normal_weight_training["RecordID"]
classification_measurements_n_w_t = train_X[train_X["RecordID"].isin(classification_normal_weight_training_ids)]
classification_measurements_n_w_t = classification_measurements_n_w_t.count() 
classification_measurements_n_w_t 

RecordID       54960
level_1        54960
Time           54960
ALP              868
ALT              894
AST              892
Age            52743
Albumin          660
BUN             4049
Bilirubin        890
Cholesterol       99
Creatinine      4063
DiasABP        36479
FiO2            9247
GCS            17953
Gender          1145
Glucose         3672
HCO3            3907
HCT             5550
HR             50540
Height         52743
ICUType         1145
K               4132
Lactate         2554
MAP            36326
MechVent        9271
Mg              3994
NIDiasABP      18256
NIMAP          18091
NISysABP       18273
Na              3847
PaCO2           8185
PaO2            8162
Platelets       4378
RespRate        8900
SaO2            3331
SysABP         36480
Temp           24912
TroponinI        150
TroponinT        466
Urine          40735
WBC             3879
Weight         28423
pH              8681
dtype: int64

In [101]:
classification_overweight_training = bmi_data_train[bmi_data_train["Classificacao"] == 'Sobrepeso']
classification_overweight_training_ids = classification_overweight_training['RecordID']
classification_measurements_o_w_t = train_X[train_X["RecordID"].isin(classification_overweight_training_ids)]
classification_measurements_o_w_t = classification_measurements_o_w_t.count() 
classification_measurements_o_w_t

RecordID       68448
level_1        68448
Time           68448
ALP             1158
ALT             1181
AST             1180
Age            65430
Albumin          828
BUN             5102
Bilirubin       1192
Cholesterol      136
Creatinine      5128
DiasABP        46518
FiO2           11527
GCS            21036
Gender          1426
Glucose         4506
HCO3            4927
HCT             7203
HR             62388
Height         65430
ICUType         1426
K               5046
Lactate         3033
MAP            46438
MechVent       11325
Mg              5019
NIDiasABP      21360
NIMAP          21180
NISysABP       21387
Na              4678
PaCO2          10574
PaO2           10549
Platelets       5676
RespRate       10473
SaO2            4810
SysABP         46522
Temp           32342
TroponinI        161
TroponinT        645
Urine          50692
WBC             4984
Weight         35280
pH             11229
dtype: int64

In [102]:
classification_obesity_grade1_training = bmi_data_train[bmi_data_train["Classificacao"] == 'Obesidade grau 1']
classification_obesity_grade1_training_ids = classification_obesity_grade1_training["RecordID"]
classification_measurements_ob1_t = train_X[train_X["RecordID"].isin(classification_obesity_grade1_training_ids)]
classification_measurements_ob1_t  = classification_measurements_ob1_t .count() 
classification_measurements_ob1_t 

RecordID       35952
level_1        35952
Time           35952
ALP              603
ALT              619
AST              617
Age            34562
Albumin          433
BUN             2669
Bilirubin        615
Cholesterol       54
Creatinine      2678
DiasABP        25011
FiO2            6486
GCS            10907
Gender           749
Glucose         2385
HCO3            2569
HCT             3690
HR             33055
Height         34562
ICUType          749
K               2687
Lactate         1760
MAP            24849
MechVent        6281
Mg              2604
NIDiasABP      10946
NIMAP          10846
NISysABP       10958
Na              2475
PaCO2           5841
PaO2            5837
Platelets       2904
RespRate        4692
SaO2            2504
SysABP         25011
Temp           17426
TroponinI         96
TroponinT        368
Urine          27471
WBC             2554
Weight         18668
pH              6179
dtype: int64

In [103]:
classification_obesity_grade2_training = bmi_data_train[bmi_data_train["Classificacao"] == 'Obesidade grau 2']
classification_obesity_grade2_training_ids = classification_obesity_grade2_training["RecordID"]
classification_measurements_ob2_t = train_X[train_X["RecordID"].isin(classification_obesity_grade2_training_ids)]
classification_measurements_ob2_t = classification_measurements_ob2_t.count() 
classification_measurements_ob2_t 

RecordID       15024
level_1        15024
Time           15024
ALP              264
ALT              266
AST              265
Age            14337
Albumin          186
BUN             1166
Bilirubin        272
Cholesterol       29
Creatinine      1177
DiasABP        10155
FiO2            2749
GCS             4533
Gender           313
Glucose         1030
HCO3            1123
HCT             1622
HR             13727
Height         14337
ICUType          313
K               1144
Lactate          805
MAP            10153
MechVent        2583
Mg              1114
NIDiasABP       4632
NIMAP           4545
NISysABP        4635
Na              1085
PaCO2           2504
PaO2            2499
Platelets       1285
RespRate        2629
SaO2            1137
SysABP         10157
Temp            7501
TroponinI         20
TroponinT        163
Urine          11161
WBC             1129
Weight          8454
pH              2618
dtype: int64

In [104]:
classification_obesity_grade3_training = bmi_data_train[bmi_data_train["Classificacao"] == 'Obesidade grau 3']
classification_obesity_grade3_training_ids = classification_obesity_grade3_training["RecordID"]
classification_measurements_ob3_t = train_X[train_X["RecordID"].isin(classification_obesity_grade3_training_ids)]
classification_measurements_ob3_t = classification_measurements_ob3_t.count() 
classification_measurements_ob3_t 

RecordID       14064
level_1        14064
Time           14064
ALP              254
ALT              260
AST              259
Age            13539
Albumin          166
BUN             1082
Bilirubin        262
Cholesterol       22
Creatinine      1082
DiasABP         9303
FiO2            2912
GCS             4067
Gender           293
Glucose          963
HCO3            1039
HCT             1388
HR             12964
Height         13539
ICUType          293
K               1083
Lactate          842
MAP             9258
MechVent        2803
Mg              1065
NIDiasABP       4280
NIMAP           4228
NISysABP        4284
Na               993
PaCO2           2423
PaO2            2422
Platelets       1074
RespRate        1894
SaO2             992
SysABP          9303
Temp            6139
TroponinI         21
TroponinT        151
Urine          10625
WBC              993
Weight          8014
pH              2507
dtype: int64

In [105]:
df_columns = train_X.columns
df_columns

Index(['RecordID', 'level_1', 'Time', 'ALP', 'ALT', 'AST', 'Age', 'Albumin',
       'BUN', 'Bilirubin', 'Cholesterol', 'Creatinine', 'DiasABP', 'FiO2',
       'GCS', 'Gender', 'Glucose', 'HCO3', 'HCT', 'HR', 'Height', 'ICUType',
       'K', 'Lactate', 'MAP', 'MechVent', 'Mg', 'NIDiasABP', 'NIMAP',
       'NISysABP', 'Na', 'PaCO2', 'PaO2', 'Platelets', 'RespRate', 'SaO2',
       'SysABP', 'Temp', 'TroponinI', 'TroponinT', 'Urine', 'WBC', 'Weight',
       'pH'],
      dtype='object')

In [106]:
df_train = pd.DataFrame(columns=df_columns)
df_train_transpose = df_train.T
df_train_transpose["Female"] = female_gender_measurements_training
df_train_transpose["Male"] = male_gender_measurements_training
df_train_transpose["Undefined Gender"] = undefined_gender_measurements_training
df_train_transpose["ICUType 1"] = ICUType_1_measurements_training 
df_train_transpose["ICUType 2"] = ICUType_2_measurements_training 
df_train_transpose["ICUType 3"] = ICUType_3_measurements_training 
df_train_transpose["ICUType 4"] = ICUType_4_measurements_training 
df_train_transpose["Age 65+"] = age_65_and_above_measurements_training
df_train_transpose["Age 65-"] = age_under_65_measurements_training 
df_train_transpose['Undefined Classification'] = classification_undefined_measurements_training
df_train_transpose['Low Weight'] = classification_measurements_l_w_t 
df_train_transpose['Normal Weight'] = classification_measurements_n_w_t 
df_train_transpose['Overweight'] = classification_measurements_o_w_t 
df_train_transpose['Obesity Grade 1'] = classification_measurements_ob1_t 
df_train_transpose['Obesity Grade 2'] = classification_measurements_ob2_t 
df_train_transpose['Obesity Grade 3'] = classification_measurements_ob3_t 
df_train_transpose = df_train_transpose.drop('RecordID',axis=0)
df_train_transpose = df_train_transpose.drop('level_1',axis=0)
df_train_transpose = df_train_transpose.drop('Time',axis=0)
df_train_transpose = df_train_transpose.drop('Age',axis=0)
df_train_transpose = df_train_transpose.drop('Gender',axis=0)
df_train_transpose = df_train_transpose.drop('Height',axis=0)
df_train_transpose = df_train_transpose.drop('ICUType',axis=0)

#df_test_transpose.style.set_caption("Repeated Measurements per variable by demographics")

display(HTML("<h2 style='text-align: center; font-size: 24px; font-weight: bold;'>Repeated Measurements per Variable by Demographics - Train Set</h2>"))
df_train_transpose

Unnamed: 0,Female,Male,Undefined Gender,ICUType 1,ICUType 2,ICUType 3,ICUType 4,Age 65+,Age 65-,Undefined Classification,Low Weight,Normal Weight,Overweight,Obesity Grade 1,Obesity Grade 2,Obesity Grade 3
ALP,2780,3360,3,832,548,2888,1875,2705,3438,2909,87,868,1158,603,264,254
ALT,2851,3461,4,873,564,2976,1903,2766,3550,3006,90,894,1181,619,266,260
AST,2846,3460,4,871,563,2976,1900,2768,3542,3007,90,892,1180,617,265,259
Albumin,2161,2538,4,626,374,2222,1481,2277,2426,2355,75,660,828,433,186,166
BUN,11981,14780,29,3837,5095,9973,7885,14155,12635,12276,446,4049,5102,2669,1166,1082
Bilirubin,2883,3466,3,857,554,3070,1871,2811,3541,3028,93,890,1192,615,272,262
Cholesterol,258,374,0,333,25,132,142,371,261,282,10,99,136,54,29,22
Creatinine,12032,14848,29,3892,5114,10008,7895,14225,12684,12334,447,4063,5128,2678,1177,1082
DiasABP,86185,113600,205,22513,61215,46493,69769,110217,89773,68873,3651,36479,46518,25011,10155,9303
FiO2,25713,31886,36,5947,13181,19501,19006,31292,26343,23751,963,9247,11527,6486,2749,2912


<h3>Validation data</h3>

<h4>Loading validation dataset</h4>

In [107]:
validation_X = physionet2012_dataset['val_X']

In [108]:
validation_female_gender = validation_X[validation_X['Gender'] == 0.0]
validation_female_gender_ids = validation_female_gender["RecordID"]
female_gender_measurements_validation = validation_X[validation_X["RecordID"].isin(validation_female_gender_ids)]
female_gender_measurements_validation = female_gender_measurements_validation.count()
female_gender_measurements_validation

RecordID       37344
level_1        37344
Time           37344
ALP              582
ALT              610
AST              611
Age            35296
Albumin          459
BUN             2666
Bilirubin        610
Cholesterol       65
Creatinine      2679
DiasABP        19082
FiO2            5816
GCS            12115
Gender           778
Glucose         2571
HCO3            2642
HCT             3553
HR             33911
Height         35296
ICUType          778
K               2841
Lactate         1522
MAP            18985
MechVent        5870
Mg              2622
NIDiasABP      16711
NIMAP          16481
NISysABP       16727
Na              2665
PaCO2           3949
PaO2            3936
Platelets       2706
RespRate        9613
SaO2            1387
SysABP         19083
Temp           13574
TroponinI         64
TroponinT        437
Urine          26532
WBC             2492
Weight         21177
pH              4106
dtype: int64

In [109]:
validation_male_gender = validation_X[validation_X['Gender'] == 1.0]
validation_male_gender_ids = validation_male_gender["RecordID"]
male_gender_measurements_validation = validation_X[validation_X["RecordID"].isin(validation_male_gender_ids)]
male_gender_measurements_validation = male_gender_measurements_validation.count()
male_gender_measurements_validation

RecordID       54576
level_1        54576
Time           54576
ALP              918
ALT              943
AST              946
Age            51371
Albumin          695
BUN             4021
Bilirubin        967
Cholesterol      111
Creatinine      4047
DiasABP        30861
FiO2            8781
GCS            17465
Gender          1137
Glucose         3771
HCO3            3953
HCT             5445
HR             49130
Height         51371
ICUType         1137
K               4205
Lactate         2436
MAP            30683
MechVent        8369
Mg              3925
NIDiasABP      22355
NIMAP          22141
NISysABP       22381
Na              3905
PaCO2           6714
PaO2            6705
Platelets       4197
RespRate       11722
SaO2            2222
SysABP         30867
Temp           20689
TroponinI        110
TroponinT        607
Urine          37120
WBC             3772
Weight         28895
pH              7023
dtype: int64

In [110]:
validation_undefined_gender = validation_X[validation_X['Gender'] == -1.0]
validation_undefined_gender_ids = validation_undefined_gender["RecordID"]
undefined_gender_measurements_validation = validation_X[validation_X["RecordID"].isin(validation_undefined_gender_ids)]
undefined_gender_measurements_validation = undefined_gender_measurements_validation.count()
undefined_gender_measurements_validation

RecordID       144
level_1        144
Time           144
ALP              2
ALT              2
AST              2
Age            138
Albumin          1
BUN              8
Bilirubin        2
Cholesterol      0
Creatinine       8
DiasABP         85
FiO2            16
GCS             61
Gender           3
Glucose          8
HCO3             8
HCT              7
HR             135
Height         138
ICUType          3
K                8
Lactate          1
MAP             84
MechVent        26
Mg               7
NIDiasABP       75
NIMAP           75
NISysABP        75
Na               8
PaCO2            8
PaO2             8
Platelets        7
RespRate        42
SaO2             0
SysABP          85
Temp            35
TroponinI        0
TroponinT        0
Urine           97
WBC              8
Weight          91
pH               9
dtype: int64

In [111]:
validation_ICUType_1 = validation_X[validation_X['ICUType'] == 1.0]
validation_ICUType_1 = validation_ICUType_1[validation_ICUType_1['Time'] == 0.0]
validation_ICUType_1_ids = validation_ICUType_1['RecordID']
ICUType_1_measurements_validation = validation_X[validation_X['RecordID'].isin(validation_ICUType_1_ids)]
ICUType_1_measurements_validation = ICUType_1_measurements_validation.count()
ICUType_1_measurements_validation

RecordID       14064
level_1        14064
Time           14064
ALP              216
ALT              233
AST              233
Age            12853
Albumin          163
BUN             1014
Bilirubin        224
Cholesterol      104
Creatinine      1038
DiasABP         6112
FiO2            1464
GCS             3632
Gender           293
Glucose          986
HCO3             989
HCT             1268
HR             12229
Height         12853
ICUType          293
K               1187
Lactate          321
MAP             6081
MechVent        1418
Mg               991
NIDiasABP       6557
NIMAP           6511
NISysABP        6559
Na               982
PaCO2           1135
PaO2            1137
Platelets       1033
RespRate        4882
SaO2             648
SysABP          6112
Temp            4109
TroponinI         46
TroponinT        313
Urine           7965
WBC              919
Weight          6185
pH              1159
dtype: int64

In [112]:
validation_ICUType_2 = validation_X[validation_X['ICUType'] == 2.0]
validation_ICUType_2 = validation_ICUType_2[validation_ICUType_2['Time'] == 0.0]
validation_ICUType_2_ids = validation_ICUType_2['RecordID']
ICUType_2_measurements_validation = validation_X[validation_X['RecordID'].isin(validation_ICUType_2_ids)]
ICUType_2_measurements_validation = ICUType_2_measurements_validation.count()
ICUType_2_measurements_validation

RecordID       19008
level_1        19008
Time           19008
ALP              153
ALT              158
AST              158
Age            18370
Albumin           92
BUN             1238
Bilirubin        157
Cholesterol        3
Creatinine      1237
DiasABP        14747
FiO2            3154
GCS             5075
Gender           396
Glucose          887
HCO3            1163
HCT             2133
HR             17375
Height         18370
ICUType          396
K               1002
Lactate          672
MAP            14803
MechVent        3201
Mg              1192
NIDiasABP       4154
NIMAP           4099
NISysABP        4166
Na               977
PaCO2           3872
PaO2            3859
Platelets       1591
RespRate         607
SaO2            1951
SysABP         14747
Temp           11005
TroponinI         19
TroponinT         52
Urine          15878
WBC             1335
Weight         10160
pH              4219
dtype: int64

In [113]:
validation_ICUType_3 = validation_X[validation_X['ICUType'] == 3.0]
validation_ICUType_3 = validation_ICUType_3[validation_ICUType_3['Time'] == 0.0]
validation_ICUType_3_ids = validation_ICUType_3['RecordID']
ICUType_3_measurements_validation = validation_X[validation_X['RecordID'].isin(validation_ICUType_3_ids)]
ICUType_3_measurements_validation = ICUType_3_measurements_validation.count()
ICUType_3_measurements_validation

RecordID       32928
level_1        32928
Time           32928
ALP              692
ALT              714
AST              715
Age            30716
Albumin          553
BUN             2510
Bilirubin        757
Cholesterol       30
Creatinine      2523
DiasABP        11863
FiO2            4878
GCS             8972
Gender           686
Glucose         2523
HCO3            2528
HCT             2997
HR             29625
Height         30716
ICUType          686
K               2732
Lactate         1337
MAP            11656
MechVent        4649
Mg              2376
NIDiasABP      19019
NIMAP          18757
NISysABP       19031
Na              2575
PaCO2           2493
PaO2            2495
Platelets       2287
RespRate        9920
SaO2             401
SysABP         11867
Temp            9615
TroponinI         66
TroponinT        473
Urine          20181
WBC             2172
Weight         22253
pH              2537
dtype: int64

In [114]:
validation_ICUType_4 = validation_X[validation_X['ICUType'] == 4.0]
validation_ICUType_4 = validation_ICUType_4[validation_ICUType_4['Time'] == 0.0]
validation_ICUType_4_ids = validation_ICUType_4['RecordID']
ICUType_4_measurements_validation = validation_X[validation_X['RecordID'].isin(validation_ICUType_4_ids)]
ICUType_4_measurements_validation = ICUType_4_measurements_validation.count()
ICUType_4_measurements_validation

RecordID       26064
level_1        26064
Time           26064
ALP              441
ALT              450
AST              453
Age            24866
Albumin          347
BUN             1933
Bilirubin        441
Cholesterol       39
Creatinine      1936
DiasABP        17306
FiO2            5117
GCS            11962
Gender           543
Glucose         1954
HCO3            1923
HCT             2607
HR             23947
Height         24866
ICUType          543
K               2133
Lactate         1629
MAP            17212
MechVent        4997
Mg              1995
NIDiasABP       9411
NIMAP           9330
NISysABP        9427
Na              2044
PaCO2           3171
PaO2            3158
Platelets       1999
RespRate        5968
SaO2             609
SysABP         17309
Temp            9569
TroponinI         43
TroponinT        206
Urine          19725
WBC             1846
Weight         11565
pH              3223
dtype: int64

In [115]:
age_65_and_above_validation = validation_X[validation_X['Age'] >= 65]
age_65_and_above_validation = age_65_and_above_validation[age_65_and_above_validation['Time'] == 0.0]
age_65_and_above_validation_ids = age_65_and_above_validation['RecordID']
age_65_and_above_measurements_validation = validation_X[validation_X['RecordID'].isin(age_65_and_above_validation_ids)]
age_65_and_above_measurements_validation = age_65_and_above_measurements_validation.count()
age_65_and_above_measurements_validation

RecordID       50784
level_1        50784
Time           50784
ALP              694
ALT              711
AST              714
Age            48099
Albumin          560
BUN             3631
Bilirubin        732
Cholesterol      106
Creatinine      3652
DiasABP        27904
FiO2            8082
GCS            15826
Gender          1058
Glucose         3390
HCO3            3580
HCT             4985
HR             46114
Height         48099
ICUType         1058
K               3801
Lactate         1988
MAP            27763
MechVent        7719
Mg              3585
NIDiasABP      21658
NIMAP          21425
NISysABP       21681
Na              3542
PaCO2           5996
PaO2            5986
Platelets       3783
RespRate       11947
SaO2            2254
SysABP         27907
Temp           19730
TroponinI        102
TroponinT        736
Urine          35896
WBC             3423
Weight         27904
pH              6269
dtype: int64

In [116]:
age_under_65_validation = validation_X[validation_X['Age'] < 65]
age_under_65_validation = age_under_65_validation[age_under_65_validation['Time'] == 0.0]
age_under_65_validation_ids = age_under_65_validation["RecordID"]
age_under_65__measurements_validation = validation_X[validation_X["RecordID"].isin(age_under_65_validation_ids)]
age_under_65__measurements_validation = age_under_65__measurements_validation.count()
age_under_65__measurements_validation

RecordID       41280
level_1        41280
Time           41280
ALP              808
ALT              844
AST              845
Age            38706
Albumin          595
BUN             3064
Bilirubin        847
Cholesterol       70
Creatinine      3082
DiasABP        22124
FiO2            6531
GCS            13815
Gender           860
Glucose         2960
HCO3            3023
HCT             4020
HR             37062
Height         38706
ICUType          860
K               3253
Lactate         1971
MAP            21989
MechVent        6546
Mg              2969
NIDiasABP      17483
NIMAP          17272
NISysABP       17502
Na              3036
PaCO2           4675
PaO2            4663
Platelets       3127
RespRate        9430
SaO2            1355
SysABP         22128
Temp           14568
TroponinI         72
TroponinT        308
Urine          27853
WBC             2849
Weight         22259
pH              4869
dtype: int64

In [117]:
filtered_validation_X = validation_X[(validation_X['Height'] != -1) & (validation_X['Weight'] != -1) & (validation_X['Height'].notna()) & (validation_X['Weight'].notna())] 

In [118]:
filtered_validation_X_meters = filtered_validation_X.copy()
filtered_validation_X_meters['Height'] = filtered_validation_X['Height'] / 100 # Converting Height from cm to meters
filtered_validation_X_meters['Height']

288       1.626
289       1.626
290       1.626
291       1.626
293       1.626
          ...  
574891    1.829
574892    1.829
574893    1.829
574894    1.829
575184    1.727
Name: Height, Length: 26104, dtype: float64

In [119]:
bmi_data_validation = filtered_validation_X_meters
bmi_data_validation["BMI"] = round(filtered_validation_X_meters["Weight"] / (filtered_validation_X_meters["Height"] ** 2), 1)
bmi_data_validation["Classificacao"] = bmi_data_validation["BMI"].apply(classify_BMI)
bmi_data_validation.head()

Unnamed: 0,RecordID,level_1,Time,ALP,ALT,AST,Age,Albumin,BUN,Bilirubin,...,SysABP,Temp,TroponinI,TroponinT,Urine,WBC,Weight,pH,BMI,Classificacao
288,132548,0,0.0,,,,68.0,,,,...,,36.3,,,,,87.0,,32.9,Obesidade grau 1
289,132548,1,1.0,,,,68.0,,,,...,205.0,35.8,,,120.0,,87.0,,32.9,Obesidade grau 1
290,132548,2,2.0,,,,68.0,,32.0,,...,212.142857,,,,60.0,6.2,87.0,,32.9,Obesidade grau 1
291,132548,3,3.0,,,,68.0,,,,...,201.25,,,,140.0,,87.0,,32.9,Obesidade grau 1
293,132548,5,5.0,,,,68.0,,,,...,175.0,36.6,,,190.0,,87.0,,32.9,Obesidade grau 1


In [120]:
classification_undefined_validation_ids = bmi_data_validation["RecordID"]
classification_undefined_measurements_validation = validation_X[~validation_X["RecordID"].isin(classification_undefined_validation_ids)]
classification_undefined_measurements_validation = classification_undefined_measurements_validation.count()
classification_undefined_measurements_validation

RecordID       43632
level_1        43632
Time           43632
ALP              709
ALT              737
AST              737
Age            40333
Albumin          582
BUN             3023
Bilirubin        748
Cholesterol       84
Creatinine      3045
DiasABP        17339
FiO2            6097
GCS            14753
Gender           909
Glucose         3045
HCO3            3026
HCT             3801
HR             38806
Height         40333
ICUType          909
K               3350
Lactate         1516
MAP            17120
MechVent        5695
Mg              2940
NIDiasABP      23567
NIMAP          23352
NISysABP       23587
Na              3145
PaCO2           3040
PaO2            3031
Platelets       2885
RespRate       14189
SaO2             551
SysABP         17346
Temp           11866
TroponinI         60
TroponinT        554
Urine          27485
WBC             2729
Weight         24059
pH              3103
dtype: int64

In [121]:
classification_low_weight_validation = bmi_data_validation[bmi_data_validation["Classificacao"] == 'Baixo peso']
classification_low_weight_validation_ids = classification_low_weight_validation["RecordID"]
classification_measurements_l_w_v = validation_X[validation_X["RecordID"].isin(classification_low_weight_validation_ids)]
classification_measurements_l_w_v = classification_measurements_l_w_v.count() 
classification_measurements_l_w_v

RecordID       1824
level_1        1824
Time           1824
ALP              34
ALT              34
AST              34
Age            1773
Albumin          23
BUN             132
Bilirubin        34
Cholesterol       2
Creatinine      134
DiasABP        1130
FiO2            279
GCS             579
Gender           38
Glucose         117
HCO3            126
HCT             191
HR             1693
Height         1773
ICUType          38
K               134
Lactate         113
MAP            1123
MechVent        312
Mg              138
NIDiasABP       633
NIMAP           621
NISysABP        633
Na              121
PaCO2           259
PaO2            258
Platelets       151
RespRate        254
SaO2            135
SysABP         1130
Temp            856
TroponinI         7
TroponinT        22
Urine          1361
WBC             133
Weight          997
pH              285
dtype: int64

In [122]:
classification_normal_weight_validation = bmi_data_validation[bmi_data_validation["Classificacao"] == 'Peso normal']
classification_normal_weight_validation_ids = classification_normal_weight_validation["RecordID"]
classification_measurements_n_w_v = validation_X[validation_X["RecordID"].isin(classification_normal_weight_validation_ids)]
classification_measurements_n_w_v = classification_measurements_n_w_v.count() 
classification_measurements_n_w_v

RecordID       16800
level_1        16800
Time           16800
ALP              269
ALT              277
AST              277
Age            16117
Albumin          198
BUN             1253
Bilirubin        280
Cholesterol       32
Creatinine      1260
DiasABP        11178
FiO2            2887
GCS             5137
Gender           350
Glucose         1126
HCO3            1222
HCT             1814
HR             15349
Height         16117
ICUType          350
K               1278
Lactate          887
MAP            11139
MechVent        2879
Mg              1262
NIDiasABP       5601
NIMAP           5564
NISysABP        5609
Na              1175
PaCO2           2513
PaO2            2504
Platelets       1424
RespRate        2419
SaO2            1042
SysABP         11178
Temp            7686
TroponinI         33
TroponinT        173
Urine          12312
WBC             1238
Weight          8662
pH              2672
dtype: int64

In [123]:
classification_overweight_validation = bmi_data_validation[bmi_data_validation["Classificacao"] == 'Sobrepeso']
classification_overweight_validation_ids = classification_overweight_validation["RecordID"]
classification_measurements_o_w_v = validation_X[validation_X["RecordID"].isin(classification_overweight_validation_ids)]
classification_measurements_o_w_v = classification_measurements_o_w_v.count() 
classification_measurements_o_w_v

RecordID       19152
level_1        19152
Time           19152
ALP              282
ALT              297
AST              297
Age            18467
Albumin          207
BUN             1449
Bilirubin        298
Cholesterol       27
Creatinine      1455
DiasABP        13617
FiO2            3398
GCS             5830
Gender           399
Glucose         1246
HCO3            1401
HCT             2163
HR             17615
Height         18467
ICUType          399
K               1376
Lactate          885
MAP            13597
MechVent        3348
Mg              1425
NIDiasABP       5782
NIMAP           5697
NISysABP        5787
Na              1314
PaCO2           3216
PaO2            3207
Platelets       1652
RespRate        2466
SaO2            1307
SysABP         13617
Temp            9570
TroponinI         47
TroponinT        150
Urine          14660
WBC             1448
Weight         10745
pH              3413
dtype: int64

In [124]:
classification_obesity_grade1_validation = bmi_data_validation[bmi_data_validation["Classificacao"] == 'Obesidade grau 1']
classification_obesity_grade1_validation_ids = classification_obesity_grade1_validation["RecordID"]
classification_measurements_ob1_v = validation_X[validation_X["RecordID"].isin(classification_obesity_grade1_validation_ids)]
classification_measurements_ob1_v  = classification_measurements_ob1_v .count() 
classification_measurements_ob1_v 

RecordID       13968
level_1        13968
Time           13968
ALP              242
ALT              249
AST              250
Age            13485
Albumin          169
BUN             1092
Bilirubin        253
Cholesterol       23
Creatinine      1095
DiasABP        10194
FiO2            2684
GCS             4294
Gender           291
Glucose          942
HCO3            1059
HCT             1649
HR             12833
Height         13485
ICUType          291
K               1054
Lactate          807
MAP            10167
MechVent        2764
Mg              1041
NIDiasABP       3883
NIMAP           3800
NISysABP        3889
Na              1003
PaCO2           2640
PaO2            2634
Platelets       1235
RespRate        1492
SaO2            1073
SysABP         10194
Temp            7108
TroponinI         23
TroponinT        115
Urine          11148
WBC             1071
Weight          7867
pH              2783
dtype: int64

In [125]:
classification_obesity_grade2_validation = bmi_data_validation[bmi_data_validation["Classificacao"] == 'Obesidade grau 2']
classification_obesity_grade2_validation_ids = classification_obesity_grade2_validation["RecordID"]
classification_measurements_ob2_v = validation_X[validation_X["RecordID"].isin(classification_obesity_grade2_validation_ids)]
classification_measurements_ob2_v  = classification_measurements_ob2_v .count() 
classification_measurements_ob2_v 

RecordID       5136
level_1        5136
Time           5136
ALP              84
ALT              84
AST              86
Age            4992
Albumin          48
BUN             386
Bilirubin        87
Cholesterol       8
Creatinine      387
DiasABP        3832
FiO2            970
GCS            1414
Gender          107
Glucose         318
HCO3            373
HCT             557
HR             4753
Height         4992
ICUType         107
K               374
Lactate         260
MAP            3866
MechVent        986
Mg              371
NIDiasABP      1254
NIMAP          1230
NISysABP       1255
Na              344
PaCO2           959
PaO2            957
Platelets       419
RespRate        466
SaO2            380
SysABP         3832
Temp           2607
TroponinI        15
TroponinT        64
Urine          4123
WBC             379
Weight         3383
pH             1006
dtype: int64

In [126]:
classification_obesity_grade3_validation = bmi_data_validation[bmi_data_validation["Classificacao"] == 'Obesidade grau 3']
classification_obesity_grade3_validation_ids = classification_obesity_grade3_validation["RecordID"]
classification_measurements_ob3_v = validation_X[validation_X["RecordID"].isin(classification_obesity_grade3_validation_ids)]
classification_measurements_ob3_v  = classification_measurements_ob3_v .count() 
classification_measurements_ob3_v 

RecordID       3888
level_1        3888
Time           3888
ALP              59
ALT              60
AST              61
Age            3738
Albumin          46
BUN             288
Bilirubin        60
Cholesterol       8
Creatinine      288
DiasABP        2683
FiO2            753
GCS            1117
Gender           81
Glucose         265
HCO3            282
HCT             356
HR             3608
Height         3738
ICUType          81
K               293
Lactate         213
MAP            2712
MechVent        812
Mg              282
NIDiasABP      1191
NIMAP          1163
NISysABP       1198
Na              272
PaCO2           693
PaO2            697
Platelets       283
RespRate        544
SaO2            292
SysABP         2683
Temp           1867
TroponinI         6
TroponinT        52
Urine          3004
WBC             257
Weight         2505
pH              716
dtype: int64

In [127]:
df_columns = validation_X.columns
df_columns

Index(['RecordID', 'level_1', 'Time', 'ALP', 'ALT', 'AST', 'Age', 'Albumin',
       'BUN', 'Bilirubin', 'Cholesterol', 'Creatinine', 'DiasABP', 'FiO2',
       'GCS', 'Gender', 'Glucose', 'HCO3', 'HCT', 'HR', 'Height', 'ICUType',
       'K', 'Lactate', 'MAP', 'MechVent', 'Mg', 'NIDiasABP', 'NIMAP',
       'NISysABP', 'Na', 'PaCO2', 'PaO2', 'Platelets', 'RespRate', 'SaO2',
       'SysABP', 'Temp', 'TroponinI', 'TroponinT', 'Urine', 'WBC', 'Weight',
       'pH'],
      dtype='object')

In [128]:
df_validation = pd.DataFrame(columns=df_columns)
df_validation_transpose = df_validation.T
df_validation_transpose["Female"] = female_gender_measurements_validation
df_validation_transpose["Male"] = male_gender_measurements_validation
df_validation_transpose["Undefined Gender"] = undefined_gender_measurements_validation
df_validation_transpose["ICUType 1"] = ICUType_1_measurements_validation
df_validation_transpose["ICUType 2"] = ICUType_2_measurements_validation
df_validation_transpose["ICUType 3"] = ICUType_3_measurements_validation
df_validation_transpose["ICUType 4"] = ICUType_4_measurements_validation
df_validation_transpose["Age 65+"] = age_65_and_above_measurements_validation
df_validation_transpose["Age 65-"] = age_under_65__measurements_validation
df_validation_transpose['Undefined Classification'] = classification_undefined_measurements_validation
df_validation_transpose['Low Weight'] = classification_measurements_l_w_v
df_validation_transpose['Normal Weight'] = classification_measurements_n_w_v
df_validation_transpose['Overweight'] = classification_measurements_o_w_v
df_validation_transpose['Obesity Grade 1'] = classification_measurements_ob1_v
df_validation_transpose['Obesity Grade 2'] = classification_measurements_ob2_v
df_validation_transpose['Obesity Grade 3'] = classification_measurements_ob3_v
df_validation_transpose = df_validation_transpose.drop('RecordID',axis=0)
df_validation_transpose = df_validation_transpose.drop('level_1',axis=0)
df_validation_transpose = df_validation_transpose.drop('Time',axis=0)
df_validation_transpose = df_validation_transpose.drop('Age',axis=0)
df_validation_transpose = df_validation_transpose.drop('Gender',axis=0)
df_validation_transpose = df_validation_transpose.drop('Height',axis=0)
df_validation_transpose = df_validation_transpose.drop('ICUType',axis=0)

#df_test_transpose.style.set_caption("Repeated Measurements per variable by demographics")

display(HTML("<h2 style='text-align: center; font-size: 24px; font-weight: bold;'>Repeated Measurements per Variable by Demographics - Validation Set</h2>"))
df_validation_transpose

Unnamed: 0,Female,Male,Undefined Gender,ICUType 1,ICUType 2,ICUType 3,ICUType 4,Age 65+,Age 65-,Undefined Classification,Low Weight,Normal Weight,Overweight,Obesity Grade 1,Obesity Grade 2,Obesity Grade 3
ALP,582,918,2,216,153,692,441,694,808,709,34,269,282,242,84,59
ALT,610,943,2,233,158,714,450,711,844,737,34,277,297,249,84,60
AST,611,946,2,233,158,715,453,714,845,737,34,277,297,250,86,61
Albumin,459,695,1,163,92,553,347,560,595,582,23,198,207,169,48,46
BUN,2666,4021,8,1014,1238,2510,1933,3631,3064,3023,132,1253,1449,1092,386,288
Bilirubin,610,967,2,224,157,757,441,732,847,748,34,280,298,253,87,60
Cholesterol,65,111,0,104,3,30,39,106,70,84,2,32,27,23,8,8
Creatinine,2679,4047,8,1038,1237,2523,1936,3652,3082,3045,134,1260,1455,1095,387,288
DiasABP,19082,30861,85,6112,14747,11863,17306,27904,22124,17339,1130,11178,13617,10194,3832,2683
FiO2,5816,8781,16,1464,3154,4878,5117,8082,6531,6097,279,2887,3398,2684,970,753


<h3> Test data</h3>

<h4>Loading test dataset</h4>

In [152]:
test_X = physionet2012_dataset['test_X']

In [153]:
var1 = test_X.count()
var1

RecordID       115152
level_1        115152
Time           115152
ALP              1780
ALT              1824
AST              1827
Age            108365
Albumin          1381
BUN              8185
Bilirubin        1862
Cholesterol       186
Creatinine       8216
DiasABP         61780
FiO2            18082
GCS             36791
Gender           2399
Glucose          7697
HCO3             7989
HCT             10831
HR             103753
Height         108365
ICUType          2399
K                8591
Lactate          4538
MAP             61325
MechVent        17325
Mg               8154
NIDiasABP       48650
NIMAP           47889
NISysABP        48694
Na               8021
PaCO2           13163
PaO2            13144
Platelets        8391
RespRate        26821
SaO2             4538
SysABP          61783
Temp            41916
TroponinI         231
TroponinT        1247
Urine           79884
WBC              7659
Weight          59538
pH              13778
dtype: int64

In [131]:
test_female_gender = test_X[test_X['Gender'] == 0.0]
test_female_gender_ids = test_female_gender["RecordID"]
female_gender_measurements_test = test_X[test_X["RecordID"].isin(test_female_gender_ids)]
female_gender_measurements_test = female_gender_measurements_test.count()
female_gender_measurements_test

RecordID       50256
level_1        50256
Time           50256
ALP              740
ALT              760
AST              759
Age            47157
Albumin          594
BUN             3574
Bilirubin        780
Cholesterol       92
Creatinine      3584
DiasABP        25190
FiO2            7425
GCS            16277
Gender          1047
Glucose         3405
HCO3            3509
HCT             4514
HR             45197
Height         47157
ICUType         1047
K               3784
Lactate         1859
MAP            25018
MechVent        7406
Mg              3529
NIDiasABP      22446
NIMAP          22078
NISysABP       22464
Na              3560
PaCO2           5142
PaO2            5137
Platelets       3536
RespRate       13255
SaO2            1738
SysABP         25192
Temp           17196
TroponinI        129
TroponinT        478
Urine          34706
WBC             3289
Weight         25630
pH              5355
dtype: int64

In [132]:
test_male_gender = test_X[test_X['Gender'] == 1.0]
test_male_gender_ids = test_male_gender["RecordID"]
male_gender_measurements_test = test_X[test_X["RecordID"].isin(test_male_gender_ids)]
male_gender_measurements_test = male_gender_measurements_test.count()
male_gender_measurements_test

RecordID       64896
level_1        64896
Time           64896
ALP             1040
ALT             1064
AST             1068
Age            61208
Albumin          787
BUN             4611
Bilirubin       1082
Cholesterol       94
Creatinine      4632
DiasABP        36590
FiO2           10657
GCS            20514
Gender          1352
Glucose         4292
HCO3            4480
HCT             6317
HR             58556
Height         61208
ICUType         1352
K               4807
Lactate         2679
MAP            36307
MechVent        9919
Mg              4625
NIDiasABP      26204
NIMAP          25811
NISysABP       26230
Na              4461
PaCO2           8021
PaO2            8007
Platelets       4855
RespRate       13566
SaO2            2800
SysABP         36591
Temp           24720
TroponinI        102
TroponinT        769
Urine          45178
WBC             4370
Weight         33908
pH              8423
dtype: int64

In [133]:
test_undefined_gender = test_X[test_X['Gender'] == -1.0]
test_undefined_gender_ids = test_undefined_gender["RecordID"]
undefined_gender_measurements_test = test_X[test_X["RecordID"].isin(test_undefined_gender_ids)]
undefined_gender_measurements_test = undefined_gender_measurements_test.count()
undefined_gender_measurements_test

RecordID       0
level_1        0
Time           0
ALP            0
ALT            0
AST            0
Age            0
Albumin        0
BUN            0
Bilirubin      0
Cholesterol    0
Creatinine     0
DiasABP        0
FiO2           0
GCS            0
Gender         0
Glucose        0
HCO3           0
HCT            0
HR             0
Height         0
ICUType        0
K              0
Lactate        0
MAP            0
MechVent       0
Mg             0
NIDiasABP      0
NIMAP          0
NISysABP       0
Na             0
PaCO2          0
PaO2           0
Platelets      0
RespRate       0
SaO2           0
SysABP         0
Temp           0
TroponinI      0
TroponinT      0
Urine          0
WBC            0
Weight         0
pH             0
dtype: int64

In [134]:
test_ICUType_1 = test_X[test_X['ICUType'] == 1.0]
test_ICUType_1 = test_ICUType_1[test_ICUType_1['Time'] == 0.0]
test_ICUType_1_ids = test_ICUType_1['RecordID']
ICUType_1_measurements_test = test_X[test_X['RecordID'].isin(test_ICUType_1_ids)]
ICUType_1_measurements_test = ICUType_1_measurements_test.count()
ICUType_1_measurements_test

RecordID       16656
level_1        16656
Time           16656
ALP              211
ALT              228
AST              228
Age            15207
Albumin          170
BUN             1202
Bilirubin        229
Cholesterol      101
Creatinine      1224
DiasABP         6376
FiO2            1827
GCS             4352
Gender           347
Glucose         1132
HCO3            1141
HCT             1418
HR             14473
Height         15207
ICUType          347
K               1412
Lactate          330
MAP             6313
MechVent        1660
Mg              1226
NIDiasABP       8260
NIMAP           8232
NISysABP        8264
Na              1144
PaCO2           1332
PaO2            1329
Platelets       1193
RespRate        5622
SaO2             792
SysABP          6377
Temp            4765
TroponinI         51
TroponinT        373
Urine           9315
WBC             1065
Weight          7159
pH              1345
dtype: int64

In [135]:
test_ICUType_2 = test_X[test_X['ICUType'] == 2.0]
test_ICUType_2 = test_ICUType_2[test_ICUType_2['Time'] == 0.0]
test_ICUType_2_ids = test_ICUType_2['RecordID']
ICUType_2_measurements_test = test_X[test_X['RecordID'].isin(test_ICUType_2_ids)]
ICUType_2_measurements_test = ICUType_2_measurements_test.count()
ICUType_2_measurements_test

RecordID       24336
level_1        24336
Time           24336
ALP              156
ALT              159
AST              159
Age            23559
Albumin           99
BUN             1540
Bilirubin        155
Cholesterol        5
Creatinine      1540
DiasABP        19238
FiO2            4248
GCS             6476
Gender           507
Glucose         1091
HCO3            1423
HCT             2668
HR             22356
Height         23559
ICUType          507
K               1240
Lactate          843
MAP            19231
MechVent        4031
Mg              1543
NIDiasABP       5352
NIMAP           5303
NISysABP        5362
Na              1177
PaCO2           4929
PaO2            4913
Platelets       1948
RespRate        1390
SaO2            2666
SysABP         19238
Temp           14065
TroponinI         27
TroponinT         83
Urine          20569
WBC             1621
Weight         12598
pH              5441
dtype: int64

In [136]:
test_ICUType_3 = test_X[test_X['ICUType'] == 3.0]
test_ICUType_3 = test_ICUType_3[test_ICUType_3['Time'] == 0.0]
test_ICUType_3_ids = test_ICUType_3['RecordID']
ICUType_3_measurements_test = test_X[test_X['RecordID'].isin(test_ICUType_3_ids)]
ICUType_3_measurements_test = ICUType_3_measurements_test.count()
ICUType_3_measurements_test

RecordID       41088
level_1        41088
Time           41088
ALP              873
ALT              890
AST              891
Age            38079
Albumin          659
BUN             3029
Bilirubin        939
Cholesterol       40
Creatinine      3038
DiasABP        14479
FiO2            5849
GCS            10814
Gender           856
Glucose         3024
HCO3            3033
HCT             3617
HR             36687
Height         38079
ICUType          856
K               3267
Lactate         1601
MAP            14285
MechVent        5575
Mg              2875
NIDiasABP      23220
NIMAP          22752
NISysABP       23237
Na              3116
PaCO2           3094
PaO2            3102
Platelets       2802
RespRate       12088
SaO2             470
SysABP         14480
Temp           11601
TroponinI        104
TroponinT        535
Urine          24914
WBC             2666
Weight         25915
pH              3133
dtype: int64

In [137]:
test_ICUType_4 = test_X[test_X['ICUType'] == 4.0]
test_ICUType_4 = test_ICUType_4[test_ICUType_4['Time'] == 0.0]
test_ICUType_4_ids = test_ICUType_4['RecordID']
ICUType_4_measurements_test = test_X[test_X['RecordID'].isin(test_ICUType_4_ids)]
ICUType_4_measurements_test = ICUType_4_measurements_test.count()
ICUType_4_measurements_test

RecordID       33072
level_1        33072
Time           33072
ALP              540
ALT              547
AST              549
Age            31520
Albumin          453
BUN             2414
Bilirubin        539
Cholesterol       40
Creatinine      2414
DiasABP        21687
FiO2            6158
GCS            15149
Gender           689
Glucose         2450
HCO3            2392
HCT             3128
HR             30237
Height         31520
ICUType          689
K               2672
Lactate         1764
MAP            21496
MechVent        6059
Mg              2510
NIDiasABP      11818
NIMAP          11602
NISysABP       11831
Na              2584
PaCO2           3808
PaO2            3800
Platelets       2448
RespRate        7721
SaO2             610
SysABP         21688
Temp           11485
TroponinI         49
TroponinT        256
Urine          25086
WBC             2307
Weight         13866
pH              3859
dtype: int64

In [138]:
age_65_and_above_test= test_X[test_X['Age'] >= 65]
age_65_and_above_test = age_65_and_above_test[age_65_and_above_test['Time'] == 0.0]
age_65_and_above_test_ids = age_65_and_above_test['RecordID']
age_65_and_above_measurements_test= test_X[test_X['RecordID'].isin(age_65_and_above_test_ids)]
age_65_and_above_measurements_test = age_65_and_above_measurements_test.count()
age_65_and_above_measurements_test

RecordID       62592
level_1        62592
Time           62592
ALP              784
ALT              805
AST              808
Age            59150
Albumin          653
BUN             4416
Bilirubin        819
Cholesterol      113
Creatinine      4435
DiasABP        34403
FiO2            9991
GCS            19461
Gender          1304
Glucose         4092
HCO3            4299
HCT             5844
HR             56726
Height         59150
ICUType         1304
K               4584
Lactate         2399
MAP            34201
MechVent        9229
Mg              4388
NIDiasABP      26449
NIMAP          26083
NISysABP       26476
Na              4267
PaCO2           7305
PaO2            7289
Platelets       4529
RespRate       14713
SaO2            2819
SysABP         34405
Temp           23778
TroponinI        155
TroponinT        862
Urine          44708
WBC             4134
Weight         32490
pH              7657
dtype: int64

In [139]:
age_under_65_test = test_X[test_X['Age'] < 65]
age_under_65_test = age_under_65_test[age_under_65_test['Time'] == 0.0]
age_under_65_test_ids = age_under_65_test["RecordID"]
age_under_65_measurements_test = test_X[test_X["RecordID"].isin(age_under_65_test_ids)]
age_under_65_measurements_test = age_under_65_measurements_test.count()
age_under_65_measurements_test

RecordID       52560
level_1        52560
Time           52560
ALP              996
ALT             1019
AST             1019
Age            49215
Albumin          728
BUN             3769
Bilirubin       1043
Cholesterol       73
Creatinine      3781
DiasABP        27377
FiO2            8091
GCS            17330
Gender          1095
Glucose         3605
HCO3            3690
HCT             4987
HR             47027
Height         49215
ICUType         1095
K               4007
Lactate         2139
MAP            27124
MechVent        8096
Mg              3766
NIDiasABP      22201
NIMAP          21806
NISysABP       22218
Na              3754
PaCO2           5858
PaO2            5855
Platelets       3862
RespRate       12108
SaO2            1719
SysABP         27378
Temp           18138
TroponinI         76
TroponinT        385
Urine          35176
WBC             3525
Weight         27048
pH              6121
dtype: int64

In [140]:
filtered_test_X = test_X[(test_X['Height'] != -1) & (test_X['Weight'] != -1) & (test_X['Height'].notna()) & (test_X['Weight'].notna())] 

In [141]:
filtered_test_X_meters = filtered_test_X.copy()
filtered_test_X_meters['Height'] = filtered_test_X['Height'] / 100 # Converting Height from cm to meters
filtered_test_X_meters['Height']

48        1.753
67        1.753
68        1.753
69        1.753
70        1.753
          ...  
574841    1.651
574842    1.651
574843    1.651
574844    1.651
574845    1.651
Name: Height, Length: 30809, dtype: float64

In [142]:
bmi_data_test = filtered_test_X_meters
bmi_data_test["BMI"] = round(filtered_test_X_meters["Weight"] / (filtered_test_X_meters["Height"] ** 2), 1)
bmi_data_test["Classificacao"] = bmi_data_test["BMI"].apply(classify_BMI)
bmi_data_test.head()

Unnamed: 0,RecordID,level_1,Time,ALP,ALT,AST,Age,Albumin,BUN,Bilirubin,...,SysABP,Temp,TroponinI,TroponinT,Urine,WBC,Weight,pH,BMI,Classificacao
48,132540,0,0.0,,,,76.0,,,,...,,,,,,,76.0,7.45,24.7,Peso normal
67,132540,19,19.0,,,,76.0,,,,...,122.0,37.5,,,50.0,,80.6,,26.2,Sobrepeso
68,132540,20,20.0,,,,76.0,,,,...,107.0,37.4,,,380.0,,80.6,,26.2,Sobrepeso
69,132540,21,21.0,,,,76.0,,,,...,121.0,37.5,,,170.0,,80.6,,26.2,Sobrepeso
70,132540,22,22.0,,,,76.0,,,,...,128.0,37.5,,,130.0,,80.6,,26.2,Sobrepeso


In [143]:
classification_undefined_test_ids = bmi_data_test["RecordID"]
classification_undefined_measurements_test = test_X[~test_X["RecordID"].isin(classification_undefined_test_ids)]
classification_undefined_measurements_test = classification_undefined_measurements_test.count()
classification_undefined_measurements_test

RecordID       56928
level_1        56928
Time           56928
ALP              928
ALT              948
AST              953
Age            52645
Albumin          745
BUN             4011
Bilirubin        982
Cholesterol       89
Creatinine      4029
DiasABP        23263
FiO2            8091
GCS            19201
Gender          1186
Glucose         4003
HCO3            3983
HCT             4803
HR             50600
Height         52645
ICUType         1186
K               4388
Lactate         1880
MAP            22950
MechVent        7511
Mg              3955
NIDiasABP      29690
NIMAP          29149
NISysABP       29706
Na              4172
PaCO2           4121
PaO2            4116
Platelets       3722
RespRate       18255
SaO2             652
SysABP         23265
Temp           15349
TroponinI         95
TroponinT        682
Urine          36664
WBC             3574
Weight         28729
pH              4184
dtype: int64

In [144]:
classification_low_weight_test = bmi_data_test[bmi_data_test["Classificacao"] == 'Baixo peso']
classification_low_weight_test_ids = classification_low_weight_test["RecordID"]
classification_measurements_l_w_test = test_X[test_X["RecordID"].isin(classification_low_weight_test_ids)]
classification_measurements_l_w_test = classification_measurements_l_w_test.count() 
classification_measurements_l_w_test

RecordID       1968
level_1        1968
Time           1968
ALP              23
ALT              22
AST              23
Age            1866
Albumin          17
BUN             133
Bilirubin        22
Cholesterol       3
Creatinine      132
DiasABP        1339
FiO2            351
GCS             554
Gender           41
Glucose         120
HCO3            130
HCT             155
HR             1805
Height         1866
ICUType          41
K               137
Lactate          92
MAP            1334
MechVent        358
Mg              133
NIDiasABP       669
NIMAP           649
NISysABP        673
Na              132
PaCO2           243
PaO2            246
Platelets       128
RespRate        351
SaO2            111
SysABP         1339
Temp            795
TroponinI         4
TroponinT        18
Urine          1409
WBC             123
Weight          927
pH              254
dtype: int64

In [145]:
classification_normal_weight_test = bmi_data_test[bmi_data_test["Classificacao"] == 'Peso normal']
classification_normal_weight_test_ids = classification_normal_weight_test["RecordID"]
classification_measurements_n_w_test = test_X[test_X["RecordID"].isin(classification_normal_weight_test_ids)]
classification_measurements_n_w_test = classification_measurements_n_w_test.count() 
classification_measurements_n_w_test

RecordID       19152
level_1        19152
Time           19152
ALP              273
ALT              278
AST              279
Age            18348
Albumin          220
BUN             1348
Bilirubin        282
Cholesterol       31
Creatinine      1351
DiasABP        12335
FiO2            3113
GCS             6032
Gender           399
Glucose         1231
HCO3            1306
HCT             1987
HR             17537
Height         18348
ICUType          399
K               1400
Lactate          774
MAP            12271
MechVent        3060
Mg              1369
NIDiasABP       6573
NIMAP           6512
NISysABP        6580
Na              1262
PaCO2           2649
PaO2            2640
Platelets       1516
RespRate        3219
SaO2            1150
SysABP         12335
Temp            8421
TroponinI         43
TroponinT        207
Urine          13975
WBC             1339
Weight          9853
pH              2840
dtype: int64

In [146]:
classification_overweight_test = bmi_data_test[bmi_data_test["Classificacao"] == 'Sobrepeso']
classification_overweight_test_ids = classification_overweight_test["RecordID"]
classification_measurements_o_w_test = test_X[test_X["RecordID"].isin(classification_overweight_test_ids)]
classification_measurements_o_w_test = classification_measurements_o_w_test.count() 
classification_measurements_o_w_test

RecordID       24096
level_1        24096
Time           24096
ALP              358
ALT              371
AST              370
Age            23111
Albumin          246
BUN             1736
Bilirubin        366
Cholesterol       32
Creatinine      1745
DiasABP        16989
FiO2            4251
GCS             7100
Gender           502
Glucose         1491
HCO3            1643
HCT             2629
HR             21941
Height         23111
ICUType          502
K               1682
Lactate         1191
MAP            16925
MechVent        4198
Mg              1774
NIDiasABP       6989
NIMAP           6885
NISysABP        7000
Na              1543
PaCO2           4175
PaO2            4162
Platelets       2031
RespRate        2779
SaO2            1904
SysABP         16990
Temp           12134
TroponinI         55
TroponinT        207
Urine          18467
WBC             1720
Weight         13206
pH              4503
dtype: int64

In [147]:
classification_obesity_grade1_test = bmi_data_test[bmi_data_test["Classificacao"] == 'Obesidade grau 1']
classification_obesity_grade1_test_ids = classification_obesity_grade1_test["RecordID"]
classification_measurements_ob1_test = test_X[test_X["RecordID"].isin(classification_obesity_grade1_test_ids)]
classification_measurements_ob1_test = classification_measurements_ob1_test .count() 
classification_measurements_ob1_test

RecordID       14736
level_1        14736
Time           14736
ALP              218
ALT              221
AST              220
Age            14263
Albumin          152
BUN             1078
Bilirubin        225
Cholesterol       21
Creatinine      1080
DiasABP        10582
FiO2            2763
GCS             4306
Gender           307
Glucose          869
HCO3            1011
HCT             1637
HR             13559
Height         14263
ICUType          307
K               1000
Lactate          759
MAP            10547
MechVent        2719
Mg              1076
NIDiasABP       4156
NIMAP           4136
NISysABP        4158
Na               941
PaCO2           2765
PaO2            2761
Platelets       1222
RespRate        1507
SaO2            1243
SysABP         10582
Temp            7862
TroponinI         41
TroponinT        128
Urine          11612
WBC             1047
Weight          8791
pH              2950
dtype: int64

In [148]:
classification_obesity_grade2_test = bmi_data_test[bmi_data_test["Classificacao"] == 'Obesidade grau 2']
classification_obesity_grade2_test_ids = classification_obesity_grade2_test["RecordID"]
classification_measurements_ob2_test = test_X[test_X["RecordID"].isin(classification_obesity_grade2_test_ids)]
classification_measurements_ob2_test = classification_measurements_ob2_test .count() 
classification_measurements_ob2_test

RecordID       7392
level_1        7392
Time           7392
ALP             106
ALT             111
AST             111
Age            7114
Albumin          83
BUN             551
Bilirubin       110
Cholesterol       5
Creatinine      553
DiasABP        5288
FiO2           1465
GCS            2036
Gender          154
Glucose         448
HCO3            517
HCT             850
HR             6734
Height         7114
ICUType         154
K               494
Lactate         422
MAP            5289
MechVent       1448
Mg              544
NIDiasABP      2000
NIMAP          1982
NISysABP       2005
Na              468
PaCO2          1508
PaO2           1507
Platelets       661
RespRate        528
SaO2            658
SysABP         5288
Temp           3903
TroponinI        13
TroponinT        49
Urine          5689
WBC             579
Weight         4051
pH             1568
dtype: int64

In [149]:
classification_obesity_grade3_test = bmi_data_test[bmi_data_test["Classificacao"] == 'Obesidade grau 3']
classification_obesity_grade3_test_ids = classification_obesity_grade3_test["RecordID"]
classification_measurements_ob3_test = test_X[test_X["RecordID"].isin(classification_obesity_grade3_test_ids)]
classification_measurements_ob3_test = classification_measurements_ob3_test .count() 
classification_measurements_ob3_test

RecordID       5280
level_1        5280
Time           5280
ALP              77
ALT              80
AST              79
Age            5031
Albumin          61
BUN             380
Bilirubin        83
Cholesterol       8
Creatinine      379
DiasABP        3648
FiO2           1022
GCS            1460
Gender          110
Glucose         323
HCO3            370
HCT             525
HR             4782
Height         5031
ICUType         110
K               354
Lactate         285
MAP            3665
MechVent        989
Mg              381
NIDiasABP      1439
NIMAP          1419
NISysABP       1442
Na              342
PaCO2           991
PaO2            989
Platelets       431
RespRate        434
SaO2            410
SysABP         3648
Temp           2454
TroponinI        10
TroponinT        37
Urine          3927
WBC             365
Weight         2983
pH             1020
dtype: int64

In [150]:
df_columns = test_X.columns
df_columns

Index(['RecordID', 'level_1', 'Time', 'ALP', 'ALT', 'AST', 'Age', 'Albumin',
       'BUN', 'Bilirubin', 'Cholesterol', 'Creatinine', 'DiasABP', 'FiO2',
       'GCS', 'Gender', 'Glucose', 'HCO3', 'HCT', 'HR', 'Height', 'ICUType',
       'K', 'Lactate', 'MAP', 'MechVent', 'Mg', 'NIDiasABP', 'NIMAP',
       'NISysABP', 'Na', 'PaCO2', 'PaO2', 'Platelets', 'RespRate', 'SaO2',
       'SysABP', 'Temp', 'TroponinI', 'TroponinT', 'Urine', 'WBC', 'Weight',
       'pH'],
      dtype='object')

In [151]:
df_test = pd.DataFrame(columns=df_columns)
df_test_transpose = df_test.T
df_test_transpose["Female"] = female_gender_measurements_test
df_test_transpose["Male"] = male_gender_measurements_test
df_test_transpose["Undefined Gender"] = undefined_gender_measurements_test
df_test_transpose["ICUType 1"] = ICUType_1_measurements_test
df_test_transpose["ICUType 2"] = ICUType_2_measurements_test
df_test_transpose["ICUType 3"] = ICUType_3_measurements_test
df_test_transpose["ICUType 4"] = ICUType_4_measurements_test
df_test_transpose["Age 65+"] = age_65_and_above_measurements_test
df_test_transpose["Age 65-"] = age_under_65_measurements_test
df_test_transpose['Undefined Classification'] = classification_undefined_measurements_test
df_test_transpose['Low Weight'] = classification_measurements_l_w_test 
df_test_transpose['Normal Weight'] = classification_measurements_n_w_test
df_test_transpose['Overweight'] = classification_measurements_o_w_test
df_test_transpose['Obesity Grade 1'] = classification_measurements_ob1_test 
df_test_transpose['Obesity Grade 2'] = classification_measurements_ob2_test
df_test_transpose['Obesity Grade 3'] = classification_measurements_ob3_test
df_test_transpose = df_test_transpose.drop('RecordID',axis=0)
df_test_transpose = df_test_transpose.drop('level_1',axis=0)
df_test_transpose = df_test_transpose.drop('Time',axis=0)
df_test_transpose = df_test_transpose.drop('Age',axis=0)
df_test_transpose = df_test_transpose.drop('Gender',axis=0)
df_test_transpose = df_test_transpose.drop('Height',axis=0)
df_test_transpose = df_test_transpose.drop('ICUType',axis=0)

#df_test_transpose.style.set_caption("Repeated Measurements per variable by demographics")

display(HTML("<h2 style='text-align: center; font-size: 24px; font-weight: bold;'>Repeated Measurements per Variable by Demographics - Test Set</h2>"))
df_test_transpose

Unnamed: 0,Female,Male,Undefined Gender,ICUType 1,ICUType 2,ICUType 3,ICUType 4,Age 65+,Age 65-,Undefined Classification,Low Weight,Normal Weight,Overweight,Obesity Grade 1,Obesity Grade 2,Obesity Grade 3
ALP,740,1040,0,211,156,873,540,784,996,928,23,273,358,218,106,77
ALT,760,1064,0,228,159,890,547,805,1019,948,22,278,371,221,111,80
AST,759,1068,0,228,159,891,549,808,1019,953,23,279,370,220,111,79
Albumin,594,787,0,170,99,659,453,653,728,745,17,220,246,152,83,61
BUN,3574,4611,0,1202,1540,3029,2414,4416,3769,4011,133,1348,1736,1078,551,380
Bilirubin,780,1082,0,229,155,939,539,819,1043,982,22,282,366,225,110,83
Cholesterol,92,94,0,101,5,40,40,113,73,89,3,31,32,21,5,8
Creatinine,3584,4632,0,1224,1540,3038,2414,4435,3781,4029,132,1351,1745,1080,553,379
DiasABP,25190,36590,0,6376,19238,14479,21687,34403,27377,23263,1339,12335,16989,10582,5288,3648
FiO2,7425,10657,0,1827,4248,5849,6158,9991,8091,8091,351,3113,4251,2763,1465,1022
