In [1]:
import os
import sys
import pandas as pd
from IPython.display import display, HTML
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

<h2>Loading dataset</h2>

In [2]:
from pypotsModify.benchpots.datasets import preprocess_physionet2012
physionet2012_dataset = preprocess_physionet2012(subset="all", rate=0.1)

2024-11-28 09:00:07 [INFO]: You're using dataset physionet_2012, please cite it properly in your work. You can find its reference information at the below link: 
https://github.com/WenjieDu/TSDB/tree/main/dataset_profiles/physionet_2012
2024-11-28 09:00:07 [INFO]: Dataset physionet_2012 has already been downloaded. Processing directly...
2024-11-28 09:00:07 [INFO]: Dataset physionet_2012 has already been cached. Loading from cache directly...
2024-11-28 09:00:07 [INFO]: Loaded successfully!


<h3>Training data</h3>

<h4>Loading training dataset</h4>

In [3]:
train_X = physionet2012_dataset['train_X']

In [4]:
training_female_gender = train_X[train_X["Gender"] == 0.0]
training_female_gender_ids = training_female_gender["RecordID"]
female_gender_measurements_training = train_X[train_X["RecordID"].isin(training_female_gender_ids)]
female_gender_measurements_training = female_gender_measurements_training.count()
female_gender_measurements_training

RecordID       160656
level_1        160656
Time           160656
ALP              2629
ALT              2696
AST              2689
Age            151408
Albumin          2032
BUN             11637
Bilirubin        2742
Cholesterol       255
Creatinine      11674
DiasABP         82364
FiO2            24750
GCS             51695
Gender           3347
Glucose         11075
HCO3            11410
HCT             14775
HR             145425
Height         151408
ICUType          3347
K               12272
Lactate          6341
MAP             81992
MechVent        24201
Mg              11463
NIDiasABP       70772
NIMAP           69632
NISysABP        70831
Na              11510
PaCO2           17265
PaO2            17232
Platelets       11470
RespRate        42573
SaO2             6008
SysABP          82368
Temp            57300
TroponinI         387
TroponinT        1743
Urine          112226
WBC             10669
Weight          86449
pH              17887
dtype: int64

In [5]:
training_male_gender = train_X[train_X['Gender'] == 1.0]
training_male_gender_ids = training_male_gender["RecordID"]
male_gender_measurements_training = train_X[train_X["RecordID"].isin(training_male_gender_ids)]
male_gender_measurements_training  = male_gender_measurements_training.count()
male_gender_measurements_training 

RecordID       207168
level_1        207168
Time           207168
ALP              3474
ALT              3559
AST              3566
Age            194990
Albumin          2638
BUN             15057
Bilirubin        3616
Cholesterol       361
Creatinine      15117
DiasABP        117057
FiO2            33054
GCS             65843
Gender           4316
Glucose         14007
HCO3            14690
HCT             20190
HR             186349
Height         194990
ICUType          4316
K               15630
Lactate          8816
MAP            116306
MechVent        31168
Mg              14775
NIDiasABP       82883
NIMAP           81789
NISysABP        82960
Na              14629
PaCO2           25191
PaO2            25151
Platelets       15634
RespRate        45383
SaO2             8973
SysABP         117068
Temp            79151
TroponinI         405
TroponinT        2244
Urine          142601
WBC             14152
Weight         108538
pH              26462
dtype: int64

In [6]:
training_undefined_gender = train_X[train_X['Gender'] == -1.0]
undefined_gender_training_ids = training_undefined_gender['RecordID']
undefined_gender_measurements_training = train_X[train_X["RecordID"].isin(undefined_gender_training_ids)]
undefined_gender_measurements_training = undefined_gender_measurements_training.count()
undefined_gender_measurements_training

RecordID       384
level_1        384
Time           384
ALP              3
ALT              3
AST              3
Age            241
Albumin          2
BUN             17
Bilirubin        3
Cholesterol      0
Creatinine      17
DiasABP        164
FiO2            20
GCS             82
Gender           8
Glucose         17
HCO3            17
HCT             17
HR             228
Height         241
ICUType          8
K               17
Lactate          3
MAP            162
MechVent        21
Mg              16
NIDiasABP      111
NIMAP          111
NISysABP       111
Na              16
PaCO2           15
PaO2            15
Platelets       16
RespRate        89
SaO2             1
SysABP         164
Temp            84
TroponinI        1
TroponinT        0
Urine          157
WBC             16
Weight         130
pH              20
dtype: int64

In [7]:
training_ICUType_1 = train_X[train_X['ICUType'] == 1.0]
training_ICUType_1 = training_ICUType_1[training_ICUType_1["Time"] == 0.0]
training_ICUType_1_ids = training_ICUType_1["RecordID"]
ICUType_1_measurements_training = train_X[train_X["RecordID"].isin(training_ICUType_1_ids)]
ICUType_1_measurements_training = ICUType_1_measurements_training.count() 
ICUType_1_measurements_training

RecordID       54384
level_1        54384
Time           54384
ALP              816
ALT              849
AST              847
Age            49878
Albumin          619
BUN             3904
Bilirubin        848
Cholesterol      335
Creatinine      3963
DiasABP        22483
FiO2            6007
GCS            13928
Gender          1133
Glucose         3728
HCO3            3770
HCT             4820
HR             47430
Height         49878
ICUType         1133
K               4542
Lactate         1217
MAP            22427
MechVent        5528
Mg              3896
NIDiasABP      26003
NIMAP          25873
NISysABP       26016
Na              3758
PaCO2           4494
PaO2            4491
Platelets       3932
RespRate       18857
SaO2            2704
SysABP         22485
Temp           16338
TroponinI        157
TroponinT       1178
Urine          30772
WBC             3532
Weight         24021
pH              4579
dtype: int64

In [8]:
training_ICUType_2 = train_X[train_X['ICUType'] == 2.0]
training_ICUType_2 = training_ICUType_2[training_ICUType_2["Time"] == 0.0]
training_ICUType2_ids = training_ICUType_2["RecordID"]
ICUType_2_measurements_training = train_X[train_X["RecordID"].isin(training_ICUType2_ids)]
ICUType_2_measurements_training = ICUType_2_measurements_training.count() 
ICUType_2_measurements_training

RecordID       79200
level_1        79200
Time           79200
ALP              554
ALT              568
AST              569
Age            76694
Albumin          366
BUN             5146
Bilirubin        563
Cholesterol       17
Creatinine      5158
DiasABP        62479
FiO2           13364
GCS            20884
Gender          1650
Glucose         3673
HCO3            4768
HCT             8617
HR             72885
Height         76694
ICUType         1650
K               4167
Lactate         2798
MAP            62587
MechVent       13069
Mg              5042
NIDiasABP      17309
NIMAP          17150
NISysABP       17345
Na              3981
PaCO2          15992
PaO2           15951
Platelets       6356
RespRate        3477
SaO2            8639
SysABP         62482
Temp           46250
TroponinI        119
TroponinT        219
Urine          66663
WBC             5304
Weight         41249
pH             17490
dtype: int64

In [9]:
training_ICUType_3 = train_X[train_X['ICUType'] == 3.0]
training_ICUType_3 = training_ICUType_3[training_ICUType_3["Time"] == 0.0]
training_ICUType_3_ids = training_ICUType_3["RecordID"]
ICUType_3_measurements_training = train_X[train_X["RecordID"].isin(training_ICUType_3_ids)]
ICUType_3_measurements_training = ICUType_3_measurements_training.count() 
ICUType_3_measurements_training

RecordID       132240
level_1        132240
Time           132240
ALP              2910
ALT              2986
AST              2988
Age            122384
Albumin          2244
BUN              9974
Bilirubin        3140
Cholesterol       121
Creatinine       9997
DiasABP         46350
FiO2            19341
GCS             35206
Gender           2755
Glucose          9960
HCO3             9983
HCT             11660
HR             117782
Height         122384
ICUType          2755
K               10785
Lactate          5175
MAP             45771
MechVent        17870
Mg               9445
NIDiasABP       74706
NIMAP           73269
NISysABP        74748
Na              10217
PaCO2           10087
PaO2            10086
Platelets        9011
RespRate        41097
SaO2             1617
SysABP          46356
Temp            37859
TroponinI         342
TroponinT        1759
Urine           80024
WBC              8657
Weight          86317
pH              10227
dtype: int64

In [10]:
training_ICUType_4 = train_X[train_X['ICUType'] == 4.0]
training_ICUType_4 = training_ICUType_4[training_ICUType_4["Time"] == 0.0]
training_ICUType_4_ids = training_ICUType_4["RecordID"]
ICUType_4_measurements_training = train_X[train_X["RecordID"].isin(training_ICUType_4_ids)]
ICUType_4_measurements_training = ICUType_4_measurements_training.count() 
ICUType_4_measurements_training 

RecordID       102384
level_1        102384
Time           102384
ALP              1826
ALT              1855
AST              1854
Age             97683
Albumin          1443
BUN              7687
Bilirubin        1810
Cholesterol       143
Creatinine       7690
DiasABP         68273
FiO2            19112
GCS             47602
Gender           2133
Glucose          7738
HCO3             7596
HCT              9885
HR              93905
Height          97683
ICUType          2133
K                8425
Lactate          5970
MAP             67675
MechVent        18923
Mg               7871
NIDiasABP       35748
NIMAP           35240
NISysABP        35793
Na               8199
PaCO2           11898
PaO2            11870
Platelets        7821
RespRate        24614
SaO2             2022
SysABP          68277
Temp            36088
TroponinI         175
TroponinT         831
Urine           77525
WBC              7344
Weight          43530
pH              12073
dtype: int64

In [11]:
age_65_and_above_training = train_X[train_X["Age"] >= 65]
age_65_and_above_training = age_65_and_above_training[age_65_and_above_training ["Time"] == 0.0]
age_65_and_above_training_ids = age_65_and_above_training ["RecordID"]
age_65_and_above_measurements_training = train_X[train_X["RecordID"].isin(age_65_and_above_training_ids)]
age_65_and_above_measurements_training = age_65_and_above_measurements_training.count()
age_65_and_above_measurements_training

RecordID       202032
level_1        202032
Time           202032
ALP              2722
ALT              2776
AST              2781
Age            190883
Albumin          2266
BUN             14267
Bilirubin        2850
Cholesterol       358
Creatinine      14324
DiasABP        110920
FiO2            31691
GCS             63079
Gender           4209
Glucose         13184
HCO3            13920
HCT             19022
HR             183256
Height         190883
ICUType          4209
K               14831
Lactate          7880
MAP            110507
MechVent        29690
Mg              14094
NIDiasABP       84652
NIMAP           83664
NISysABP        84718
Na              13775
PaCO2           23514
PaO2            23467
Platelets       14564
RespRate        49789
SaO2             9322
SysABP         110928
Temp            77632
TroponinI         536
TroponinT        2819
Urine          143480
WBC             13314
Weight         107723
pH              24622
dtype: int64

In [12]:
# Motivação para o teste: A quantidade de valores no grupo de idade que estava na tabela, era muito maior do que a quantidade de 
# valores para o grupo de gênero, o que não fazia sentido, pois, já que não temos dados faltantes dessas variáveis em ambos os grupos
# se gênero tem n ocorrências, idade deve ter n ocorrências também, e vice-versa. 

# Descoberta: Através do teste, vimos que tinha casos em que o valor do campo idade, se repete durante a série temporal do paciente, e visto isso, nós ajustamos o código para pegar apenas a primeira ocorrência de valor para cada recordID.
# Também descobrimos que, como estamos filtrando apenas os gêneros female e male e há um campo para gênero indefinido, o grupo de Age estava pegando as idades desse campo de gênero indefinido também, o que estava fazendo com que 
# a soma total de ocorrências para o grupo de Age estivesse maior do que para o grupo de Gender.

# teste2 = train_X[train_X['Age'] >= 65]
# teste2["RecordID"].value_counts()   

In [13]:
age_under_65_training  = train_X[train_X["Age"] < 65]
age_under_65_training  = age_under_65_training[age_under_65_training["Time"] == 0.0]
age_under_65_training_ids = age_under_65_training["RecordID"]
age_under_65_measurements_training = train_X[train_X["RecordID"].isin(age_under_65_training_ids)]
age_under_65_measurements_training  = age_under_65_measurements_training .count()
age_under_65_measurements_training 

RecordID       166176
level_1        166176
Time           166176
ALP              3384
ALT              3482
AST              3477
Age            155756
Albumin          2406
BUN             12444
Bilirubin        3511
Cholesterol       258
Creatinine      12484
DiasABP         88665
FiO2            26133
GCS             54541
Gender           3462
Glucose         11915
HCO3            12197
HCT             15960
HR             148746
Height         155756
ICUType          3462
K               13088
Lactate          7280
MAP             87953
MechVent        25700
Mg              12160
NIDiasABP       69114
NIMAP           67868
NISysABP        69184
Na              12380
PaCO2           18957
PaO2            18931
Platelets       12556
RespRate        38256
SaO2             5660
SysABP          88672
Temp            58903
TroponinI         257
TroponinT        1168
Urine          111504
WBC             11523
Weight          87394
pH              19747
dtype: int64

In [14]:
filtered_train_X = train_X[(train_X['Height'] != -1) & (train_X['Weight'] != -1) & (train_X['Height'].notna()) & (train_X['Weight'].notna())] 

In [15]:
def classify_BMI(BMI):
    if BMI <= 18.5:
        return "Baixo peso"
    elif BMI >= 18.6 and BMI <= 24.9:
        return "Peso normal"
    elif BMI >= 25 and BMI <= 29.9:
        return "Sobrepeso"
    elif BMI >= 30 and BMI <= 34.9:
        return "Obesidade grau 1"
    elif BMI >= 35 and BMI <= 39.9:
        return "Obesidade grau 2"
    elif BMI >= 40:
        return "Obesidade grau 3"

In [16]:
filtered_train_X_meters = filtered_train_X.copy()
filtered_train_X_meters['Height'] = filtered_train_X['Height'] / 100 # Converting Height from cm to meters
filtered_train_X_meters['Height']

48        1.753
67        1.753
68        1.753
69        1.753
70        1.753
          ...  
574842    1.651
574843    1.651
574844    1.651
574845    1.651
575088    1.727
Name: Height, Length: 101262, dtype: float64

In [17]:
bmi_data_train = filtered_train_X_meters
bmi_data_train["BMI"] = round(filtered_train_X_meters["Weight"] / (filtered_train_X_meters["Height"] ** 2), 1)
bmi_data_train["Classificacao"] = bmi_data_train["BMI"].apply(classify_BMI)
bmi_data_train.head()

Unnamed: 0,RecordID,level_1,Time,ALP,ALT,AST,Age,Albumin,BUN,Bilirubin,...,SysABP,Temp,TroponinI,TroponinT,Urine,WBC,Weight,pH,BMI,Classificacao
48,132540,0,0.0,,,,76.0,,,,...,,,,,,,76.0,7.45,24.7,Peso normal
67,132540,19,19.0,,,,76.0,,,,...,122.0,37.5,,,50.0,,80.6,,26.2,Sobrepeso
68,132540,20,20.0,,,,76.0,,,,...,107.0,37.4,,,380.0,,80.6,,26.2,Sobrepeso
69,132540,21,21.0,,,,76.0,,,,...,121.0,37.5,,,170.0,,80.6,,26.2,Sobrepeso
70,132540,22,22.0,,,,76.0,,,,...,128.0,37.5,,,130.0,,80.6,,26.2,Sobrepeso


In [18]:
bmi_data_train = bmi_data_train.groupby("RecordID").first().reset_index()
bmi_data_train

Unnamed: 0,RecordID,level_1,Time,ALP,ALT,AST,Age,Albumin,BUN,Bilirubin,...,SysABP,Temp,TroponinI,TroponinT,Urine,WBC,Weight,pH,BMI,Classificacao
0,132540,0,0.0,,,,76.0,,21.0,,...,122.00,37.5,,,50.0,13.3,76.0,7.45,24.7,Peso normal
1,132543,0,0.0,105.0,12.0,15.0,68.0,4.4,23.0,0.2,...,,36.3,,,600.0,11.5,84.6,,26.0,Sobrepeso
2,132547,0,0.0,,,,64.0,,,,...,,,,,,,114.0,,35.1,Obesidade grau 2
3,132548,0,0.0,,,,68.0,,32.0,,...,205.00,36.3,0.7,,120.0,6.2,87.0,,32.9,Obesidade grau 1
4,132555,0,0.0,,,,74.0,,19.0,,...,98.00,34.8,,,35.0,9.0,66.1,7.39,21.5,Peso normal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4026,162991,0,0.0,,,,56.0,,,,...,155.25,,,,,,96.2,,33.7,Obesidade grau 1
4027,162995,0,0.0,60.0,21.0,20.0,84.0,,93.0,0.4,...,121.00,37.1,0.6,,60.0,17.1,96.5,7.31,28.8,Sobrepeso
4028,163002,0,0.0,,,,53.0,,,,...,,,,,,2.7,68.0,7.27,31.3,Obesidade grau 1
4029,163003,0,0.0,124.0,254.0,204.0,36.0,2.6,3.0,30.3,...,,36.3,,,50.0,10.6,57.7,7.47,21.2,Peso normal


In [19]:
bmi_data_train["Classificacao"].value_counts()

Classificacao
Sobrepeso           1436
Peso normal         1190
Obesidade grau 1     700
Obesidade grau 2     294
Obesidade grau 3     285
Baixo peso           126
Name: count, dtype: int64

In [20]:
classification_undefined_training_ids = bmi_data_train["RecordID"]
classification_undefined_measurements_training = train_X[~train_X["RecordID"].isin(classification_undefined_training_ids)]
classification_undefined_measurements_training = classification_undefined_measurements_training.count()
classification_undefined_measurements_training

RecordID       174720
level_1        174720
Time           174720
ALP              3020
ALT              3103
AST              3108
Age            161294
Albumin          2413
BUN             12403
Bilirubin        3166
Cholesterol       286
Creatinine      12450
DiasABP         69035
FiO2            24269
GCS             58161
Gender           3640
Glucose         12376
HCO3            12305
HCT             14972
HR             154984
Height         161294
ICUType          3640
K               13574
Lactate          6051
MAP             68261
MechVent        22442
Mg              12055
NIDiasABP       92846
NIMAP           91308
NISysABP        92917
Na              12853
PaCO2           12384
PaO2            12352
Platelets       11506
RespRate        59257
SaO2             1993
SysABP          69042
Temp            47056
TroponinI         292
TroponinT        2113
Urine          110894
WBC             11043
Weight          93855
pH              12596
dtype: int64

In [21]:
classification_low_weight_training = bmi_data_train[bmi_data_train["Classificacao"] == 'Baixo peso']
classification_low_weight_training_ids = classification_low_weight_training["RecordID"]
classification_measurements_l_w_t = train_X[train_X["RecordID"].isin(classification_low_weight_training_ids)]
classification_measurements_l_w_t = classification_measurements_l_w_t.count() 
classification_measurements_l_w_t 

RecordID       6048
level_1        6048
Time           6048
ALP              93
ALT              95
AST              96
Age            5773
Albumin          75
BUN             439
Bilirubin        96
Cholesterol      10
Creatinine      440
DiasABP        3826
FiO2            968
GCS            1873
Gender          126
Glucose         416
HCO3            433
HCT             567
HR             5541
Height         5773
ICUType         126
K               461
Lactate         285
MAP            3856
MechVent       1027
Mg              449
NIDiasABP      2066
NIMAP          2009
NISysABP       2066
Na              422
PaCO2           819
PaO2            821
Platelets       450
RespRate        898
SaO2            352
SysABP         3827
Temp           2530
TroponinI        31
TroponinT        55
Urine          4345
WBC             411
Weight         2836
pH              851
dtype: int64

In [22]:
classification_normal_weight_training = bmi_data_train[bmi_data_train["Classificacao"] == 'Peso normal']
classification_normal_weight_training_ids = classification_normal_weight_training["RecordID"]
classification_measurements_n_w_t = train_X[train_X["RecordID"].isin(classification_normal_weight_training_ids)]
classification_measurements_n_w_t = classification_measurements_n_w_t.count() 
classification_measurements_n_w_t 

RecordID       57120
level_1        57120
Time           57120
ALP              856
ALT              880
AST              875
Age            54842
Albumin          658
BUN             4158
Bilirubin        887
Cholesterol       85
Creatinine      4167
DiasABP        38084
FiO2            9532
GCS            18471
Gender          1190
Glucose         3782
HCO3            4026
HCT             5784
HR             52496
Height         54842
ICUType         1190
K               4262
Lactate         2649
MAP            37948
MechVent        9497
Mg              4129
NIDiasABP      18794
NIMAP          18630
NISysABP       18817
Na              3944
PaCO2           8257
PaO2            8233
Platelets       4530
RespRate        9092
SaO2            3450
SysABP         38085
Temp           25548
TroponinI        155
TroponinT        556
Urine          42071
WBC             3998
Weight         28923
pH              8779
dtype: int64

In [23]:
classification_overweight_training = bmi_data_train[bmi_data_train["Classificacao"] == 'Sobrepeso']
classification_overweight_training_ids = classification_overweight_training['RecordID']
classification_measurements_o_w_t = train_X[train_X["RecordID"].isin(classification_overweight_training_ids)]
classification_measurements_o_w_t = classification_measurements_o_w_t.count() 
classification_measurements_o_w_t

RecordID       68928
level_1        68928
Time           68928
ALP             1137
ALT             1163
AST             1161
Age            65880
Albumin          825
BUN             5090
Bilirubin       1176
Cholesterol      124
Creatinine      5117
DiasABP        47211
FiO2           11809
GCS            20995
Gender          1436
Glucose         4445
HCO3            4897
HCT             7321
HR             62773
Height         65880
ICUType         1436
K               4996
Lactate         3141
MAP            47120
MechVent       11536
Mg              5100
NIDiasABP      21389
NIMAP          21166
NISysABP       21409
Na              4642
PaCO2          10923
PaO2           10905
Platelets       5707
RespRate        9932
SaO2            4902
SysABP         47215
Temp           32657
TroponinI        160
TroponinT        643
Urine          51451
WBC             4997
Weight         36300
pH             11594
dtype: int64

In [24]:
classification_obesity_grade1_training = bmi_data_train[bmi_data_train["Classificacao"] == 'Obesidade grau 1']
classification_obesity_grade1_training_ids = classification_obesity_grade1_training["RecordID"]
classification_measurements_ob1_t = train_X[train_X["RecordID"].isin(classification_obesity_grade1_training_ids)]
classification_measurements_ob1_t  = classification_measurements_ob1_t .count() 
classification_measurements_ob1_t 

RecordID       33600
level_1        33600
Time           33600
ALP              519
ALT              529
AST              531
Age            32295
Albumin          374
BUN             2478
Bilirubin        546
Cholesterol       51
Creatinine      2483
DiasABP        23146
FiO2            5929
GCS            10037
Gender           700
Glucose         2187
HCO3            2384
HCT             3519
HR             30847
Height         32295
ICUType          700
K               2501
Lactate         1611
MAP            23020
MechVent        5809
Mg              2453
NIDiasABP      10019
NIMAP           9913
NISysABP       10029
Na              2294
PaCO2           5531
PaO2            5527
Platelets       2702
RespRate        4740
SaO2            2363
SysABP         23146
Temp           16296
TroponinI         94
TroponinT        316
Urine          25793
WBC             2371
Weight         17538
pH              5822
dtype: int64

In [25]:
classification_obesity_grade2_training = bmi_data_train[bmi_data_train["Classificacao"] == 'Obesidade grau 2']
classification_obesity_grade2_training_ids = classification_obesity_grade2_training["RecordID"]
classification_measurements_ob2_t = train_X[train_X["RecordID"].isin(classification_obesity_grade2_training_ids)]
classification_measurements_ob2_t = classification_measurements_ob2_t.count() 
classification_measurements_ob2_t 

RecordID       14112
level_1        14112
Time           14112
ALP              254
ALT              255
AST              254
Age            13504
Albumin          168
BUN             1103
Bilirubin        261
Cholesterol       32
Creatinine      1110
DiasABP         9657
FiO2            2512
GCS             4145
Gender           294
Glucose          955
HCO3            1063
HCT             1505
HR             12902
Height         13504
ICUType          294
K               1070
Lactate          681
MAP             9630
MechVent        2359
Mg              1051
NIDiasABP       4227
NIMAP           4150
NISysABP        4231
Na              1023
PaCO2           2336
PaO2            2332
Platelets       1196
RespRate        2498
SaO2            1049
SysABP          9659
Temp            6797
TroponinI         35
TroponinT        150
Urine          10421
WBC             1069
Weight          7850
pH              2421
dtype: int64

In [26]:
classification_obesity_grade3_training = bmi_data_train[bmi_data_train["Classificacao"] == 'Obesidade grau 3']
classification_obesity_grade3_training_ids = classification_obesity_grade3_training["RecordID"]
classification_measurements_ob3_t = train_X[train_X["RecordID"].isin(classification_obesity_grade3_training_ids)]
classification_measurements_ob3_t = classification_measurements_ob3_t.count() 
classification_measurements_ob3_t 

RecordID       13680
level_1        13680
Time           13680
ALP              227
ALT              233
AST              233
Age            13051
Albumin          159
BUN             1040
Bilirubin        229
Cholesterol       28
Creatinine      1041
DiasABP         8626
FiO2            2805
GCS             3938
Gender           285
Glucose          938
HCO3            1009
HCT             1314
HR             12459
Height         13051
ICUType          285
K               1055
Lactate          742
MAP             8625
MechVent        2720
Mg              1017
NIDiasABP       4425
NIMAP           4356
NISysABP        4433
Na               977
PaCO2           2221
PaO2            2228
Platelets       1029
RespRate        1628
SaO2             873
SysABP          8626
Temp            5651
TroponinI         26
TroponinT        154
Urine          10009
WBC              948
Weight          7815
pH              2306
dtype: int64

In [27]:
df_columns = train_X.columns
df_columns

Index(['RecordID', 'level_1', 'Time', 'ALP', 'ALT', 'AST', 'Age', 'Albumin',
       'BUN', 'Bilirubin', 'Cholesterol', 'Creatinine', 'DiasABP', 'FiO2',
       'GCS', 'Gender', 'Glucose', 'HCO3', 'HCT', 'HR', 'Height', 'ICUType',
       'K', 'Lactate', 'MAP', 'MechVent', 'Mg', 'NIDiasABP', 'NIMAP',
       'NISysABP', 'Na', 'PaCO2', 'PaO2', 'Platelets', 'RespRate', 'SaO2',
       'SysABP', 'Temp', 'TroponinI', 'TroponinT', 'Urine', 'WBC', 'Weight',
       'pH'],
      dtype='object')

In [28]:
df_train = pd.DataFrame(columns=df_columns)
df_train_transpose = df_train.T
df_train_transpose["Female"] = female_gender_measurements_training
df_train_transpose["Male"] = male_gender_measurements_training
df_train_transpose["Undefined Gender"] = undefined_gender_measurements_training
df_train_transpose["ICUType 1"] = ICUType_1_measurements_training 
df_train_transpose["ICUType 2"] = ICUType_2_measurements_training 
df_train_transpose["ICUType 3"] = ICUType_3_measurements_training 
df_train_transpose["ICUType 4"] = ICUType_4_measurements_training 
df_train_transpose["Age 65+"] = age_65_and_above_measurements_training
df_train_transpose["Age 65-"] = age_under_65_measurements_training 
df_train_transpose['Undefined Classification'] = classification_undefined_measurements_training
df_train_transpose['Low Weight'] = classification_measurements_l_w_t 
df_train_transpose['Normal Weight'] = classification_measurements_n_w_t 
df_train_transpose['Overweight'] = classification_measurements_o_w_t 
df_train_transpose['Obesity Grade 1'] = classification_measurements_ob1_t 
df_train_transpose['Obesity Grade 2'] = classification_measurements_ob2_t 
df_train_transpose['Obesity Grade 3'] = classification_measurements_ob3_t 
df_train_transpose = df_train_transpose.drop('RecordID',axis=0)
df_train_transpose = df_train_transpose.drop('level_1',axis=0)
df_train_transpose = df_train_transpose.drop('Time',axis=0)
df_train_transpose = df_train_transpose.drop('Age',axis=0)
df_train_transpose = df_train_transpose.drop('Gender',axis=0)
df_train_transpose = df_train_transpose.drop('Height',axis=0)
df_train_transpose = df_train_transpose.drop('ICUType',axis=0)

#df_test_transpose.style.set_caption("Repeated Measurements per variable by demographics")

display(HTML("<h2 style='text-align: center; font-size: 24px; font-weight: bold;'>Repeated Measurements per Variable by Demographics - Train Set</h2>"))
df_train_transpose

Unnamed: 0,Female,Male,Undefined Gender,ICUType 1,ICUType 2,ICUType 3,ICUType 4,Age 65+,Age 65-,Undefined Classification,Low Weight,Normal Weight,Overweight,Obesity Grade 1,Obesity Grade 2,Obesity Grade 3
ALP,2629,3474,3,816,554,2910,1826,2722,3384,3020,93,856,1137,519,254,227
ALT,2696,3559,3,849,568,2986,1855,2776,3482,3103,95,880,1163,529,255,233
AST,2689,3566,3,847,569,2988,1854,2781,3477,3108,96,875,1161,531,254,233
Albumin,2032,2638,2,619,366,2244,1443,2266,2406,2413,75,658,825,374,168,159
BUN,11637,15057,17,3904,5146,9974,7687,14267,12444,12403,439,4158,5090,2478,1103,1040
Bilirubin,2742,3616,3,848,563,3140,1810,2850,3511,3166,96,887,1176,546,261,229
Cholesterol,255,361,0,335,17,121,143,358,258,286,10,85,124,51,32,28
Creatinine,11674,15117,17,3963,5158,9997,7690,14324,12484,12450,440,4167,5117,2483,1110,1041
DiasABP,82364,117057,164,22483,62479,46350,68273,110920,88665,69035,3826,38084,47211,23146,9657,8626
FiO2,24750,33054,20,6007,13364,19341,19112,31691,26133,24269,968,9532,11809,5929,2512,2805


<h3>Validation data</h3>

<h4>Loading validation dataset</h4>

In [29]:
validation_X = physionet2012_dataset['val_X']

In [30]:
validation_female_gender = validation_X[validation_X['Gender'] == 0.0]
validation_female_gender_ids = validation_female_gender["RecordID"]
female_gender_measurements_validation = validation_X[validation_X["RecordID"].isin(validation_female_gender_ids)]
female_gender_measurements_validation = female_gender_measurements_validation.count()
female_gender_measurements_validation

RecordID       41328
level_1        41328
Time           41328
ALP              658
ALT              670
AST              673
Age            38880
Albumin          514
BUN             2931
Bilirubin        690
Cholesterol       77
Creatinine      2944
DiasABP        21371
FiO2            6249
GCS            13847
Gender           861
Glucose         2833
HCO3            2893
HCT             3889
HR             37316
Height         38880
ICUType          861
K               3136
Lactate         1626
MAP            21172
MechVent        6049
Mg              2887
NIDiasABP      18317
NIMAP          18057
NISysABP       18335
Na              2939
PaCO2           4303
PaO2            4294
Platelets       2973
RespRate       11632
SaO2            1486
SysABP         21373
Temp           14348
TroponinI         51
TroponinT        429
Urine          28754
WBC             2740
Weight         22825
pH              4460
dtype: int64

In [31]:
validation_male_gender = validation_X[validation_X['Gender'] == 1.0]
validation_male_gender_ids = validation_male_gender["RecordID"]
male_gender_measurements_validation = validation_X[validation_X["RecordID"].isin(validation_male_gender_ids)]
male_gender_measurements_validation = male_gender_measurements_validation.count()
male_gender_measurements_validation

RecordID       50688
level_1        50688
Time           50688
ALP              796
ALT              832
AST              831
Age            47782
Albumin          603
BUN             3680
Bilirubin        820
Cholesterol      105
Creatinine      3703
DiasABP        27710
FiO2            7761
GCS            16454
Gender          1056
Glucose         3418
HCO3            3578
HCT             4990
HR             45725
Height         47782
ICUType         1056
K               3807
Lactate         2081
MAP            27569
MechVent        7492
Mg              3560
NIDiasABP      21457
NIMAP          21207
NISysABP       21476
Na              3550
PaCO2           6067
PaO2            6055
Platelets       3853
RespRate       11787
SaO2            2143
SysABP         27713
Temp           19495
TroponinI        103
TroponinT        595
Urine          34839
WBC             3430
Weight         25974
pH              6420
dtype: int64

In [32]:
validation_undefined_gender = validation_X[validation_X['Gender'] == -1.0]
validation_undefined_gender_ids = validation_undefined_gender["RecordID"]
undefined_gender_measurements_validation = validation_X[validation_X["RecordID"].isin(validation_undefined_gender_ids)]
undefined_gender_measurements_validation = undefined_gender_measurements_validation.count()
undefined_gender_measurements_validation

RecordID       48
level_1        48
Time           48
ALP             0
ALT             1
AST             1
Age            47
Albumin         0
BUN             3
Bilirubin       0
Cholesterol     0
Creatinine      3
DiasABP        32
FiO2            1
GCS            10
Gender          1
Glucose         3
HCO3            3
HCT             2
HR             46
Height         47
ICUType         1
K               4
Lactate         0
MAP            30
MechVent        0
Mg              3
NIDiasABP      17
NIMAP          17
NISysABP       17
Na              3
PaCO2           3
PaO2            3
Platelets       2
RespRate       46
SaO2            0
SysABP         32
Temp           11
TroponinI       0
TroponinT       4
Urine          28
WBC             2
Weight         45
pH              3
dtype: int64

In [33]:
validation_ICUType_1 = validation_X[validation_X['ICUType'] == 1.0]
validation_ICUType_1 = validation_ICUType_1[validation_ICUType_1['Time'] == 0.0]
validation_ICUType_1_ids = validation_ICUType_1['RecordID']
ICUType_1_measurements_validation = validation_X[validation_X['RecordID'].isin(validation_ICUType_1_ids)]
ICUType_1_measurements_validation = ICUType_1_measurements_validation.count()
ICUType_1_measurements_validation

RecordID       13248
level_1        13248
Time           13248
ALP              170
ALT              188
AST              187
Age            12234
Albumin          140
BUN              921
Bilirubin        173
Cholesterol       96
Creatinine       938
DiasABP         5001
FiO2            1370
GCS             3490
Gender           276
Glucose          877
HCO3             887
HCT             1122
HR             11661
Height         12234
ICUType          276
K               1075
Lactate          287
MAP             4942
MechVent        1261
Mg               930
NIDiasABP       6814
NIMAP           6779
NISysABP        6822
Na               883
PaCO2           1024
PaO2            1024
Platelets        938
RespRate        5584
SaO2             644
SysABP          5001
Temp            3947
TroponinI         41
TroponinT        299
Urine           7594
WBC              847
Weight          6204
pH              1049
dtype: int64

In [34]:
validation_ICUType_2 = validation_X[validation_X['ICUType'] == 2.0]
validation_ICUType_2 = validation_ICUType_2[validation_ICUType_2['Time'] == 0.0]
validation_ICUType_2_ids = validation_ICUType_2['RecordID']
ICUType_2_measurements_validation = validation_X[validation_X['RecordID'].isin(validation_ICUType_2_ids)]
ICUType_2_measurements_validation = ICUType_2_measurements_validation.count()
ICUType_2_measurements_validation

RecordID       18480
level_1        18480
Time           18480
ALP              129
ALT              134
AST              132
Age            17791
Albumin           87
BUN             1186
Bilirubin        128
Cholesterol        8
Creatinine      1185
DiasABP        14413
FiO2            3106
GCS             4926
Gender           385
Glucose          872
HCO3            1104
HCT             2005
HR             16917
Height         17791
ICUType          385
K                978
Lactate          607
MAP            14434
MechVent        3033
Mg              1140
NIDiasABP       3902
NIMAP           3877
NISysABP        3909
Na               930
PaCO2           3716
PaO2            3701
Platelets       1527
RespRate         744
SaO2            2035
SysABP         14413
Temp           10591
TroponinI         18
TroponinT         41
Urine          15496
WBC             1243
Weight          9513
pH              4100
dtype: int64

In [35]:
validation_ICUType_3 = validation_X[validation_X['ICUType'] == 3.0]
validation_ICUType_3 = validation_ICUType_3[validation_ICUType_3['Time'] == 0.0]
validation_ICUType_3_ids = validation_ICUType_3['RecordID']
ICUType_3_measurements_validation = validation_X[validation_X['RecordID'].isin(validation_ICUType_3_ids)]
ICUType_3_measurements_validation = ICUType_3_measurements_validation.count()
ICUType_3_measurements_validation

RecordID       32496
level_1        32496
Time           32496
ALP              697
ALT              716
AST              719
Age            30195
Albumin          517
BUN             2414
Bilirubin        739
Cholesterol       34
Creatinine      2429
DiasABP        11266
FiO2            4598
GCS             8631
Gender           677
Glucose         2406
HCO3            2421
HCT             3004
HR             29055
Height         30195
ICUType          677
K               2614
Lactate         1266
MAP            11100
MechVent        4325
Mg              2260
NIDiasABP      18924
NIMAP          18621
NISysABP       18936
Na              2477
PaCO2           2425
PaO2            2431
Platelets       2258
RespRate       10078
SaO2             401
SysABP         11268
Temp            9250
TroponinI         66
TroponinT        488
Urine          19580
WBC             2125
Weight         20986
pH              2477
dtype: int64

In [36]:
validation_ICUType_4 = validation_X[validation_X['ICUType'] == 4.0]
validation_ICUType_4 = validation_ICUType_4[validation_ICUType_4['Time'] == 0.0]
validation_ICUType_4_ids = validation_ICUType_4['RecordID']
ICUType_4_measurements_validation = validation_X[validation_X['RecordID'].isin(validation_ICUType_4_ids)]
ICUType_4_measurements_validation = ICUType_4_measurements_validation.count()
ICUType_4_measurements_validation

RecordID       27840
level_1        27840
Time           27840
ALP              458
ALT              465
AST              467
Age            26489
Albumin          373
BUN             2093
Bilirubin        470
Cholesterol       44
Creatinine      2098
DiasABP        18433
FiO2            4937
GCS            13264
Gender           580
Glucose         2099
HCO3            2062
HCT             2750
HR             25454
Height         26489
ICUType          580
K               2280
Lactate         1547
MAP            18295
MechVent        4922
Mg              2120
NIDiasABP      10151
NIMAP          10004
NISysABP       10161
Na              2202
PaCO2           3208
PaO2            3196
Platelets       2105
RespRate        7059
SaO2             549
SysABP         18436
Temp           10066
TroponinI         29
TroponinT        200
Urine          20951
WBC             1957
Weight         12141
pH              3257
dtype: int64

In [37]:
age_65_and_above_validation = validation_X[validation_X['Age'] >= 65]
age_65_and_above_validation = age_65_and_above_validation[age_65_and_above_validation['Time'] == 0.0]
age_65_and_above_validation_ids = age_65_and_above_validation['RecordID']
age_65_and_above_measurements_validation = validation_X[validation_X['RecordID'].isin(age_65_and_above_validation_ids)]
age_65_and_above_measurements_validation = age_65_and_above_measurements_validation.count()
age_65_and_above_measurements_validation

RecordID       49968
level_1        49968
Time           49968
ALP              622
ALT              641
AST              641
Age            47180
Albumin          526
BUN             3484
Bilirubin        650
Cholesterol      109
Creatinine      3507
DiasABP        26348
FiO2            7499
GCS            15666
Gender          1041
Glucose         3266
HCO3            3396
HCT             4708
HR             45264
Height         47180
ICUType         1041
K               3643
Lactate         1952
MAP            26122
MechVent        7067
Mg              3400
NIDiasABP      22346
NIMAP          22093
NISysABP       22368
Na              3367
PaCO2           5640
PaO2            5630
Platelets       3592
RespRate       13246
SaO2            2177
SysABP         26350
Temp           18967
TroponinI        115
TroponinT        689
Urine          34938
WBC             3253
Weight         27543
pH              5909
dtype: int64

In [38]:
age_under_65_validation = validation_X[validation_X['Age'] < 65]
age_under_65_validation = age_under_65_validation[age_under_65_validation['Time'] == 0.0]
age_under_65_validation_ids = age_under_65_validation["RecordID"]
age_under_65__measurements_validation = validation_X[validation_X["RecordID"].isin(age_under_65_validation_ids)]
age_under_65__measurements_validation = age_under_65__measurements_validation.count()
age_under_65__measurements_validation

RecordID       42096
level_1        42096
Time           42096
ALP              832
ALT              862
AST              864
Age            39529
Albumin          591
BUN             3130
Bilirubin        860
Cholesterol       73
Creatinine      3143
DiasABP        22765
FiO2            6512
GCS            14645
Gender           877
Glucose         2988
HCO3            3078
HCT             4173
HR             37823
Height         39529
ICUType          877
K               3304
Lactate         1755
MAP            22649
MechVent        6474
Mg              3050
NIDiasABP      17445
NIMAP          17188
NISysABP       17460
Na              3125
PaCO2           4733
PaO2            4722
Platelets       3236
RespRate       10219
SaO2            1452
SysABP         22768
Temp           14887
TroponinI         39
TroponinT        339
Urine          28683
WBC             2919
Weight         21301
pH              4974
dtype: int64

In [39]:
filtered_validation_X = validation_X[(validation_X['Height'] != -1) & (validation_X['Weight'] != -1) & (validation_X['Height'].notna()) & (validation_X['Weight'].notna())] 

In [40]:
filtered_validation_X_meters = filtered_validation_X.copy()
filtered_validation_X_meters['Height'] = filtered_validation_X['Height'] / 100 # Converting Height from cm to meters
filtered_validation_X_meters['Height']

336       1.626
337       1.626
341       1.626
342       1.626
343       1.626
          ...  
575321    1.727
575322    1.727
575323    1.727
575325    1.727
575327    1.727
Name: Height, Length: 25740, dtype: float64

In [41]:
bmi_data_validation = filtered_validation_X_meters
bmi_data_validation["BMI"] = round(filtered_validation_X_meters["Weight"] / (filtered_validation_X_meters["Height"] ** 2), 1)
bmi_data_validation["Classificacao"] = bmi_data_validation["BMI"].apply(classify_BMI)
bmi_data_validation.head()

Unnamed: 0,RecordID,level_1,Time,ALP,ALT,AST,Age,Albumin,BUN,Bilirubin,...,SysABP,Temp,TroponinI,TroponinT,Urine,WBC,Weight,pH,BMI,Classificacao
336,132551,0,0.0,47.0,46.0,82.0,78.0,1.9,81.0,0.3,...,102.75,38.0,3.5,,,16.1,48.4,7.4,18.3,Baixo peso
337,132551,1,1.0,,,,78.0,,,,...,114.5,,,,120.0,,48.4,,18.3,Baixo peso
341,132551,5,5.0,,,,78.0,,,,...,104.0,,,,130.0,,48.4,7.29,18.3,Baixo peso
342,132551,6,6.0,,,,78.0,,67.0,,...,141.0,35.6,3.1,,60.0,20.4,48.4,7.25,18.3,Baixo peso
343,132551,7,7.0,,,,78.0,,,,...,132.0,,,,,,48.4,,18.3,Baixo peso


In [42]:
classification_undefined_validation_ids = bmi_data_validation["RecordID"]
classification_undefined_measurements_validation = validation_X[~validation_X["RecordID"].isin(classification_undefined_validation_ids)]
classification_undefined_measurements_validation = classification_undefined_measurements_validation.count()
classification_undefined_measurements_validation

RecordID       44640
level_1        44640
Time           44640
ALP              673
ALT              704
AST              706
Age            41231
Albumin          551
BUN             3073
Bilirubin        705
Cholesterol       83
Creatinine      3091
DiasABP        17821
FiO2            5824
GCS            15232
Gender           930
Glucose         3062
HCO3            3050
HCT             3899
HR             39559
Height         41231
ICUType          930
K               3370
Lactate         1453
MAP            17639
MechVent        5503
Mg              3008
NIDiasABP      24092
NIMAP          23727
NISysABP       24107
Na              3192
PaCO2           3041
PaO2            3038
Platelets       2902
RespRate       15977
SaO2             483
SysABP         17825
Temp           11904
TroponinI         46
TroponinT        555
Urine          28342
WBC             2753
Weight         23104
pH              3113
dtype: int64

In [43]:
classification_low_weight_validation = bmi_data_validation[bmi_data_validation["Classificacao"] == 'Baixo peso']
classification_low_weight_validation_ids = classification_low_weight_validation["RecordID"]
classification_measurements_l_w_v = validation_X[validation_X["RecordID"].isin(classification_low_weight_validation_ids)]
classification_measurements_l_w_v = classification_measurements_l_w_v.count() 
classification_measurements_l_w_v

RecordID       1344
level_1        1344
Time           1344
ALP              14
ALT              14
AST              14
Age            1274
Albumin          12
BUN             100
Bilirubin        15
Cholesterol       0
Creatinine      101
DiasABP         801
FiO2            228
GCS             431
Gender           28
Glucose          97
HCO3             97
HCT             119
HR             1241
Height         1274
ICUType          28
K               108
Lactate          67
MAP             799
MechVent        211
Mg               97
NIDiasABP       575
NIMAP           574
NISysABP        579
Na              101
PaCO2           157
PaO2            157
Platelets        86
RespRate        344
SaO2             57
SysABP          801
Temp            462
TroponinI         5
TroponinT        16
Urine           955
WBC              81
Weight          720
pH              167
dtype: int64

In [44]:
classification_normal_weight_validation = bmi_data_validation[bmi_data_validation["Classificacao"] == 'Peso normal']
classification_normal_weight_validation_ids = classification_normal_weight_validation["RecordID"]
classification_measurements_n_w_v = validation_X[validation_X["RecordID"].isin(classification_normal_weight_validation_ids)]
classification_measurements_n_w_v = classification_measurements_n_w_v.count() 
classification_measurements_n_w_v

RecordID       15168
level_1        15168
Time           15168
ALP              237
ALT              246
AST              246
Age            14537
Albumin          175
BUN             1105
Bilirubin        245
Cholesterol       36
Creatinine      1114
DiasABP         9208
FiO2            2520
GCS             4808
Gender           316
Glucose          997
HCO3            1074
HCT             1555
HR             13897
Height         14537
ICUType          316
K               1136
Lactate          675
MAP             9146
MechVent        2464
Mg              1086
NIDiasABP       5646
NIMAP           5598
NISysABP        5649
Na              1045
PaCO2           2187
PaO2            2178
Platelets       1209
RespRate        2557
SaO2             888
SysABP          9208
Temp            6526
TroponinI         38
TroponinT        145
Urine          10859
WBC             1076
Weight          8461
pH              2329
dtype: int64

In [45]:
classification_overweight_validation = bmi_data_validation[bmi_data_validation["Classificacao"] == 'Sobrepeso']
classification_overweight_validation_ids = classification_overweight_validation["RecordID"]
classification_measurements_o_w_v = validation_X[validation_X["RecordID"].isin(classification_overweight_validation_ids)]
classification_measurements_o_w_v = classification_measurements_o_w_v.count() 
classification_measurements_o_w_v

RecordID       18240
level_1        18240
Time           18240
ALP              276
ALT              281
AST              281
Age            17522
Albumin          197
BUN             1352
Bilirubin        282
Cholesterol       32
Creatinine      1361
DiasABP        12815
FiO2            3064
GCS             5703
Gender           380
Glucose         1178
HCO3            1301
HCT             2015
HR             16743
Height         17522
ICUType          380
K               1313
Lactate          813
MAP            12775
MechVent        3055
Mg              1309
NIDiasABP       5370
NIMAP           5315
NISysABP        5379
Na              1222
PaCO2           2947
PaO2            2939
Platelets       1598
RespRate        2639
SaO2            1345
SysABP         12816
Temp            9210
TroponinI         31
TroponinT        146
Urine          13822
WBC             1364
Weight          9773
pH              3163
dtype: int64

In [46]:
classification_obesity_grade1_validation = bmi_data_validation[bmi_data_validation["Classificacao"] == 'Obesidade grau 1']
classification_obesity_grade1_validation_ids = classification_obesity_grade1_validation["RecordID"]
classification_measurements_ob1_v = validation_X[validation_X["RecordID"].isin(classification_obesity_grade1_validation_ids)]
classification_measurements_ob1_v  = classification_measurements_ob1_v .count() 
classification_measurements_ob1_v 

RecordID       13056
level_1        13056
Time           13056
ALP              236
ALT              244
AST              242
Age            12559
Albumin          161
BUN             1012
Bilirubin        236
Cholesterol       25
Creatinine      1012
DiasABP         9594
FiO2            2397
GCS             4113
Gender           272
Glucose          880
HCO3             965
HCT             1455
HR             11947
Height         12559
ICUType          272
K                984
Lactate          677
MAP             9537
MechVent        2313
Mg               975
NIDiasABP       3556
NIMAP           3519
NISysABP        3559
Na               910
PaCO2           2330
PaO2            2324
Platelets       1128
RespRate        1421
SaO2            1006
SysABP          9594
Temp            6781
TroponinI         29
TroponinT        117
Urine          10356
WBC              963
Weight          7155
pH              2484
dtype: int64

In [47]:
classification_obesity_grade2_validation = bmi_data_validation[bmi_data_validation["Classificacao"] == 'Obesidade grau 2']
classification_obesity_grade2_validation_ids = classification_obesity_grade2_validation["RecordID"]
classification_measurements_ob2_v = validation_X[validation_X["RecordID"].isin(classification_obesity_grade2_validation_ids)]
classification_measurements_ob2_v  = classification_measurements_ob2_v .count() 
classification_measurements_ob2_v 

RecordID       5424
level_1        5424
Time           5424
ALP              70
ALT              70
AST              72
Age            5258
Albumin          50
BUN             395
Bilirubin        74
Cholesterol       6
Creatinine      399
DiasABP        3859
FiO2           1052
GCS            1690
Gender          113
Glucose         339
HCO3            378
HCT             599
HR             5034
Height         5258
ICUType         113
K               373
Lactate         299
MAP            3870
MechVent       1033
Mg              390
NIDiasABP      1613
NIMAP          1599
NISysABP       1615
Na              350
PaCO2          1029
PaO2           1026
Platelets       477
RespRate        415
SaO2            500
SysABP         3859
Temp           2915
TroponinI         4
TroponinT        55
Urine          4305
WBC             392
Weight         3327
pH             1075
dtype: int64

In [48]:
classification_obesity_grade3_validation = bmi_data_validation[bmi_data_validation["Classificacao"] == 'Obesidade grau 3']
classification_obesity_grade3_validation_ids = classification_obesity_grade3_validation["RecordID"]
classification_measurements_ob3_v = validation_X[validation_X["RecordID"].isin(classification_obesity_grade3_validation_ids)]
classification_measurements_ob3_v  = classification_measurements_ob3_v .count() 
classification_measurements_ob3_v 

RecordID       4464
level_1        4464
Time           4464
ALP              67
ALT              70
AST              69
Age            4352
Albumin          51
BUN             326
Bilirubin        76
Cholesterol       4
Creatinine      324
DiasABP        3412
FiO2            903
GCS            1323
Gender           93
Glucose         301
HCO3            317
HCT             457
HR             4188
Height         4352
ICUType          93
K               327
Lactate         303
MAP            3396
MechVent        940
Mg              329
NIDiasABP      1071
NIMAP          1064
NISysABP       1074
Na              297
PaCO2           902
PaO2            898
Platelets       374
RespRate        393
SaO2            391
SysABP         3412
Temp           2397
TroponinI         5
TroponinT        37
Urine          3554
WBC             311
Weight         2896
pH              932
dtype: int64

In [49]:
df_columns = validation_X.columns
df_columns

Index(['RecordID', 'level_1', 'Time', 'ALP', 'ALT', 'AST', 'Age', 'Albumin',
       'BUN', 'Bilirubin', 'Cholesterol', 'Creatinine', 'DiasABP', 'FiO2',
       'GCS', 'Gender', 'Glucose', 'HCO3', 'HCT', 'HR', 'Height', 'ICUType',
       'K', 'Lactate', 'MAP', 'MechVent', 'Mg', 'NIDiasABP', 'NIMAP',
       'NISysABP', 'Na', 'PaCO2', 'PaO2', 'Platelets', 'RespRate', 'SaO2',
       'SysABP', 'Temp', 'TroponinI', 'TroponinT', 'Urine', 'WBC', 'Weight',
       'pH'],
      dtype='object')

In [50]:
df_validation = pd.DataFrame(columns=df_columns)
df_validation_transpose = df_validation.T
df_validation_transpose["Female"] = female_gender_measurements_validation
df_validation_transpose["Male"] = male_gender_measurements_validation
df_validation_transpose["Undefined Gender"] = undefined_gender_measurements_validation
df_validation_transpose["ICUType 1"] = ICUType_1_measurements_validation
df_validation_transpose["ICUType 2"] = ICUType_2_measurements_validation
df_validation_transpose["ICUType 3"] = ICUType_3_measurements_validation
df_validation_transpose["ICUType 4"] = ICUType_4_measurements_validation
df_validation_transpose["Age 65+"] = age_65_and_above_measurements_validation
df_validation_transpose["Age 65-"] = age_under_65__measurements_validation
df_validation_transpose['Undefined Classification'] = classification_undefined_measurements_validation
df_validation_transpose['Low Weight'] = classification_measurements_l_w_v
df_validation_transpose['Normal Weight'] = classification_measurements_n_w_v
df_validation_transpose['Overweight'] = classification_measurements_o_w_v
df_validation_transpose['Obesity Grade 1'] = classification_measurements_ob1_v
df_validation_transpose['Obesity Grade 2'] = classification_measurements_ob2_v
df_validation_transpose['Obesity Grade 3'] = classification_measurements_ob3_v
df_validation_transpose = df_validation_transpose.drop('RecordID',axis=0)
df_validation_transpose = df_validation_transpose.drop('level_1',axis=0)
df_validation_transpose = df_validation_transpose.drop('Time',axis=0)
df_validation_transpose = df_validation_transpose.drop('Age',axis=0)
df_validation_transpose = df_validation_transpose.drop('Gender',axis=0)
df_validation_transpose = df_validation_transpose.drop('Height',axis=0)
df_validation_transpose = df_validation_transpose.drop('ICUType',axis=0)

#df_test_transpose.style.set_caption("Repeated Measurements per variable by demographics")

display(HTML("<h2 style='text-align: center; font-size: 24px; font-weight: bold;'>Repeated Measurements per Variable by Demographics - Validation Set</h2>"))
df_validation_transpose

Unnamed: 0,Female,Male,Undefined Gender,ICUType 1,ICUType 2,ICUType 3,ICUType 4,Age 65+,Age 65-,Undefined Classification,Low Weight,Normal Weight,Overweight,Obesity Grade 1,Obesity Grade 2,Obesity Grade 3
ALP,658,796,0,170,129,697,458,622,832,673,14,237,276,236,70,67
ALT,670,832,1,188,134,716,465,641,862,704,14,246,281,244,70,70
AST,673,831,1,187,132,719,467,641,864,706,14,246,281,242,72,69
Albumin,514,603,0,140,87,517,373,526,591,551,12,175,197,161,50,51
BUN,2931,3680,3,921,1186,2414,2093,3484,3130,3073,100,1105,1352,1012,395,326
Bilirubin,690,820,0,173,128,739,470,650,860,705,15,245,282,236,74,76
Cholesterol,77,105,0,96,8,34,44,109,73,83,0,36,32,25,6,4
Creatinine,2944,3703,3,938,1185,2429,2098,3507,3143,3091,101,1114,1361,1012,399,324
DiasABP,21371,27710,32,5001,14413,11266,18433,26348,22765,17821,801,9208,12815,9594,3859,3412
FiO2,6249,7761,1,1370,3106,4598,4937,7499,6512,5824,228,2520,3064,2397,1052,903


<h3> Test data</h3>

<h4>Loading test dataset</h4>

In [51]:
test_X = physionet2012_dataset['test_X']

In [52]:
var1 = test_X.count()
var1

RecordID       115152
level_1        115152
Time           115152
ALP              1865
ALT              1934
AST              1933
Age            108408
Albumin          1450
BUN              8345
Bilirubin        1922
Cholesterol       196
Creatinine       8401
DiasABP         63100
FiO2            18495
GCS             36500
Gender           2399
Glucose          7916
HCO3             8194
HCT             10816
HR             103927
Height         108408
ICUType          2399
K                8796
Lactate          4827
MAP             62788
MechVent        17790
Mg               8131
NIDiasABP       48514
NIMAP           47924
NISysABP        48557
Na               8230
PaCO2           13575
PaO2            13549
Platelets        8457
RespRate        26932
SaO2             4512
SysABP          63103
Temp            43244
TroponinI         227
TroponinT        1232
Urine           79973
WBC              7733
Weight          60107
pH              14195
dtype: int64

In [53]:
test_female_gender = test_X[test_X['Gender'] == 0.0]
test_female_gender_ids = test_female_gender["RecordID"]
female_gender_measurements_test = test_X[test_X["RecordID"].isin(test_female_gender_ids)]
female_gender_measurements_test = female_gender_measurements_test.count()
female_gender_measurements_test

RecordID       50448
level_1        50448
Time           50448
ALP              815
ALT              855
AST              854
Age            47435
Albumin          668
BUN             3653
Bilirubin        841
Cholesterol       83
Creatinine      3677
DiasABP        26722
FiO2            7955
GCS            15982
Gender          1051
Glucose         3480
HCO3            3621
HCT             4557
HR             45596
Height         47435
ICUType         1051
K               3841
Lactate         2055
MAP            26532
MechVent        7807
Mg              3518
NIDiasABP      22136
NIMAP          21863
NISysABP       22154
Na              3652
PaCO2           5487
PaO2            5478
Platelets       3589
RespRate       12888
SaO2            1869
SysABP         26725
Temp           18179
TroponinI        120
TroponinT        517
Urine          35079
WBC             3361
Weight         26404
pH              5712
dtype: int64

In [54]:
test_male_gender = test_X[test_X['Gender'] == 1.0]
test_male_gender_ids = test_male_gender["RecordID"]
male_gender_measurements_test = test_X[test_X["RecordID"].isin(test_male_gender_ids)]
male_gender_measurements_test = male_gender_measurements_test.count()
male_gender_measurements_test

RecordID       64560
level_1        64560
Time           64560
ALP             1048
ALT             1077
AST             1077
Age            60831
Albumin          779
BUN             4675
Bilirubin       1079
Cholesterol      113
Creatinine      4707
DiasABP        36284
FiO2           10509
GCS            20480
Gender          1345
Glucose         4419
HCO3            4556
HCT             6244
HR             58192
Height         60831
ICUType         1345
K               4938
Lactate         2746
MAP            36164
MechVent        9953
Mg              4598
NIDiasABP      26329
NIMAP          26012
NISysABP       26354
Na              4561
PaCO2           8050
PaO2            8033
Platelets       4852
RespRate       14044
SaO2            2641
SysABP         36284
Temp           25005
TroponinI        107
TroponinT        710
Urine          44796
WBC             4357
Weight         33578
pH              8445
dtype: int64

In [55]:
test_undefined_gender = test_X[test_X['Gender'] == -1.0]
test_undefined_gender_ids = test_undefined_gender["RecordID"]
undefined_gender_measurements_test = test_X[test_X["RecordID"].isin(test_undefined_gender_ids)]
undefined_gender_measurements_test = undefined_gender_measurements_test.count()
undefined_gender_measurements_test

RecordID       144
level_1        144
Time           144
ALP              2
ALT              2
AST              2
Age            142
Albumin          3
BUN             17
Bilirubin        2
Cholesterol      0
Creatinine      17
DiasABP         94
FiO2            31
GCS             38
Gender           3
Glucose         17
HCO3            17
HCT             15
HR             139
Height         142
ICUType          3
K               17
Lactate         26
MAP             92
MechVent        30
Mg              15
NIDiasABP       49
NIMAP           49
NISysABP        49
Na              17
PaCO2           38
PaO2            38
Platelets       16
RespRate         0
SaO2             2
SysABP          94
Temp            60
TroponinI        0
TroponinT        5
Urine           98
WBC             15
Weight         125
pH              38
dtype: int64

In [56]:
test_ICUType_1 = test_X[test_X['ICUType'] == 1.0]
test_ICUType_1 = test_ICUType_1[test_ICUType_1['Time'] == 0.0]
test_ICUType_1_ids = test_ICUType_1['RecordID']
ICUType_1_measurements_test = test_X[test_X['RecordID'].isin(test_ICUType_1_ids)]
ICUType_1_measurements_test = ICUType_1_measurements_test.count()
ICUType_1_measurements_test

RecordID       17088
level_1        17088
Time           17088
ALP              273
ALT              297
AST              298
Age            15540
Albumin          200
BUN             1228
Bilirubin        289
Cholesterol      107
Creatinine      1253
DiasABP         7517
FiO2            1861
GCS             4451
Gender           356
Glucose         1185
HCO3            1192
HCT             1466
HR             14822
Height         15540
ICUType          356
K               1427
Lactate          412
MAP             7495
MechVent        1798
Mg              1190
NIDiasABP       7890
NIMAP           7841
NISysABP        7897
Na              1181
PaCO2           1478
PaO2            1482
Platelets       1215
RespRate        5431
SaO2             832
SysABP          7517
Temp            5158
TroponinI         59
TroponinT        364
Urine           9797
WBC             1084
Weight          7728
pH              1516
dtype: int64

In [57]:
test_ICUType_2 = test_X[test_X['ICUType'] == 2.0]
test_ICUType_2 = test_ICUType_2[test_ICUType_2['Time'] == 0.0]
test_ICUType_2_ids = test_ICUType_2['RecordID']
ICUType_2_measurements_test = test_X[test_X['RecordID'].isin(test_ICUType_2_ids)]
ICUType_2_measurements_test = ICUType_2_measurements_test.count()
ICUType_2_measurements_test

RecordID       23664
level_1        23664
Time           23664
ALP              174
ALT              179
AST              179
Age            22745
Albumin          112
BUN             1541
Bilirubin        175
Cholesterol        8
Creatinine      1548
DiasABP        18308
FiO2            4113
GCS             6322
Gender           493
Glucose         1127
HCO3            1421
HCT             2609
HR             21505
Height         22745
ICUType          493
K               1285
Lactate          858
MAP            18340
MechVent        4069
Mg              1471
NIDiasABP       5166
NIMAP           5121
NISysABP        5176
Na              1203
PaCO2           4880
PaO2            4859
Platelets       1942
RespRate         924
SaO2            2482
SysABP         18308
Temp           13789
TroponinI         22
TroponinT         47
Urine          19921
WBC             1635
Weight         12152
pH              5360
dtype: int64

In [58]:
test_ICUType_3 = test_X[test_X['ICUType'] == 3.0]
test_ICUType_3 = test_ICUType_3[test_ICUType_3['Time'] == 0.0]
test_ICUType_3_ids = test_ICUType_3['RecordID']
ICUType_3_measurements_test = test_X[test_X['RecordID'].isin(test_ICUType_3_ids)]
ICUType_3_measurements_test = ICUType_3_measurements_test.count()
ICUType_3_measurements_test

RecordID       41040
level_1        41040
Time           41040
ALP              846
ALT              878
AST              875
Age            38251
Albumin          673
BUN             3124
Bilirubin        887
Cholesterol       47
Creatinine      3143
DiasABP        15219
FiO2            6289
GCS            10963
Gender           855
Glucose         3121
HCO3            3147
HCT             3571
HR             36897
Height         38251
ICUType          855
K               3391
Lactate         1563
MAP            14983
MechVent        5842
Mg              2933
NIDiasABP      23242
NIMAP          22863
NISysABP       23256
Na              3215
PaCO2           3159
PaO2            3164
Platelets       2830
RespRate       12249
SaO2             454
SysABP         15221
Temp           11842
TroponinI         92
TroponinT        577
Urine          25068
WBC             2710
Weight         26288
pH              3204
dtype: int64

In [59]:
test_ICUType_4 = test_X[test_X['ICUType'] == 4.0]
test_ICUType_4 = test_ICUType_4[test_ICUType_4['Time'] == 0.0]
test_ICUType_4_ids = test_ICUType_4['RecordID']
ICUType_4_measurements_test = test_X[test_X['RecordID'].isin(test_ICUType_4_ids)]
ICUType_4_measurements_test = ICUType_4_measurements_test.count()
ICUType_4_measurements_test

RecordID       33360
level_1        33360
Time           33360
ALP              572
ALT              580
AST              581
Age            31872
Albumin          465
BUN             2452
Bilirubin        571
Cholesterol       34
Creatinine      2457
DiasABP        22056
FiO2            6232
GCS            14764
Gender           695
Glucose         2483
HCO3            2434
HCT             3170
HR             30703
Height         31872
ICUType          695
K               2693
Lactate         1994
MAP            21970
MechVent        6081
Mg              2537
NIDiasABP      12216
NIMAP          12099
NISysABP       12228
Na              2631
PaCO2           4058
PaO2            4044
Platelets       2470
RespRate        8328
SaO2             744
SysABP         22057
Temp           12455
TroponinI         54
TroponinT        244
Urine          25187
WBC             2304
Weight         13939
pH              4115
dtype: int64

In [60]:
age_65_and_above_test= test_X[test_X['Age'] >= 65]
age_65_and_above_test = age_65_and_above_test[age_65_and_above_test['Time'] == 0.0]
age_65_and_above_test_ids = age_65_and_above_test['RecordID']
age_65_and_above_measurements_test= test_X[test_X['RecordID'].isin(age_65_and_above_test_ids)]
age_65_and_above_measurements_test = age_65_and_above_measurements_test.count()
age_65_and_above_measurements_test

RecordID       62640
level_1        62640
Time           62640
ALP              839
ALT              865
AST              868
Age            59200
Albumin          698
BUN             4451
Bilirubin        862
Cholesterol      123
Creatinine      4481
DiasABP        35256
FiO2           10175
GCS            19190
Gender          1305
Glucose         4175
HCO3            4375
HCT             5942
HR             56823
Height         59200
ICUType         1305
K               4654
Lactate         2470
MAP            35068
MechVent        9668
Mg              4373
NIDiasABP      25714
NIMAP          25415
NISysABP       25741
Na              4344
PaCO2           7600
PaO2            7594
Platelets       4556
RespRate       14198
SaO2            2818
SysABP         35257
Temp           24817
TroponinI        166
TroponinT        850
Urine          44949
WBC             4147
Weight         32723
pH              7965
dtype: int64

In [61]:
age_under_65_test = test_X[test_X['Age'] < 65]
age_under_65_test = age_under_65_test[age_under_65_test['Time'] == 0.0]
age_under_65_test_ids = age_under_65_test["RecordID"]
age_under_65_measurements_test = test_X[test_X["RecordID"].isin(age_under_65_test_ids)]
age_under_65_measurements_test = age_under_65_measurements_test.count()
age_under_65_measurements_test

RecordID       52512
level_1        52512
Time           52512
ALP             1026
ALT             1069
AST             1065
Age            49208
Albumin          752
BUN             3894
Bilirubin       1060
Cholesterol       73
Creatinine      3920
DiasABP        27844
FiO2            8320
GCS            17310
Gender          1094
Glucose         3741
HCO3            3819
HCT             4874
HR             47104
Height         49208
ICUType         1094
K               4142
Lactate         2357
MAP            27720
MechVent        8122
Mg              3758
NIDiasABP      22800
NIMAP          22509
NISysABP       22816
Na              3886
PaCO2           5975
PaO2            5955
Platelets       3901
RespRate       12734
SaO2            1694
SysABP         27846
Temp           18427
TroponinI         61
TroponinT        382
Urine          35024
WBC             3586
Weight         27384
pH              6230
dtype: int64

In [62]:
filtered_test_X = test_X[(test_X['Height'] != -1) & (test_X['Weight'] != -1) & (test_X['Height'].notna()) & (test_X['Weight'].notna())] 

In [63]:
filtered_test_X_meters = filtered_test_X.copy()
filtered_test_X_meters['Height'] = filtered_test_X['Height'] / 100 # Converting Height from cm to meters
filtered_test_X_meters['Height']

624       1.702
628       1.702
629       1.702
630       1.702
631       1.702
          ...  
574987    1.524
574988    1.524
574989    1.524
574990    1.524
574991    1.524
Name: Height, Length: 31797, dtype: float64

In [64]:
bmi_data_test = filtered_test_X_meters
bmi_data_test["BMI"] = round(filtered_test_X_meters["Weight"] / (filtered_test_X_meters["Height"] ** 2), 1)
bmi_data_test["Classificacao"] = bmi_data_test["BMI"].apply(classify_BMI)
bmi_data_test.head()

Unnamed: 0,RecordID,level_1,Time,ALP,ALT,AST,Age,Albumin,BUN,Bilirubin,...,SysABP,Temp,TroponinI,TroponinT,Urine,WBC,Weight,pH,BMI,Classificacao
624,132570,0,0.0,,,,84.0,,,,...,,,,,,,102.6,,35.4,Obesidade grau 2
628,132570,4,4.0,,,,84.0,,,,...,,36.6,,,,,102.6,,35.4,Obesidade grau 2
629,132570,5,5.0,19.0,15.0,20.0,84.0,,83.0,0.1,...,,,,,,8.8,102.6,,35.4,Obesidade grau 2
630,132570,6,6.0,,,,84.0,,,,...,,,,,,,102.6,,35.4,Obesidade grau 2
631,132570,7,7.0,,,,84.0,,,,...,,,,,600.0,,102.6,,35.4,Obesidade grau 2


In [65]:
classification_undefined_test_ids = bmi_data_test["RecordID"]
classification_undefined_measurements_test = test_X[~test_X["RecordID"].isin(classification_undefined_test_ids)]
classification_undefined_measurements_test = classification_undefined_measurements_test.count()
classification_undefined_measurements_test

RecordID       55200
level_1        55200
Time           55200
ALP              853
ALT              884
AST              883
Age            50894
Albumin          718
BUN             3834
Bilirubin        887
Cholesterol       86
Creatinine      3867
DiasABP        22619
FiO2            7846
GCS            18225
Gender          1150
Glucose         3844
HCO3            3836
HCT             4543
HR             48945
Height         50894
ICUType         1150
K               4247
Lactate         1809
MAP            22395
MechVent        7170
Mg              3720
NIDiasABP      29076
NIMAP          28710
NISysABP       29092
Na              4032
PaCO2           4010
PaO2            4004
Platelets       3540
RespRate       17864
SaO2             641
SysABP         22622
Temp           14962
TroponinI        113
TroponinT        672
Urine          35050
WBC             3377
Weight         28310
pH              4078
dtype: int64

In [66]:
classification_low_weight_test = bmi_data_test[bmi_data_test["Classificacao"] == 'Baixo peso']
classification_low_weight_test_ids = classification_low_weight_test["RecordID"]
classification_measurements_l_w_test = test_X[test_X["RecordID"].isin(classification_low_weight_test_ids)]
classification_measurements_l_w_test = classification_measurements_l_w_test.count() 
classification_measurements_l_w_test

RecordID       2064
level_1        2064
Time           2064
ALP              36
ALT              36
AST              36
Age            2032
Albumin          27
BUN             166
Bilirubin        36
Cholesterol       5
Creatinine      166
DiasABP        1416
FiO2            379
GCS             644
Gender           43
Glucose         155
HCO3            167
HCT             230
HR             1959
Height         2032
ICUType          43
K               169
Lactate         137
MAP            1419
MechVent        378
Mg              161
NIDiasABP       712
NIMAP           706
NISysABP        713
Na              167
PaCO2           297
PaO2            303
Platelets       181
RespRate        365
SaO2            122
SysABP         1416
Temp            963
TroponinI         0
TroponinT        28
Urine          1498
WBC             158
Weight         1342
pH              322
dtype: int64

In [67]:
classification_normal_weight_test = bmi_data_test[bmi_data_test["Classificacao"] == 'Peso normal']
classification_normal_weight_test_ids = classification_normal_weight_test["RecordID"]
classification_measurements_n_w_test = test_X[test_X["RecordID"].isin(classification_normal_weight_test_ids)]
classification_measurements_n_w_test = classification_measurements_n_w_test.count() 
classification_measurements_n_w_test

RecordID       18960
level_1        18960
Time           18960
ALP              325
ALT              332
AST              335
Age            18179
Albumin          252
BUN             1431
Bilirubin        328
Cholesterol       42
Creatinine      1436
DiasABP        12775
FiO2            3210
GCS             5981
Gender           395
Glucose         1298
HCO3            1379
HCT             2028
HR             17379
Height         18179
ICUType          395
K               1459
Lactate          938
MAP            12715
MechVent        3268
Mg              1422
NIDiasABP       6266
NIMAP           6222
NISysABP        6273
Na              1339
PaCO2           2913
PaO2            2905
Platelets       1605
RespRate        3023
SaO2            1155
SysABP         12775
Temp            9014
TroponinI         29
TroponinT        145
Urine          14191
WBC             1393
Weight          9800
pH              3100
dtype: int64

In [68]:
classification_overweight_test = bmi_data_test[bmi_data_test["Classificacao"] == 'Sobrepeso']
classification_overweight_test_ids = classification_overweight_test["RecordID"]
classification_measurements_o_w_test = test_X[test_X["RecordID"].isin(classification_overweight_test_ids)]
classification_measurements_o_w_test = classification_measurements_o_w_test.count() 
classification_measurements_o_w_test

RecordID       24096
level_1        24096
Time           24096
ALP              387
ALT              408
AST              409
Age            23188
Albumin          254
BUN             1819
Bilirubin        398
Cholesterol       44
Creatinine      1831
DiasABP        16658
FiO2            4162
GCS             7129
Gender           502
Glucose         1594
HCO3            1745
HCT             2612
HR             22069
Height         23188
ICUType          502
K               1771
Lactate         1069
MAP            16644
MechVent        4133
Mg              1760
NIDiasABP       7488
NIMAP           7397
NISysABP        7502
Na              1645
PaCO2           4017
PaO2            3996
Platelets       2049
RespRate        3313
SaO2            1781
SysABP         16658
Temp           12124
TroponinI         61
TroponinT        202
Urine          18323
WBC             1773
Weight         13136
pH              4298
dtype: int64

In [69]:
classification_obesity_grade1_test = bmi_data_test[bmi_data_test["Classificacao"] == 'Obesidade grau 1']
classification_obesity_grade1_test_ids = classification_obesity_grade1_test["RecordID"]
classification_measurements_ob1_test = test_X[test_X["RecordID"].isin(classification_obesity_grade1_test_ids)]
classification_measurements_ob1_test = classification_measurements_ob1_test .count() 
classification_measurements_ob1_test

RecordID       16656
level_1        16656
Time           16656
ALP              259
ALT              267
AST              265
Age            16092
Albumin          182
BUN             1227
Bilirubin        262
Cholesterol       23
Creatinine      1236
DiasABP        11913
FiO2            3196
GCS             4978
Gender           347
Glucose         1056
HCO3            1172
HCT             1758
HR             15394
Height         16092
ICUType          347
K               1187
Lactate          822
MAP            11868
MechVent        3210
Mg              1170
NIDiasABP       5016
NIMAP           4946
NISysABP        5023
Na              1111
PaCO2           2974
PaO2            2967
Platelets       1356
RespRate        1543
SaO2            1214
SysABP         11913
Temp            8389
TroponinI         47
TroponinT        168
Urine          13019
WBC             1193
Weight          9718
pH              3151
dtype: int64

In [70]:
classification_obesity_grade2_test = bmi_data_test[bmi_data_test["Classificacao"] == 'Obesidade grau 2']
classification_obesity_grade2_test_ids = classification_obesity_grade2_test["RecordID"]
classification_measurements_ob2_test = test_X[test_X["RecordID"].isin(classification_obesity_grade2_test_ids)]
classification_measurements_ob2_test = classification_measurements_ob2_test .count() 
classification_measurements_ob2_test

RecordID       7536
level_1        7536
Time           7536
ALP             117
ALT             123
AST             122
Age            7208
Albumin          85
BUN             561
Bilirubin       122
Cholesterol       5
Creatinine      564
DiasABP        5392
FiO2           1477
GCS            2099
Gender          157
Glucose         500
HCO3            542
HCT             823
HR             6845
Height         7208
ICUType         157
K               556
Lactate         492
MAP            5387
MechVent       1453
Mg              550
NIDiasABP      2061
NIMAP          2024
NISysABP       2063
Na              510
PaCO2          1401
PaO2           1398
Platelets       633
RespRate        850
SaO2            552
SysABP         5392
Temp           3878
TroponinI        11
TroponinT        70
Urine          5822
WBC             577
Weight         4477
pH             1483
dtype: int64

In [71]:
classification_obesity_grade3_test = bmi_data_test[bmi_data_test["Classificacao"] == 'Obesidade grau 3']
classification_obesity_grade3_test_ids = classification_obesity_grade3_test["RecordID"]
classification_measurements_ob3_test = test_X[test_X["RecordID"].isin(classification_obesity_grade3_test_ids)]
classification_measurements_ob3_test = classification_measurements_ob3_test .count() 
classification_measurements_ob3_test

RecordID       5232
level_1        5232
Time           5232
ALP              86
ALT              87
AST              86
Age            5068
Albumin          58
BUN             372
Bilirubin        89
Cholesterol       5
Creatinine      373
DiasABP        3616
FiO2           1012
GCS            1461
Gender          109
Glucose         313
HCO3            353
HCT             521
HR             4872
Height         5068
ICUType         109
K               352
Lactate         302
MAP            3641
MechVent        978
Mg              371
NIDiasABP      1533
NIMAP          1509
NISysABP       1535
Na              326
PaCO2           981
PaO2            979
Platelets       388
RespRate        823
SaO2            447
SysABP         3616
Temp           2578
TroponinI         8
TroponinT        33
Urine          4140
WBC             352
Weight         2948
pH             1016
dtype: int64

In [72]:
df_columns = test_X.columns
df_columns

Index(['RecordID', 'level_1', 'Time', 'ALP', 'ALT', 'AST', 'Age', 'Albumin',
       'BUN', 'Bilirubin', 'Cholesterol', 'Creatinine', 'DiasABP', 'FiO2',
       'GCS', 'Gender', 'Glucose', 'HCO3', 'HCT', 'HR', 'Height', 'ICUType',
       'K', 'Lactate', 'MAP', 'MechVent', 'Mg', 'NIDiasABP', 'NIMAP',
       'NISysABP', 'Na', 'PaCO2', 'PaO2', 'Platelets', 'RespRate', 'SaO2',
       'SysABP', 'Temp', 'TroponinI', 'TroponinT', 'Urine', 'WBC', 'Weight',
       'pH'],
      dtype='object')

In [73]:
df_test = pd.DataFrame(columns=df_columns)
df_test_transpose = df_test.T
df_test_transpose["Female"] = female_gender_measurements_test
df_test_transpose["Male"] = male_gender_measurements_test
df_test_transpose["Undefined Gender"] = undefined_gender_measurements_test
df_test_transpose["ICUType 1"] = ICUType_1_measurements_test
df_test_transpose["ICUType 2"] = ICUType_2_measurements_test
df_test_transpose["ICUType 3"] = ICUType_3_measurements_test
df_test_transpose["ICUType 4"] = ICUType_4_measurements_test
df_test_transpose["Age 65+"] = age_65_and_above_measurements_test
df_test_transpose["Age 65-"] = age_under_65_measurements_test
df_test_transpose['Undefined Classification'] = classification_undefined_measurements_test
df_test_transpose['Low Weight'] = classification_measurements_l_w_test 
df_test_transpose['Normal Weight'] = classification_measurements_n_w_test
df_test_transpose['Overweight'] = classification_measurements_o_w_test
df_test_transpose['Obesity Grade 1'] = classification_measurements_ob1_test 
df_test_transpose['Obesity Grade 2'] = classification_measurements_ob2_test
df_test_transpose['Obesity Grade 3'] = classification_measurements_ob3_test
df_test_transpose = df_test_transpose.drop('RecordID',axis=0)
df_test_transpose = df_test_transpose.drop('level_1',axis=0)
df_test_transpose = df_test_transpose.drop('Time',axis=0)
df_test_transpose = df_test_transpose.drop('Age',axis=0)
df_test_transpose = df_test_transpose.drop('Gender',axis=0)
df_test_transpose = df_test_transpose.drop('Height',axis=0)
df_test_transpose = df_test_transpose.drop('ICUType',axis=0)

#df_test_transpose.style.set_caption("Repeated Measurements per variable by demographics")

display(HTML("<h2 style='text-align: center; font-size: 24px; font-weight: bold;'>Repeated Measurements per Variable by Demographics - Test Set</h2>"))
df_test_transpose

Unnamed: 0,Female,Male,Undefined Gender,ICUType 1,ICUType 2,ICUType 3,ICUType 4,Age 65+,Age 65-,Undefined Classification,Low Weight,Normal Weight,Overweight,Obesity Grade 1,Obesity Grade 2,Obesity Grade 3
ALP,815,1048,2,273,174,846,572,839,1026,853,36,325,387,259,117,86
ALT,855,1077,2,297,179,878,580,865,1069,884,36,332,408,267,123,87
AST,854,1077,2,298,179,875,581,868,1065,883,36,335,409,265,122,86
Albumin,668,779,3,200,112,673,465,698,752,718,27,252,254,182,85,58
BUN,3653,4675,17,1228,1541,3124,2452,4451,3894,3834,166,1431,1819,1227,561,372
Bilirubin,841,1079,2,289,175,887,571,862,1060,887,36,328,398,262,122,89
Cholesterol,83,113,0,107,8,47,34,123,73,86,5,42,44,23,5,5
Creatinine,3677,4707,17,1253,1548,3143,2457,4481,3920,3867,166,1436,1831,1236,564,373
DiasABP,26722,36284,94,7517,18308,15219,22056,35256,27844,22619,1416,12775,16658,11913,5392,3616
FiO2,7955,10509,31,1861,4113,6289,6232,10175,8320,7846,379,3210,4162,3196,1477,1012
