In [1]:
import os
import sys
import pandas as pd
from IPython.display import display, HTML
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

<h2>Loading dataset</h2>

In [2]:
from pypotsModify.benchpots.datasets import preprocess_physionet2012
physionet2012_dataset = preprocess_physionet2012(subset="all", rate=0.1)

2024-11-28 09:26:13 [INFO]: You're using dataset physionet_2012, please cite it properly in your work. You can find its reference information at the below link: 
https://github.com/WenjieDu/TSDB/tree/main/dataset_profiles/physionet_2012
2024-11-28 09:26:13 [INFO]: Dataset physionet_2012 has already been downloaded. Processing directly...
2024-11-28 09:26:13 [INFO]: Dataset physionet_2012 has already been cached. Loading from cache directly...
2024-11-28 09:26:13 [INFO]: Loaded successfully!


<h3>Training data</h3>

<h4>Loading training dataset</h4>

In [3]:
train_X = physionet2012_dataset['train_X']

In [4]:
training_female_gender = train_X[train_X["Gender"] == 0.0]
training_female_gender_ids = training_female_gender["RecordID"]
female_gender_measurements_training = train_X[train_X["RecordID"].isin(training_female_gender_ids)]
female_gender_measurements_training = female_gender_measurements_training.count()
female_gender_measurements_training

RecordID       162096
level_1        162096
Time           162096
ALP              2657
ALT              2740
AST              2732
Age            152664
Albumin          2057
BUN             11706
Bilirubin        2778
Cholesterol       265
Creatinine      11759
DiasABP         82964
FiO2            25006
GCS             52204
Gender           3377
Glucose         11177
HCO3            11522
HCT             14911
HR             146584
Height         152664
ICUType          3377
K               12379
Lactate          6591
MAP             82560
MechVent        24372
Mg              11478
NIDiasABP       72207
NIMAP           71043
NISysABP        72266
Na              11622
PaCO2           17558
PaO2            17526
Platelets       11601
RespRate        43681
SaO2             6102
SysABP          82968
Temp            57877
TroponinI         393
TroponinT        1721
Urine          112423
WBC             10786
Weight          87863
pH              18173
dtype: int64

In [5]:
training_male_gender = train_X[train_X['Gender'] == 1.0]
training_male_gender_ids = training_male_gender["RecordID"]
male_gender_measurements_training = train_X[train_X["RecordID"].isin(training_male_gender_ids)]
male_gender_measurements_training  = male_gender_measurements_training.count()
male_gender_measurements_training 

RecordID       205632
level_1        205632
Time           205632
ALP              3418
ALT              3514
AST              3515
Age            194084
Albumin          2581
BUN             15028
Bilirubin        3544
Cholesterol       351
Creatinine      15095
DiasABP        116499
FiO2            32918
GCS             65453
Gender           4284
Glucose         14018
HCO3            14637
HCT             20200
HR             185633
Height         194084
ICUType          4284
K               15604
Lactate          8937
MAP            115786
MechVent        31165
Mg              14738
NIDiasABP       83085
NIMAP           82034
NISysABP        83158
Na              14593
PaCO2           25272
PaO2            25218
Platelets       15524
RespRate        44881
SaO2             8882
SysABP         116506
Temp            79546
TroponinI         379
TroponinT        2153
Urine          142740
WBC             14005
Weight         108487
pH              26570
dtype: int64

In [6]:
training_undefined_gender = train_X[train_X['Gender'] == -1.0]
undefined_gender_training_ids = training_undefined_gender['RecordID']
undefined_gender_measurements_training = train_X[train_X["RecordID"].isin(undefined_gender_training_ids)]
undefined_gender_measurements_training = undefined_gender_measurements_training.count()
undefined_gender_measurements_training

RecordID       480
level_1        480
Time           480
ALP              5
ALT              5
AST              5
Age            339
Albumin          5
BUN             30
Bilirubin        5
Cholesterol      0
Creatinine      30
DiasABP        217
FiO2            51
GCS             91
Gender          10
Glucose         30
HCO3            30
HCT             29
HR             325
Height         339
ICUType         10
K               30
Lactate         29
MAP            213
MechVent        51
Mg              27
NIDiasABP      136
NIMAP          136
NISysABP       136
Na              29
PaCO2           52
PaO2            52
Platelets       29
RespRate        47
SaO2             3
SysABP         217
Temp           134
TroponinI        1
TroponinT        5
Urine          224
WBC             28
Weight         254
pH              56
dtype: int64

In [7]:
training_ICUType_1 = train_X[train_X['ICUType'] == 1.0]
training_ICUType_1 = training_ICUType_1[training_ICUType_1["Time"] == 0.0]
training_ICUType_1_ids = training_ICUType_1["RecordID"]
ICUType_1_measurements_training = train_X[train_X["RecordID"].isin(training_ICUType_1_ids)]
ICUType_1_measurements_training = ICUType_1_measurements_training.count() 
ICUType_1_measurements_training

RecordID       53424
level_1        53424
Time           53424
ALP              780
ALT              830
AST              829
Age            49009
Albumin          624
BUN             3807
Bilirubin        806
Cholesterol      324
Creatinine      3865
DiasABP        21897
FiO2            5852
GCS            13730
Gender          1113
Glucose         3645
HCO3            3677
HCT             4665
HR             46636
Height         49009
ICUType         1113
K               4438
Lactate         1232
MAP            21783
MechVent        5398
Mg              3797
NIDiasABP      25842
NIMAP          25702
NISysABP       25857
Na              3662
PaCO2           4459
PaO2            4459
Platelets       3806
RespRate       19065
SaO2            2621
SysABP         21898
Temp           16047
TroponinI        173
TroponinT       1099
Urine          30624
WBC             3424
Weight         24071
pH              4557
dtype: int64

In [8]:
training_ICUType_2 = train_X[train_X['ICUType'] == 2.0]
training_ICUType_2 = training_ICUType_2[training_ICUType_2["Time"] == 0.0]
training_ICUType2_ids = training_ICUType_2["RecordID"]
ICUType_2_measurements_training = train_X[train_X["RecordID"].isin(training_ICUType2_ids)]
ICUType_2_measurements_training = ICUType_2_measurements_training.count() 
ICUType_2_measurements_training

RecordID       79776
level_1        79776
Time           79776
ALP              568
ALT              581
AST              580
Age            77033
Albumin          380
BUN             5173
Bilirubin        572
Cholesterol       22
Creatinine      5191
DiasABP        62320
FiO2           13546
GCS            21170
Gender          1662
Glucose         3745
HCO3            4791
HCT             8689
HR             73127
Height         77033
ICUType         1662
K               4227
Lactate         2845
MAP            62485
MechVent       13298
Mg              5067
NIDiasABP      17660
NIMAP          17518
NISysABP       17691
Na              4049
PaCO2          16133
PaO2           16083
Platelets       6452
RespRate        3117
SaO2            8688
SysABP         62322
Temp           45907
TroponinI        118
TroponinT        195
Urine          67077
WBC             5399
Weight         41753
pH             17647
dtype: int64

In [9]:
training_ICUType_3 = train_X[train_X['ICUType'] == 3.0]
training_ICUType_3 = training_ICUType_3[training_ICUType_3["Time"] == 0.0]
training_ICUType_3_ids = training_ICUType_3["RecordID"]
ICUType_3_measurements_training = train_X[train_X["RecordID"].isin(training_ICUType_3_ids)]
ICUType_3_measurements_training = ICUType_3_measurements_training.count() 
ICUType_3_measurements_training

RecordID       131952
level_1        131952
Time           131952
ALP              2869
ALT              2954
AST              2948
Age            122740
Albumin          2217
BUN             10022
Bilirubin        3093
Cholesterol       133
Creatinine      10054
DiasABP         47053
FiO2            19433
GCS             35260
Gender           2749
Glucose         10014
HCO3            10046
HCT             11671
HR             118217
Height         122740
ICUType          2749
K               10856
Lactate          5383
MAP             46418
MechVent        18016
Mg               9447
NIDiasABP       75164
NIMAP           73742
NISysABP        75207
Na              10288
PaCO2           10254
PaO2            10251
Platelets        8988
RespRate        41341
SaO2             1583
SysABP          47057
Temp            38190
TroponinI         313
TroponinT        1819
Urine           79797
WBC              8610
Weight          86534
pH              10380
dtype: int64

In [10]:
training_ICUType_4 = train_X[train_X['ICUType'] == 4.0]
training_ICUType_4 = training_ICUType_4[training_ICUType_4["Time"] == 0.0]
training_ICUType_4_ids = training_ICUType_4["RecordID"]
ICUType_4_measurements_training = train_X[train_X["RecordID"].isin(training_ICUType_4_ids)]
ICUType_4_measurements_training = ICUType_4_measurements_training.count() 
ICUType_4_measurements_training 

RecordID       103056
level_1        103056
Time           103056
ALP              1863
ALT              1894
AST              1895
Age             98305
Albumin          1422
BUN              7762
Bilirubin        1856
Cholesterol       137
Creatinine       7774
DiasABP         68410
FiO2            19144
GCS             47588
Gender           2147
Glucose          7821
HCO3             7675
HCT             10115
HR              94562
Height          98305
ICUType          2147
K                8492
Lactate          6097
MAP             67873
MechVent        18876
Mg               7932
NIDiasABP       36762
NIMAP           36251
NISysABP        36805
Na               8245
PaCO2           12036
PaO2            12003
Platelets        7908
RespRate        25086
SaO2             2095
SysABP          68414
Temp            37413
TroponinI         169
TroponinT         766
Urine           77889
WBC              7386
Weight          44246
pH              12215
dtype: int64

In [11]:
age_65_and_above_training = train_X[train_X["Age"] >= 65]
age_65_and_above_training = age_65_and_above_training[age_65_and_above_training ["Time"] == 0.0]
age_65_and_above_training_ids = age_65_and_above_training ["RecordID"]
age_65_and_above_measurements_training = train_X[train_X["RecordID"].isin(age_65_and_above_training_ids)]
age_65_and_above_measurements_training = age_65_and_above_measurements_training.count()
age_65_and_above_measurements_training

RecordID       201072
level_1        201072
Time           201072
ALP              2677
ALT              2744
AST              2744
Age            190270
Albumin          2213
BUN             14241
Bilirubin        2793
Cholesterol       356
Creatinine      14309
DiasABP        110061
FiO2            31568
GCS             62475
Gender           4189
Glucose         13230
HCO3            13927
HCT             19061
HR             182658
Height         190270
ICUType          4189
K               14822
Lactate          7971
MAP            109559
MechVent        29547
Mg              14051
NIDiasABP       85511
NIMAP           84478
NISysABP        85582
Na              13788
PaCO2           23558
PaO2            23517
Platelets       14520
RespRate        49401
SaO2             9310
SysABP         110067
Temp            77966
TroponinI         544
TroponinT        2676
Urine          142975
WBC             13244
Weight         108319
pH              24673
dtype: int64

In [12]:
# Motivação para o teste: A quantidade de valores no grupo de idade que estava na tabela, era muito maior do que a quantidade de 
# valores para o grupo de gênero, o que não fazia sentido, pois, já que não temos dados faltantes dessas variáveis em ambos os grupos
# se gênero tem n ocorrências, idade deve ter n ocorrências também, e vice-versa. 

# Descoberta: Através do teste, vimos que tinha casos em que o valor do campo idade, se repete durante a série temporal do paciente, e visto isso, nós ajustamos o código para pegar apenas a primeira ocorrência de valor para cada recordID.
# Também descobrimos que, como estamos filtrando apenas os gêneros female e male e há um campo para gênero indefinido, o grupo de Age estava pegando as idades desse campo de gênero indefinido também, o que estava fazendo com que 
# a soma total de ocorrências para o grupo de Age estivesse maior do que para o grupo de Gender.

# teste2 = train_X[train_X['Age'] >= 65]
# teste2["RecordID"].value_counts()   

In [13]:
age_under_65_training  = train_X[train_X["Age"] < 65]
age_under_65_training  = age_under_65_training[age_under_65_training["Time"] == 0.0]
age_under_65_training_ids = age_under_65_training["RecordID"]
age_under_65_measurements_training = train_X[train_X["RecordID"].isin(age_under_65_training_ids)]
age_under_65_measurements_training  = age_under_65_measurements_training .count()
age_under_65_measurements_training 

RecordID       167136
level_1        167136
Time           167136
ALP              3403
ALT              3515
AST              3508
Age            156817
Albumin          2430
BUN             12523
Bilirubin        3534
Cholesterol       260
Creatinine      12575
DiasABP         89619
FiO2            26407
GCS             55273
Gender           3482
Glucose         11995
HCO3            12262
HCT             16079
HR             149884
Height         156817
ICUType          3482
K               13191
Lactate          7586
MAP             89000
MechVent        26041
Mg              12192
NIDiasABP       69917
NIMAP           68735
NISysABP        69978
Na              12456
PaCO2           19324
PaO2            19279
Platelets       12634
RespRate        39208
SaO2             5677
SysABP          89624
Temp            59591
TroponinI         229
TroponinT        1203
Urine          112412
WBC             11575
Weight          88285
pH              20126
dtype: int64

In [14]:
filtered_train_X = train_X[(train_X['Height'] != -1) & (train_X['Weight'] != -1) & (train_X['Height'].notna()) & (train_X['Weight'].notna())] 

In [15]:
def classify_BMI(BMI):
    if BMI <= 18.5:
        return "Baixo peso"
    elif BMI >= 18.6 and BMI <= 24.9:
        return "Peso normal"
    elif BMI >= 25 and BMI <= 29.9:
        return "Sobrepeso"
    elif BMI >= 30 and BMI <= 34.9:
        return "Obesidade grau 1"
    elif BMI >= 35 and BMI <= 39.9:
        return "Obesidade grau 2"
    elif BMI >= 40:
        return "Obesidade grau 3"

In [16]:
filtered_train_X_meters = filtered_train_X.copy()
filtered_train_X_meters['Height'] = filtered_train_X['Height'] / 100 # Converting Height from cm to meters
filtered_train_X_meters['Height']

48        1.753
67        1.753
68        1.753
69        1.753
70        1.753
          ...  
574989    1.524
574990    1.524
574991    1.524
575088    1.727
575184    1.727
Name: Height, Length: 103634, dtype: float64

In [17]:
bmi_data_train = filtered_train_X_meters
bmi_data_train["BMI"] = round(filtered_train_X_meters["Weight"] / (filtered_train_X_meters["Height"] ** 2), 1)
bmi_data_train["Classificacao"] = bmi_data_train["BMI"].apply(classify_BMI)
bmi_data_train.head()

Unnamed: 0,RecordID,level_1,Time,ALP,ALT,AST,Age,Albumin,BUN,Bilirubin,...,SysABP,Temp,TroponinI,TroponinT,Urine,WBC,Weight,pH,BMI,Classificacao
48,132540,0,0.0,,,,76.0,,,,...,,,,,,,76.0,7.45,24.7,Peso normal
67,132540,19,19.0,,,,76.0,,,,...,122.0,37.5,,,50.0,,80.6,,26.2,Sobrepeso
68,132540,20,20.0,,,,76.0,,,,...,107.0,37.4,,,380.0,,80.6,,26.2,Sobrepeso
69,132540,21,21.0,,,,76.0,,,,...,121.0,37.5,,,170.0,,80.6,,26.2,Sobrepeso
70,132540,22,22.0,,,,76.0,,,,...,128.0,37.5,,,130.0,,80.6,,26.2,Sobrepeso


In [18]:
bmi_data_train = bmi_data_train.groupby("RecordID").first().reset_index()
bmi_data_train

Unnamed: 0,RecordID,level_1,Time,ALP,ALT,AST,Age,Albumin,BUN,Bilirubin,...,SysABP,Temp,TroponinI,TroponinT,Urine,WBC,Weight,pH,BMI,Classificacao
0,132540,0,0.0,,,,76.0,,21.0,,...,122.00,37.50,,,50.0,13.3,76.0,7.45,24.7,Peso normal
1,132543,0,0.0,105.0,12.0,15.0,68.0,4.4,23.0,0.2,...,,36.30,,,600.0,11.5,84.6,,26.0,Sobrepeso
2,132547,0,0.0,,,,64.0,,,,...,,,,,,,114.0,,35.1,Obesidade grau 2
3,132548,0,0.0,,,,68.0,,32.0,,...,205.00,36.30,0.7,,120.0,6.2,87.0,,32.9,Obesidade grau 1
4,132551,0,0.0,47.0,46.0,82.0,78.0,1.9,81.0,0.3,...,102.75,38.00,3.5,,120.0,16.1,48.4,7.40,18.3,Baixo peso
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4046,163007,0,0.0,42.0,30.0,40.0,19.0,2.8,16.0,0.6,...,0.00,40.55,,1.00,150.0,14.1,114.3,7.36,34.2,Obesidade grau 1
4047,163008,0,0.0,,,,59.0,,24.0,,...,97.00,37.60,,,45.0,6.9,98.5,7.38,34.0,Obesidade grau 1
4048,163013,0,0.0,82.0,11.0,30.0,74.0,2.5,30.0,1.2,...,118.00,36.50,,0.03,40.0,9.6,68.6,7.35,29.5,Sobrepeso
4049,163021,0,0.0,,,,72.0,,9.0,,...,,,,,,8.6,62.0,,20.8,Peso normal


In [19]:
bmi_data_train["Classificacao"].value_counts()

Classificacao
Sobrepeso           1380
Peso normal         1188
Obesidade grau 1     759
Obesidade grau 2     309
Obesidade grau 3     275
Baixo peso           140
Name: count, dtype: int64

In [20]:
classification_undefined_training_ids = bmi_data_train["RecordID"]
classification_undefined_measurements_training = train_X[~train_X["RecordID"].isin(classification_undefined_training_ids)]
classification_undefined_measurements_training = classification_undefined_measurements_training.count()
classification_undefined_measurements_training

RecordID       173760
level_1        173760
Time           173760
ALP              2886
ALT              2980
AST              2980
Age            160695
Albumin          2325
BUN             12299
Bilirubin        3023
Cholesterol       285
Creatinine      12354
DiasABP         69069
FiO2            23972
GCS             57936
Gender           3620
Glucose         12266
HCO3            12211
HCT             14891
HR             154498
Height         160695
ICUType          3620
K               13484
Lactate          5992
MAP             68296
MechVent        22085
Mg              11921
NIDiasABP       93092
NIMAP           91573
NISysABP        93156
Na              12765
PaCO2           12304
PaO2            12272
Platelets       11340
RespRate        59386
SaO2             1967
SysABP          69076
Temp            46985
TroponinI         271
TroponinT        2128
Urine          110289
WBC             10864
Weight          92970
pH              12510
dtype: int64

In [21]:
classification_low_weight_training = bmi_data_train[bmi_data_train["Classificacao"] == 'Baixo peso']
classification_low_weight_training_ids = classification_low_weight_training["RecordID"]
classification_measurements_l_w_t = train_X[train_X["RecordID"].isin(classification_low_weight_training_ids)]
classification_measurements_l_w_t = classification_measurements_l_w_t.count() 
classification_measurements_l_w_t 

RecordID       6720
level_1        6720
Time           6720
ALP              97
ALT              99
AST             100
Age            6440
Albumin          77
BUN             475
Bilirubin        99
Cholesterol      11
Creatinine      477
DiasABP        4205
FiO2           1072
GCS            2126
Gender          140
Glucose         438
HCO3            465
HCT             634
HR             6178
Height         6440
ICUType         140
K               489
Lactate         338
MAP            4230
MechVent       1061
Mg              478
NIDiasABP      2451
NIMAP          2398
NISysABP       2452
Na              457
PaCO2           855
PaO2            856
Platelets       483
RespRate       1132
SaO2            360
SysABP         4205
Temp           2769
TroponinI        30
TroponinT        51
Urine          4805
WBC             441
Weight         3517
pH              906
dtype: int64

In [22]:
classification_normal_weight_training = bmi_data_train[bmi_data_train["Classificacao"] == 'Peso normal']
classification_normal_weight_training_ids = classification_normal_weight_training["RecordID"]
classification_measurements_n_w_t = train_X[train_X["RecordID"].isin(classification_normal_weight_training_ids)]
classification_measurements_n_w_t = classification_measurements_n_w_t.count() 
classification_measurements_n_w_t 

RecordID       57024
level_1        57024
Time           57024
ALP              852
ALT              881
AST              876
Age            54663
Albumin          671
BUN             4175
Bilirubin        875
Cholesterol       88
Creatinine      4187
DiasABP        37197
FiO2            9597
GCS            18146
Gender          1188
Glucose         3787
HCO3            4044
HCT             5819
HR             52249
Height         54663
ICUType         1188
K               4263
Lactate         2748
MAP            37068
MechVent        9557
Mg              4123
NIDiasABP      19366
NIMAP          19217
NISysABP       19390
Na              3958
PaCO2           8353
PaO2            8330
Platelets       4505
RespRate        9345
SaO2            3416
SysABP         37198
Temp           25683
TroponinI        139
TroponinT        515
Urine          41986
WBC             3999
Weight         29961
pH              8858
dtype: int64

In [23]:
classification_overweight_training = bmi_data_train[bmi_data_train["Classificacao"] == 'Sobrepeso']
classification_overweight_training_ids = classification_overweight_training['RecordID']
classification_measurements_o_w_t = train_X[train_X["RecordID"].isin(classification_overweight_training_ids)]
classification_measurements_o_w_t = classification_measurements_o_w_t.count() 
classification_measurements_o_w_t

RecordID       66240
level_1        66240
Time           66240
ALP             1128
ALT             1158
AST             1158
Age            63494
Albumin          778
BUN             4968
Bilirubin       1170
Cholesterol      120
Creatinine      5000
DiasABP        45026
FiO2           11465
GCS            20258
Gender          1380
Glucose         4399
HCO3            4797
HCT             7124
HR             60583
Height         63494
ICUType         1380
K               4913
Lactate         3096
MAP            44947
MechVent       11192
Mg              4961
NIDiasABP      21118
NIMAP          20874
NISysABP       21140
Na              4573
PaCO2          10557
PaO2           10532
Platelets       5608
RespRate        9459
SaO2            4661
SysABP         45028
Temp           31456
TroponinI        169
TroponinT        566
Urine          49420
WBC             4878
Weight         34649
pH             11210
dtype: int64

In [24]:
classification_obesity_grade1_training = bmi_data_train[bmi_data_train["Classificacao"] == 'Obesidade grau 1']
classification_obesity_grade1_training_ids = classification_obesity_grade1_training["RecordID"]
classification_measurements_ob1_t = train_X[train_X["RecordID"].isin(classification_obesity_grade1_training_ids)]
classification_measurements_ob1_t  = classification_measurements_ob1_t .count() 
classification_measurements_ob1_t 

RecordID       36432
level_1        36432
Time           36432
ALP              615
ALT              632
AST              631
Age            34984
Albumin          443
BUN             2721
Bilirubin        635
Cholesterol       60
Creatinine      2730
DiasABP        25305
FiO2            6526
GCS            11067
Gender           759
Glucose         2421
HCO3            2604
HCT             3819
HR             33399
Height         34984
ICUType          759
K               2734
Lactate         1817
MAP            25115
MechVent        6440
Mg              2685
NIDiasABP      11088
NIMAP          10960
NISysABP       11100
Na              2509
PaCO2           6073
PaO2            6065
Platelets       2961
RespRate        5286
SaO2            2594
SysABP         25305
Temp           17705
TroponinI        101
TroponinT        342
Urine          27991
WBC             2603
Weight         19537
pH              6422
dtype: int64

In [25]:
classification_obesity_grade2_training = bmi_data_train[bmi_data_train["Classificacao"] == 'Obesidade grau 2']
classification_obesity_grade2_training_ids = classification_obesity_grade2_training["RecordID"]
classification_measurements_ob2_t = train_X[train_X["RecordID"].isin(classification_obesity_grade2_training_ids)]
classification_measurements_ob2_t = classification_measurements_ob2_t.count() 
classification_measurements_ob2_t 

RecordID       14832
level_1        14832
Time           14832
ALP              257
ALT              258
AST              258
Age            14201
Albumin          173
BUN             1149
Bilirubin        268
Cholesterol       29
Creatinine      1158
DiasABP        10535
FiO2            2797
GCS             4479
Gender           309
Glucose         1015
HCO3            1111
HCT             1585
HR             13574
Height         14201
ICUType          309
K               1132
Lactate          839
MAP            10536
MechVent        2687
Mg              1110
NIDiasABP       4065
NIMAP           3997
NISysABP        4071
Na              1064
PaCO2           2597
PaO2            2594
Platelets       1256
RespRate        2186
SaO2            1155
SysABP         10536
Temp            7455
TroponinI         32
TroponinT        153
Urine          11209
WBC             1122
Weight          8265
pH              2690
dtype: int64

In [26]:
classification_obesity_grade3_training = bmi_data_train[bmi_data_train["Classificacao"] == 'Obesidade grau 3']
classification_obesity_grade3_training_ids = classification_obesity_grade3_training["RecordID"]
classification_measurements_ob3_t = train_X[train_X["RecordID"].isin(classification_obesity_grade3_training_ids)]
classification_measurements_ob3_t = classification_measurements_ob3_t.count() 
classification_measurements_ob3_t 

RecordID       13200
level_1        13200
Time           13200
ALP              245
ALT              251
AST              249
Age            12610
Albumin          176
BUN              977
Bilirubin        257
Cholesterol       23
Creatinine       978
DiasABP         8343
FiO2            2546
GCS             3736
Gender           275
Glucose          899
HCO3             957
HCT             1268
HR             12061
Height         12610
ICUType          275
K                998
Lactate          727
MAP             8367
MechVent        2566
Mg               965
NIDiasABP       4248
NIMAP           4194
NISysABP        4251
Na               918
PaCO2           2143
PaO2            2147
Platelets       1001
RespRate        1815
SaO2             834
SysABP          8343
Temp            5504
TroponinI         31
TroponinT        124
Urine           9687
WBC              912
Weight          7705
pH              2203
dtype: int64

In [27]:
df_columns = train_X.columns
df_columns

Index(['RecordID', 'level_1', 'Time', 'ALP', 'ALT', 'AST', 'Age', 'Albumin',
       'BUN', 'Bilirubin', 'Cholesterol', 'Creatinine', 'DiasABP', 'FiO2',
       'GCS', 'Gender', 'Glucose', 'HCO3', 'HCT', 'HR', 'Height', 'ICUType',
       'K', 'Lactate', 'MAP', 'MechVent', 'Mg', 'NIDiasABP', 'NIMAP',
       'NISysABP', 'Na', 'PaCO2', 'PaO2', 'Platelets', 'RespRate', 'SaO2',
       'SysABP', 'Temp', 'TroponinI', 'TroponinT', 'Urine', 'WBC', 'Weight',
       'pH'],
      dtype='object')

In [28]:
df_train = pd.DataFrame(columns=df_columns)
df_train_transpose = df_train.T
df_train_transpose["Female"] = female_gender_measurements_training
df_train_transpose["Male"] = male_gender_measurements_training
df_train_transpose["Undefined Gender"] = undefined_gender_measurements_training
df_train_transpose["ICUType 1"] = ICUType_1_measurements_training 
df_train_transpose["ICUType 2"] = ICUType_2_measurements_training 
df_train_transpose["ICUType 3"] = ICUType_3_measurements_training 
df_train_transpose["ICUType 4"] = ICUType_4_measurements_training 
df_train_transpose["Age 65+"] = age_65_and_above_measurements_training
df_train_transpose["Age 65-"] = age_under_65_measurements_training 
df_train_transpose['Undefined Classification'] = classification_undefined_measurements_training
df_train_transpose['Low Weight'] = classification_measurements_l_w_t 
df_train_transpose['Normal Weight'] = classification_measurements_n_w_t 
df_train_transpose['Overweight'] = classification_measurements_o_w_t 
df_train_transpose['Obesity Grade 1'] = classification_measurements_ob1_t 
df_train_transpose['Obesity Grade 2'] = classification_measurements_ob2_t 
df_train_transpose['Obesity Grade 3'] = classification_measurements_ob3_t 
df_train_transpose = df_train_transpose.drop('RecordID',axis=0)
df_train_transpose = df_train_transpose.drop('level_1',axis=0)
df_train_transpose = df_train_transpose.drop('Time',axis=0)
df_train_transpose = df_train_transpose.drop('Age',axis=0)
df_train_transpose = df_train_transpose.drop('Gender',axis=0)
df_train_transpose = df_train_transpose.drop('Height',axis=0)
df_train_transpose = df_train_transpose.drop('ICUType',axis=0)

#df_test_transpose.style.set_caption("Repeated Measurements per variable by demographics")

display(HTML("<h2 style='text-align: center; font-size: 24px; font-weight: bold;'>Repeated Measurements per Variable by Demographics - Train Set</h2>"))
df_train_transpose

Unnamed: 0,Female,Male,Undefined Gender,ICUType 1,ICUType 2,ICUType 3,ICUType 4,Age 65+,Age 65-,Undefined Classification,Low Weight,Normal Weight,Overweight,Obesity Grade 1,Obesity Grade 2,Obesity Grade 3
ALP,2657,3418,5,780,568,2869,1863,2677,3403,2886,97,852,1128,615,257,245
ALT,2740,3514,5,830,581,2954,1894,2744,3515,2980,99,881,1158,632,258,251
AST,2732,3515,5,829,580,2948,1895,2744,3508,2980,100,876,1158,631,258,249
Albumin,2057,2581,5,624,380,2217,1422,2213,2430,2325,77,671,778,443,173,176
BUN,11706,15028,30,3807,5173,10022,7762,14241,12523,12299,475,4175,4968,2721,1149,977
Bilirubin,2778,3544,5,806,572,3093,1856,2793,3534,3023,99,875,1170,635,268,257
Cholesterol,265,351,0,324,22,133,137,356,260,285,11,88,120,60,29,23
Creatinine,11759,15095,30,3865,5191,10054,7774,14309,12575,12354,477,4187,5000,2730,1158,978
DiasABP,82964,116499,217,21897,62320,47053,68410,110061,89619,69069,4205,37197,45026,25305,10535,8343
FiO2,25006,32918,51,5852,13546,19433,19144,31568,26407,23972,1072,9597,11465,6526,2797,2546


<h3>Validation data</h3>

<h4>Loading validation dataset</h4>

In [29]:
validation_X = physionet2012_dataset['val_X']

In [30]:
validation_female_gender = validation_X[validation_X['Gender'] == 0.0]
validation_female_gender_ids = validation_female_gender["RecordID"]
female_gender_measurements_validation = validation_X[validation_X["RecordID"].isin(validation_female_gender_ids)]
female_gender_measurements_validation = female_gender_measurements_validation.count()
female_gender_measurements_validation

RecordID       40512
level_1        40512
Time           40512
ALP              644
ALT              651
AST              654
Age            38144
Albumin          538
BUN             2895
Bilirubin        655
Cholesterol       61
Creatinine      2910
DiasABP        21584
FiO2            6198
GCS            13131
Gender           844
Glucose         2786
HCO3            2855
HCT             3683
HR             36726
Height         38144
ICUType          844
K               3082
Lactate         1514
MAP            21489
MechVent        6179
Mg              2875
NIDiasABP      17076
NIMAP          16794
NISysABP       17093
Na              2922
PaCO2           4214
PaO2            4211
Platelets       2868
RespRate       10212
SaO2            1448
SysABP         21587
Temp           14315
TroponinI         66
TroponinT        478
Urine          28623
WBC             2674
Weight         21422
pH              4411
dtype: int64

In [31]:
validation_male_gender = validation_X[validation_X['Gender'] == 1.0]
validation_male_gender_ids = validation_male_gender["RecordID"]
male_gender_measurements_validation = validation_X[validation_X["RecordID"].isin(validation_male_gender_ids)]
male_gender_measurements_validation = male_gender_measurements_validation.count()
male_gender_measurements_validation

RecordID       51456
level_1        51456
Time           51456
ALP              870
ALT              899
AST              899
Age            48387
Albumin          629
BUN             3691
Bilirubin        899
Cholesterol       86
Creatinine      3700
DiasABP        28439
FiO2            8057
GCS            16362
Gender          1072
Glucose         3444
HCO3            3604
HCT             5096
HR             46169
Height         48387
ICUType         1072
K               3849
Lactate         2083
MAP            28278
MechVent        7769
Mg              3625
NIDiasABP      21031
NIMAP          20742
NISysABP       21051
Na              3580
PaCO2           6233
PaO2            6228
Platelets       3939
RespRate       11717
SaO2            2095
SysABP         28443
Temp           19633
TroponinI         77
TroponinT        531
Urine          35069
WBC             3529
Weight         26795
pH              6534
dtype: int64

In [32]:
validation_undefined_gender = validation_X[validation_X['Gender'] == -1.0]
validation_undefined_gender_ids = validation_undefined_gender["RecordID"]
undefined_gender_measurements_validation = validation_X[validation_X["RecordID"].isin(validation_undefined_gender_ids)]
undefined_gender_measurements_validation = undefined_gender_measurements_validation.count()
undefined_gender_measurements_validation

RecordID       96
level_1        96
Time           96
ALP             0
ALT             1
AST             1
Age            91
Albumin         0
BUN             7
Bilirubin       0
Cholesterol     0
Creatinine      7
DiasABP        73
FiO2            1
GCS            39
Gender          2
Glucose         7
HCO3            7
HCT             5
HR             88
Height         91
ICUType         2
K               8
Lactate         0
MAP            71
MechVent        0
Mg              7
NIDiasABP      41
NIMAP          41
NISysABP       41
Na              7
PaCO2           4
PaO2            4
Platelets       5
RespRate       88
SaO2            0
SysABP         73
Temp           21
TroponinI       0
TroponinT       4
Urine          59
WBC             5
Weight         46
pH              5
dtype: int64

In [33]:
validation_ICUType_1 = validation_X[validation_X['ICUType'] == 1.0]
validation_ICUType_1 = validation_ICUType_1[validation_ICUType_1['Time'] == 0.0]
validation_ICUType_1_ids = validation_ICUType_1['RecordID']
ICUType_1_measurements_validation = validation_X[validation_X['RecordID'].isin(validation_ICUType_1_ids)]
ICUType_1_measurements_validation = ICUType_1_measurements_validation.count()
ICUType_1_measurements_validation

RecordID       13296
level_1        13296
Time           13296
ALP              179
ALT              190
AST              188
Age            12314
Albumin          123
BUN              933
Bilirubin        187
Cholesterol       89
Creatinine       945
DiasABP         5281
FiO2            1292
GCS             3437
Gender           277
Glucose          881
HCO3             899
HCT             1184
HR             11753
Height         12314
ICUType          277
K               1078
Lactate          220
MAP             5285
MechVent        1212
Mg               919
NIDiasABP       6752
NIMAP           6718
NISysABP        6760
Na               890
PaCO2            989
PaO2             994
Platelets        963
RespRate        4804
SaO2             624
SysABP          5282
Temp            3950
TroponinI         31
TroponinT        307
Urine           7433
WBC              854
Weight          6112
pH              1013
dtype: int64

In [34]:
validation_ICUType_2 = validation_X[validation_X['ICUType'] == 2.0]
validation_ICUType_2 = validation_ICUType_2[validation_ICUType_2['Time'] == 0.0]
validation_ICUType_2_ids = validation_ICUType_2['RecordID']
ICUType_2_measurements_validation = validation_X[validation_X['RecordID'].isin(validation_ICUType_2_ids)]
ICUType_2_measurements_validation = ICUType_2_measurements_validation.count()
ICUType_2_measurements_validation

RecordID       18096
level_1        18096
Time           18096
ALP              120
ALT              121
AST              121
Age            17456
Albumin           64
BUN             1152
Bilirubin        119
Cholesterol        4
Creatinine      1153
DiasABP        14290
FiO2            3077
GCS             4711
Gender           377
Glucose          827
HCO3            1071
HCT             1999
HR             16512
Height         17456
ICUType          377
K                947
Lactate          608
MAP            14286
MechVent        3030
Mg              1129
NIDiasABP       3589
NIMAP           3544
NISysABP        3597
Na               885
PaCO2           3705
PaO2            3693
Platelets       1472
RespRate         658
SaO2            1945
SysABP         14291
Temp           10806
TroponinI          4
TroponinT         51
Urine          15131
WBC             1209
Weight          9258
pH              4072
dtype: int64

In [35]:
validation_ICUType_3 = validation_X[validation_X['ICUType'] == 3.0]
validation_ICUType_3 = validation_ICUType_3[validation_ICUType_3['Time'] == 0.0]
validation_ICUType_3_ids = validation_ICUType_3['RecordID']
ICUType_3_measurements_validation = validation_X[validation_X['RecordID'].isin(validation_ICUType_3_ids)]
ICUType_3_measurements_validation = ICUType_3_measurements_validation.count()
ICUType_3_measurements_validation

RecordID       33744
level_1        33744
Time           33744
ALP              742
ALT              760
AST              764
Age            31228
Albumin          571
BUN             2515
Bilirubin        774
Cholesterol       24
Creatinine      2524
DiasABP        12512
FiO2            4965
GCS             8942
Gender           703
Glucose         2520
HCO3            2529
HCT             3060
HR             30105
Height         31228
ICUType          703
K               2739
Lactate         1194
MAP            12376
MechVent        4748
Mg              2399
NIDiasABP      18484
NIMAP          18128
NISysABP       18493
Na              2570
PaCO2           2566
PaO2            2573
Platelets       2377
RespRate       10151
SaO2             403
SysABP         12515
Temp            9728
TroponinI         72
TroponinT        458
Urine          20680
WBC             2268
Weight         22213
pH              2620
dtype: int64

In [36]:
validation_ICUType_4 = validation_X[validation_X['ICUType'] == 4.0]
validation_ICUType_4 = validation_ICUType_4[validation_ICUType_4['Time'] == 0.0]
validation_ICUType_4_ids = validation_ICUType_4['RecordID']
ICUType_4_measurements_validation = validation_X[validation_X['RecordID'].isin(validation_ICUType_4_ids)]
ICUType_4_measurements_validation = ICUType_4_measurements_validation.count()
ICUType_4_measurements_validation

RecordID       26928
level_1        26928
Time           26928
ALP              473
ALT              480
AST              481
Age            25624
Albumin          409
BUN             1993
Bilirubin        474
Cholesterol       30
Creatinine      1995
DiasABP        18013
FiO2            4922
GCS            12442
Gender           561
Glucose         2009
HCO3            1967
HCT             2541
HR             24613
Height         25624
ICUType          561
K               2175
Lactate         1575
MAP            17891
MechVent        4958
Mg              2060
NIDiasABP       9323
NIMAP           9187
NISysABP        9335
Na              2164
PaCO2           3191
PaO2            3183
Platelets       2000
RespRate        6404
SaO2             571
SysABP         18015
Temp            9485
TroponinI         36
TroponinT        197
Urine          20507
WBC             1877
Weight         10680
pH              3245
dtype: int64

In [37]:
age_65_and_above_validation = validation_X[validation_X['Age'] >= 65]
age_65_and_above_validation = age_65_and_above_validation[age_65_and_above_validation['Time'] == 0.0]
age_65_and_above_validation_ids = age_65_and_above_validation['RecordID']
age_65_and_above_measurements_validation = validation_X[validation_X['RecordID'].isin(age_65_and_above_validation_ids)]
age_65_and_above_measurements_validation = age_65_and_above_measurements_validation.count()
age_65_and_above_measurements_validation

RecordID       49872
level_1        49872
Time           49872
ALP              685
ALT              696
AST              699
Age            47206
Albumin          557
BUN             3439
Bilirubin        700
Cholesterol       92
Creatinine      3456
DiasABP        27386
FiO2            7563
GCS            15667
Gender          1039
Glucose         3205
HCO3            3359
HCT             4682
HR             45301
Height         47206
ICUType         1039
K               3604
Lactate         1801
MAP            27285
MechVent        7340
Mg              3414
NIDiasABP      20880
NIMAP          20620
NISysABP       20900
Na              3327
PaCO2           5676
PaO2            5668
Platelets       3628
RespRate       12398
SaO2            2106
SysABP         27389
Temp           19144
TroponinI         98
TroponinT        726
Urine          35578
WBC             3288
Weight         25973
pH              5961
dtype: int64

In [38]:
age_under_65_validation = validation_X[validation_X['Age'] < 65]
age_under_65_validation = age_under_65_validation[age_under_65_validation['Time'] == 0.0]
age_under_65_validation_ids = age_under_65_validation["RecordID"]
age_under_65__measurements_validation = validation_X[validation_X["RecordID"].isin(age_under_65_validation_ids)]
age_under_65__measurements_validation = age_under_65__measurements_validation.count()
age_under_65__measurements_validation

RecordID       42192
level_1        42192
Time           42192
ALP              829
ALT              855
AST              855
Age            39416
Albumin          610
BUN             3154
Bilirubin        854
Cholesterol       55
Creatinine      3161
DiasABP        22710
FiO2            6693
GCS            13865
Gender           879
Glucose         3032
HCO3            3107
HCT             4102
HR             37682
Height         39416
ICUType          879
K               3335
Lactate         1796
MAP            22553
MechVent        6608
Mg              3093
NIDiasABP      17268
NIMAP          16957
NISysABP       17285
Na              3182
PaCO2           4775
PaO2            4775
Platelets       3184
RespRate        9619
SaO2            1437
SysABP         22714
Temp           14825
TroponinI         45
TroponinT        287
Urine          28173
WBC             2920
Weight         22290
pH              4989
dtype: int64

In [39]:
filtered_validation_X = validation_X[(validation_X['Height'] != -1) & (validation_X['Weight'] != -1) & (validation_X['Height'].notna()) & (validation_X['Weight'].notna())] 

In [40]:
filtered_validation_X_meters = filtered_validation_X.copy()
filtered_validation_X_meters['Height'] = filtered_validation_X['Height'] / 100 # Converting Height from cm to meters
filtered_validation_X_meters['Height']

528       1.575
549       1.575
550       1.575
551       1.575
552       1.575
          ...  
574652    1.829
574653    1.829
574654    1.829
574655    1.829
574752    1.473
Name: Height, Length: 25125, dtype: float64

In [41]:
bmi_data_validation = filtered_validation_X_meters
bmi_data_validation["BMI"] = round(filtered_validation_X_meters["Weight"] / (filtered_validation_X_meters["Height"] ** 2), 1)
bmi_data_validation["Classificacao"] = bmi_data_validation["BMI"].apply(classify_BMI)
bmi_data_validation.head()

Unnamed: 0,RecordID,level_1,Time,ALP,ALT,AST,Age,Albumin,BUN,Bilirubin,...,SysABP,Temp,TroponinI,TroponinT,Urine,WBC,Weight,pH,BMI,Classificacao
528,132567,0,0.0,,,,71.0,,,,...,111.5,35.6,,,,,56.0,7.44,22.6,Peso normal
549,132567,21,21.0,,,,71.0,,,,...,110.0,37.5,,,15.0,,55.8,,22.5,Peso normal
550,132567,22,22.0,,,,71.0,,,,...,106.0,37.6,,,20.0,,55.8,,22.5,Peso normal
551,132567,23,23.0,,,,71.0,,,,...,129.0,37.7,,,30.0,,55.8,,22.5,Peso normal
552,132567,24,24.0,,,,71.0,,,,...,94.0,37.8,,,20.0,,55.8,,22.5,Peso normal


In [42]:
classification_undefined_validation_ids = bmi_data_validation["RecordID"]
classification_undefined_measurements_validation = validation_X[~validation_X["RecordID"].isin(classification_undefined_validation_ids)]
classification_undefined_measurements_validation = classification_undefined_measurements_validation.count()
classification_undefined_measurements_validation

RecordID       43872
level_1        43872
Time           43872
ALP              758
ALT              776
AST              780
Age            40486
Albumin          635
BUN             3075
Bilirubin        778
Cholesterol       62
Creatinine      3087
DiasABP        18031
FiO2            5983
GCS            14710
Gender           914
Glucose         3100
HCO3            3072
HCT             3762
HR             38907
Height         40486
ICUType          914
K               3399
Lactate         1432
MAP            17883
MechVent        5697
Mg              3050
NIDiasABP      22795
NIMAP          22425
NISysABP       22810
Na              3241
PaCO2           3081
PaO2            3076
Platelets       2921
RespRate       14869
SaO2             454
SysABP         18036
Temp           11900
TroponinI         66
TroponinT        515
Urine          28204
WBC             2774
Weight         23138
pH              3140
dtype: int64

In [43]:
classification_low_weight_validation = bmi_data_validation[bmi_data_validation["Classificacao"] == 'Baixo peso']
classification_low_weight_validation_ids = classification_low_weight_validation["RecordID"]
classification_measurements_l_w_v = validation_X[validation_X["RecordID"].isin(classification_low_weight_validation_ids)]
classification_measurements_l_w_v = classification_measurements_l_w_v.count() 
classification_measurements_l_w_v

RecordID       1104
level_1        1104
Time           1104
ALP              20
ALT              20
AST              20
Age            1067
Albumin          15
BUN              97
Bilirubin        21
Cholesterol       1
Creatinine       97
DiasABP         830
FiO2            229
GCS             313
Gender           23
Glucose          95
HCO3             96
HCT             114
HR             1047
Height         1067
ICUType          23
K               104
Lactate          81
MAP             825
MechVent        247
Mg               96
NIDiasABP       280
NIMAP           269
NISysABP        280
Na               98
PaCO2           179
PaO2            185
Platelets        93
RespRate        136
SaO2             75
SysABP          830
Temp            520
TroponinI         2
TroponinT        34
Urine           822
WBC              85
Weight          649
pH              190
dtype: int64

In [44]:
classification_normal_weight_validation = bmi_data_validation[bmi_data_validation["Classificacao"] == 'Peso normal']
classification_normal_weight_validation_ids = classification_normal_weight_validation["RecordID"]
classification_measurements_n_w_v = validation_X[validation_X["RecordID"].isin(classification_normal_weight_validation_ids)]
classification_measurements_n_w_v = classification_measurements_n_w_v.count() 
classification_measurements_n_w_v

RecordID       14496
level_1        14496
Time           14496
ALP              246
ALT              250
AST              251
Age            13947
Albumin          169
BUN             1041
Bilirubin        252
Cholesterol       27
Creatinine      1047
DiasABP         9669
FiO2            2399
GCS             4654
Gender           302
Glucose          961
HCO3            1017
HCT             1521
HR             13358
Height         13947
ICUType          302
K               1095
Lactate          638
MAP             9610
MechVent        2516
Mg              1045
NIDiasABP       4783
NIMAP           4741
NISysABP        4788
Na               990
PaCO2           2094
PaO2            2087
Platelets       1196
RespRate        2051
SaO2             834
SysABP          9669
Temp            6719
TroponinI         25
TroponinT        125
Urine          10657
WBC             1051
Weight          6753
pH              2227
dtype: int64

In [45]:
classification_overweight_validation = bmi_data_validation[bmi_data_validation["Classificacao"] == 'Sobrepeso']
classification_overweight_validation_ids = classification_overweight_validation["RecordID"]
classification_measurements_o_w_v = validation_X[validation_X["RecordID"].isin(classification_overweight_validation_ids)]
classification_measurements_o_w_v = classification_measurements_o_w_v.count() 
classification_measurements_o_w_v

RecordID       20880
level_1        20880
Time           20880
ALP              310
ALT              320
AST              319
Age            19962
Albumin          221
BUN             1533
Bilirubin        316
Cholesterol       35
Creatinine      1537
DiasABP        14510
FiO2            3563
GCS             6354
Gender           435
Glucose         1318
HCO3            1470
HCT             2270
HR             18946
Height         19962
ICUType          435
K               1468
Lactate          923
MAP            14480
MechVent        3625
Mg              1496
NIDiasABP       6069
NIMAP           6009
NISysABP        6081
Na              1369
PaCO2           3438
PaO2            3428
Platelets       1744
RespRate        3102
SaO2            1517
SysABP         14512
Temp           10387
TroponinI         36
TroponinT        169
Urine          15757
WBC             1533
Weight         11586
pH              3665
dtype: int64

In [46]:
classification_obesity_grade1_validation = bmi_data_validation[bmi_data_validation["Classificacao"] == 'Obesidade grau 1']
classification_obesity_grade1_validation_ids = classification_obesity_grade1_validation["RecordID"]
classification_measurements_ob1_v = validation_X[validation_X["RecordID"].isin(classification_obesity_grade1_validation_ids)]
classification_measurements_ob1_v  = classification_measurements_ob1_v .count() 
classification_measurements_ob1_v 

RecordID       13680
level_1        13680
Time           13680
ALP              206
ALT              209
AST              208
Age            13170
Albumin          143
BUN              986
Bilirubin        209
Cholesterol       18
Creatinine       991
DiasABP         9799
FiO2            2527
GCS             4033
Gender           285
Glucose          827
HCO3             946
HCT             1495
HR             12518
Height         13170
ICUType          285
K                950
Lactate          682
MAP             9786
MechVent        2472
Mg               973
NIDiasABP       3717
NIMAP           3662
NISysABP        3720
Na               877
PaCO2           2444
PaO2            2441
Platelets       1158
RespRate        1102
SaO2            1099
SysABP          9800
Temp            6892
TroponinI         32
TroponinT        124
Urine          10642
WBC              993
Weight          7817
pH              2603
dtype: int64

In [47]:
classification_obesity_grade2_validation = bmi_data_validation[bmi_data_validation["Classificacao"] == 'Obesidade grau 2']
classification_obesity_grade2_validation_ids = classification_obesity_grade2_validation["RecordID"]
classification_measurements_ob2_v = validation_X[validation_X["RecordID"].isin(classification_obesity_grade2_validation_ids)]
classification_measurements_ob2_v  = classification_measurements_ob2_v .count() 
classification_measurements_ob2_v 

RecordID       5664
level_1        5664
Time           5664
ALP              84
ALT              85
AST              85
Age            5455
Albumin          60
BUN             404
Bilirubin        86
Cholesterol       4
Creatinine      404
DiasABP        4065
FiO2           1032
GCS            1529
Gender          118
Glucose         315
HCO3            379
HCT             630
HR             5207
Height         5455
ICUType         118
K               370
Lactate         287
MAP            4051
MechVent        957
Mg              394
NIDiasABP      1607
NIMAP          1558
NISysABP       1609
Na              346
PaCO2          1058
PaO2           1057
Platelets       471
RespRate        529
SaO2            493
SysABP         4065
Temp           2970
TroponinI         7
TroponinT        59
Urine          4330
WBC             405
Weight         3380
pH             1135
dtype: int64

In [48]:
classification_obesity_grade3_validation = bmi_data_validation[bmi_data_validation["Classificacao"] == 'Obesidade grau 3']
classification_obesity_grade3_validation_ids = classification_obesity_grade3_validation["RecordID"]
classification_measurements_ob3_v = validation_X[validation_X["RecordID"].isin(classification_obesity_grade3_validation_ids)]
classification_measurements_ob3_v  = classification_measurements_ob3_v .count() 
classification_measurements_ob3_v 

RecordID       4608
level_1        4608
Time           4608
ALP              66
ALT              69
AST              69
Age            4469
Albumin          46
BUN             336
Bilirubin        68
Cholesterol       7
Creatinine      335
DiasABP        3191
FiO2            866
GCS            1318
Gender           96
Glucose         288
HCO3            317
HCT             441
HR             4292
Height         4469
ICUType          96
K               327
Lactate         234
MAP            3185
MechVent        870
Mg              326
NIDiasABP      1387
NIMAP          1364
NISysABP       1390
Na              306
PaCO2           785
PaO2            786
Platelets       333
RespRate        582
SaO2            321
SysABP         3191
Temp           2103
TroponinI         2
TroponinT        52
Urine          3523
WBC             292
Weight         2725
pH              825
dtype: int64

In [49]:
df_columns = validation_X.columns
df_columns

Index(['RecordID', 'level_1', 'Time', 'ALP', 'ALT', 'AST', 'Age', 'Albumin',
       'BUN', 'Bilirubin', 'Cholesterol', 'Creatinine', 'DiasABP', 'FiO2',
       'GCS', 'Gender', 'Glucose', 'HCO3', 'HCT', 'HR', 'Height', 'ICUType',
       'K', 'Lactate', 'MAP', 'MechVent', 'Mg', 'NIDiasABP', 'NIMAP',
       'NISysABP', 'Na', 'PaCO2', 'PaO2', 'Platelets', 'RespRate', 'SaO2',
       'SysABP', 'Temp', 'TroponinI', 'TroponinT', 'Urine', 'WBC', 'Weight',
       'pH'],
      dtype='object')

In [50]:
df_validation = pd.DataFrame(columns=df_columns)
df_validation_transpose = df_validation.T
df_validation_transpose["Female"] = female_gender_measurements_validation
df_validation_transpose["Male"] = male_gender_measurements_validation
df_validation_transpose["Undefined Gender"] = undefined_gender_measurements_validation
df_validation_transpose["ICUType 1"] = ICUType_1_measurements_validation
df_validation_transpose["ICUType 2"] = ICUType_2_measurements_validation
df_validation_transpose["ICUType 3"] = ICUType_3_measurements_validation
df_validation_transpose["ICUType 4"] = ICUType_4_measurements_validation
df_validation_transpose["Age 65+"] = age_65_and_above_measurements_validation
df_validation_transpose["Age 65-"] = age_under_65__measurements_validation
df_validation_transpose['Undefined Classification'] = classification_undefined_measurements_validation
df_validation_transpose['Low Weight'] = classification_measurements_l_w_v
df_validation_transpose['Normal Weight'] = classification_measurements_n_w_v
df_validation_transpose['Overweight'] = classification_measurements_o_w_v
df_validation_transpose['Obesity Grade 1'] = classification_measurements_ob1_v
df_validation_transpose['Obesity Grade 2'] = classification_measurements_ob2_v
df_validation_transpose['Obesity Grade 3'] = classification_measurements_ob3_v
df_validation_transpose = df_validation_transpose.drop('RecordID',axis=0)
df_validation_transpose = df_validation_transpose.drop('level_1',axis=0)
df_validation_transpose = df_validation_transpose.drop('Time',axis=0)
df_validation_transpose = df_validation_transpose.drop('Age',axis=0)
df_validation_transpose = df_validation_transpose.drop('Gender',axis=0)
df_validation_transpose = df_validation_transpose.drop('Height',axis=0)
df_validation_transpose = df_validation_transpose.drop('ICUType',axis=0)

#df_test_transpose.style.set_caption("Repeated Measurements per variable by demographics")

display(HTML("<h2 style='text-align: center; font-size: 24px; font-weight: bold;'>Repeated Measurements per Variable by Demographics - Validation Set</h2>"))
df_validation_transpose

Unnamed: 0,Female,Male,Undefined Gender,ICUType 1,ICUType 2,ICUType 3,ICUType 4,Age 65+,Age 65-,Undefined Classification,Low Weight,Normal Weight,Overweight,Obesity Grade 1,Obesity Grade 2,Obesity Grade 3
ALP,644,870,0,179,120,742,473,685,829,758,20,246,310,206,84,66
ALT,651,899,1,190,121,760,480,696,855,776,20,250,320,209,85,69
AST,654,899,1,188,121,764,481,699,855,780,20,251,319,208,85,69
Albumin,538,629,0,123,64,571,409,557,610,635,15,169,221,143,60,46
BUN,2895,3691,7,933,1152,2515,1993,3439,3154,3075,97,1041,1533,986,404,336
Bilirubin,655,899,0,187,119,774,474,700,854,778,21,252,316,209,86,68
Cholesterol,61,86,0,89,4,24,30,92,55,62,1,27,35,18,4,7
Creatinine,2910,3700,7,945,1153,2524,1995,3456,3161,3087,97,1047,1537,991,404,335
DiasABP,21584,28439,73,5281,14290,12512,18013,27386,22710,18031,830,9669,14510,9799,4065,3191
FiO2,6198,8057,1,1292,3077,4965,4922,7563,6693,5983,229,2399,3563,2527,1032,866


<h3> Test data</h3>

<h4>Loading test dataset</h4>

In [51]:
test_X = physionet2012_dataset['test_X']

In [52]:
var1 = test_X.count()
var1

RecordID       115152
level_1        115152
Time           115152
ALP              1831
ALT              1885
AST              1890
Age            108047
Albumin          1429
BUN              8313
Bilirubin        1912
Cholesterol       231
Creatinine       8358
DiasABP         62022
FiO2            18099
GCS             37151
Gender           2399
Glucose          7807
HCO3             8130
HCT             10755
HR             103491
Height         108047
ICUType          2399
K                8710
Lactate          4540
MAP             61622
MechVent        17185
Mg               8085
NIDiasABP       48495
NIMAP           47947
NISysABP        48542
Na               8124
PaCO2           13086
PaO2            13060
Platelets        8439
RespRate        27816
SaO2             4593
SysABP          62027
Temp            42107
TroponinI         258
TroponinT        1355
Urine           79440
WBC              7715
Weight          59201
pH              13698
dtype: int64

In [53]:
test_female_gender = test_X[test_X['Gender'] == 0.0]
test_female_gender_ids = test_female_gender["RecordID"]
female_gender_measurements_test = test_X[test_X["RecordID"].isin(test_female_gender_ids)]
female_gender_measurements_test = female_gender_measurements_test.count()
female_gender_measurements_test

RecordID       49824
level_1        49824
Time           49824
ALP              801
ALT              830
AST              830
Age            46915
Albumin          619
BUN             3620
Bilirubin        840
Cholesterol       89
Creatinine      3626
DiasABP        25909
FiO2            7750
GCS            16189
Gender          1038
Glucose         3425
HCO3            3547
HCT             4627
HR             45027
Height         46915
ICUType         1038
K               3788
Lactate         1917
MAP            25647
MechVent        7506
Mg              3515
NIDiasABP      21942
NIMAP          21715
NISysABP       21961
Na              3557
PaCO2           5283
PaO2            5267
Platelets       3563
RespRate       13200
SaO2            1813
SysABP         25911
Temp           17635
TroponinI         99
TroponinT        490
Urine          35013
WBC             3310
Weight         26393
pH              5475
dtype: int64

In [54]:
test_male_gender = test_X[test_X['Gender'] == 1.0]
test_male_gender_ids = test_male_gender["RecordID"]
male_gender_measurements_test = test_X[test_X["RecordID"].isin(test_male_gender_ids)]
male_gender_measurements_test = male_gender_measurements_test.count()
male_gender_measurements_test

RecordID       65328
level_1        65328
Time           65328
ALP             1030
ALT             1055
AST             1060
Age            61132
Albumin          810
BUN             4693
Bilirubin       1072
Cholesterol      142
Creatinine      4732
DiasABP        36113
FiO2           10349
GCS            20962
Gender          1361
Glucose         4382
HCO3            4583
HCT             6128
HR             58464
Height         61132
ICUType         1361
K               4922
Lactate         2623
MAP            35975
MechVent        9679
Mg              4570
NIDiasABP      26553
NIMAP          26232
NISysABP       26581
Na              4567
PaCO2           7803
PaO2            7793
Platelets       4876
RespRate       14616
SaO2            2780
SysABP         36116
Temp           24472
TroponinI        159
TroponinT        865
Urine          44427
WBC             4405
Weight         32808
pH              8223
dtype: int64

In [55]:
test_undefined_gender = test_X[test_X['Gender'] == -1.0]
test_undefined_gender_ids = test_undefined_gender["RecordID"]
undefined_gender_measurements_test = test_X[test_X["RecordID"].isin(test_undefined_gender_ids)]
undefined_gender_measurements_test = undefined_gender_measurements_test.count()
undefined_gender_measurements_test

RecordID       0
level_1        0
Time           0
ALP            0
ALT            0
AST            0
Age            0
Albumin        0
BUN            0
Bilirubin      0
Cholesterol    0
Creatinine     0
DiasABP        0
FiO2           0
GCS            0
Gender         0
Glucose        0
HCO3           0
HCT            0
HR             0
Height         0
ICUType        0
K              0
Lactate        0
MAP            0
MechVent       0
Mg             0
NIDiasABP      0
NIMAP          0
NISysABP       0
Na             0
PaCO2          0
PaO2           0
Platelets      0
RespRate       0
SaO2           0
SysABP         0
Temp           0
TroponinI      0
TroponinT      0
Urine          0
WBC            0
Weight         0
pH             0
dtype: int64

In [56]:
test_ICUType_1 = test_X[test_X['ICUType'] == 1.0]
test_ICUType_1 = test_ICUType_1[test_ICUType_1['Time'] == 0.0]
test_ICUType_1_ids = test_ICUType_1['RecordID']
ICUType_1_measurements_test = test_X[test_X['RecordID'].isin(test_ICUType_1_ids)]
ICUType_1_measurements_test = ICUType_1_measurements_test.count()
ICUType_1_measurements_test

RecordID       18000
level_1        18000
Time           18000
ALP              300
ALT              314
AST              315
Age            16329
Albumin          212
BUN             1313
Bilirubin        317
Cholesterol      125
Creatinine      1344
DiasABP         7823
FiO2            2094
GCS             4702
Gender           375
Glucose         1264
HCO3            1273
HCT             1559
HR             15524
Height         16329
ICUType          375
K               1528
Lactate          464
MAP             7796
MechVent        1977
Mg              1300
NIDiasABP       8113
NIMAP           8073
NISysABP        8118
Na              1270
PaCO2           1548
PaO2            1544
Platelets       1316
RespRate        6003
SaO2             935
SysABP          7823
Temp            5446
TroponinI         53
TroponinT        435
Urine          10106
WBC             1185
Weight          7770
pH              1574
dtype: int64

In [57]:
test_ICUType_2 = test_X[test_X['ICUType'] == 2.0]
test_ICUType_2 = test_ICUType_2[test_ICUType_2['Time'] == 0.0]
test_ICUType_2_ids = test_ICUType_2['RecordID']
ICUType_2_measurements_test = test_X[test_X['RecordID'].isin(test_ICUType_2_ids)]
ICUType_2_measurements_test = ICUType_2_measurements_test.count()
ICUType_2_measurements_test

RecordID       23472
level_1        23472
Time           23472
ALP              169
ALT              179
AST              179
Age            22741
Albumin          121
BUN             1548
Bilirubin        175
Cholesterol        7
Creatinine      1547
DiasABP        18590
FiO2            3960
GCS             6251
Gender           489
Glucose         1100
HCO3            1431
HCT             2543
HR             21668
Height         22741
ICUType          489
K               1256
Lactate          810
MAP            18590
MechVent        3843
Mg              1457
NIDiasABP       5128
NIMAP           5086
NISysABP        5142
Na              1180
PaCO2           4750
PaO2            4735
Platelets       1901
RespRate        1370
SaO2            2523
SysABP         18590
Temp           13917
TroponinI         37
TroponinT         61
Urine          19872
WBC             1574
Weight         11903
pH              5231
dtype: int64

In [58]:
test_ICUType_3 = test_X[test_X['ICUType'] == 3.0]
test_ICUType_3 = test_ICUType_3[test_ICUType_3['Time'] == 0.0]
test_ICUType_3_ids = test_ICUType_3['RecordID']
ICUType_3_measurements_test = test_X[test_X['RecordID'].isin(test_ICUType_3_ids)]
ICUType_3_measurements_test = ICUType_3_measurements_test.count()
ICUType_3_measurements_test

RecordID       40080
level_1        40080
Time           40080
ALP              842
ALT              866
AST              870
Age            36862
Albumin          646
BUN             2975
Bilirubin        899
Cholesterol       45
Creatinine      2991
DiasABP        13270
FiO2            5830
GCS            10598
Gender           835
Glucose         2953
HCO3            2976
HCT             3504
HR             35412
Height         36862
ICUType          835
K               3195
Lactate         1427
MAP            13060
MechVent        5273
Mg              2792
NIDiasABP      23224
NIMAP          22883
NISysABP       23240
Na              3051
PaCO2           2851
PaO2            2857
Platelets       2734
RespRate       11932
SaO2             486
SysABP         13273
Temp           11033
TroponinI        115
TroponinT        547
Urine          24195
WBC             2614
Weight         24844
pH              2908
dtype: int64

In [59]:
test_ICUType_4 = test_X[test_X['ICUType'] == 4.0]
test_ICUType_4 = test_ICUType_4[test_ICUType_4['Time'] == 0.0]
test_ICUType_4_ids = test_ICUType_4['RecordID']
ICUType_4_measurements_test = test_X[test_X['RecordID'].isin(test_ICUType_4_ids)]
ICUType_4_measurements_test = ICUType_4_measurements_test.count()
ICUType_4_measurements_test

RecordID       33600
level_1        33600
Time           33600
ALP              520
ALT              526
AST              526
Age            32115
Albumin          450
BUN             2477
Bilirubin        521
Cholesterol       54
Creatinine      2476
DiasABP        22339
FiO2            6215
GCS            15600
Gender           700
Glucose         2490
HCO3            2450
HCT             3149
HR             30887
Height         32115
ICUType          700
K               2731
Lactate         1839
MAP            22176
MechVent        6092
Mg              2536
NIDiasABP      12030
NIMAP          11905
NISysABP       12042
Na              2623
PaCO2           3937
PaO2            3924
Platelets       2488
RespRate        8511
SaO2             649
SysABP         22341
Temp           11711
TroponinI         53
TroponinT        312
Urine          25267
WBC             2342
Weight         14684
pH              3985
dtype: int64

In [60]:
age_65_and_above_test= test_X[test_X['Age'] >= 65]
age_65_and_above_test = age_65_and_above_test[age_65_and_above_test['Time'] == 0.0]
age_65_and_above_test_ids = age_65_and_above_test['RecordID']
age_65_and_above_measurements_test= test_X[test_X['RecordID'].isin(age_65_and_above_test_ids)]
age_65_and_above_measurements_test = age_65_and_above_measurements_test.count()
age_65_and_above_measurements_test

RecordID       63696
level_1        63696
Time           63696
ALP              821
ALT              842
AST              847
Age            59787
Albumin          720
BUN             4522
Bilirubin        869
Cholesterol      142
Creatinine      4547
DiasABP        35077
FiO2           10234
GCS            19793
Gender          1327
Glucose         4190
HCO3            4405
HCT             5929
HR             57384
Height         59787
ICUType         1327
K               4702
Lactate         2530
MAP            34853
MechVent        9538
Mg              4402
NIDiasABP      26321
NIMAP          26074
NISysABP       26345
Na              4371
PaCO2           7520
PaO2            7506
Platelets       4564
RespRate       15434
SaO2            2901
SysABP         35079
Temp           24306
TroponinI        175
TroponinT        956
Urine          44814
WBC             4182
Weight         33697
pH              7862
dtype: int64

In [61]:
age_under_65_test = test_X[test_X['Age'] < 65]
age_under_65_test = age_under_65_test[age_under_65_test['Time'] == 0.0]
age_under_65_test_ids = age_under_65_test["RecordID"]
age_under_65_measurements_test = test_X[test_X["RecordID"].isin(age_under_65_test_ids)]
age_under_65_measurements_test = age_under_65_measurements_test.count()
age_under_65_measurements_test

RecordID       51456
level_1        51456
Time           51456
ALP             1010
ALT             1043
AST             1043
Age            48260
Albumin          709
BUN             3791
Bilirubin       1043
Cholesterol       89
Creatinine      3811
DiasABP        26945
FiO2            7865
GCS            17358
Gender          1072
Glucose         3617
HCO3            3725
HCT             4826
HR             46107
Height         48260
ICUType         1072
K               4008
Lactate         2010
MAP            26769
MechVent        7647
Mg              3683
NIDiasABP      22174
NIMAP          21873
NISysABP       22197
Na              3753
PaCO2           5566
PaO2            5554
Platelets       3875
RespRate       12382
SaO2            1692
SysABP         26948
Temp           17801
TroponinI         83
TroponinT        399
Urine          34626
WBC             3533
Weight         25504
pH              5836
dtype: int64

In [62]:
filtered_test_X = test_X[(test_X['Height'] != -1) & (test_X['Weight'] != -1) & (test_X['Height'].notna()) & (test_X['Weight'].notna())] 

In [63]:
filtered_test_X_meters = filtered_test_X.copy()
filtered_test_X_meters['Height'] = filtered_test_X['Height'] / 100 # Converting Height from cm to meters
filtered_test_X_meters['Height']

720       1.676
738       1.676
739       1.676
740       1.676
741       1.676
          ...  
575321    1.727
575322    1.727
575323    1.727
575325    1.727
575327    1.727
Name: Height, Length: 30040, dtype: float64

In [64]:
bmi_data_test = filtered_test_X_meters
bmi_data_test["BMI"] = round(filtered_test_X_meters["Weight"] / (filtered_test_X_meters["Height"] ** 2), 1)
bmi_data_test["Classificacao"] = bmi_data_test["BMI"].apply(classify_BMI)
bmi_data_test.head()

Unnamed: 0,RecordID,level_1,Time,ALP,ALT,AST,Age,Albumin,BUN,Bilirubin,...,SysABP,Temp,TroponinI,TroponinT,Urine,WBC,Weight,pH,BMI,Classificacao
720,132575,0,0.0,,,,78.0,,,,...,,,,,,12.5,63.0,7.34,22.4,Peso normal
738,132575,18,18.0,,,,78.0,,,,...,122.0,37.4,,,38.0,,72.4,,25.8,Sobrepeso
739,132575,19,19.0,,,,78.0,,,,...,121.0,37.3,,,45.0,,72.4,,25.8,Sobrepeso
740,132575,20,20.0,,,,78.0,,,,...,97.5,37.3,,,30.0,,72.4,,25.8,Sobrepeso
741,132575,21,21.0,,,,78.0,,,,...,90.0,37.2,,,20.0,,72.4,,25.8,Sobrepeso


In [65]:
classification_undefined_test_ids = bmi_data_test["RecordID"]
classification_undefined_measurements_test = test_X[~test_X["RecordID"].isin(classification_undefined_test_ids)]
classification_undefined_measurements_test = classification_undefined_measurements_test.count()
classification_undefined_measurements_test

RecordID       56928
level_1        56928
Time           56928
ALP              902
ALT              935
AST              937
Age            52238
Albumin          722
BUN             3936
Bilirubin        957
Cholesterol      108
Creatinine      3967
DiasABP        22375
FiO2            7984
GCS            18972
Gender          1186
Glucose         3916
HCO3            3908
HCT             4761
HR             50083
Height         52238
ICUType         1186
K               4308
Lactate         1889
MAP            22116
MechVent        7333
Mg              3812
NIDiasABP      30127
NIMAP          29747
NISysABP       30150
Na              4071
PaCO2           4050
PaO2            4046
Platelets       3687
RespRate       18843
SaO2             696
SysABP         22377
Temp           15037
TroponinI        114
TroponinT        697
Urine          35793
WBC             3535
Weight         29161
pH              4137
dtype: int64

In [66]:
classification_low_weight_test = bmi_data_test[bmi_data_test["Classificacao"] == 'Baixo peso']
classification_low_weight_test_ids = classification_low_weight_test["RecordID"]
classification_measurements_l_w_test = test_X[test_X["RecordID"].isin(classification_low_weight_test_ids)]
classification_measurements_l_w_test = classification_measurements_l_w_test.count() 
classification_measurements_l_w_test

RecordID       1680
level_1        1680
Time           1680
ALP              26
ALT              26
AST              26
Age            1619
Albumin          22
BUN             136
Bilirubin        27
Cholesterol       3
Creatinine      136
DiasABP        1044
FiO2            275
GCS             518
Gender           35
Glucose         136
HCO3            138
HCT             174
HR             1560
Height         1619
ICUType          35
K               149
Lactate          68
MAP            1055
MechVent        305
Mg              136
NIDiasABP       630
NIMAP           630
NISysABP        634
Na              138
PaCO2           240
PaO2            241
Platelets       146
RespRate        339
SaO2             98
SysABP         1045
Temp            729
TroponinI         4
TroponinT        14
Urine          1215
WBC             129
Weight          762
pH              250
dtype: int64

In [67]:
classification_normal_weight_test = bmi_data_test[bmi_data_test["Classificacao"] == 'Peso normal']
classification_normal_weight_test_ids = classification_normal_weight_test["RecordID"]
classification_measurements_n_w_test = test_X[test_X["RecordID"].isin(classification_normal_weight_test_ids)]
classification_measurements_n_w_test = classification_measurements_n_w_test.count() 
classification_measurements_n_w_test

RecordID       19152
level_1        19152
Time           19152
ALP              312
ALT              319
AST              321
Age            18375
Albumin          242
BUN             1430
Bilirubin        326
Cholesterol       48
Creatinine      1436
DiasABP        12903
FiO2            3248
GCS             6307
Gender           399
Glucose         1295
HCO3            1375
HCT             1986
HR             17610
Height         18375
ICUType          399
K               1461
Lactate          821
MAP            12838
MechVent        3142
Mg              1443
NIDiasABP       6211
NIMAP           6137
NISysABP        6215
Na              1338
PaCO2           2845
PaO2            2834
Platelets       1615
RespRate        3144
SaO2            1211
SysABP         12903
Temp            8565
TroponinI         57
TroponinT        199
Urine          14100
WBC             1392
Weight          9994
pH              3029
dtype: int64

In [68]:
classification_overweight_test = bmi_data_test[bmi_data_test["Classificacao"] == 'Sobrepeso']
classification_overweight_test_ids = classification_overweight_test["RecordID"]
classification_measurements_o_w_test = test_X[test_X["RecordID"].isin(classification_overweight_test_ids)]
classification_measurements_o_w_test = classification_measurements_o_w_test.count() 
classification_measurements_o_w_test

RecordID       24960
level_1        24960
Time           24960
ALP              378
ALT              391
AST              390
Age            23943
Albumin          290
BUN             1831
Bilirubin        387
Cholesterol       41
Creatinine      1840
DiasABP        17946
FiO2            4232
GCS             7417
Gender           520
Glucose         1554
HCO3            1738
HCT             2630
HR             22821
Height         23943
ICUType          520
K               1761
Lactate         1077
MAP            17888
MechVent        4176
Mg              1795
NIDiasABP       7027
NIMAP           6957
NISysABP        7036
Na              1621
PaCO2           4066
PaO2            4056
Platelets       2062
RespRate        3278
SaO2            1875
SysABP         17947
Temp           12493
TroponinI         57
TroponinT        278
Urine          19039
WBC             1777
Weight         13382
pH              4383
dtype: int64

In [69]:
classification_obesity_grade1_test = bmi_data_test[bmi_data_test["Classificacao"] == 'Obesidade grau 1']
classification_obesity_grade1_test_ids = classification_obesity_grade1_test["RecordID"]
classification_measurements_ob1_test = test_X[test_X["RecordID"].isin(classification_obesity_grade1_test_ids)]
classification_measurements_ob1_test = classification_measurements_ob1_test .count() 
classification_measurements_ob1_test

RecordID       14256
level_1        14256
Time           14256
ALP              222
ALT              227
AST              227
Age            13820
Albumin          152
BUN             1082
Bilirubin        231
Cholesterol       20
Creatinine      1081
DiasABP        10404
FiO2            2612
GCS             4348
Gender           297
Glucose          904
HCO3            1043
HCT             1551
HR             13207
Height         13820
ICUType          297
K               1024
Lactate          733
MAP            10396
MechVent        2586
Mg              1001
NIDiasABP       3894
NIMAP           3859
NISysABP        3898
Na               975
PaCO2           2562
PaO2            2554
Platelets       1165
RespRate        1469
SaO2            1048
SysABP         10404
Temp            7666
TroponinI         26
TroponinT        135
Urine          11332
WBC             1014
Weight          7722
pH              2693
dtype: int64

In [70]:
classification_obesity_grade2_test = bmi_data_test[bmi_data_test["Classificacao"] == 'Obesidade grau 2']
classification_obesity_grade2_test_ids = classification_obesity_grade2_test["RecordID"]
classification_measurements_ob2_test = test_X[test_X["RecordID"].isin(classification_obesity_grade2_test_ids)]
classification_measurements_ob2_test = classification_measurements_ob2_test .count() 
classification_measurements_ob2_test

RecordID       6768
level_1        6768
Time           6768
ALP             114
ALT             118
AST             118
Age            6514
Albumin          81
BUN             538
Bilirubin       114
Cholesterol      10
Creatinine      542
DiasABP        4688
FiO2           1318
GCS            1925
Gender          141
Glucose         453
HCO3            520
HCT             770
HR             6195
Height         6514
ICUType         141
K               501
Lactate         392
MAP            4683
MechVent       1256
Mg              502
NIDiasABP      2057
NIMAP          2043
NISysABP       2058
Na              478
PaCO2          1247
PaO2           1243
Platelets       606
RespRate        915
SaO2            528
SysABP         4689
Temp           3472
TroponinI        17
TroponinT        66
Urine          5251
WBC             526
Weight         4068
pH             1286
dtype: int64

In [71]:
classification_obesity_grade3_test = bmi_data_test[bmi_data_test["Classificacao"] == 'Obesidade grau 3']
classification_obesity_grade3_test_ids = classification_obesity_grade3_test["RecordID"]
classification_measurements_ob3_test = test_X[test_X["RecordID"].isin(classification_obesity_grade3_test_ids)]
classification_measurements_ob3_test = classification_measurements_ob3_test .count() 
classification_measurements_ob3_test

RecordID       5232
level_1        5232
Time           5232
ALP              67
ALT              68
AST              69
Age            5062
Albumin          48
BUN             405
Bilirubin        68
Cholesterol       7
Creatinine      406
DiasABP        3976
FiO2           1179
GCS            1579
Gender          109
Glucose         345
HCO3            386
HCT             535
HR             4859
Height         5062
ICUType         109
K               387
Lactate         350
MAP            3959
MechVent       1112
Mg              406
NIDiasABP      1293
NIMAP          1283
NISysABP       1299
Na              360
PaCO2          1094
PaO2           1093
Platelets       423
RespRate        619
SaO2            523
SysABP         3976
Temp           2754
TroponinI         2
TroponinT        62
Urine          4232
WBC             374
Weight         2928
pH             1138
dtype: int64

In [72]:
df_columns = test_X.columns
df_columns

Index(['RecordID', 'level_1', 'Time', 'ALP', 'ALT', 'AST', 'Age', 'Albumin',
       'BUN', 'Bilirubin', 'Cholesterol', 'Creatinine', 'DiasABP', 'FiO2',
       'GCS', 'Gender', 'Glucose', 'HCO3', 'HCT', 'HR', 'Height', 'ICUType',
       'K', 'Lactate', 'MAP', 'MechVent', 'Mg', 'NIDiasABP', 'NIMAP',
       'NISysABP', 'Na', 'PaCO2', 'PaO2', 'Platelets', 'RespRate', 'SaO2',
       'SysABP', 'Temp', 'TroponinI', 'TroponinT', 'Urine', 'WBC', 'Weight',
       'pH'],
      dtype='object')

In [73]:
df_test = pd.DataFrame(columns=df_columns)
df_test_transpose = df_test.T
df_test_transpose["Female"] = female_gender_measurements_test
df_test_transpose["Male"] = male_gender_measurements_test
df_test_transpose["Undefined Gender"] = undefined_gender_measurements_test
df_test_transpose["ICUType 1"] = ICUType_1_measurements_test
df_test_transpose["ICUType 2"] = ICUType_2_measurements_test
df_test_transpose["ICUType 3"] = ICUType_3_measurements_test
df_test_transpose["ICUType 4"] = ICUType_4_measurements_test
df_test_transpose["Age 65+"] = age_65_and_above_measurements_test
df_test_transpose["Age 65-"] = age_under_65_measurements_test
df_test_transpose['Undefined Classification'] = classification_undefined_measurements_test
df_test_transpose['Low Weight'] = classification_measurements_l_w_test 
df_test_transpose['Normal Weight'] = classification_measurements_n_w_test
df_test_transpose['Overweight'] = classification_measurements_o_w_test
df_test_transpose['Obesity Grade 1'] = classification_measurements_ob1_test 
df_test_transpose['Obesity Grade 2'] = classification_measurements_ob2_test
df_test_transpose['Obesity Grade 3'] = classification_measurements_ob3_test
df_test_transpose = df_test_transpose.drop('RecordID',axis=0)
df_test_transpose = df_test_transpose.drop('level_1',axis=0)
df_test_transpose = df_test_transpose.drop('Time',axis=0)
df_test_transpose = df_test_transpose.drop('Age',axis=0)
df_test_transpose = df_test_transpose.drop('Gender',axis=0)
df_test_transpose = df_test_transpose.drop('Height',axis=0)
df_test_transpose = df_test_transpose.drop('ICUType',axis=0)

#df_test_transpose.style.set_caption("Repeated Measurements per variable by demographics")

display(HTML("<h2 style='text-align: center; font-size: 24px; font-weight: bold;'>Repeated Measurements per Variable by Demographics - Test Set</h2>"))
df_test_transpose

Unnamed: 0,Female,Male,Undefined Gender,ICUType 1,ICUType 2,ICUType 3,ICUType 4,Age 65+,Age 65-,Undefined Classification,Low Weight,Normal Weight,Overweight,Obesity Grade 1,Obesity Grade 2,Obesity Grade 3
ALP,801,1030,0,300,169,842,520,821,1010,902,26,312,378,222,114,67
ALT,830,1055,0,314,179,866,526,842,1043,935,26,319,391,227,118,68
AST,830,1060,0,315,179,870,526,847,1043,937,26,321,390,227,118,69
Albumin,619,810,0,212,121,646,450,720,709,722,22,242,290,152,81,48
BUN,3620,4693,0,1313,1548,2975,2477,4522,3791,3936,136,1430,1831,1082,538,405
Bilirubin,840,1072,0,317,175,899,521,869,1043,957,27,326,387,231,114,68
Cholesterol,89,142,0,125,7,45,54,142,89,108,3,48,41,20,10,7
Creatinine,3626,4732,0,1344,1547,2991,2476,4547,3811,3967,136,1436,1840,1081,542,406
DiasABP,25909,36113,0,7823,18590,13270,22339,35077,26945,22375,1044,12903,17946,10404,4688,3976
FiO2,7750,10349,0,2094,3960,5830,6215,10234,7865,7984,275,3248,4232,2612,1318,1179
