In [160]:
import os
import sys
import pandas as pd
from IPython.display import display, HTML
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

<h2>Loading dataset</h2>

In [161]:
from pypots.benchpots.datasets import preprocess_physionet2012
physionet2012_dataset = preprocess_physionet2012(subset="all", rate=0.1)

2024-11-10 21:23:09 [INFO]: You're using dataset physionet_2012, please cite it properly in your work. You can find its reference information at the below link: 
https://github.com/WenjieDu/TSDB/tree/main/dataset_profiles/physionet_2012
2024-11-10 21:23:09 [INFO]: Dataset physionet_2012 has already been downloaded. Processing directly...
2024-11-10 21:23:09 [INFO]: Dataset physionet_2012 has already been cached. Loading from cache directly...
2024-11-10 21:23:09 [INFO]: Loaded successfully!


<h3>Training data</h3>

<h4>Loading training dataset</h4>

In [162]:
train_X = physionet2012_dataset['train_X']

In [163]:
training_female_gender = train_X[train_X["Gender"] == 0.0]
training_female_gender_ids = training_female_gender["RecordID"]
female_gender_measurements_training = train_X[train_X["RecordID"].isin(training_female_gender_ids)]
female_gender_measurements_training = female_gender_measurements_training.count()
female_gender_measurements_training

RecordID       160464
level_1        160464
Time           160464
ALP              2552
ALT              2649
AST              2642
Age            150856
Albumin          2044
BUN             11504
Bilirubin        2676
Cholesterol       269
Creatinine      11550
DiasABP         80959
FiO2            24430
GCS             51809
Gender           3343
Glucose         11011
HCO3            11339
HCT             14653
HR             144904
Height         150856
ICUType          3343
K               12243
Lactate          6308
MAP             80525
MechVent        23863
Mg              11321
NIDiasABP       71920
NIMAP           70901
NISysABP        71972
Na              11475
PaCO2           16873
PaO2            16834
Platelets       11409
RespRate        44637
SaO2             5940
SysABP          80965
Temp            56772
TroponinI         360
TroponinT        1681
Urine          111764
WBC             10610
Weight          85873
pH              17477
dtype: int64

In [164]:
training_male_gender = train_X[train_X['Gender'] == 1.0]
training_male_gender_ids = training_male_gender["RecordID"]
male_gender_measurements_training = train_X[train_X["RecordID"].isin(training_male_gender_ids)]
male_gender_measurements_training  = male_gender_measurements_training.count()
male_gender_measurements_training 

RecordID       207312
level_1        207312
Time           207312
ALP              3431
ALT              3524
AST              3529
Age            195478
Albumin          2577
BUN             15124
Bilirubin        3553
Cholesterol       376
Creatinine      15198
DiasABP        117224
FiO2            32977
GCS             66552
Gender           4319
Glucose         14124
HCO3            14706
HCT             20202
HR             187065
Height         195478
ICUType          4319
K               15723
Lactate          8798
MAP            116467
MechVent        31266
Mg              14799
NIDiasABP       83585
NIMAP           82553
NISysABP        83653
Na              14660
PaCO2           25123
PaO2            25075
Platelets       15752
RespRate        46440
SaO2             8700
SysABP         117232
Temp            79688
TroponinI         400
TroponinT        2282
Urine          143435
WBC             14113
Weight         109791
pH              26399
dtype: int64

In [165]:
training_undefined_gender = train_X[train_X['Gender'] == -1.0]
undefined_gender_training_ids = training_undefined_gender['RecordID']
undefined_gender_measurements_training = train_X[train_X["RecordID"].isin(undefined_gender_training_ids)]
undefined_gender_measurements_training = undefined_gender_measurements_training.count()
undefined_gender_measurements_training

RecordID       432
level_1        432
Time           432
ALP              3
ALT              4
AST              4
Age            287
Albumin          2
BUN             20
Bilirubin        3
Cholesterol      0
Creatinine      20
DiasABP        196
FiO2            21
GCS             92
Gender           9
Glucose         20
HCO3            20
HCT             19
HR             274
Height         287
ICUType          9
K               21
Lactate          3
MAP            192
MechVent        28
Mg              18
NIDiasABP      122
NIMAP          122
NISysABP       122
Na              19
PaCO2           17
PaO2            17
Platelets       18
RespRate        88
SaO2             1
SysABP         196
Temp            96
TroponinI        0
TroponinT        4
Urine          177
WBC             18
Weight         176
pH              22
dtype: int64

In [166]:
training_ICUType_1 = train_X[train_X['ICUType'] == 1.0]
training_ICUType_1 = training_ICUType_1[training_ICUType_1["Time"] == 0.0]
training_ICUType_1_ids = training_ICUType_1["RecordID"]
ICUType_1_measurements_training = train_X[train_X["RecordID"].isin(training_ICUType_1_ids)]
ICUType_1_measurements_training = ICUType_1_measurements_training.count() 
ICUType_1_measurements_training

RecordID       53904
level_1        53904
Time           53904
ALP              806
ALT              856
AST              858
Age            49403
Albumin          625
BUN             3810
Bilirubin        848
Cholesterol      354
Creatinine      3873
DiasABP        22316
FiO2            5858
GCS            13786
Gender          1123
Glucose         3664
HCO3            3689
HCT             4717
HR             47038
Height         49403
ICUType         1123
K               4449
Lactate         1209
MAP            22235
MechVent        5350
Mg              3794
NIDiasABP      25818
NIMAP          25687
NISysABP       25832
Na              3675
PaCO2           4469
PaO2            4471
Platelets       3909
RespRate       19550
SaO2            2685
SysABP         22317
Temp           16155
TroponinI        167
TroponinT       1175
Urine          30484
WBC             3491
Weight         24249
pH              4562
dtype: int64

In [167]:
training_ICUType_2 = train_X[train_X['ICUType'] == 2.0]
training_ICUType_2 = training_ICUType_2[training_ICUType_2["Time"] == 0.0]
training_ICUType2_ids = training_ICUType_2["RecordID"]
ICUType_2_measurements_training = train_X[train_X["RecordID"].isin(training_ICUType2_ids)]
ICUType_2_measurements_training = ICUType_2_measurements_training.count() 
ICUType_2_measurements_training

RecordID       76080
level_1        76080
Time           76080
ALP              532
ALT              548
AST              545
Age            73534
Albumin          356
BUN             4929
Bilirubin        533
Cholesterol       24
Creatinine      4938
DiasABP        59345
FiO2           12912
GCS            20172
Gender          1585
Glucose         3541
HCO3            4541
HCT             8302
HR             69799
Height         73534
ICUType         1585
K               4024
Lactate         2751
MAP            59468
MechVent       12611
Mg              4808
NIDiasABP      16865
NIMAP          16753
NISysABP       16891
Na              3795
PaCO2          15406
PaO2           15353
Platelets       6134
RespRate        3238
SaO2            8296
SysABP         59347
Temp           44631
TroponinI        103
TroponinT        190
Urine          64083
WBC             5070
Weight         39832
pH             16877
dtype: int64

In [168]:
training_ICUType_3 = train_X[train_X['ICUType'] == 3.0]
training_ICUType_3 = training_ICUType_3[training_ICUType_3["Time"] == 0.0]
training_ICUType_3_ids = training_ICUType_3["RecordID"]
ICUType_3_measurements_training = train_X[train_X["RecordID"].isin(training_ICUType_3_ids)]
ICUType_3_measurements_training = ICUType_3_measurements_training.count() 
ICUType_3_measurements_training

RecordID       132432
level_1        132432
Time           132432
ALP              2809
ALT              2903
AST              2906
Age            122945
Albumin          2126
BUN              9944
Bilirubin        3012
Cholesterol       136
Creatinine       9984
DiasABP         46368
FiO2            19202
GCS             35078
Gender           2759
Glucose          9936
HCO3             9965
HCT             11664
HR             118502
Height         122945
ICUType          2759
K               10787
Lactate          5067
MAP             45695
MechVent        17850
Mg               9394
NIDiasABP       75698
NIMAP           74368
NISysABP        75739
Na              10181
PaCO2            9877
PaO2             9876
Platelets        9038
RespRate        41430
SaO2             1584
SysABP          46373
Temp            37671
TroponinI         324
TroponinT        1781
Urine           80842
WBC              8635
Weight          86496
pH              10013
dtype: int64

In [169]:
training_ICUType_4 = train_X[train_X['ICUType'] == 4.0]
training_ICUType_4 = training_ICUType_4[training_ICUType_4["Time"] == 0.0]
training_ICUType_4_ids = training_ICUType_4["RecordID"]
ICUType_4_measurements_training = train_X[train_X["RecordID"].isin(training_ICUType_4_ids)]
ICUType_4_measurements_training = ICUType_4_measurements_training.count() 
ICUType_4_measurements_training 

RecordID       105792
level_1        105792
Time           105792
ALP              1839
ALT              1870
AST              1866
Age            100739
Albumin          1516
BUN              7965
Bilirubin        1839
Cholesterol       131
Creatinine       7973
DiasABP         70350
FiO2            19456
GCS             49417
Gender           2204
Glucose          8014
HCO3             7870
HCT             10191
HR              96904
Height         100739
ICUType          2204
K                8727
Lactate          6082
MAP             69786
MechVent        19346
Mg               8142
NIDiasABP       37246
NIMAP           36768
NISysABP        37285
Na               8503
PaCO2           12261
PaO2            12226
Platelets        8098
RespRate        26947
SaO2             2076
SysABP          70356
Temp            38099
TroponinI         166
TroponinT         821
Urine           79967
WBC              7545
Weight          45263
pH              12446
dtype: int64

In [170]:
age_65_and_above_training = train_X[train_X["Age"] >= 65]
age_65_and_above_training = age_65_and_above_training[age_65_and_above_training ["Time"] == 0.0]
age_65_and_above_training_ids = age_65_and_above_training ["RecordID"]
age_65_and_above_measurements_training = train_X[train_X["RecordID"].isin(age_65_and_above_training_ids)]
age_65_and_above_measurements_training = age_65_and_above_measurements_training.count()
age_65_and_above_measurements_training

RecordID       202368
level_1        202368
Time           202368
ALP              2679
ALT              2764
AST              2771
Age            191286
Albumin          2258
BUN             14320
Bilirubin        2812
Cholesterol       377
Creatinine      14399
DiasABP        110166
FiO2            31324
GCS             63104
Gender           4216
Glucose         13331
HCO3            13990
HCT             19019
HR             183711
Height         191286
ICUType          4216
K               14968
Lactate          7833
MAP            109600
MechVent        29391
Mg              14133
NIDiasABP       86177
NIMAP           85188
NISysABP        86246
Na              13883
PaCO2           23254
PaO2            23210
Platelets       14648
RespRate        51720
SaO2             9119
SysABP         110172
Temp            77863
TroponinI         527
TroponinT        2805
Urine          143699
WBC             13333
Weight         108970
pH              24320
dtype: int64

In [171]:
# Motivação para o teste: A quantidade de valores no grupo de idade que estava na tabela, era muito maior do que a quantidade de 
# valores para o grupo de gênero, o que não fazia sentido, pois, já que não temos dados faltantes dessas variáveis em ambos os grupos
# se gênero tem n ocorrências, idade deve ter n ocorrências também, e vice-versa. 

# Descoberta: Através do teste, vimos que tinha casos em que o valor do campo idade, se repete durante a série temporal do paciente, e visto isso, nós ajustamos o código para pegar apenas a primeira ocorrência de valor para cada recordID.
# Também descobrimos que, como estamos filtrando apenas os gêneros female e male e há um campo para gênero indefinido, o grupo de Age estava pegando as idades desse campo de gênero indefinido também, o que estava fazendo com que 
# a soma total de ocorrências para o grupo de Age estivesse maior do que para o grupo de Gender.

# teste2 = train_X[train_X['Age'] >= 65]
# teste2["RecordID"].value_counts()   

In [172]:
age_under_65_training  = train_X[train_X["Age"] < 65]
age_under_65_training  = age_under_65_training[age_under_65_training["Time"] == 0.0]
age_under_65_training_ids = age_under_65_training["RecordID"]
age_under_65_measurements_training = train_X[train_X["RecordID"].isin(age_under_65_training_ids)]
age_under_65_measurements_training  = age_under_65_measurements_training .count()
age_under_65_measurements_training 

RecordID       165840
level_1        165840
Time           165840
ALP              3307
ALT              3413
AST              3404
Age            155335
Albumin          2365
BUN             12328
Bilirubin        3420
Cholesterol       268
Creatinine      12369
DiasABP         88213
FiO2            26104
GCS             55349
Gender           3455
Glucose         11824
HCO3            12075
HCT             15855
HR             148532
Height         155335
ICUType          3455
K               13019
Lactate          7276
MAP             87584
MechVent        25766
Mg              12005
NIDiasABP       69450
NIMAP           68388
NISysABP        69501
Na              12271
PaCO2           18759
PaO2            18716
Platelets       12531
RespRate        39445
SaO2             5522
SysABP          88221
Temp            58693
TroponinI         233
TroponinT        1162
Urine          111677
WBC             11408
Weight          86870
pH              19578
dtype: int64

In [173]:
filtered_train_X = train_X[(train_X['Height'] != -1) & (train_X['Weight'] != -1) & (train_X['Height'].notna()) & (train_X['Weight'].notna())] 

In [174]:
def classify_BMI(BMI):
    if BMI <= 18.5:
        return "Baixo peso"
    elif BMI >= 18.6 and BMI <= 24.9:
        return "Peso normal"
    elif BMI >= 25 and BMI <= 29.9:
        return "Sobrepeso"
    elif BMI >= 30 and BMI <= 34.9:
        return "Obesidade grau 1"
    elif BMI >= 35 and BMI <= 39.9:
        return "Obesidade grau 2"
    elif BMI >= 40:
        return "Obesidade grau 3"

In [175]:
filtered_train_X_meters = filtered_train_X.copy()
filtered_train_X_meters['Height'] = filtered_train_X['Height'] / 100 # Converting Height from cm to meters
filtered_train_X_meters['Height']

144       1.803
145       1.803
146       1.803
147       1.803
148       1.803
          ...  
575321    1.727
575322    1.727
575323    1.727
575325    1.727
575327    1.727
Name: Height, Length: 101481, dtype: float64

In [176]:
bmi_data_train = filtered_train_X_meters
bmi_data_train["BMI"] = round(filtered_train_X_meters["Weight"] / (filtered_train_X_meters["Height"] ** 2), 1)
bmi_data_train["Classificacao"] = bmi_data_train["BMI"].apply(classify_BMI)
bmi_data_train.head()

Unnamed: 0,RecordID,level_1,Time,ALP,ALT,AST,Age,Albumin,BUN,Bilirubin,...,SysABP,Temp,TroponinI,TroponinT,Urine,WBC,Weight,pH,BMI,Classificacao
144,132543,0,0.0,105.0,12.0,15.0,68.0,4.4,23.0,0.2,...,,36.3,,,,11.5,84.6,,26.0,Sobrepeso
145,132543,1,1.0,,,,68.0,,,,...,,,,,,,84.6,,26.0,Sobrepeso
146,132543,2,2.0,,,,68.0,,,,...,,,,,,,84.6,,26.0,Sobrepeso
147,132543,3,3.0,,,,68.0,,,,...,,36.4,,,,,84.6,,26.0,Sobrepeso
148,132543,4,4.0,,,,68.0,,,,...,,,,,,,84.6,,26.0,Sobrepeso


In [177]:
bmi_data_train = bmi_data_train.groupby("RecordID").first().reset_index()
bmi_data_train

Unnamed: 0,RecordID,level_1,Time,ALP,ALT,AST,Age,Albumin,BUN,Bilirubin,...,SysABP,Temp,TroponinI,TroponinT,Urine,WBC,Weight,pH,BMI,Classificacao
0,132543,0,0.0,105.0,12.0,15.0,68.0,4.4,23.0,0.2,...,,36.3,,,600.0,11.5,84.6,,26.0,Sobrepeso
1,132547,0,0.0,,,,64.0,,,,...,,,,,,,114.0,,35.1,Obesidade grau 2
2,132568,0,0.0,,,,66.0,,18.0,,...,,36.1,,,220.0,14.8,84.5,,34.1,Obesidade grau 1
3,132573,0,0.0,,,,77.0,,,,...,,36.9,,,120.0,,90.1,,34.1,Obesidade grau 1
4,132582,0,0.0,,,,84.0,2.6,31.0,,...,,36.3,,,200.0,5.3,82.5,,24.7,Peso normal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3972,163003,0,0.0,124.0,254.0,204.0,36.0,2.6,3.0,30.3,...,,36.3,,,50.0,10.6,57.7,7.47,21.2,Peso normal
3973,163013,0,0.0,82.0,11.0,30.0,74.0,2.5,30.0,1.2,...,118.0,36.5,,0.03,40.0,9.6,68.6,7.35,29.5,Sobrepeso
3974,163016,0,0.0,,27.0,120.0,65.0,,29.0,0.4,...,101.0,38.1,,,75.0,8.0,63.6,7.37,24.8,Peso normal
3975,163029,0,0.0,,,,61.0,,,,...,,,,,,,85.0,,28.5,Sobrepeso


In [178]:
classification_low_weight_training = bmi_data_train[bmi_data_train["Classificacao"] == 'Baixo peso']
classification_low_weight_training_ids = classification_low_weight_training["RecordID"]
classification_measurements_l_w_t = train_X[train_X["RecordID"].isin(classification_low_weight_training_ids)]
classification_measurements_l_w_t = classification_measurements_l_w_t.count() 
classification_measurements_l_w_t 

RecordID       5856
level_1        5856
Time           5856
ALP              87
ALT              89
AST              89
Age            5635
Albumin          74
BUN             428
Bilirubin        91
Cholesterol      10
Creatinine      429
DiasABP        3605
FiO2            951
GCS            1918
Gender          122
Glucose         400
HCO3            427
HCT             551
HR             5429
Height         5635
ICUType         122
K               440
Lactate         303
MAP            3643
MechVent       1003
Mg              437
NIDiasABP      2169
NIMAP          2112
NISysABP       2173
Na              423
PaCO2           792
PaO2            795
Platelets       444
RespRate       1159
SaO2            338
SysABP         3606
Temp           2556
TroponinI        16
TroponinT        55
Urine          4396
WBC             400
Weight         2908
pH              833
dtype: int64

In [179]:
classification_normal_weight_training = bmi_data_train[bmi_data_train["Classificacao"] == 'Peso normal']
classification_normal_weight_training_ids = classification_normal_weight_training["RecordID"]
classification_measurements_n_w_t = train_X[train_X["RecordID"].isin(classification_normal_weight_training_ids)]
classification_measurements_n_w_t = classification_measurements_n_w_t.count() 
classification_measurements_n_w_t 

RecordID       55392
level_1        55392
Time           55392
ALP              901
ALT              926
AST              925
Age            53007
Albumin          680
BUN             4063
Bilirubin        933
Cholesterol      108
Creatinine      4073
DiasABP        35777
FiO2            9106
GCS            17702
Gender          1154
Glucose         3702
HCO3            3931
HCT             5663
HR             50707
Height         53007
ICUType         1154
K               4174
Lactate         2535
MAP            35621
MechVent        9032
Mg              4037
NIDiasABP      19036
NIMAP          18892
NISysABP       19057
Na              3848
PaCO2           7957
PaO2            7931
Platelets       4447
RespRate        9621
SaO2            3346
SysABP         35778
Temp           25022
TroponinI        139
TroponinT        524
Urine          40533
WBC             3900
Weight         28756
pH              8443
dtype: int64

In [180]:
classification_overweight_training = bmi_data_train[bmi_data_train["Classificacao"] == 'Sobrepeso']
classification_overweight_training_ids = classification_overweight_training['RecordID']
classification_measurements_o_w_t = train_X[train_X["RecordID"].isin(classification_overweight_training_ids)]
classification_measurements_o_w_t = classification_measurements_o_w_t.count() 
classification_measurements_o_w_t

RecordID       67968
level_1        67968
Time           67968
ALP             1079
ALT             1119
AST             1115
Age            65049
Albumin          776
BUN             5022
Bilirubin       1120
Cholesterol      127
Creatinine      5047
DiasABP        45909
FiO2           11516
GCS            20764
Gender          1416
Glucose         4449
HCO3            4825
HCT             7237
HR             62090
Height         65049
ICUType         1416
K               5001
Lactate         3041
MAP            45829
MechVent       11292
Mg              4982
NIDiasABP      21670
NIMAP          21496
NISysABP       21693
Na              4579
PaCO2          10548
PaO2           10531
Platelets       5648
RespRate       10242
SaO2            4696
SysABP         45912
Temp           32380
TroponinI        159
TroponinT        616
Urine          50750
WBC             4915
Weight         35496
pH             11199
dtype: int64

In [181]:
classification_obesity_grade1_training = bmi_data_train[bmi_data_train["Classificacao"] == 'Obesidade grau 1']
classification_obesity_grade1_training_ids = classification_obesity_grade1_training["RecordID"]
classification_measurements_ob1_t = train_X[train_X["RecordID"].isin(classification_obesity_grade1_training_ids)]
classification_measurements_ob1_t  = classification_measurements_ob1_t .count() 
classification_measurements_ob1_t 

RecordID       34032
level_1        34032
Time           34032
ALP              577
ALT              595
AST              595
Age            32675
Albumin          406
BUN             2579
Bilirubin        595
Cholesterol       60
Creatinine      2581
DiasABP        23103
FiO2            6005
GCS            10431
Gender           709
Glucose         2278
HCO3            2467
HCT             3552
HR             31192
Height         32675
ICUType          709
K               2589
Lactate         1645
MAP            22947
MechVent        5956
Mg              2495
NIDiasABP      10471
NIMAP          10347
NISysABP       10479
Na              2388
PaCO2           5543
PaO2            5532
Platelets       2763
RespRate        4933
SaO2            2349
SysABP         23103
Temp           16022
TroponinI         97
TroponinT        375
Urine          25964
WBC             2414
Weight         18724
pH              5857
dtype: int64

In [182]:
classification_obesity_grade2_training = bmi_data_train[bmi_data_train["Classificacao"] == 'Obesidade grau 2']
classification_obesity_grade2_training_ids = classification_obesity_grade2_training["RecordID"]
classification_measurements_ob2_t = train_X[train_X["RecordID"].isin(classification_obesity_grade2_training_ids)]
classification_measurements_ob2_t = classification_measurements_ob2_t.count() 
classification_measurements_ob2_t 

RecordID       14352
level_1        14352
Time           14352
ALP              242
ALT              248
AST              246
Age            13716
Albumin          175
BUN             1077
Bilirubin        248
Cholesterol       32
Creatinine      1082
DiasABP         9943
FiO2            2647
GCS             4308
Gender           299
Glucose          940
HCO3            1040
HCT             1528
HR             13129
Height         13716
ICUType          299
K               1047
Lactate          749
MAP             9944
MechVent        2542
Mg              1037
NIDiasABP       4264
NIMAP           4219
NISysABP        4268
Na              1004
PaCO2           2415
PaO2            2410
Platelets       1209
RespRate        1870
SaO2            1040
SysABP          9944
Temp            7046
TroponinI         29
TroponinT        166
Urine          10815
WBC             1058
Weight          8074
pH              2523
dtype: int64

In [183]:
classification_obesity_grade3_training = bmi_data_train[bmi_data_train["Classificacao"] == 'Obesidade grau 3']
classification_obesity_grade3_training_ids = classification_obesity_grade3_training["RecordID"]
classification_measurements_ob3_t = train_X[train_X["RecordID"].isin(classification_obesity_grade3_training_ids)]
classification_measurements_ob3_t = classification_measurements_ob3_t.count() 
classification_measurements_ob3_t 

RecordID       13296
level_1        13296
Time           13296
ALP              252
ALT              259
AST              258
Age            12752
Albumin          175
BUN             1018
Bilirubin        260
Cholesterol       28
Creatinine      1020
DiasABP         8570
FiO2            2690
GCS             3843
Gender           277
Glucose          926
HCO3             984
HCT             1269
HR             12247
Height         12752
ICUType          277
K               1040
Lactate          810
MAP             8548
MechVent        2627
Mg               989
NIDiasABP       4396
NIMAP           4340
NISysABP        4404
Na               946
PaCO2           2204
PaO2            2207
Platelets       1001
RespRate        1955
SaO2             863
SysABP          8570
Temp            5699
TroponinI         22
TroponinT        141
Urine           9864
WBC              920
Weight          7523
pH              2278
dtype: int64

In [184]:
df_columns = train_X.columns
df_columns

Index(['RecordID', 'level_1', 'Time', 'ALP', 'ALT', 'AST', 'Age', 'Albumin',
       'BUN', 'Bilirubin', 'Cholesterol', 'Creatinine', 'DiasABP', 'FiO2',
       'GCS', 'Gender', 'Glucose', 'HCO3', 'HCT', 'HR', 'Height', 'ICUType',
       'K', 'Lactate', 'MAP', 'MechVent', 'Mg', 'NIDiasABP', 'NIMAP',
       'NISysABP', 'Na', 'PaCO2', 'PaO2', 'Platelets', 'RespRate', 'SaO2',
       'SysABP', 'Temp', 'TroponinI', 'TroponinT', 'Urine', 'WBC', 'Weight',
       'pH'],
      dtype='object')

In [185]:
df_train = pd.DataFrame(columns=df_columns)
df_train_transpose = df_train.T
df_train_transpose["Female"] = female_gender_measurements_training
df_train_transpose["Male"] = male_gender_measurements_training
df_train_transpose["Undefined Gender"] = undefined_gender_measurements_training
df_train_transpose["ICUType 1"] = ICUType_1_measurements_training 
df_train_transpose["ICUType 2"] = ICUType_2_measurements_training 
df_train_transpose["ICUType 3"] = ICUType_3_measurements_training 
df_train_transpose["ICUType 4"] = ICUType_4_measurements_training 
df_train_transpose["Age 65+"] = age_65_and_above_measurements_training
df_train_transpose["Age 65-"] = age_under_65_measurements_training 
df_train_transpose['Low Weight'] = classification_measurements_l_w_t 
df_train_transpose['Normal Weight'] = classification_measurements_n_w_t 
df_train_transpose['Overweight'] = classification_measurements_o_w_t 
df_train_transpose['Obesity Grade 1'] = classification_measurements_ob1_t 
df_train_transpose['Obesity Grade 2'] = classification_measurements_ob2_t 
df_train_transpose['Obesity Grade 3'] = classification_measurements_ob3_t 
df_train_transpose = df_train_transpose.drop('RecordID',axis=0)
df_train_transpose = df_train_transpose.drop('level_1',axis=0)
df_train_transpose = df_train_transpose.drop('Time',axis=0)
df_train_transpose = df_train_transpose.drop('Age',axis=0)
df_train_transpose = df_train_transpose.drop('Gender',axis=0)
df_train_transpose = df_train_transpose.drop('Height',axis=0)
df_train_transpose = df_train_transpose.drop('ICUType',axis=0)

#df_test_transpose.style.set_caption("Repeated Measurements per variable by demographics")

display(HTML("<h2 style='text-align: center; font-size: 24px; font-weight: bold;'>Repeated Measurements per Variable by Demographics - Train Set</h2>"))
df_train_transpose

Unnamed: 0,Female,Male,Undefined Gender,ICUType 1,ICUType 2,ICUType 3,ICUType 4,Age 65+,Age 65-,Low Weight,Normal Weight,Overweight,Obesity Grade 1,Obesity Grade 2,Obesity Grade 3
ALP,2552,3431,3,806,532,2809,1839,2679,3307,87,901,1079,577,242,252
ALT,2649,3524,4,856,548,2903,1870,2764,3413,89,926,1119,595,248,259
AST,2642,3529,4,858,545,2906,1866,2771,3404,89,925,1115,595,246,258
Albumin,2044,2577,2,625,356,2126,1516,2258,2365,74,680,776,406,175,175
BUN,11504,15124,20,3810,4929,9944,7965,14320,12328,428,4063,5022,2579,1077,1018
Bilirubin,2676,3553,3,848,533,3012,1839,2812,3420,91,933,1120,595,248,260
Cholesterol,269,376,0,354,24,136,131,377,268,10,108,127,60,32,28
Creatinine,11550,15198,20,3873,4938,9984,7973,14399,12369,429,4073,5047,2581,1082,1020
DiasABP,80959,117224,196,22316,59345,46368,70350,110166,88213,3605,35777,45909,23103,9943,8570
FiO2,24430,32977,21,5858,12912,19202,19456,31324,26104,951,9106,11516,6005,2647,2690


<h3>Validation data</h3>

<h4>Loading validation dataset</h4>

In [186]:
validation_X = physionet2012_dataset['val_X']

In [187]:
validation_female_gender = validation_X[validation_X['Gender'] == 0.0]
validation_female_gender_ids = validation_female_gender["RecordID"]
female_gender_measurements_validation = validation_X[validation_X["RecordID"].isin(validation_female_gender_ids)]
female_gender_measurements_validation = female_gender_measurements_validation.count()
female_gender_measurements_validation

RecordID       40896
level_1        40896
Time           40896
ALP              697
ALT              714
AST              712
Age            38533
Albumin          505
BUN             3003
Bilirubin        719
Cholesterol       52
Creatinine      3012
DiasABP        21886
FiO2            6602
GCS            13240
Gender           852
Glucose         2871
HCO3            2950
HCT             3867
HR             37025
Height         38533
ICUType          852
K               3164
Lactate         1695
MAP            21770
MechVent        6433
Mg              2946
NIDiasABP      17671
NIMAP          17426
NISysABP       17694
Na              2966
PaCO2           4520
PaO2            4511
Platelets       2947
RespRate        9226
SaO2            1512
SysABP         21887
Temp           14583
TroponinI         78
TroponinT        434
Urine          28773
WBC             2747
Weight         22359
pH              4700
dtype: int64

In [188]:
validation_male_gender = validation_X[validation_X['Gender'] == 1.0]
validation_male_gender_ids = validation_male_gender["RecordID"]
male_gender_measurements_validation = validation_X[validation_X["RecordID"].isin(validation_male_gender_ids)]
male_gender_measurements_validation = male_gender_measurements_validation.count()
male_gender_measurements_validation

RecordID       51024
level_1        51024
Time           51024
ALP              857
ALT              883
AST              881
Age            47827
Albumin          673
BUN             3712
Bilirubin        900
Cholesterol       86
Creatinine      3725
DiasABP        28211
FiO2            8165
GCS            16003
Gender          1063
Glucose         3452
HCO3            3636
HCT             5048
HR             45615
Height         47827
ICUType         1063
K               3860
Lactate         2197
MAP            28065
MechVent        7594
Mg              3608
NIDiasABP      20788
NIMAP          20516
NISysABP       20808
Na              3619
PaCO2           6319
PaO2            6312
Platelets       3884
RespRate       11367
SaO2            2271
SysABP         28214
Temp           19317
TroponinI         90
TroponinT        549
Urine          34231
WBC             3506
Weight         25667
pH              6626
dtype: int64

In [189]:
validation_undefined_gender = validation_X[validation_X['Gender'] == -1.0]
validation_undefined_gender_ids = validation_undefined_gender["RecordID"]
undefined_gender_measurements_validation = validation_X[validation_X["RecordID"].isin(validation_undefined_gender_ids)]
undefined_gender_measurements_validation = undefined_gender_measurements_validation.count()
undefined_gender_measurements_validation

RecordID       144
level_1        144
Time           144
ALP              2
ALT              2
AST              2
Age            143
Albumin          3
BUN             17
Bilirubin        2
Cholesterol      0
Creatinine      17
DiasABP         94
FiO2            31
GCS             38
Gender           3
Glucose         17
HCO3            17
HCT             15
HR             139
Height         143
ICUType          3
K               17
Lactate         26
MAP             92
MechVent        23
Mg              16
NIDiasABP       55
NIMAP           55
NISysABP        55
Na              17
PaCO2           39
PaO2            39
Platelets       16
RespRate        47
SaO2             2
SysABP          94
Temp            59
TroponinI        1
TroponinT        5
Urine          106
WBC             15
Weight         124
pH              39
dtype: int64

In [190]:
validation_ICUType_1 = validation_X[validation_X['ICUType'] == 1.0]
validation_ICUType_1 = validation_ICUType_1[validation_ICUType_1['Time'] == 0.0]
validation_ICUType_1_ids = validation_ICUType_1['RecordID']
ICUType_1_measurements_validation = validation_X[validation_X['RecordID'].isin(validation_ICUType_1_ids)]
ICUType_1_measurements_validation = ICUType_1_measurements_validation.count()
ICUType_1_measurements_validation

RecordID       13920
level_1        13920
Time           13920
ALP              209
ALT              219
AST              218
Age            12772
Albumin          151
BUN             1029
Bilirubin        215
Cholesterol       73
Creatinine      1049
DiasABP         6052
FiO2            1624
GCS             3561
Gender           290
Glucose          974
HCO3             993
HCT             1229
HR             12155
Height         12772
ICUType          290
K               1196
Lactate          318
MAP             6018
MechVent        1509
Mg              1014
NIDiasABP       6487
NIMAP           6455
NISysABP        6496
Na               983
PaCO2           1184
PaO2            1185
Platelets       1009
RespRate        4391
SaO2             727
SysABP          6052
Temp            4251
TroponinI         40
TroponinT        291
Urine           7940
WBC              903
Weight          6485
pH              1208
dtype: int64

In [191]:
validation_ICUType_2 = validation_X[validation_X['ICUType'] == 2.0]
validation_ICUType_2 = validation_ICUType_2[validation_ICUType_2['Time'] == 0.0]
validation_ICUType_2_ids = validation_ICUType_2['RecordID']
ICUType_2_measurements_validation = validation_X[validation_X['RecordID'].isin(validation_ICUType_2_ids)]
ICUType_2_measurements_validation = ICUType_2_measurements_validation.count()
ICUType_2_measurements_validation

RecordID       19536
level_1        19536
Time           19536
ALP              151
ALT              157
AST              157
Age            18866
Albumin          102
BUN             1285
Bilirubin        156
Cholesterol        6
Creatinine      1288
DiasABP        15467
FiO2            3391
GCS             5212
Gender           407
Glucose          930
HCO3            1196
HCT             2207
HR             17908
Height         18866
ICUType          407
K               1061
Lactate          665
MAP            15457
MechVent        3276
Mg              1232
NIDiasABP       4102
NIMAP           4040
NISysABP        4110
Na              1007
PaCO2           4064
PaO2            4052
Platelets       1638
RespRate         805
SaO2            2153
SysABP         15467
Temp           11298
TroponinI         19
TroponinT         56
Urine          16316
WBC             1383
Weight          9826
pH              4438
dtype: int64

In [192]:
validation_ICUType_3 = validation_X[validation_X['ICUType'] == 3.0]
validation_ICUType_3 = validation_ICUType_3[validation_ICUType_3['Time'] == 0.0]
validation_ICUType_3_ids = validation_ICUType_3['RecordID']
ICUType_3_measurements_validation = validation_X[validation_X['RecordID'].isin(validation_ICUType_3_ids)]
ICUType_3_measurements_validation = ICUType_3_measurements_validation.count()
ICUType_3_measurements_validation

RecordID       33024
level_1        33024
Time           33024
ALP              748
ALT              770
AST              767
Age            30423
Albumin          603
BUN             2539
Bilirubin        802
Cholesterol       26
Creatinine      2543
DiasABP        11788
FiO2            4884
GCS             8998
Gender           688
Glucose         2538
HCO3            2550
HCT             3024
HR             29205
Height         30423
ICUType          688
K               2720
Lactate         1405
MAP            11698
MechVent        4492
Mg              2395
NIDiasABP      18648
NIMAP          18369
NISysABP       18667
Na              2605
PaCO2           2606
PaO2            2608
Platelets       2322
RespRate        9894
SaO2             398
SysABP         11791
Temp            9521
TroponinI         65
TroponinT        471
Urine          19812
WBC             2225
Weight         20393
pH              2648
dtype: int64

In [193]:
validation_ICUType_4 = validation_X[validation_X['ICUType'] == 4.0]
validation_ICUType_4 = validation_ICUType_4[validation_ICUType_4['Time'] == 0.0]
validation_ICUType_4_ids = validation_ICUType_4['RecordID']
ICUType_4_measurements_validation = validation_X[validation_X['RecordID'].isin(validation_ICUType_4_ids)]
ICUType_4_measurements_validation = ICUType_4_measurements_validation.count()
ICUType_4_measurements_validation

RecordID       25584
level_1        25584
Time           25584
ALP              448
ALT              453
AST              453
Age            24442
Albumin          325
BUN             1879
Bilirubin        448
Cholesterol       33
Creatinine      1874
DiasABP        16884
FiO2            4899
GCS            11510
Gender           533
Glucose         1898
HCO3            1864
HCT             2470
HR             23511
Height         24442
ICUType          533
K               2064
Lactate         1530
MAP            16754
MechVent        4773
Mg              1929
NIDiasABP       9277
NIMAP           9133
NISysABP        9284
Na              2007
PaCO2           3024
PaO2            3017
Platelets       1878
RespRate        5550
SaO2             507
SysABP         16885
Temp            8889
TroponinI         45
TroponinT        170
Urine          19042
WBC             1757
Weight         11446
pH              3071
dtype: int64

In [194]:
age_65_and_above_validation = validation_X[validation_X['Age'] >= 65]
age_65_and_above_validation = age_65_and_above_validation[age_65_and_above_validation['Time'] == 0.0]
age_65_and_above_validation_ids = age_65_and_above_validation['RecordID']
age_65_and_above_measurements_validation = validation_X[validation_X['RecordID'].isin(age_65_and_above_validation_ids)]
age_65_and_above_measurements_validation = age_65_and_above_measurements_validation.count()
age_65_and_above_measurements_validation

RecordID       50160
level_1        50160
Time           50160
ALP              705
ALT              718
AST              715
Age            47313
Albumin          570
BUN             3546
Bilirubin        724
Cholesterol       83
Creatinine      3556
DiasABP        27855
FiO2            8175
GCS            15663
Gender          1045
Glucose         3303
HCO3            3472
HCT             4816
HR             45394
Height         47313
ICUType         1045
K               3678
Lactate         2008
MAP            27736
MechVent        7609
Mg              3469
NIDiasABP      20802
NIMAP          20564
NISysABP       20827
Na              3428
PaCO2           5946
PaO2            5930
Platelets       3669
RespRate       11370
SaO2            2286
SysABP         27857
Temp           19306
TroponinI        125
TroponinT        644
Urine          35437
WBC             3359
Weight         26360
pH              6236
dtype: int64

In [195]:
age_under_65_validation = validation_X[validation_X['Age'] < 65]
age_under_65_validation = age_under_65_validation[age_under_65_validation['Time'] == 0.0]
age_under_65_validation_ids = age_under_65_validation["RecordID"]
age_under_65__measurements_validation = validation_X[validation_X["RecordID"].isin(age_under_65_validation_ids)]
age_under_65__measurements_validation = age_under_65__measurements_validation.count()
age_under_65__measurements_validation

RecordID       41904
level_1        41904
Time           41904
ALP              851
ALT              881
AST              880
Age            39190
Albumin          611
BUN             3186
Bilirubin        897
Cholesterol       55
Creatinine      3198
DiasABP        22336
FiO2            6623
GCS            13618
Gender           873
Glucose         3037
HCO3            3131
HCT             4114
HR             37385
Height         39190
ICUType          873
K               3363
Lactate         1910
MAP            22191
MechVent        6441
Mg              3101
NIDiasABP      17712
NIMAP          17433
NISysABP       17730
Na              3174
PaCO2           4932
PaO2            4932
Platelets       3178
RespRate        9270
SaO2            1499
SysABP         22338
Temp           14653
TroponinI         44
TroponinT        344
Urine          27673
WBC             2909
Weight         21790
pH              5129
dtype: int64

In [196]:
filtered_validation_X = validation_X[(validation_X['Height'] != -1) & (validation_X['Weight'] != -1) & (validation_X['Height'].notna()) & (validation_X['Weight'].notna())] 

In [197]:
filtered_validation_X_meters = filtered_validation_X.copy()
filtered_validation_X_meters['Height'] = filtered_validation_X['Height'] / 100 # Converting Height from cm to meters
filtered_validation_X_meters['Height']

48        1.753
67        1.753
68        1.753
69        1.753
70        1.753
          ...  
574939    1.702
574940    1.702
574941    1.702
574942    1.702
574943    1.702
Name: Height, Length: 25617, dtype: float64

In [198]:
bmi_data_validation = filtered_validation_X_meters
bmi_data_validation["BMI"] = filtered_validation_X_meters["Weight"] / (filtered_validation_X_meters["Height"] ** 2)
bmi_data_validation["Classificacao"] = bmi_data_validation["BMI"].apply(classify_BMI)
bmi_data_validation.head()

Unnamed: 0,RecordID,level_1,Time,ALP,ALT,AST,Age,Albumin,BUN,Bilirubin,...,SysABP,Temp,TroponinI,TroponinT,Urine,WBC,Weight,pH,BMI,Classificacao
48,132540,0,0.0,,,,76.0,,,,...,,,,,,,76.0,7.45,24.73146,Peso normal
67,132540,19,19.0,,,,76.0,,,,...,122.0,37.5,,,50.0,,80.6,,26.228364,Sobrepeso
68,132540,20,20.0,,,,76.0,,,,...,107.0,37.4,,,380.0,,80.6,,26.228364,Sobrepeso
69,132540,21,21.0,,,,76.0,,,,...,121.0,37.5,,,170.0,,80.6,,26.228364,Sobrepeso
70,132540,22,22.0,,,,76.0,,,,...,128.0,37.5,,,130.0,,80.6,,26.228364,Sobrepeso


In [199]:
classification_low_weight_validation = bmi_data_validation[bmi_data_validation["Classificacao"] == 'Baixo peso']
classification_low_weight_validation_ids = classification_low_weight_validation["RecordID"]
classification_measurements_l_w_v = validation_X[validation_X["RecordID"].isin(classification_low_weight_validation_ids)]
classification_measurements_l_w_v = classification_measurements_l_w_v.count() 
classification_measurements_l_w_v

RecordID       1440
level_1        1440
Time           1440
ALP              18
ALT              17
AST              17
Age            1394
Albumin          13
BUN             102
Bilirubin        19
Cholesterol       0
Creatinine      103
DiasABP        1058
FiO2            292
GCS             456
Gender           30
Glucose          97
HCO3            100
HCT             147
HR             1334
Height         1394
ICUType          30
K               108
Lactate          67
MAP            1056
MechVent        295
Mg              101
NIDiasABP       408
NIMAP           407
NISysABP        408
Na               97
PaCO2           211
PaO2            210
Platelets       107
RespRate        138
SaO2             72
SysABP         1058
Temp            585
TroponinI         9
TroponinT        19
Urine          1019
WBC              99
Weight          845
pH              220
dtype: int64

In [200]:
classification_normal_weight_validation = bmi_data_validation[bmi_data_validation["Classificacao"] == 'Peso normal']
classification_normal_weight_validation_ids = classification_normal_weight_validation["RecordID"]
classification_measurements_n_w_v = validation_X[validation_X["RecordID"].isin(classification_normal_weight_validation_ids)]
classification_measurements_n_w_v = classification_measurements_n_w_v.count() 
classification_measurements_n_w_v

RecordID       16032
level_1        16032
Time           16032
ALP              250
ALT              254
AST              254
Age            15452
Albumin          198
BUN             1200
Bilirubin        256
Cholesterol       21
Creatinine      1205
DiasABP        10736
FiO2            2856
GCS             5138
Gender           334
Glucose         1098
HCO3            1168
HCT             1742
HR             14800
Height         15452
ICUType          334
K               1216
Lactate          748
MAP            10692
MechVent        2795
Mg              1195
NIDiasABP       5351
NIMAP           5281
NISysABP        5358
Na              1146
PaCO2           2406
PaO2            2400
Platelets       1345
RespRate        2333
SaO2             982
SysABP         10736
Temp            7168
TroponinI         35
TroponinT        157
Urine          11696
WBC             1205
Weight          8579
pH              2563
dtype: int64

In [201]:
classification_overweight_validation = bmi_data_validation[bmi_data_validation["Classificacao"] == 'Sobrepeso']
classification_overweight_validation_ids = classification_overweight_validation["RecordID"]
classification_measurements_o_w_v = validation_X[validation_X["RecordID"].isin(classification_overweight_validation_ids)]
classification_measurements_o_w_v = classification_measurements_o_w_v.count() 
classification_measurements_o_w_v

RecordID       18960
level_1        18960
Time           18960
ALP              304
ALT              309
AST              309
Age            18266
Albumin          216
BUN             1421
Bilirubin        312
Cholesterol       21
Creatinine      1427
DiasABP        13713
FiO2            3368
GCS             5680
Gender           395
Glucose         1230
HCO3            1361
HCT             2089
HR             17374
Height         18266
ICUType          395
K               1375
Lactate          885
MAP            13641
MechVent        3357
Mg              1385
NIDiasABP       5381
NIMAP           5295
NISysABP        5388
Na              1282
PaCO2           3229
PaO2            3216
Platelets       1625
RespRate        2045
SaO2            1468
SysABP         13713
Temp            9508
TroponinI         38
TroponinT        140
Urine          14282
WBC             1409
Weight         10616
pH              3434
dtype: int64

In [202]:
classification_obesity_grade1_validation = bmi_data_validation[bmi_data_validation["Classificacao"] == 'Obesidade grau 1']
classification_obesity_grade1_validation_ids = classification_obesity_grade1_validation["RecordID"]
classification_measurements_ob1_v = validation_X[validation_X["RecordID"].isin(classification_obesity_grade1_validation_ids)]
classification_measurements_ob1_v  = classification_measurements_ob1_v .count() 
classification_measurements_ob1_v 

RecordID       11808
level_1        11808
Time           11808
ALP              198
ALT              200
AST              199
Age            11390
Albumin          148
BUN              893
Bilirubin        204
Cholesterol       13
Creatinine       896
DiasABP         8737
FiO2            2275
GCS             3398
Gender           246
Glucose          747
HCO3             865
HCT             1355
HR             10859
Height         11390
ICUType          246
K                851
Lactate          679
MAP             8714
MechVent        2250
Mg               880
NIDiasABP       3159
NIMAP           3130
NISysABP        3163
Na               805
PaCO2           2336
PaO2            2334
Platelets       1033
RespRate        1234
SaO2            1027
SysABP          8737
Temp            6375
TroponinI         40
TroponinT         85
Urine           9176
WBC              886
Weight          6240
pH              2449
dtype: int64

In [203]:
classification_obesity_grade2_validation = bmi_data_validation[bmi_data_validation["Classificacao"] == 'Obesidade grau 2']
classification_obesity_grade2_validation_ids = classification_obesity_grade2_validation["RecordID"]
classification_measurements_ob2_v = validation_X[validation_X["RecordID"].isin(classification_obesity_grade2_validation_ids)]
classification_measurements_ob2_v  = classification_measurements_ob2_v .count() 
classification_measurements_ob2_v 

RecordID       5472
level_1        5472
Time           5472
ALP              85
ALT              86
AST              86
Age            5252
Albumin          57
BUN             435
Bilirubin        88
Cholesterol       5
Creatinine      440
DiasABP        4012
FiO2           1056
GCS            1573
Gender          114
Glucose         365
HCO3            411
HCT             628
HR             5003
Height         5252
ICUType         114
K               409
Lactate         347
MAP            4008
MechVent       1023
Mg              422
NIDiasABP      1442
NIMAP          1432
NISysABP       1446
Na              374
PaCO2          1080
PaO2           1078
Platelets       480
RespRate        951
SaO2            460
SysABP         4012
Temp           2920
TroponinI        14
TroponinT        23
Urine          4150
WBC             416
Weight         3361
pH             1126
dtype: int64

In [204]:
classification_obesity_grade3_validation = bmi_data_validation[bmi_data_validation["Classificacao"] == 'Obesidade grau 3']
classification_obesity_grade3_validation_ids = classification_obesity_grade3_validation["RecordID"]
classification_measurements_ob3_v = validation_X[validation_X["RecordID"].isin(classification_obesity_grade3_validation_ids)]
classification_measurements_ob3_v  = classification_measurements_ob3_v .count() 
classification_measurements_ob3_v 

RecordID       4080
level_1        4080
Time           4080
ALP              59
ALT              60
AST              60
Age            3895
Albumin          41
BUN             299
Bilirubin        69
Cholesterol       3
Creatinine      298
DiasABP        2769
FiO2            841
GCS            1152
Gender           85
Glucose         253
HCO3            291
HCT             432
HR             3686
Height         3895
ICUType          85
K               288
Lactate         213
MAP            2750
MechVent        809
Mg              303
NIDiasABP      1081
NIMAP          1067
NISysABP       1083
Na              277
PaCO2           790
PaO2            791
Platelets       334
RespRate        379
SaO2            336
SysABP         2769
Temp           1889
TroponinI         9
TroponinT        19
Urine          3061
WBC             283
Weight         2378
pH              820
dtype: int64

In [205]:
df_columns = validation_X.columns
df_columns

Index(['RecordID', 'level_1', 'Time', 'ALP', 'ALT', 'AST', 'Age', 'Albumin',
       'BUN', 'Bilirubin', 'Cholesterol', 'Creatinine', 'DiasABP', 'FiO2',
       'GCS', 'Gender', 'Glucose', 'HCO3', 'HCT', 'HR', 'Height', 'ICUType',
       'K', 'Lactate', 'MAP', 'MechVent', 'Mg', 'NIDiasABP', 'NIMAP',
       'NISysABP', 'Na', 'PaCO2', 'PaO2', 'Platelets', 'RespRate', 'SaO2',
       'SysABP', 'Temp', 'TroponinI', 'TroponinT', 'Urine', 'WBC', 'Weight',
       'pH'],
      dtype='object')

In [206]:
df_validation = pd.DataFrame(columns=df_columns)
df_validation_transpose = df_validation.T
df_validation_transpose["Female"] = female_gender_measurements_validation
df_validation_transpose["Male"] = male_gender_measurements_validation
df_validation_transpose["Undefined Gender"] = undefined_gender_measurements_validation
df_validation_transpose["ICUType 1"] = ICUType_1_measurements_validation
df_validation_transpose["ICUType 2"] = ICUType_2_measurements_validation
df_validation_transpose["ICUType 3"] = ICUType_3_measurements_validation
df_validation_transpose["ICUType 4"] = ICUType_4_measurements_validation
df_validation_transpose["Age 65+"] = age_65_and_above_measurements_validation
df_validation_transpose["Age 65-"] = age_under_65__measurements_validation
df_validation_transpose['Low Weight'] = classification_measurements_l_w_v
df_validation_transpose['Normal Weight'] = classification_measurements_n_w_v
df_validation_transpose['Overweight'] = classification_measurements_o_w_v
df_validation_transpose['Obesity Grade 1'] = classification_measurements_ob1_v
df_validation_transpose['Obesity Grade 2'] = classification_measurements_ob2_v
df_validation_transpose['Obesity Grade 3'] = classification_measurements_ob3_v
df_validation_transpose = df_validation_transpose.drop('RecordID',axis=0)
df_validation_transpose = df_validation_transpose.drop('level_1',axis=0)
df_validation_transpose = df_validation_transpose.drop('Time',axis=0)
df_validation_transpose = df_validation_transpose.drop('Age',axis=0)
df_validation_transpose = df_validation_transpose.drop('Gender',axis=0)
df_validation_transpose = df_validation_transpose.drop('Height',axis=0)
df_validation_transpose = df_validation_transpose.drop('ICUType',axis=0)

#df_test_transpose.style.set_caption("Repeated Measurements per variable by demographics")

display(HTML("<h2 style='text-align: center; font-size: 24px; font-weight: bold;'>Repeated Measurements per Variable by Demographics - Validation Set</h2>"))
df_validation_transpose

Unnamed: 0,Female,Male,Undefined Gender,ICUType 1,ICUType 2,ICUType 3,ICUType 4,Age 65+,Age 65-,Low Weight,Normal Weight,Overweight,Obesity Grade 1,Obesity Grade 2,Obesity Grade 3
ALP,697,857,2,209,151,748,448,705,851,18,250,304,198,85,59
ALT,714,883,2,219,157,770,453,718,881,17,254,309,200,86,60
AST,712,881,2,218,157,767,453,715,880,17,254,309,199,86,60
Albumin,505,673,3,151,102,603,325,570,611,13,198,216,148,57,41
BUN,3003,3712,17,1029,1285,2539,1879,3546,3186,102,1200,1421,893,435,299
Bilirubin,719,900,2,215,156,802,448,724,897,19,256,312,204,88,69
Cholesterol,52,86,0,73,6,26,33,83,55,0,21,21,13,5,3
Creatinine,3012,3725,17,1049,1288,2543,1874,3556,3198,103,1205,1427,896,440,298
DiasABP,21886,28211,94,6052,15467,11788,16884,27855,22336,1058,10736,13713,8737,4012,2769
FiO2,6602,8165,31,1624,3391,4884,4899,8175,6623,292,2856,3368,2275,1056,841


<h3> Test data</h3>

<h4>Loading test dataset</h4>

In [207]:
test_X = physionet2012_dataset['test_X']

In [208]:
test_female_gender = test_X[test_X['Gender'] == 0.0]
test_female_gender_ids = test_female_gender["RecordID"]
female_gender_measurements_test = test_X[test_X["RecordID"].isin(test_female_gender_ids)]
female_gender_measurements_test = female_gender_measurements_test.count()
female_gender_measurements_test

RecordID       51072
level_1        51072
Time           51072
ALP              853
ALT              858
AST              862
Age            48334
Albumin          665
BUN             3714
Bilirubin        878
Cholesterol       94
Creatinine      3733
DiasABP        27612
FiO2            7922
GCS            16475
Gender          1064
Glucose         3506
HCO3            3635
HCT             4701
HR             46408
Height         48334
ICUType         1064
K               3842
Lactate         2019
MAP            27401
MechVent        7761
Mg              3601
NIDiasABP      21634
NIMAP          21225
NISysABP       21654
Na              3660
PaCO2           5662
PaO2            5659
Platelets       3676
RespRate       13230
SaO2            1911
SysABP         27614
Temp           18472
TroponinI        120
TroponinT        574
Urine          35522
WBC             3413
Weight         27446
pH              5882
dtype: int64

In [209]:
test_male_gender = test_X[test_X['Gender'] == 1.0]
test_male_gender_ids = test_male_gender["RecordID"]
male_gender_measurements_test = test_X[test_X["RecordID"].isin(test_male_gender_ids)]
male_gender_measurements_test = male_gender_measurements_test.count()
male_gender_measurements_test

RecordID       64080
level_1        64080
Time           64080
ALP             1030
ALT             1061
AST             1064
Age            60298
Albumin          770
BUN             4576
Bilirubin       1062
Cholesterol      117
Creatinine      4604
DiasABP        35616
FiO2           10182
GCS            20222
Gender          1335
Glucose         4268
HCO3            4482
HCT             6174
HR             57586
Height         60298
ICUType         1335
K               4792
Lactate         2648
MAP            35507
MechVent        9753
Mg              4526
NIDiasABP      26296
NIMAP          25939
NISysABP       26329
Na              4461
PaCO2           7866
PaO2            7852
Platelets       4703
RespRate       13407
SaO2            2786
SysABP         35619
Temp           24646
TroponinI        125
TroponinT        718
Urine          44570
WBC             4320
Weight         32632
pH              8302
dtype: int64

In [210]:
test_undefined_gender = test_X[test_X['Gender'] == -1.0]
test_undefined_gender_ids = test_undefined_gender["RecordID"]
undefined_gender_measurements_test = test_X[test_X["RecordID"].isin(test_undefined_gender_ids)]
undefined_gender_measurements_test = undefined_gender_measurements_test.count()
undefined_gender_measurements_test

RecordID       0
level_1        0
Time           0
ALP            0
ALT            0
AST            0
Age            0
Albumin        0
BUN            0
Bilirubin      0
Cholesterol    0
Creatinine     0
DiasABP        0
FiO2           0
GCS            0
Gender         0
Glucose        0
HCO3           0
HCT            0
HR             0
Height         0
ICUType        0
K              0
Lactate        0
MAP            0
MechVent       0
Mg             0
NIDiasABP      0
NIMAP          0
NISysABP       0
Na             0
PaCO2          0
PaO2           0
Platelets      0
RespRate       0
SaO2           0
SysABP         0
Temp           0
TroponinI      0
TroponinT      0
Urine          0
WBC            0
Weight         0
pH             0
dtype: int64

In [211]:
test_ICUType_1 = test_X[test_X['ICUType'] == 1.0]
test_ICUType_1 = test_ICUType_1[test_ICUType_1['Time'] == 0.0]
test_ICUType_1_ids = test_ICUType_1['RecordID']
ICUType_1_measurements_test = test_X[test_X['RecordID'].isin(test_ICUType_1_ids)]
ICUType_1_measurements_test = ICUType_1_measurements_test.count()
ICUType_1_measurements_test

RecordID       16896
level_1        16896
Time           16896
ALP              244
ALT              259
AST              256
Age            15477
Albumin          183
BUN             1214
Bilirubin        247
Cholesterol      111
Creatinine      1232
DiasABP         6633
FiO2            1756
GCS             4522
Gender           352
Glucose         1152
HCO3            1167
HCT             1462
HR             14720
Height         15477
ICUType          352
K               1399
Lactate          389
MAP             6611
MechVent        1728
Mg              1208
NIDiasABP       8402
NIMAP           8351
NISysABP        8407
Na              1164
PaCO2           1343
PaO2            1341
Platelets       1167
RespRate        5931
SaO2             768
SysABP          6634
Temp            5037
TroponinI         50
TroponinT        375
Urine           9739
WBC             1069
Weight          7219
pH              1374
dtype: int64

In [212]:
test_ICUType_2 = test_X[test_X['ICUType'] == 2.0]
test_ICUType_2 = test_ICUType_2[test_ICUType_2['Time'] == 0.0]
test_ICUType_2_ids = test_ICUType_2['RecordID']
ICUType_2_measurements_test = test_X[test_X['RecordID'].isin(test_ICUType_2_ids)]
ICUType_2_measurements_test = ICUType_2_measurements_test.count()
ICUType_2_measurements_test

RecordID       25728
level_1        25728
Time           25728
ALP              174
ALT              176
AST              178
Age            24830
Albumin          107
BUN             1659
Bilirubin        177
Cholesterol        3
Creatinine      1665
DiasABP        20388
FiO2            4280
GCS             6748
Gender           536
Glucose         1201
HCO3            1556
HCT             2722
HR             23600
Height         24830
ICUType          536
K               1345
Lactate          847
MAP            20436
MechVent        4284
Mg              1613
NIDiasABP       5410
NIMAP           5355
NISysABP        5429
Na              1312
PaCO2           5118
PaO2            5106
Platelets       2053
RespRate        1102
SaO2            2707
SysABP         20389
Temp           14701
TroponinI         37
TroponinT         61
Urine          21681
WBC             1729
Weight         13256
pH              5635
dtype: int64

In [213]:
test_ICUType_3 = test_X[test_X['ICUType'] == 3.0]
test_ICUType_3 = test_ICUType_3[test_ICUType_3['Time'] == 0.0]
test_ICUType_3_ids = test_ICUType_3['RecordID']
ICUType_3_measurements_test = test_X[test_X['RecordID'].isin(test_ICUType_3_ids)]
ICUType_3_measurements_test = ICUType_3_measurements_test.count()
ICUType_3_measurements_test

RecordID       40320
level_1        40320
Time           40320
ALP              896
ALT              907
AST              909
Age            37462
Albumin          705
BUN             3029
Bilirubin        952
Cholesterol       40
Creatinine      3042
DiasABP        14679
FiO2            6142
GCS            10724
Gender           840
Glucose         3013
HCO3            3036
HCT             3547
HR             36027
Height         37462
ICUType          840
K               3283
Lactate         1532
MAP            14461
MechVent        5695
Mg              2849
NIDiasABP      22526
NIMAP          22016
NISysABP       22534
Na              3123
PaCO2           3188
PaO2            3197
Platelets       2739
RespRate       12100
SaO2             490
SysABP         14681
Temp           11759
TroponinI        111
TroponinT        572
Urine          24018
WBC             2632
Weight         26702
pH              3247
dtype: int64

In [214]:
test_ICUType_4 = test_X[test_X['ICUType'] == 4.0]
test_ICUType_4 = test_ICUType_4[test_ICUType_4['Time'] == 0.0]
test_ICUType_4_ids = test_ICUType_4['RecordID']
ICUType_4_measurements_test = test_X[test_X['RecordID'].isin(test_ICUType_4_ids)]
ICUType_4_measurements_test = ICUType_4_measurements_test.count()
ICUType_4_measurements_test

RecordID       32208
level_1        32208
Time           32208
ALP              569
ALT              577
AST              583
Age            30863
Albumin          440
BUN             2388
Bilirubin        564
Cholesterol       57
Creatinine      2398
DiasABP        21528
FiO2            5926
GCS            14703
Gender           671
Glucose         2408
HCO3            2358
HCT             3144
HR             29647
Height         30863
ICUType          671
K               2607
Lactate         1899
MAP            21400
MechVent        5807
Mg              2457
NIDiasABP      11592
NIMAP          11442
NISysABP       11613
Na              2522
PaCO2           3879
PaO2            3867
Platelets       2420
RespRate        7504
SaO2             732
SysABP         21529
Temp           11621
TroponinI         47
TroponinT        284
Urine          24654
WBC             2303
Weight         12901
pH              3928
dtype: int64

In [215]:
age_65_and_above_test= test_X[test_X['Age'] >= 65]
age_65_and_above_test = age_65_and_above_test[age_65_and_above_test['Time'] == 0.0]
age_65_and_above_test_ids = age_65_and_above_test['RecordID']
age_65_and_above_measurements_test= test_X[test_X['RecordID'].isin(age_65_and_above_test_ids)]
age_65_and_above_measurements_test = age_65_and_above_measurements_test.count()
age_65_and_above_measurements_test

RecordID       62112
level_1        62112
Time           62112
ALP              799
ALT              800
AST              804
Age            58664
Albumin          662
BUN             4336
Bilirubin        826
Cholesterol      130
Creatinine      4357
DiasABP        34503
FiO2            9866
GCS            19168
Gender          1294
Glucose         3991
HCO3            4229
HCT             5837
HR             56238
Height         58664
ICUType         1294
K               4482
Lactate         2461
MAP            34361
MechVent        9425
Mg              4265
NIDiasABP      25733
NIMAP          25420
NISysABP       25754
Na              4175
PaCO2           7554
PaO2            7551
Platelets       4395
RespRate       14143
SaO2            2912
SysABP         34506
Temp           24247
TroponinI        165
TroponinT        909
Urine          44231
WBC             4022
Weight         32659
pH              7940
dtype: int64

In [216]:
age_under_65_test = test_X[test_X['Age'] < 65]
age_under_65_test = age_under_65_test[age_under_65_test['Time'] == 0.0]
age_under_65_test_ids = age_under_65_test["RecordID"]
age_under_65_measurements_test = test_X[test_X["RecordID"].isin(age_under_65_test_ids)]
age_under_65_measurements_test = age_under_65_measurements_test.count()
age_under_65_measurements_test

RecordID       53040
level_1        53040
Time           53040
ALP             1084
ALT             1119
AST             1122
Age            49968
Albumin          773
BUN             3954
Bilirubin       1114
Cholesterol       81
Creatinine      3980
DiasABP        28725
FiO2            8238
GCS            17529
Gender          1105
Glucose         3783
HCO3            3888
HCT             5038
HR             47756
Height         49968
ICUType         1105
K               4152
Lactate         2206
MAP            28547
MechVent        8089
Mg              3862
NIDiasABP      22197
NIMAP          21744
NISysABP       22229
Na              3946
PaCO2           5974
PaO2            5960
Platelets       3984
RespRate       12494
SaO2            1785
SysABP         28727
Temp           18871
TroponinI         80
TroponinT        383
Urine          35861
WBC             3711
Weight         27419
pH              6244
dtype: int64

In [217]:
filtered_test_X = test_X[(test_X['Height'] != -1) & (test_X['Weight'] != -1) & (test_X['Height'].notna()) & (test_X['Weight'].notna())] 

In [218]:
filtered_test_X_meters = filtered_test_X.copy()
filtered_test_X_meters['Height'] = filtered_test_X['Height'] / 100 # Converting Height from cm to meters
filtered_test_X_meters['Height']

336       1.626
337       1.626
341       1.626
342       1.626
343       1.626
          ...  
574891    1.829
574892    1.829
574893    1.829
574894    1.829
575088    1.727
Name: Height, Length: 31701, dtype: float64

In [219]:
bmi_data_test = filtered_test_X_meters
bmi_data_test["BMI"] = filtered_test_X_meters["Weight"] / (filtered_test_X_meters["Height"] ** 2)
bmi_data_test["Classificacao"] = bmi_data_test["BMI"].apply(classify_BMI)
bmi_data_test.head()

Unnamed: 0,RecordID,level_1,Time,ALP,ALT,AST,Age,Albumin,BUN,Bilirubin,...,SysABP,Temp,TroponinI,TroponinT,Urine,WBC,Weight,pH,BMI,Classificacao
336,132551,0,0.0,47.0,46.0,82.0,78.0,1.9,81.0,0.3,...,102.75,38.0,3.5,,,16.1,48.4,7.4,18.306456,Baixo peso
337,132551,1,1.0,,,,78.0,,,,...,114.5,,,,120.0,,48.4,,18.306456,Baixo peso
341,132551,5,5.0,,,,78.0,,,,...,104.0,,,,130.0,,48.4,7.29,18.306456,Baixo peso
342,132551,6,6.0,,,,78.0,,67.0,,...,141.0,35.6,3.1,,60.0,20.4,48.4,7.25,18.306456,Baixo peso
343,132551,7,7.0,,,,78.0,,,,...,132.0,,,,,,48.4,,18.306456,Baixo peso


In [220]:
classification_low_weight_test = bmi_data_test[bmi_data_test["Classificacao"] == 'Baixo peso']
classification_low_weight_test_ids = classification_low_weight_test["RecordID"]
classification_measurements_l_w_test = test_X[test_X["RecordID"].isin(classification_low_weight_test_ids)]
classification_measurements_l_w_test = classification_measurements_l_w_test.count() 
classification_measurements_l_w_test

RecordID       2160
level_1        2160
Time           2160
ALP              37
ALT              38
AST              39
Age            2060
Albumin          30
BUN             179
Bilirubin        37
Cholesterol       5
Creatinine      181
DiasABP        1448
FiO2            342
GCS             582
Gender           45
Glucose         174
HCO3            173
HCT             234
HR             1984
Height         2060
ICUType          45
K               191
Lactate         142
MAP            1444
MechVent        329
Mg              169
NIDiasABP       720
NIMAP           714
NISysABP        721
Na              177
PaCO2           293
PaO2            293
Platelets       171
RespRate        310
SaO2            115
SysABP         1448
Temp            910
TroponinI        10
TroponinT        25
Urine          1444
WBC             155
Weight         1206
pH              306
dtype: int64

In [221]:
classification_normal_weight_test = bmi_data_test[bmi_data_test["Classificacao"] == 'Peso normal']
classification_normal_weight_test_ids = classification_normal_weight_test["RecordID"]
classification_measurements_n_w_test = test_X[test_X["RecordID"].isin(classification_normal_weight_test_ids)]
classification_measurements_n_w_test = classification_measurements_n_w_test.count() 
classification_measurements_n_w_test

RecordID       19200
level_1        19200
Time           19200
ALP              257
ALT              267
AST              267
Age            18468
Albumin          206
BUN             1392
Bilirubin        261
Cholesterol       35
Creatinine      1401
DiasABP        13161
FiO2            3218
GCS             6167
Gender           400
Glucose         1251
HCO3            1343
HCT             1903
HR             17661
Height         18468
ICUType          400
K               1445
Lactate          936
MAP            13101
MechVent        3296
Mg              1382
NIDiasABP       6061
NIMAP           6012
NISysABP        6064
Na              1305
PaCO2           2891
PaO2            2883
Platelets       1503
RespRate        2583
SaO2            1133
SysABP         13161
Temp            8686
TroponinI         51
TroponinT        163
Urine          14397
WBC             1332
Weight          9373
pH              3071
dtype: int64

In [222]:
classification_overweight_test = bmi_data_test[bmi_data_test["Classificacao"] == 'Sobrepeso']
classification_overweight_test_ids = classification_overweight_test["RecordID"]
classification_measurements_o_w_test = test_X[test_X["RecordID"].isin(classification_overweight_test_ids)]
classification_measurements_o_w_test = classification_measurements_o_w_test.count() 
classification_measurements_o_w_test

RecordID       23808
level_1        23808
Time           23808
ALP              393
ALT              398
AST              400
Age            22823
Albumin          267
BUN             1762
Bilirubin        398
Cholesterol       43
Creatinine      1775
DiasABP        16961
FiO2            4077
GCS             7190
Gender           496
Glucose         1504
HCO3            1701
HCT             2518
HR             21741
Height         22823
ICUType          496
K               1672
Lactate         1055
MAP            16971
MechVent        4071
Mg              1738
NIDiasABP       6733
NIMAP           6633
NISysABP        6747
Na              1598
PaCO2           4001
PaO2            3987
Platelets       1997
RespRate        3228
SaO2            1787
SysABP         16963
Temp           11884
TroponinI         55
TroponinT        222
Urine          18394
WBC             1749
Weight         12774
pH              4315
dtype: int64

In [223]:
classification_obesity_grade1_test = bmi_data_test[bmi_data_test["Classificacao"] == 'Obesidade grau 1']
classification_obesity_grade1_test_ids = classification_obesity_grade1_test["RecordID"]
classification_measurements_ob1_test = test_X[test_X["RecordID"].isin(classification_obesity_grade1_test_ids)]
classification_measurements_ob1_test = classification_measurements_ob1_test .count() 
classification_measurements_ob1_test

RecordID       17424
level_1        17424
Time           17424
ALP              259
ALT              260
AST              260
Age            16877
Albumin          177
BUN             1267
Bilirubin        270
Cholesterol       26
Creatinine      1275
DiasABP        13074
FiO2            3355
GCS             5292
Gender           363
Glucose         1065
HCO3            1220
HCT             1906
HR             16117
Height         16877
ICUType          363
K               1189
Lactate          887
MAP            13027
MechVent        3255
Mg              1262
NIDiasABP       4843
NIMAP           4778
NISysABP        4852
Na              1136
PaCO2           3180
PaO2            3178
Platelets       1462
RespRate        1477
SaO2            1383
SysABP         13074
Temp            9140
TroponinI         30
TroponinT        126
Urine          13999
WBC             1301
Weight          9324
pH              3383
dtype: int64

In [224]:
classification_obesity_grade2_test = bmi_data_test[bmi_data_test["Classificacao"] == 'Obesidade grau 2']
classification_obesity_grade2_test_ids = classification_obesity_grade2_test["RecordID"]
classification_measurements_ob2_test = test_X[test_X["RecordID"].isin(classification_obesity_grade2_test_ids)]
classification_measurements_ob2_test = classification_measurements_ob2_test .count() 
classification_measurements_ob2_test

RecordID       7776
level_1        7776
Time           7776
ALP             141
ALT             137
AST             140
Age            7517
Albumin          90
BUN             598
Bilirubin       146
Cholesterol       8
Creatinine      603
DiasABP        5700
FiO2           1534
GCS            2121
Gender          162
Glucose         514
HCO3            581
HCT             871
HR             7126
Height         7517
ICUType         162
K               573
Lactate         463
MAP            5724
MechVent       1500
Mg              569
NIDiasABP      2011
NIMAP          1936
NISysABP       2012
Na              534
PaCO2          1533
PaO2           1530
Platelets       666
RespRate        678
SaO2            689
SysABP         5701
Temp           4168
TroponinI        12
TroponinT        77
Urine          6044
WBC             602
Weight         4653
pH             1598
dtype: int64

In [225]:
classification_obesity_grade3_test = bmi_data_test[bmi_data_test["Classificacao"] == 'Obesidade grau 3']
classification_obesity_grade3_test_ids = classification_obesity_grade3_test["RecordID"]
classification_measurements_ob3_test = test_X[test_X["RecordID"].isin(classification_obesity_grade3_test_ids)]
classification_measurements_ob3_test = classification_measurements_ob3_test .count() 
classification_measurements_ob3_test

RecordID       5424
level_1        5424
Time           5424
ALP              78
ALT              79
AST              79
Age            5239
Albumin          57
BUN             406
Bilirubin        75
Cholesterol       6
Creatinine      406
DiasABP        3892
FiO2           1109
GCS            1565
Gender          113
Glucose         364
HCO3            390
HCT             558
HR             5033
Height         5239
ICUType         113
K               402
Lactate         296
MAP            3923
MechVent       1128
Mg              420
NIDiasABP      1418
NIMAP          1388
NISysABP       1422
Na              367
PaCO2           984
PaO2            985
Platelets       431
RespRate        614
SaO2            465
SysABP         3892
Temp           2671
TroponinI         7
TroponinT        66
Urine          4237
WBC             391
Weight         3251
pH             1018
dtype: int64

In [226]:
df_columns = test_X.columns
df_columns

Index(['RecordID', 'level_1', 'Time', 'ALP', 'ALT', 'AST', 'Age', 'Albumin',
       'BUN', 'Bilirubin', 'Cholesterol', 'Creatinine', 'DiasABP', 'FiO2',
       'GCS', 'Gender', 'Glucose', 'HCO3', 'HCT', 'HR', 'Height', 'ICUType',
       'K', 'Lactate', 'MAP', 'MechVent', 'Mg', 'NIDiasABP', 'NIMAP',
       'NISysABP', 'Na', 'PaCO2', 'PaO2', 'Platelets', 'RespRate', 'SaO2',
       'SysABP', 'Temp', 'TroponinI', 'TroponinT', 'Urine', 'WBC', 'Weight',
       'pH'],
      dtype='object')

In [227]:
df_test = pd.DataFrame(columns=df_columns)
df_test_transpose = df_test.T
df_test_transpose["Female"] = female_gender_measurements_test
df_test_transpose["Male"] = male_gender_measurements_test
df_test_transpose["Undefined Gender"] = undefined_gender_measurements_test
df_test_transpose["ICUType 1"] = ICUType_1_measurements_test
df_test_transpose["ICUType 2"] = ICUType_2_measurements_test
df_test_transpose["ICUType 3"] = ICUType_3_measurements_test
df_test_transpose["ICUType 4"] = ICUType_4_measurements_test
df_test_transpose["Age 65+"] = age_65_and_above_measurements_test
df_test_transpose["Age 65-"] = age_under_65_measurements_test
df_test_transpose['Low Weight'] = classification_measurements_l_w_test 
df_test_transpose['Normal Weight'] = classification_measurements_n_w_test
df_test_transpose['Overweight'] = classification_measurements_o_w_test
df_test_transpose['Obesity Grade 1'] = classification_measurements_ob1_test 
df_test_transpose['Obesity Grade 2'] = classification_measurements_ob2_test
df_test_transpose['Obesity Grade 3'] = classification_measurements_ob3_test
df_test_transpose = df_test_transpose.drop('RecordID',axis=0)
df_test_transpose = df_test_transpose.drop('level_1',axis=0)
df_test_transpose = df_test_transpose.drop('Time',axis=0)
df_test_transpose = df_test_transpose.drop('Age',axis=0)
df_test_transpose = df_test_transpose.drop('Gender',axis=0)
df_test_transpose = df_test_transpose.drop('Height',axis=0)
df_test_transpose = df_test_transpose.drop('ICUType',axis=0)

#df_test_transpose.style.set_caption("Repeated Measurements per variable by demographics")

display(HTML("<h2 style='text-align: center; font-size: 24px; font-weight: bold;'>Repeated Measurements per Variable by Demographics - Test Set</h2>"))
df_test_transpose

Unnamed: 0,Female,Male,Undefined Gender,ICUType 1,ICUType 2,ICUType 3,ICUType 4,Age 65+,Age 65-,Low Weight,Normal Weight,Overweight,Obesity Grade 1,Obesity Grade 2,Obesity Grade 3
ALP,853,1030,0,244,174,896,569,799,1084,37,257,393,259,141,78
ALT,858,1061,0,259,176,907,577,800,1119,38,267,398,260,137,79
AST,862,1064,0,256,178,909,583,804,1122,39,267,400,260,140,79
Albumin,665,770,0,183,107,705,440,662,773,30,206,267,177,90,57
BUN,3714,4576,0,1214,1659,3029,2388,4336,3954,179,1392,1762,1267,598,406
Bilirubin,878,1062,0,247,177,952,564,826,1114,37,261,398,270,146,75
Cholesterol,94,117,0,111,3,40,57,130,81,5,35,43,26,8,6
Creatinine,3733,4604,0,1232,1665,3042,2398,4357,3980,181,1401,1775,1275,603,406
DiasABP,27612,35616,0,6633,20388,14679,21528,34503,28725,1448,13161,16961,13074,5700,3892
FiO2,7922,10182,0,1756,4280,6142,5926,9866,8238,342,3218,4077,3355,1534,1109
