# Inputs

In [1]:
splitPartCount = 5
splitSeed = 27
hoursPerWindow = 1

# Preproccess

## read data

In [2]:
from utils.class_patient import Patients


patients = Patients.loadPatients()
len(patients)

1213

## remove missing

In [3]:
# fill measures whose null represent false value

from constants import NULLABLE_MEASURES


nullableMeasures = NULLABLE_MEASURES

for measureName in nullableMeasures:
    patients.fillMissingMeasureValue(measureName, 0)

In [4]:
# remove measures with less than 80% of data

measures = patients.getMeasures()

for measure, count in measures.items():
    if count < len(patients) * 80 / 100:
        patients.removeMeasures([measure])
        print(measure, count)

pco2 917
ph 954
po2 917
albumin 406
hba1c 326
lymphocyte 446
height 415
urine-ketone 294
crp 19


In [5]:
# remove patients with less than 80% of data

patients.removePatientByMissingFeatures()
len(patients)

1206

In [6]:
# # remove patients with positive tag in first 12 hours

from pandas import Timedelta


patients.removePatientAkiEarly(Timedelta(hours=12))

82

In [7]:
print("Total ", len(patients))
print("AKI ", sum([1 for p in patients if p.akdPositive]))
print("Ratio ", sum([1 for p in patients if p.akdPositive]) / len(patients))

Total  1124
AKI  392
Ratio  0.3487544483985765


## split patients

In [8]:
splitedPatients = patients.split(splitPartCount, splitSeed)

len(splitedPatients[0])

225

In [9]:
splitedPatients = patients.split(splitPartCount, splitSeed)


def trainTest():
    for i in range(splitedPatients.__len__()):
        testPatients = splitedPatients[i]

        trainPatientsList = splitedPatients[:i] + splitedPatients[i + 1 :]
        trainPatients = Patients(patients=[])
        for trainPatientsElem in trainPatientsList:
            trainPatients += trainPatientsElem

        yield trainPatients, testPatients


def trainValTest():
    for i in range(splitedPatients.__len__()):
        testPatients = splitedPatients[i]

        trainPatientsList = splitedPatients[:i] + splitedPatients[i + 1 :]
        trainPatients = Patients(patients=[])
        for trainPatientsElem in trainPatientsList:
            trainPatients += trainPatientsElem

        *trainPatients, valPatients = trainPatients.split(5, 27)
        tmpPatients = Patients(patients=[])
        for trainPatientsElem in trainPatients:
            tmpPatients += trainPatientsElem
        trainPatients = tmpPatients

        yield trainPatients, valPatients, testPatients

In [10]:
for trainPatients, testPatients in trainTest():
    print(len(trainPatients.patientList), len(testPatients.patientList))

899 225
899 225
899 225
899 225
900 224


# LSTM

### Seperate static and dynamic

In [11]:
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import LSTM, Dense, Dropout, Input, Concatenate, Masking, Conv1D, MaxPooling1D
from tensorflow.keras.optimizers import Adam


def createModel2(timeSteps, timeFeatures, staticFeatures):
    # time series layers
    timeInputLayer = Input(shape=(timeSteps, timeFeatures))
    maskingLayer = Masking(mask_value=0.0)(timeInputLayer)
    # cnnLayer = Conv1D(64, 3, activation="relu")(maskingLayer)
    # poolingLayer = MaxPooling1D(2)(cnnLayer)
    seriesLayer = LSTM(64)(maskingLayer)

    # static layers
    staticInputLayer = Input(shape=(staticFeatures,))
    staticLayer = Dense(32, activation="relu")(staticInputLayer)

    # combine layers
    combined = Concatenate(axis=1)([seriesLayer, staticLayer])
    dense1 = Dense(16, activation="relu")(combined)
    dense2 = Dense(1, activation="sigmoid")(dense1)

    model = Model(inputs=[timeInputLayer, staticInputLayer], outputs=dense2)
    model.compile(
        optimizer=Adam(learning_rate=0.001),
        loss="binary_crossentropy",
        metrics=["AUC", "accuracy", "precision", "recall"],
    )

    return model

2024-06-29 08:59:04.916609: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-06-29 08:59:04.960508: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [12]:
%load_ext autoreload
%autoreload 2

from utils.prepare_data import normalizeData, patientsToNumpy
from constants import CATEGORICAL_MEASURES
import numpy as np
from tensorflow.keras.callbacks import EarlyStopping



loses = []
aucs = []
accuracies = []
precisions = []
recals = []
for i, (trainPatients, valPatients, testPatients) in enumerate(trainValTest()):
    npTrainX, categoryEncoder, numericEncoder, oulier, columns = patientsToNumpy(
        trainPatients, 
        hoursPerWindow,
        CATEGORICAL_MEASURES,
        timeSeriesOnly=True,
        fromHour=0,
        toHour=12,
    )

    npTestX, *_ = patientsToNumpy(
        testPatients,
        hoursPerWindow,
        CATEGORICAL_MEASURES,
        columns,
        categoryEncoder,
        numericEncoder,
        oulier,
        timeSeriesOnly=True,
        fromHour=0,
        toHour=12,
    )

    npValX, *_ = patientsToNumpy(
        valPatients,
        hoursPerWindow,
        CATEGORICAL_MEASURES,
        columns,
        categoryEncoder,
        numericEncoder,
        oulier,
        timeSeriesOnly=True,
        fromHour=0,
        toHour=12,
    )
    
    npTrainX = np.nan_to_num(npTrainX, nan=0)
    npTestX = np.nan_to_num(npTestX, nan=0)
    npValX = np.nan_to_num(npValX, nan=0)
    
    
    ################### Static ###################
    staticTrainX = trainPatients.getMeasuresBetween(measureTypes="static")
    staticTestX = testPatients.getMeasuresBetween(measureTypes="static")
    staticValX = valPatients.getMeasuresBetween(measureTypes="static")

    staticTrainX, staticTestX, staticValX = normalizeData(
        staticTrainX, staticTestX, staticValX
    )
    
    staticLen = len(staticTrainX.columns)

    staticTrainX = staticTrainX.to_numpy().astype(np.float32)
    staticTestX = staticTestX.to_numpy().astype(np.float32)
    staticValX = staticValX.to_numpy().astype(np.float32) # type: ignore
    
    staticTrainX = np.nan_to_num(staticTrainX, nan=0)
    staticTestX = np.nan_to_num(staticTestX, nan=0)
    


    ################### labels ###################
    trainY = [p.akdPositive for p in trainPatients]
    testY = [p.akdPositive for p in testPatients]
    valY = [p.akdPositive for p in valPatients]

    model = createModel2(npTrainX.shape[1], npTrainX.shape[2], staticLen)

    neg, pos = np.bincount(trainY)
    weight0 = (1 / neg) * (len(trainY)) / 2.0
    weight1 = (1 / pos) * (len(trainY)) / 2.0
    weight = {0: weight0, 1: weight1}

    early_stopping = EarlyStopping(
        monitor="val_loss", patience=10, restore_best_weights=True
    )

    trainY = np.array(trainY)
    model.fit(
        [npTrainX, staticTrainX],
        trainY,
        epochs=50,
        batch_size=32,
        validation_data=([npValX, staticValX], np.array(valY)),
        class_weight=weight,
        callbacks=[early_stopping],
    )

    testY = np.array(testY)
    loss, auc, accuracy, precison, recal = model.evaluate([npTestX, staticTestX], testY)
    loses.append(loss)
    aucs.append(auc)
    accuracies.append(accuracy)
    precisions.append(precison)
    recals.append(recal)

    pass

print("Loses:", loses, np.mean(loses), np.std(loses))
print("AUCs:", aucs, np.mean(aucs), np.std(aucs))
print("Accuracies:", accuracies, np.mean(accuracies), np.std(accuracies))
print("Precisions:", precisions, np.mean(precisions), np.std(precisions))
print("Recals:", recals, np.mean(recals), np.std(recals))

Epoch 1/50


2024-06-29 09:01:15.378970: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-06-29 09:01:15.398749: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2251] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - AUC: 0.6329 - accuracy: 0.6397 - loss: 0.6733 - precision: 0.4969 - recall: 0.4724 - val_AUC: 0.7880 - val_accuracy: 0.7151 - val_loss: 0.5768 - val_precision: 0.5663 - val_recall: 0.7581
Epoch 2/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - AUC: 0.8444 - accuracy: 0.7635 - loss: 0.5165 - precision: 0.6444 - recall: 0.7571 - val_AUC: 0.7969 - val_accuracy: 0.7318 - val_loss: 0.5539 - val_precision: 0.5897 - val_recall: 0.7419
Epoch 3/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - AUC: 0.8488 - accuracy: 0.7881 - loss: 0.4817 - precision: 0.6529 - recall: 0.7514 - val_AUC: 0.8005 - val_accuracy: 0.7318 - val_loss: 0.5443 - val_precision: 0.5972 - val_recall: 0.6935
Epoch 4/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - AUC: 0.8691 - accuracy: 0.8000 - loss: 0.4570 - precision: 0.6760 - recall: 0.7800 - val_AUC: 0.7951 -

In [13]:
model = createModel2(npTrainX.shape[1], npTrainX.shape[2], staticLen)


model.fit(
        [npTrainX, staticTrainX],
        trainY,
        epochs=50,
        batch_size=32,
        validation_data=([npValX, staticValX], np.array(valY)),
        class_weight=weight,
        callbacks=[early_stopping],
    )


Epoch 1/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 27ms/step - AUC: 0.7129 - accuracy: 0.6032 - loss: 0.6352 - precision: 0.4598 - recall: 0.7702 - val_AUC: 0.7394 - val_accuracy: 0.7167 - val_loss: 0.5917 - val_precision: 0.5938 - val_recall: 0.6032
Epoch 2/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - AUC: 0.8580 - accuracy: 0.7762 - loss: 0.5074 - precision: 0.6495 - recall: 0.8436 - val_AUC: 0.7527 - val_accuracy: 0.6667 - val_loss: 0.5557 - val_precision: 0.5246 - val_recall: 0.5079
Epoch 3/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - AUC: 0.8863 - accuracy: 0.8133 - loss: 0.4397 - precision: 0.7032 - recall: 0.8156 - val_AUC: 0.7563 - val_accuracy: 0.6944 - val_loss: 0.5453 - val_precision: 0.5833 - val_recall: 0.4444
Epoch 4/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - AUC: 0.9004 - accuracy: 0.8027 - loss: 0.4207 - precision: 0.6772 - recall: 0.8773 - val_AU

<keras.src.callbacks.history.History at 0x733953e684a0>