# Inputs

In [1]:
splitPartCount = 5
splitSeed = 27
hoursPerWindow = 1

# Preproccess

## read data

In [2]:
from utils.class_patient import Patients


patients = Patients.loadPatients()
len(patients)

1213

## remove missing

In [3]:
# fill measures whose null represent false value

from constants import NULLABLE_MEASURES


nullableMeasures = NULLABLE_MEASURES

for measureName in nullableMeasures:
    patients.fillMissingMeasureValue(measureName, 0)

In [4]:
# remove measures with less than 80% of data

measures = patients.getMeasures()

for measure, count in measures.items():
    if count < len(patients) * 80 / 100:
        patients.removeMeasures([measure])
        print(measure, count)

pco2 917
ph 954
po2 917
albumin 406
hba1c 326
lymphocyte 446
height 415
urine-ketone 294
crp 19


In [5]:
# remove patients with less than 80% of data

patients.removePatientByMissingFeatures()
len(patients)

1206

In [6]:
# # remove patients with positive tag in first 12 hours

from pandas import Timedelta


patients.removePatientAkiEarly(Timedelta(hours=12))

82

In [7]:
print("Total ", len(patients))
print("AKI ", sum([1 for p in patients if p.akdPositive]))
print("Ratio ", sum([1 for p in patients if p.akdPositive]) / len(patients))

Total  1124
AKI  392
Ratio  0.3487544483985765


## split patients

In [8]:
splitedPatients = patients.split(splitPartCount, splitSeed)

len(splitedPatients[0])

225

In [9]:
splitedPatients = patients.split(splitPartCount, splitSeed)


def trainTest():
    for i in range(splitedPatients.__len__()):
        testPatients = splitedPatients[i]

        trainPatientsList = splitedPatients[:i] + splitedPatients[i + 1 :]
        trainPatients = Patients(patients=[])
        for trainPatientsElem in trainPatientsList:
            trainPatients += trainPatientsElem

        yield trainPatients, testPatients


def trainValTest():
    for i in range(splitedPatients.__len__()):
        testPatients = splitedPatients[i]

        trainPatientsList = splitedPatients[:i] + splitedPatients[i + 1 :]
        trainPatients = Patients(patients=[])
        for trainPatientsElem in trainPatientsList:
            trainPatients += trainPatientsElem

        *trainPatients, valPatients = trainPatients.split(5, 27)
        tmpPatients = Patients(patients=[])
        for trainPatientsElem in trainPatients:
            tmpPatients += trainPatientsElem
        trainPatients = tmpPatients

        yield trainPatients, valPatients, testPatients

In [10]:
for trainPatients, testPatients in trainTest():
    print(len(trainPatients.patientList), len(testPatients.patientList))

899 225
899 225
899 225
899 225
900 224


# Transformer

### Positional Encoding

In [11]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Embedding, Dense, LayerNormalization, Dropout
from tensorflow.keras.models import Model


def get_angles(pos, i, d_model):
    angle_rates = 1 / np.power(10000, (2 * (i // 2)) / np.float32(d_model))
    return pos * angle_rates


def positional_encoding(position, d_model):
    angle_rads = get_angles(
        np.arange(position)[:, np.newaxis], np.arange(d_model)[np.newaxis, :], d_model
    )

    # apply sin to even indices in the array; 2i
    angle_rads[:, 0::2] = np.sin(angle_rads[:, 0::2])

    # apply cos to odd indices in the array; 2i+1
    angle_rads[:, 1::2] = np.cos(angle_rads[:, 1::2])

    pos_encoding = angle_rads[np.newaxis, ...]

    return tf.cast(pos_encoding, dtype=tf.float32)

2024-08-14 23:08:34.194614: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-08-14 23:08:34.215643: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


### Scaled Dot-Product Attention

In [12]:
def scaled_dot_product_attention(q, k, v, mask):
    matmul_qk = tf.matmul(q, k, transpose_b=True)
    dk = tf.cast(tf.shape(k)[-1], tf.float32)
    scaled_attention_logits = matmul_qk / tf.math.sqrt(dk)

    if mask is not None:
        scaled_attention_logits += mask * -1e9

    attention_weights = tf.nn.softmax(scaled_attention_logits, axis=-1)
    output = tf.matmul(attention_weights, v)

    return output, attention_weights

### Multi-Head Attention

In [13]:
class MultiHeadAttention(tf.keras.layers.Layer):
    def __init__(self, d_model, num_heads):
        super(MultiHeadAttention, self).__init__()
        self.num_heads = num_heads
        self.d_model = d_model

        assert d_model % self.num_heads == 0

        self.depth = d_model // self.num_heads

        self.wq = Dense(d_model)
        self.wk = Dense(d_model)
        self.wv = Dense(d_model)

        self.dense = Dense(d_model)

    def split_heads(self, x, batch_size):
        x = tf.reshape(x, (batch_size, -1, self.num_heads, self.depth))
        return tf.transpose(x, perm=[0, 2, 1, 3])

    def call(self, v, k, q, mask):
        batch_size = tf.shape(q)[0]

        q = self.wq(q)
        k = self.wk(k)
        v = self.wv(v)

        q = self.split_heads(q, batch_size)
        k = self.split_heads(k, batch_size)
        v = self.split_heads(v, batch_size)

        scaled_attention, attention_weights = scaled_dot_product_attention(
            q, k, v, mask
        )

        scaled_attention = tf.transpose(scaled_attention, perm=[0, 2, 1, 3])

        concat_attention = tf.reshape(scaled_attention, (batch_size, -1, self.d_model))

        output = self.dense(concat_attention)

        return output, attention_weights

### Encoder Layer

In [14]:
class EncoderLayer(tf.keras.layers.Layer):
    def __init__(self, d_model, num_heads, dff, rate=0.1):
        super(EncoderLayer, self).__init__()

        self.mha = MultiHeadAttention(d_model, num_heads)
        self.ffn = tf.keras.Sequential([Dense(dff, activation="relu"), Dense(d_model)])

        self.layernorm1 = LayerNormalization(epsilon=1e-6)
        self.layernorm2 = LayerNormalization(epsilon=1e-6)

        self.dropout1 = Dropout(rate)
        self.dropout2 = Dropout(rate)

    def call(self, x, training, mask):
        attn_output, _ = self.mha(x, x, x, mask)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(x + attn_output)

        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        out2 = self.layernorm2(out1 + ffn_output)

        return out2

### Encoder

In [15]:
class Encoder(tf.keras.layers.Layer):
    def __init__(
        self,
        num_layers,
        d_model,
        num_heads,
        dff,
        input_vocab_size,
        maximum_position_encoding,
        rate=0.1,
    ):
        super(Encoder, self).__init__()

        self.d_model = d_model
        self.num_layers = num_layers

        self.embedding = Embedding(input_vocab_size, d_model)
        self.pos_encoding = positional_encoding(maximum_position_encoding, d_model)

        self.enc_layers = [
            EncoderLayer(d_model, num_heads, dff, rate) for _ in range(num_layers)
        ]

        self.dropout = Dropout(rate)

    def call(self, x, training, mask):
        seq_len = tf.shape(x)[1]

        x = self.embedding(x)  # (batch_size, input_seq_len, d_model)
        x *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))
        x += self.pos_encoding[tf.newaxis, :seq_len, :]

        x = self.dropout(x, training=training)

        for i in range(self.num_layers):
            x = self.enc_layers[i](x, training, mask)

        return x

### Decoder

In [16]:
# class Decoder(tf.keras.layers.Layer):
#     def __init__(
#         self,
#         num_layers,
#         d_model,
#         num_heads,
#         dff,
#         target_vocab_size,
#         maximum_position_encoding,
#         rate=0.1,
#     ):
#         super(Decoder, self).__init__()

#         self.d_model = d_model
#         self.num_layers = num_layers

#         self.embedding = Embedding(target_vocab_size, d_model)
#         self.pos_encoding = positional_encoding(maximum_position_encoding, d_model)

#         self.dec_layers = [
#             DecoderLayer(d_model, num_heads, dff, rate) for _ in range(num_layers)
#         ]
#         self.dropout = Dropout(rate)

#     def call(self, x, enc_output, training, look_ahead_mask, padding_mask):
#         seq_len = tf.shape(x)[1]
#         attention_weights = {}

#         x = self.embedding(x)
#         x *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))
#         x += self.pos_encoding[:, :seq_len, :]

#         x = self.dropout(x, training=training)

#         for i in range(self.num_layers):
#             x, block1, block2 = self.dec_layers[i](
#                 x, enc_output, training, look_ahead_mask, padding_mask
#             )

#             attention_weights["decoder_layer{}_block1".format(i + 1)] = block1
#             attention_weights["decoder_layer{}_block2".format(i + 1)] = block2

#         return x, attention_weights

### Transformer

In [47]:
class Transformer(Model):
    def __init__(
        self,
        num_layers,
        d_model,
        num_heads,
        dff,
        input_vocab_size,
        pe_input = 1,
        rate=0.1,
    ):
        super(Transformer, self).__init__()

        target_vocab_size = 1,
        self.encoder = Encoder(
            num_layers, d_model, num_heads, dff, input_vocab_size, pe_input, rate
        )

        self.final_layer = Dense(target_vocab_size, activation="sigmoid")

    def call(self, inp, training, enc_padding_mask=None):
        enc_output = self.encoder(inp, training=training, mask=enc_padding_mask)

        final_output = self.final_layer(enc_output[:, 0, :]) # (batch_size, 1)

        return final_output

### Using transformer

In [44]:
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import (
    LSTM,
    Dense,
    Dropout,
    Input,
    Concatenate,
    Masking,
    Conv1D,
    MaxPooling1D,
    BatchNormalization,
)
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2


# def createModel2(timeSteps, timeFeatures, staticFeatures):
#     # time series layers
#     timeInputLayer = Input(shape=(timeSteps, timeFeatures))
#     maskingLayer = Masking(mask_value=0.0)(timeInputLayer)
#     # cnnLayer = Conv1D(64, 3, activation="relu", kernel_regularizer=l2(0.01))(maskingLayer)
#     # batNormCnn = BatchNormalization()(cnnLayer)
#     # poolingLayer = MaxPooling1D(2)(batNormCnn)
#     seriesLayer = LSTM(64, return_sequences=True)(maskingLayer)
#     seriesLayer2 = LSTM(64)(seriesLayer)
#     seriesDense = Dense(32, activation="relu")(seriesLayer2)


#     # static layers
#     staticInputLayer = Input(shape=(staticFeatures,))
#     staticLayer = Dense(32, activation="relu")(staticInputLayer)

#     # combine layers
#     combined = Concatenate(axis=1)([seriesDense, staticLayer])
#     dense1 = Dense(16, activation="relu")(combined)
#     dropout1 = Dropout(0.5)(dense1)
#     dense2 = Dense(1, activation="sigmoid")(dropout1)

#     model = Model(inputs=[timeInputLayer, staticInputLayer], outputs=dense2)
#     model.compile(
#         optimizer=Adam(learning_rate=0.0001),
#         loss="binary_crossentropy",
#         metrics=["AUC", "accuracy", "precision", "recall"],
#     )

#     return model


def createTransformerModel(timeSteps, features):
    model = Transformer(
        num_layers=2,
        d_model=512,
        num_heads=8,
        dff=2048,
        input_vocab_size=features,
        pe_input=timeSteps,
    )
    
    model.compile(
        optimizer=Adam(learning_rate=0.0001),
        loss="binary_crossentropy",
        metrics=["AUC", "accuracy", "precision", "recall"],
    )
    
    return model

In [36]:
from utils.prepare_data import normalizeData, patientsToNumpy
from constants import CATEGORICAL_MEASURES
import numpy as np
from tensorflow.keras.callbacks import EarlyStopping


loses = []
aucs = []
accuracies = []
precisions = []
recals = []

train_loss_list = []
val_loss_list = []
models = []

for i, (trainPatients, valPatients, testPatients) in enumerate(trainValTest()):
    npTrainX, categoryEncoder, numericEncoder, oulier, columns = patientsToNumpy(
        trainPatients, 
        hoursPerWindow,
        CATEGORICAL_MEASURES,
        timeSeriesOnly=True,
        fromHour=0,
        toHour=12,
    )

    npTestX, *_ = patientsToNumpy(
        testPatients,
        hoursPerWindow,
        CATEGORICAL_MEASURES,
        columns,
        categoryEncoder,
        numericEncoder,
        oulier,
        timeSeriesOnly=True,
        fromHour=0,
        toHour=12,
    )

    npValX, *_ = patientsToNumpy(
        valPatients,
        hoursPerWindow,
        CATEGORICAL_MEASURES,
        columns,
        categoryEncoder,
        numericEncoder,
        oulier,
        timeSeriesOnly=True,
        fromHour=0,
        toHour=12,
    )

    npTrainX = np.nan_to_num(npTrainX, nan=0)
    npTestX = np.nan_to_num(npTestX, nan=0)
    npValX = np.nan_to_num(npValX, nan=0)

    ################### Static ###################
    staticTrainX = trainPatients.getMeasuresBetween(measureTypes="static")
    staticTestX = testPatients.getMeasuresBetween(measureTypes="static")
    staticValX = valPatients.getMeasuresBetween(measureTypes="static")

    staticTrainX = staticTrainX.drop(columns=["subject_id", "hadm_id", "stay_id", "akd"])
    staticTestX = staticTestX.drop(columns=["subject_id", "hadm_id", "stay_id", "akd"])
    staticValX = staticValX.drop(columns=["subject_id", "hadm_id", "stay_id", "akd"])

    staticTrainX, staticTestX, staticValX = normalizeData(
        staticTrainX, staticTestX, staticValX
    )

    staticLen = len(staticTrainX.columns)

    staticTrainX = staticTrainX.to_numpy().astype(np.float32)
    staticTestX = staticTestX.to_numpy().astype(np.float32)
    staticValX = staticValX.to_numpy().astype(np.float32) # type: ignore

    staticTrainX = np.nan_to_num(staticTrainX, nan=0)
    staticTestX = np.nan_to_num(staticTestX, nan=0)

    ################### labels ###################
    trainY = [p.akdPositive for p in trainPatients]
    testY = [p.akdPositive for p in testPatients]
    valY = [p.akdPositive for p in valPatients]

    # model = createModel2(npTrainX.shape[1], npTrainX.shape[2], staticLen)
    model = createTransformerModel(npTrainX.shape[1], npTrainX.shape[2] + staticLen)

    neg, pos = np.bincount(trainY)
    weight0 = (1 / neg) * (len(trainY)) / 2.0
    weight1 = (1 / pos) * (len(trainY)) / 2.0
    weight = {0: weight0, 1: weight1}

    early_stopping = EarlyStopping(
        monitor="val_loss", patience=50, restore_best_weights=True
    )
    
    # expand static data to be time steps as time series data
    staticTrainX = np.expand_dims(staticTrainX, axis=1)
    staticTrainX = np.repeat(staticTrainX, npTrainX.shape[1], axis=1)
    
    staticTestX = np.expand_dims(staticTestX, axis=1)
    staticTestX = np.repeat(staticTestX, npTestX.shape[1], axis=1)
    
    staticValX = np.expand_dims(staticValX, axis=1)
    staticValX = np.repeat(staticValX, npValX.shape[1], axis=1)
    
    # append to npX
    npTrainX = np.concatenate([npTrainX, staticTrainX], axis=2)
    npTestX = np.concatenate([npTestX, staticTestX], axis=2)
    npValX = np.concatenate([npValX, staticValX], axis=2)
    

    trainY = np.array(trainY)
    history = model.fit(
        npTrainX,
        trainY,
        epochs=1000,
        batch_size=32,
        validation_data=(npValX, np.array(valY)),
        class_weight=weight,
        callbacks=[early_stopping],
    )
    models.append(model)

    testY = np.array(testY)
    loss, auc, accuracy, precison, recal = model.evaluate(npTestX, testY)

    loses.append(loss)
    aucs.append(auc)
    accuracies.append(accuracy)
    precisions.append(precison)
    recals.append(recal)

    train_loss_list.append(history.history['loss'])
    val_loss_list.append(history.history['val_loss'])

    pass

print("Loses:", loses, np.mean(loses), np.std(loses))
print("AUCs:", aucs, np.mean(aucs), np.std(aucs))
print("Accuracies:", accuracies, np.mean(accuracies), np.std(accuracies))
print("Precisions:", precisions, np.mean(precisions), np.std(precisions))
print("Recals:", recals, np.mean(recals), np.std(recals))

Epoch 1/1000


ValueError: Exception encountered when calling Transformer.call().

[1mOnly input tensors may be passed as positional arguments. The following argument value should be passed as a keyword argument: True (of type <class 'bool'>)[0m

Arguments received by Transformer.call():
  • inp=tf.Tensor(shape=(None, 12, 81), dtype=float32)
  • training=True
  • enc_padding_mask=None

In [50]:
model = createTransformerModel(npTrainX.shape[1], npTrainX.shape[2] + staticLen)

history = model.fit(
    npTrainX,
    trainY,
    epochs=1000,
    batch_size=32,
    validation_data=(npValX, np.array(valY)),
    class_weight=weight,
    callbacks=[early_stopping],
)

Epoch 1/1000


ValueError: Exception encountered when calling Encoder.call().

[1mDimensions must be equal, but are 81 and 12 for '{{node transformer_12_1/encoder_12_1/add}} = AddV2[T=DT_FLOAT](transformer_12_1/encoder_12_1/mul, transformer_12_1/encoder_12_1/strided_slice_1)' with input shapes: [?,12,81,512], [1,12,512].[0m

Arguments received by Encoder.call():
  • x=tf.Tensor(shape=(None, 12, 81), dtype=float32)
  • training=True
  • mask=None

In [None]:
import matplotlib.pyplot as plt


plt.figure(figsize=(12, 6))
for i, train_loss in enumerate(train_loss_list):
    plt.plot(
        range(1, len(train_loss) + 1), train_loss, label=f"Fold {i+1} Training Loss"
    )
plt.title("Training Loss for Each Fold")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()
plt.show()

# Plot the validation loss for each fold
plt.figure(figsize=(12, 6))
for i, val_loss in enumerate(val_loss_list):
    plt.plot(range(1, len(val_loss) + 1), val_loss, label=f"Fold {i+1} Validation Loss")
plt.title("Validation Loss for Each Fold")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()
plt.show()