In [37]:
import pandas as pd
import numpy as np

from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, roc_auc_score

from keras_preprocessing.text import Tokenizer

import matplotlib.pyplot as plt

In [38]:
import lstm_preprocess

In [39]:
import os
import tensorflow as tf
import random
# fix the random seed for tensorflow models
os.environ['TF_DETERMINISTIC_OPS'] = '1' 
SEED = 39
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)

# set to use flexible GPU resources  
config = tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True
session = tf.compat.v1.Session(config=config)

In [40]:
print(f"Num GPUs Available: {len(tf.config.list_physical_devices('GPU'))}.")

Num GPUs Available: 0.


## Read the data

In [54]:
#data = pd.read_csv("./icd_demos_vitals.csv")
data = pd.read_csv("C:\\Users\\Maria\\Desktop\\data\\icd_demos_vitals.csv")

data.drop("Unnamed: 0", axis=1, inplace=True)

In [55]:
#either keep the patients with 24 hours of admission, or change 23 to sth smaller to include patients with less time steps
df = lstm_preprocess.pad(data, 23, 24, 0)

df

Unnamed: 0,hadm_id,subject_id,mortality,HeartRate,SysBP,DiasBP,MeanBP,RespRate,TempC,SpO2,...,18,19,20,F,M,18-25,25-45,45-65,65-89,89+
0,100061.0,11728.0,1.0,72.0,103.0,66.0,75.0,10.0,0.000000,93.0,...,0.0,3.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0
1,100061.0,11728.0,1.0,75.0,93.0,64.0,72.0,9.0,0.000000,91.0,...,0.0,3.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0
2,100061.0,11728.0,1.0,73.0,108.0,61.0,71.0,10.0,0.000000,94.0,...,0.0,3.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0
3,100061.0,11728.0,1.0,75.0,107.0,59.0,71.0,18.0,0.000000,95.0,...,0.0,3.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0
4,100061.0,11728.0,1.0,79.0,118.0,86.0,91.0,16.0,0.000000,95.0,...,0.0,3.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
53131,199984.0,55617.0,0.0,81.0,103.0,54.0,67.0,17.0,36.777778,94.0,...,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0
53132,199984.0,55617.0,0.0,79.0,111.0,52.0,65.0,19.0,0.000000,0.0,...,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0
53133,199984.0,55617.0,0.0,74.0,119.0,62.0,75.0,23.0,0.000000,0.0,...,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0
53134,199984.0,55617.0,0.0,81.0,121.0,63.0,78.0,16.0,0.000000,0.0,...,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0


In [56]:
#remove unnecessary columns
COLUMNS = lstm_preprocess.delete_columns(df)

COLUMNS

['HeartRate',
 'SysBP',
 'DiasBP',
 'MeanBP',
 'RespRate',
 'TempC',
 'SpO2',
 'Glucose',
 '1',
 '2',
 '3',
 '4',
 '5',
 '6',
 '7',
 '8',
 '9',
 '10',
 '11',
 '12',
 '13',
 '14',
 '15',
 '16',
 '17',
 '18',
 '19',
 '20',
 'F',
 'M',
 '18-25',
 '25-45',
 '45-65',
 '65-89',
 '89+']

In [44]:
hadm_idx = df['hadm_id'].values.reshape(-1, 24, 1)[:, 0, 0]
hadm_idx

array([100061., 100087., 100104., ..., 199976., 199981., 199984.])

In [57]:
#reshape the matrix to the appropriate format
MATRIX = df[COLUMNS+['mortality']].values
MATRIX = MATRIX.reshape(int(MATRIX.shape[0]/24),24,MATRIX.shape[1])

In [46]:

"""
bool_matrix = (~MATRIX.any(axis=2))
MATRIX[bool_matrix] = np.nan
#MATRIX = lstm_preprocess.ZScoreNormalize(MATRIX)

## restore 3D shape to boolmatrix for consistency
bool_matrix = np.isnan(MATRIX)
MATRIX[bool_matrix] = 0 
   
#permutation = np.random.permutation(MATRIX.shape[0])
#MATRIX = MATRIX[permutation]
#bool_matrix = bool_matrix[permutation]

# X_MATRIX = MATRIX[:,:,0:-1]
X_MATRIX = MATRIX[:,:,0:8] # only use the first 8 temporal features, ignoring demographic data for now
Y_MATRIX = MATRIX[:,:,-1]
sc = MinMaxScaler()
#x_bool_matrix = bool_matrix[:,:,0:-1]
#y_bool_matrix = bool_matrix[:,:,-1]
"""

In [58]:
bool_matrix = (~MATRIX.any(axis=2))
MATRIX[bool_matrix] = np.nan
#MATRIX = lstm_preprocess.ZScoreNormalize(MATRIX)

## restore 3D shape to boolmatrix for consistency
bool_matrix = np.isnan(MATRIX)
MATRIX[bool_matrix] = 0 
   
#permutation = np.random.permutation(MATRIX.shape[0])
#MATRIX = MATRIX[permutation]
#bool_matrix = bool_matrix[permutation]

#X_MATRIX = MATRIX[:,:,0:-7]
X_MATRIX = MATRIX[:,:,0:-2]
Y_MATRIX = MATRIX[:,:,-1]
#x_bool_matrix = bool_matrix[:,:,0:-1]
#y_bool_matrix = bool_matrix[:,:,-1]

In [59]:
pd.value_counts(Y_MATRIX[:, 0])

0.0    1819
1.0     395
dtype: int64

In [60]:
##################################################


tt_split = 0.7 
val_percentage = 0.8

X_TRAIN = X_MATRIX[0:int(tt_split*X_MATRIX.shape[0]),:,:]
Y_TRAIN = Y_MATRIX[0:int(tt_split*Y_MATRIX.shape[0]),:]
Y_TRAIN = Y_TRAIN[:, 0] 
Y_TRAIN = Y_TRAIN.reshape(Y_TRAIN.shape[0], 1)
#Y_TRAIN = Y_TRAIN.reshape(Y_TRAIN.shape[0], Y_TRAIN.shape[1], 1)

X_VAL = X_MATRIX[int(tt_split*X_MATRIX.shape[0]):int(val_percentage*X_MATRIX.shape[0])]
Y_VAL = Y_MATRIX[int(tt_split*Y_MATRIX.shape[0]):int(val_percentage*Y_MATRIX.shape[0])]
Y_VAL = Y_VAL[:, 0] 
Y_VAL = Y_VAL.reshape(Y_VAL.shape[0], 1)

X_TEST = X_MATRIX[int(val_percentage*X_MATRIX.shape[0])::]
Y_TEST = Y_MATRIX[int(val_percentage*X_MATRIX.shape[0])::]
Y_TEST = Y_TEST[:, 0] 
Y_TEST = Y_TEST.reshape(Y_TEST.shape[0], 1)

In [48]:
#train, validation, test split
tt_split = 0.7 
val_percentage = 0.8

train_tail_idx = int(tt_split*X_MATRIX.shape[0])
val_tail_idx = int(val_percentage*X_MATRIX.shape[0])

In [49]:
# extract the admission idx for train/validation/test, used to extract text patient data for prediction 
train_hadm_idx = hadm_idx[:train_tail_idx]
val_hadm_idx = hadm_idx[train_tail_idx:val_tail_idx]
test_hadm_idx = hadm_idx[val_tail_idx::]

In [50]:
X_TRAIN = X_MATRIX[0:train_tail_idx, :, :]
Y_TRAIN = Y_MATRIX[0:train_tail_idx, 0]
# Y_TRAIN = Y_TRAIN.reshape(Y_TRAIN.shape[0], Y_TRAIN.shape[1], 1)

X_VAL = X_MATRIX[train_tail_idx:val_tail_idx]
Y_VAL = Y_MATRIX[train_tail_idx:val_tail_idx, 0]
# Y_VAL = Y_VAL.reshape(Y_VAL.shape[0], Y_VAL.shape[1], 1)
"""
x_val_boolmat = x_bool_matrix[int(tt_split*x_bool_matrix.shape[0]):int(val_percentage*x_bool_matrix.shape[0])]
y_val_boolmat = y_bool_matrix[int(tt_split*y_bool_matrix.shape[0]):int(val_percentage*y_bool_matrix.shape[0])]
y_val_boolmat = y_val_boolmat.reshape(y_val_boolmat.shape[0],y_val_boolmat.shape[1],1)
"""
X_TEST = X_MATRIX[val_tail_idx::]
Y_TEST = Y_MATRIX[val_tail_idx::, 0]
# Y_TEST = Y_TEST.reshape(Y_TEST.shape[0], Y_TEST.shape[1], 1)
"""
x_test_boolmat = x_bool_matrix[int(val_percentage*x_bool_matrix.shape[0])::]
y_test_boolmat = y_bool_matrix[int(val_percentage*y_bool_matrix.shape[0])::]
y_test_boolmat = y_test_boolmat.reshape(y_test_boolmat.shape[0],y_test_boolmat.shape[1],1)

X_TEST[x_test_boolmat] = 0
Y_TEST[y_test_boolmat] = 0
"""

'\nx_test_boolmat = x_bool_matrix[int(val_percentage*x_bool_matrix.shape[0])::]\ny_test_boolmat = y_bool_matrix[int(val_percentage*y_bool_matrix.shape[0])::]\ny_test_boolmat = y_test_boolmat.reshape(y_test_boolmat.shape[0],y_test_boolmat.shape[1],1)\n\nX_TEST[x_test_boolmat] = 0\nY_TEST[y_test_boolmat] = 0\n'

In [61]:
no_feature_cols = X_TRAIN.shape[2]
no_feature_cols

34

In [62]:
X_TRAIN, X_VAL, X_TEST = lstm_preprocess.normalize(X_TRAIN, X_TEST, X_VAL)


Shape should be dx1:  (34,)
  train /= train_std
  val /= train_std
  test /= train_std


In [67]:
#need to reomove nan values,
np.isnan(X_TRAIN).any()


bool_matrix = np.isnan(X_VAL)
X_VAL[bool_matrix] = 0 
np.isnan(X_VAL).any()

bool_matrix = np.isnan(X_TRAIN)
X_TRAIN[bool_matrix] = 0 
np.isnan(X_TRAIN).any()

bool_matrix = np.isnan(X_TEST)
X_TEST[bool_matrix] = 0 
np.isnan(X_TEST).any()

False

## Load text data

In [68]:
text_data = pd.read_csv('C:\\Users\\Maria\\Desktop\\data\\texts.csv')
text_data.drop(['Unnamed: 0'], axis=1, inplace=True)
text_data.head()


Unnamed: 0,text,subject_id,hadm_id,mortality
0,sinus tachycardia delayed precordial r wave tr...,77067,140990.0,0
1,normal sinus rhythm poor r wave progression po...,40304,174997.0,0
2,sinus rhythm probable prior inferior myocardia...,80932,190053.0,0
3,sinus rhythm left ventricular hypertrophy seco...,92752,138578.0,0
4,sinus rhythm marked right axis deviation possi...,18268,128217.0,0


In [69]:
# X, y = text_data['text'], text_data['mortality']

# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=39, stratify=y)

In [70]:
train_data = text_data[text_data['hadm_id'].isin(set(train_hadm_idx))]
val_data = text_data[text_data['hadm_id'].isin(set(val_hadm_idx))]
test_data = text_data[text_data['hadm_id'].isin(set(test_hadm_idx))]

X_train, y_train = train_data['text'],  train_data['mortality']
X_val, y_val = val_data['text'],  val_data['mortality']
X_test, y_test = test_data['text'],  test_data['mortality']

In [71]:
# find the joint hadm ids for train/test/val
join_train_hadm_idx = np.unique(train_data['hadm_id'])
join_val_hadm_idx = np.unique(val_data['hadm_id'])
join_test_hadm_idx = np.unique(test_data['hadm_id'])

In [72]:
# modify the time series data to have same hadm id as text data
X_TRAIN = X_TRAIN[np.in1d(train_hadm_idx, join_train_hadm_idx)]
Y_TRAIN = Y_TRAIN[np.in1d(train_hadm_idx, join_train_hadm_idx)]

X_VAL = X_VAL[np.in1d(val_hadm_idx, join_val_hadm_idx)]
Y_VAL = Y_VAL[np.in1d(val_hadm_idx, join_val_hadm_idx)]

X_TEST = X_TEST[np.in1d(test_hadm_idx, join_test_hadm_idx)]
Y_TEST = Y_TEST[np.in1d(test_hadm_idx, join_test_hadm_idx)]


In [73]:
X_train.shape

(1529,)

In [74]:
X_TRAIN.shape

(1529, 24, 34)

In [75]:
NUM_WORDS = 3000

# Tokenize the train text
train_text = X_train.to_numpy()

tokenizer = Tokenizer(
    num_words=NUM_WORDS, 
    filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n', 
    lower=True,
    split=" ",
    char_level=False,
    oov_token='<unk>',
    document_count=0
)

tokenizer.fit_on_texts(train_text)
tokenizer.word_index['<pad>'] = 0
tokenizer.index_word[0] = '<pad>'

In [76]:
MAX_LEN = 1000

train_seqs = tokenizer.texts_to_sequences(train_text)
train_seqs = keras.preprocessing.sequence.pad_sequences(train_seqs, maxlen=MAX_LEN, padding='post')

train_labels = y_train.to_numpy().flatten()

valid_text = X_val.to_numpy()
valid_seqs = tokenizer.texts_to_sequences(valid_text)
valid_seqs = keras.preprocessing.sequence.pad_sequences(valid_seqs, maxlen=MAX_LEN, padding='post')

valid_labels = y_val.to_numpy().flatten()

In [77]:
train_seqs

array([[   4,   79,    8, ...,    7,  133,  198],
       [   1,  787,   10, ...,   62,  344,  784],
       [ 881, 1460,  269, ...,  720,  312,  689],
       ...,
       [  82,   91,  197, ...,  973,   60,    1],
       [1073,  935, 1424, ...,   13, 1243, 1111],
       [   6,   30, 2978, ...,  754, 1373,  153]])

In [78]:
train_seqs.shape

# (30455, 606752) without preprocessing/limiting and truncating

(1529, 1000)

## Use the composite model

In [79]:
def CompositeModel(n_timesteps, n_features, text_input_size): # n_features2=1 for text data
    # classifier 1 (for time series):
    inputs1 = keras.Input(shape=(n_timesteps, n_features))
    output1 = keras.layers.LSTM(64, activation='tanh')(inputs1)

    classifier1 = keras.Model(inputs1, output1, name="classifier1")

    # classifier 2 (for text data)
    inputs2 = keras.Input(text_input_size,)
    x = keras.layers.Embedding(input_dim=NUM_WORDS, output_dim=64, input_length=text_input_size)(inputs2)
    output2 = keras.layers.LSTM(64, activation='tanh')(x)

    classifier2 = keras.models.Model(inputs2, output2, name="classifier2")

    # final prediction
    combined = keras.layers.concatenate([classifier1.output, classifier2.output])
    # combined outputs
    x = keras.layers.Dense(2, activation="relu")(combined)
    outputs3 = keras.layers.Dense(1, activation="sigmoid")(x)

    composite_model = keras.models.Model([classifier1.input, classifier2.input], outputs3)
    
    return composite_model

In [80]:
composite_model = CompositeModel(n_timesteps=24, n_features=no_feature_cols, text_input_size=MAX_LEN)

optimizer = keras.optimizers.Adam(lr=0.0001)
composite_model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

composite_model.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_4 (InputLayer)            [(None, 1000)]       0                                            
__________________________________________________________________________________________________
input_3 (InputLayer)            [(None, 24, 34)]     0                                            
__________________________________________________________________________________________________
embedding_1 (Embedding)         (None, 1000, 64)     192000      input_4[0][0]                    
__________________________________________________________________________________________________
lstm_2 (LSTM)                   (None, 64)           25344       input_3[0][0]                    
____________________________________________________________________________________________

In [81]:
#TODO: plot the model structure
keras.utils.plot_model(composite_model, show_shapes=True)


('You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) ', 'for plot_model/model_to_dot to work.')


In [82]:
X_TRAIN.shape

(1529, 24, 34)

In [83]:
train_seqs.shape

(1529, 1000)

In [36]:
# Define the early stopping criteria
early_stopping_accuracy = keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=10, restore_best_weights=True) # patient = 5 or 10 doesn't guarantee find an optimal

# Train the model
# reset_seeds()
classifier_history2 = composite_model.fit([X_TRAIN, train_seqs], 
          y_train, 
          epochs=50,
          batch_size=128,
          shuffle=True, 
          verbose=True, 
          validation_data=([X_VAL, valid_seqs], y_val),
          callbacks=[early_stopping_accuracy])

Error: Pip module Unable to parse debugpy output, please log an issue with https://github.com/microsoft/vscode-jupyter is required for debugging cells. You will need to install it to debug cells.

In [86]:
y_pred = composite_model.predict([X_VAL, valid_seqs])
y_pred_classes = np.array([1 if pred > 0.5 else 0 for pred in y_pred])

# classification report
acc = accuracy_score(y_true=Y_VAL, y_pred=y_pred_classes)
print(acc)

confusion_matrix_df = pd.DataFrame(
        confusion_matrix(y_true=Y_VAL, y_pred=y_pred_classes, labels=[1, 0]),
        index=['True:pos', 'True:neg'], 
        columns=['Pred:pos', 'Pred:neg']
    )
print(confusion_matrix_df)

print(classification_report(y_true=Y_VAL, y_pred=y_pred_classes))

0.8036529680365296
          Pred:pos  Pred:neg
True:pos         0        42
True:neg         1       176
              precision    recall  f1-score   support

         0.0       0.81      0.99      0.89       177
         1.0       0.00      0.00      0.00        42

    accuracy                           0.80       219
   macro avg       0.40      0.50      0.45       219
weighted avg       0.65      0.80      0.72       219



In [None]:
results = composite_model.evaluate(X_TRAIN, Y_TRAIN, batch_size=16, verbose=0)

print("Loss: {:0.4f}".format(results[0]))

In [96]:
train_seqs

array([[   4,   79,    8, ...,    7,  133,  198],
       [   1,  787,   10, ...,   62,  344,  784],
       [ 881, 1460,  269, ...,  720,  312,  689],
       ...,
       [  82,   91,  197, ...,  973,   60,    1],
       [1073,  935, 1424, ...,   13, 1243, 1111],
       [   6,   30, 2978, ...,  754, 1373,  153]])

### Create a model with bias initi and weights on class imbalance

In [102]:
y_train

0       0
3       0
4       0
7       0
9       1
       ..
3572    0
3573    1
3576    0
3580    0
3581    1
Name: mortality, Length: 1529, dtype: int64

In [103]:
labels = pd.Series(list(y_train))

In [104]:
neg, pos = np.bincount(labels)
total = neg + pos
print('Examples:\n    Total: {}\n    Positive: {} ({:.2f}% of total)\n'.format(
    total, pos, 100 * pos / total))

Examples:
    Total: 1529
    Positive: 273 (17.85% of total)



In [170]:
# you would want to have the classifier heavily weight the few examples that are available. You can do this by passing Keras weights for each class through a parameter. These will cause the model to "pay more attention" to examples from an under-represented class.


# Scaling by total/2 helps keep the loss to a similar magnitude.
# The sum of the weights of all examples stays the same.
weight_for_0 = (1 / neg) * (total / 2.0)
weight_for_1 = (1 / pos) * (total / 2.0)

class_weight = {0: weight_for_0, 1: weight_for_1}

print('Weight for class 0: {:.2f}'.format(weight_for_0))
print('Weight for class 1: {:.2f}'.format(weight_for_1))


Weight for class 0: 0.61
Weight for class 1: 2.80


In [106]:
initial_bias = np.log([pos/neg])
initial_bias

array([-1.52621555])

In [173]:
def CompositeModel(n_timesteps, n_features, text_input_size, output_bias=None): # n_features2=1 for text data
    if output_bias is not None:
        output_bias = tensorflow.keras.initializers.Constant(output_bias)
    # classifier 1 (for time series):
    inputs1 = keras.Input(shape=(n_timesteps, n_features))
    output1 = keras.layers.LSTM(32, activation='tanh')(inputs1)

    classifier1 = keras.Model(inputs1, output1, name="classifier1")

    # classifier 2 (for text data)
    inputs2 = keras.Input(text_input_size,)
    x = keras.layers.Embedding(input_dim=NUM_WORDS, output_dim=8, input_length=text_input_size)(inputs2)
    output2 = keras.layers.LSTM(32, activation='tanh')(x)

    classifier2 = keras.models.Model(inputs2, output2, name="classifier2")

    # final prediction
    combined = keras.layers.concatenate([classifier1.output, classifier2.output])
    # combined outputs
    x = keras.layers.Dense(8, activation="relu", bias_initializer = output_bias)(combined)
    outputs3 = keras.layers.Dense(1, activation="sigmoid", bias_initializer = output_bias)(x)

    composite_model = keras.models.Model([classifier1.input, classifier2.input], outputs3)
    
    return composite_model

In [191]:
def CompositeModel(n_timesteps, n_features, text_input_size, initial_bias): # n_features2=1 for text data
    # classifier 1 (for time series):
    inputs1 = keras.Input(shape=(n_timesteps, n_features))
    output1 = keras.layers.LSTM(32, activation='tanh', bias_regularizer= tf.keras.regularizers.L1L2(l1=0.01, l2=0.01))(inputs1)
#     output1 = keras.layers.BatchNormalization()(x)

    classifier1 = keras.Model(inputs1, output1, name="classifier1")

    # classifier 2 (for text data)
    inputs2 = keras.Input(text_input_size,)
    x = keras.layers.Embedding(input_dim=NUM_WORDS, output_dim=8, input_length=text_input_size)(inputs2)
    output2 = keras.layers.LSTM(32, activation='tanh')(x)
#     output2 = keras.layers.BatchNormalization()(x)

    classifier2 = keras.models.Model(inputs2, output2, name="classifier2")

    # final prediction
    combined = keras.layers.concatenate([classifier1.output, classifier2.output])
    # combined outputs
    x = keras.layers.Dense(8, activation="relu")(combined)
    outputs3 = keras.layers.Dense(1, activation="sigmoid",
                                 bias_initializer=keras.initializers.Constant(initial_bias))(x)

    composite_model = keras.models.Model([classifier1.input, classifier2.input], outputs3)
    
    optimizer = keras.optimizers.RMSprop(lr=0.001)
    composite_model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
    
    return composite_model

In [192]:
composite_model = CompositeModel(
    n_timesteps=24, n_features=no_feature_cols, text_input_size=MAX_LEN, initial_bias=initial_bias)

composite_model.summary()

Model: "model_14"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_31 (InputLayer)           [(None, 1000)]       0                                            
__________________________________________________________________________________________________
input_30 (InputLayer)           [(None, 24, 34)]     0                                            
__________________________________________________________________________________________________
embedding_14 (Embedding)        (None, 1000, 8)      24000       input_31[0][0]                   
__________________________________________________________________________________________________
lstm_28 (LSTM)                  (None, 32)           8576        input_30[0][0]                   
___________________________________________________________________________________________

In [193]:
# Define the early stopping criteria
early_stopping_accuracy = keras.callbacks.EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True) # patient = 5 or 10 doesn't guarantee find an optimal

# Train the model
# reset_seeds()
classifier_history = composite_model.fit([X_TRAIN, train_seqs], 
          y_train, 
          epochs=50,
          batch_size=32,
          shuffle=True, 
          verbose=True, 
          validation_data=([X_VAL, valid_seqs], y_val),
#           callbacks=[early_stopping_accuracy])
          callbacks=[early_stopping_accuracy],
          class_weight=class_weight)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50


In [194]:
y_pred = composite_model.predict([X_VAL, valid_seqs])
y_pred_classes = np.array([1 if pred > 0.5 else 0 for pred in y_pred])

# classification report
acc = accuracy_score(y_true=Y_VAL, y_pred=y_pred_classes)
print(acc)
print('ROC AUC SCORE Test')
print(roc_auc_score(Y_VAL,y_pred_classes))
confusion_matrix_df = pd.DataFrame(
        confusion_matrix(y_true=Y_VAL, y_pred=y_pred_classes, labels=[1, 0]),
        index=['True:pos', 'True:neg'], 
        columns=['Pred:pos', 'Pred:neg']
    )
print(confusion_matrix_df)

print(classification_report(y_true=Y_VAL, y_pred=y_pred_classes))

0.771689497716895
ROC AUC SCORE Test
0.5137207425343019
          Pred:pos  Pred:neg
True:pos         4        38
True:neg        12       165
              precision    recall  f1-score   support

         0.0       0.81      0.93      0.87       177
         1.0       0.25      0.10      0.14        42

    accuracy                           0.77       219
   macro avg       0.53      0.51      0.50       219
weighted avg       0.70      0.77      0.73       219



In [185]:
composite_model = CompositeModel(n_timesteps=24, n_features=no_feature_cols, text_input_size=MAX_LEN)

#optimizer = keras.optimizers.Adam(lr=0.0009)
optimizer = keras.optimizers.RMSprop(lr=0.001, epsilon=1e-08)
composite_model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

composite_model.summary()

TypeError: CompositeModel() missing 1 required positional argument: 'initial_bias'

In [175]:
#TODO: plot the model structure
keras.utils.plot_model(composite_model, show_shapes=True)

('You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) ', 'for plot_model/model_to_dot to work.')


In [176]:
# Define the early stopping criteria
early_stopping_accuracy = keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=10, restore_best_weights=True) # patient = 5 or 10 doesn't guarantee find an optimal

# Train the model
# reset_seeds()
classifier_history2 = composite_model.fit([X_TRAIN, train_seqs], 
          y_train, 
          epochs=50,
          batch_size=512,
          shuffle=True, 
          verbose=True, 
          validation_data=([X_VAL, valid_seqs], y_val),
          class_weight=class_weight,
          callbacks=[early_stopping_accuracy])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50


In [178]:
y_pred = composite_model.predict([X_VAL, valid_seqs])
y_pred_classes = np.array([1 if pred > 0.5 else 0 for pred in y_pred])

# classification report
acc = accuracy_score(y_true=Y_VAL, y_pred=y_pred_classes)
print(acc)
print('ROC AUC SCORE Test')
print(roc_auc_score(Y_VAL,y_pred_classes))
confusion_matrix_df = pd.DataFrame(
        confusion_matrix(y_true=Y_VAL, y_pred=y_pred_classes, labels=[1, 0]),
        index=['True:pos', 'True:neg'], 
        columns=['Pred:pos', 'Pred:neg']
    )
print(confusion_matrix_df)

print(classification_report(y_true=Y_VAL, y_pred=y_pred_classes))

0.8036529680365296
ROC AUC SCORE Test
0.5062550443906376
          Pred:pos  Pred:neg
True:pos         1        41
True:neg         2       175
              precision    recall  f1-score   support

         0.0       0.81      0.99      0.89       177
         1.0       0.33      0.02      0.04        42

    accuracy                           0.80       219
   macro avg       0.57      0.51      0.47       219
weighted avg       0.72      0.80      0.73       219

