In [2]:
from keras import regularizers
from keras.models import Model
# noinspection PyPep8Naming
from keras import backend as K
from keras.layers import Input, Softmax, Embedding, Add, Lambda, Dense

from keras_transformer.extras import ReusableEmbedding, TiedOutputEmbedding
from keras_transformer.position import TransformerCoordinateEmbedding
from keras_transformer.transformer import TransformerACT, TransformerBlock

Using TensorFlow backend.


## Preparing Data

In [3]:
from __future__ import absolute_import, division, print_function, unicode_literals
import numpy as np
import pandas as pd
import keras
import tensorflow as tf
from keras import layers
from keras.models import Sequential
from keras import Model
from keras.layers import Masking, Dense, GRU, LSTM, InputLayer, Bidirectional, TimeDistributed, Embedding, Activation, Input, Concatenate
from keras.optimizers import Adam
from tensorflow.keras import layers

In [16]:
# preparing data
train = pd.read_csv("../../train_1121.csv")
test = pd.read_csv("../../test_1121.csv")
PatientID = train.PatientID.unique()

x_train = []
y_train = []
for Id in PatientID:
    patient = train[train.PatientID==Id]
    x_train.append(patient.values[:,7:39])
    y_train.append(patient.values[:,-1])

PatientID_t = test.PatientID.unique()
x_test = []
y_test = []
for Id in PatientID_t:
    patient = test[test.PatientID==Id]
    x_test.append(patient.values[:,7:39])
    y_test.append(patient.values[:,-1])
    
special_value = 0
max_seq_len = 25
x_train_padded = tf.keras.preprocessing.sequence.pad_sequences(x_train,padding='post', maxlen=max_seq_len, value=special_value)
y_train_padded = tf.keras.preprocessing.sequence.pad_sequences(y_train,padding='post', maxlen=max_seq_len)
x_test_padded = tf.keras.preprocessing.sequence.pad_sequences(x_test,padding='post', maxlen=max_seq_len, value=special_value)
y_test_padded = tf.keras.preprocessing.sequence.pad_sequences(y_test,padding='post', maxlen=max_seq_len)
cat_train_tags_y = keras.utils.to_categorical(y_train_padded, 2)
cat_test_tags_y = keras.utils.to_categorical(y_test_padded, 2)

In [17]:
x_train_padded.shape

(1946, 25, 32)

## hyper parameters

In [18]:
batch_size = x_train_padded.shape[0]
max_seq_length = x_train_padded.shape[1]
x_feature_size = x_train_padded.shape[2]
y_time_steps = cat_train_tags_y.shape[1]
y_feature_size = cat_train_tags_y.shape[2]
hidden_size = 256
special_value = 0

num_heads = 4
transformer_depth= 4
transformer_dropout = 0.1
l2_reg_penalty = 1e-6
confidence_penalty_weight = 0.1

## Model (Vanilla Transformer)

In [19]:
"""
A model which is almost identical to the one described by OpenAI in paper
"Improving Language Understanding by Generative Pre-Training", except
that it uses L2 regularization of the word embedding matrix,
instead of the dropout.
"""
# don't know how masking works here
panss_scores_input = Input(shape=(max_seq_length, x_feature_size), name='panss_scores')
l2_regularizer = (regularizers.l2(l2_reg_penalty) if l2_reg_penalty
                  else None)
dim_convertion_layer = TimeDistributed(Dense(2))
prediction_layer = Activation('softmax')
output_softmax_layer = Softmax(name='leadstatus_predictions')
panss_scores = panss_scores_input
for i in range(transformer_depth):
    panss_scores = (
        TransformerBlock(
            name='transformer' + str(i), num_heads=num_heads,
            residual_dropout=transformer_dropout,
            attention_dropout=transformer_dropout,
            use_masking=False,
            vanilla_wiring=True)
        (panss_scores))
lead_status_predictions = prediction_layer(
    dim_convertion_layer(panss_scores))

model = Model(inputs=[panss_scores_input], outputs=[lead_status_predictions])
model.compile(optimizer=Adam(0.001),
              loss='categorical_crossentropy',
              metrics=['accuracy']) # can change this to f1?
print(model.summary())

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
panss_scores (InputLayer)       (None, 25, 32)       0                                            
__________________________________________________________________________________________________
transformer0_self_attention (Mu (None, 25, 32)       4096        panss_scores[0][0]               
__________________________________________________________________________________________________
transformer0_dropout (Dropout)  (None, 25, 32)       0           transformer0_self_attention[0][0]
                                                                 transformer0_transition[0][0]    
__________________________________________________________________________________________________
transformer0_add (Add)          (None, 25, 32)       0           panss_scores[0][0]               
          

In [20]:
model.fit(x_train_padded, cat_train_tags_y, epochs=10, batch_size=32)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x1a55a9cfd0>

In [21]:
y_pred_padded_train = np.argmax(model.predict(x_train_padded),axis = 2)
y_pred_train = []
for idx,patient in enumerate(y_train):
    y_pred_train.append(y_pred_padded_train[idx][:len(patient)])

y_pred_train = np.concatenate(y_pred_train)
y_train_flatten = np.concatenate(y_train)

from sklearn.metrics import classification_report
print(classification_report(y_train_flatten,y_pred_train))

              precision    recall  f1-score   support

           0       0.86      0.98      0.91     12743
           1       0.88      0.49      0.63      4133

    accuracy                           0.86     16876
   macro avg       0.87      0.74      0.77     16876
weighted avg       0.86      0.86      0.84     16876



In [22]:
y_pred_padded = np.argmax(model.predict(x_test_padded),axis = 2)
y_pred = []
for idx,patient in enumerate(y_test):
    y_pred.append(y_pred_padded[idx][:len(patient)])

y_pred = np.concatenate(y_pred)
y_test_flatten = np.concatenate(y_test)

from sklearn.metrics import classification_report
print(classification_report(y_test_flatten,y_pred))

              precision    recall  f1-score   support

           0       0.85      0.97      0.91      3098
           1       0.83      0.45      0.59       973

    accuracy                           0.85      4071
   macro avg       0.84      0.71      0.75      4071
weighted avg       0.85      0.85      0.83      4071



## Model (Universal Transformer)

In [158]:
# word_ids = Input(shape=(max_seq_length,), dtype='int32', name='word_ids')
word_ids = Input(shape=(max_seq_length, x_feature_size), name='panss_scores')
l2_regularizer = (regularizers.l2(l2_reg_penalty) if l2_reg_penalty
                  else None)
transformer_act_layer = TransformerACT(name='adaptive_computation_time')
dim_convertion_layer = TimeDistributed(Dense(2))
prediction_layer = Activation('softmax')
transformer_block = TransformerBlock(
    name='transformer', num_heads=num_heads,
    residual_dropout=transformer_dropout,
    attention_dropout=transformer_dropout,
    use_masking=True, vanilla_wiring=False)
output_softmax_layer = Softmax(name='word_predictions')

next_step_input = word_ids
act_output = next_step_input

for i in range(transformer_depth):
    next_step_input = transformer_block(next_step_input)
    next_step_input, act_output = transformer_act_layer(next_step_input)

transformer_act_layer.finalize()
next_step_input = act_output
word_predictions = prediction_layer(
    dim_convertion_layer(next_step_input))

model = Model(inputs=[word_ids], outputs=[word_predictions])
model.compile(optimizer=Adam(0.001),
              loss='categorical_crossentropy',
              metrics=['accuracy']) # can change this to f1?
print(model.summary())

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
panss_scores (InputLayer)       (None, 25, 30)       0                                            
__________________________________________________________________________________________________
transformer_self_attention (Mul (None, 25, 30)       3600        panss_scores[0][0]               
                                                                 adaptive_computation_time[0][0]  
                                                                 adaptive_computation_time[1][0]  
__________________________________________________________________________________________________
transformer_add (Add)           (None, 25, 30)       0           panss_scores[0][0]               
                                                                 transformer_self_attention[0][0] 
          

In [159]:
model.fit(x_train_padded, cat_train_tags_y, epochs=10, batch_size=32)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x1d7a58a590>

In [160]:
y_pred_padded = np.argmax(model.predict(x_test_padded),axis = 2)
y_pred = []
for idx,patient in enumerate(y_test):
    y_pred.append(y_pred_padded[idx][:len(patient)])

y_pred = np.concatenate(y_pred)
y_test_flatten = np.concatenate(y_test)

from sklearn.metrics import classification_report
print(classification_report(y_test_flatten,y_pred))

              precision    recall  f1-score   support

           0       0.82      0.94      0.88      3098
           1       0.64      0.34      0.44       973

    accuracy                           0.80      4071
   macro avg       0.73      0.64      0.66      4071
weighted avg       0.78      0.80      0.77      4071

