In [1]:
# multivariate multi-step stacked lstm example
import numpy as np
from numpy import array
from numpy import hstack
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense
from keras.layers import Bidirectional
import pandas as pd
import pickle as pkl
from sklearn.metrics import mean_squared_error
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score, roc_curve, confusion_matrix, classification_report, roc_auc_score

In [2]:
nthu_train_X = pkl.load(open("../input/ddd-data-preparation-nthu/nthu_train_X", 'rb'))
nthu_train_y = pkl.load(open("../input/ddd-data-preparation-nthu/nthu_train_y", 'rb'))

nthu_test_X = pkl.load(open("../input/ddd-data-preparation-nthu/nthu_test_X", 'rb'))
nthu_test_y = pkl.load(open("../input/ddd-data-preparation-nthu/nthu_test_y", 'rb'))

In [3]:
nthu_train_y=np.array([int(nthu_train_y[i][0]/2) for i in range(len(nthu_train_y))])
nthu_test_y=np.array([int(nthu_test_y[i][0]/2) for i in range(len(nthu_test_y))])

In [4]:
n_steps_in, n_features, n_steps_out= 150, 8, 1

# define model
model = Sequential()
model.add(Bidirectional(LSTM(100, activation='tanh'), input_shape=(n_steps_in, n_features)))
model.add(Dense(n_steps_out, activation='sigmoid'))
model.compile(optimizer='adam', loss='binary_crossentropy')
# fit model
model.fit(nthu_train_X, np.ravel(nthu_train_y), batch_size=8, epochs=32, verbose=2)

Epoch 1/32
52620/52620 - 666s - loss: 0.3944
Epoch 2/32
52620/52620 - 666s - loss: 0.1771
Epoch 3/32
52620/52620 - 666s - loss: 0.0931
Epoch 4/32
52620/52620 - 667s - loss: 0.0589
Epoch 5/32
52620/52620 - 666s - loss: 0.0437
Epoch 6/32
52620/52620 - 664s - loss: 0.0358
Epoch 7/32
52620/52620 - 664s - loss: 0.0296
Epoch 8/32
52620/52620 - 666s - loss: 0.0261
Epoch 9/32
52620/52620 - 665s - loss: 0.0240
Epoch 10/32
52620/52620 - 666s - loss: 0.0225
Epoch 11/32
52620/52620 - 671s - loss: 0.0213
Epoch 12/32
52620/52620 - 666s - loss: 0.0203
Epoch 13/32
52620/52620 - 664s - loss: 0.0209
Epoch 14/32
52620/52620 - 666s - loss: 0.0212
Epoch 15/32
52620/52620 - 666s - loss: 0.0229
Epoch 16/32
52620/52620 - 669s - loss: 0.0197
Epoch 17/32
52620/52620 - 668s - loss: 0.0188
Epoch 18/32
52620/52620 - 667s - loss: 0.0198
Epoch 19/32
52620/52620 - 668s - loss: 0.0186
Epoch 20/32
52620/52620 - 666s - loss: 0.0228
Epoch 21/32
52620/52620 - 665s - loss: 0.0239
Epoch 22/32
52620/52620 - 663s - loss: 0.05

<tensorflow.python.keras.callbacks.History at 0x7f13602ce810>

In [5]:
# demonstrate prediction
yhat = model.predict_classes(nthu_test_X, verbose=1)
print(yhat)

[[1]
 [0]
 [1]
 ...
 [1]
 [1]
 [1]]


In [6]:
mean_squared_error(np.squeeze(yhat),nthu_test_y)

0.4015530222208675

In [7]:
def display_test_scores(test, pred):
    str_out = ""
    str_out += ("TEST SCORES\n")
    str_out += ("\n")

    #print accuracy
    accuracy = accuracy_score(test, pred)
    str_out += ("ACCURACY: {:.4f}\n".format(accuracy))
    str_out += ("\n")

    #print AUC score
    auc = roc_auc_score(test, pred)
    str_out += ("AUC: {:.4f}\n".format(auc))
    str_out += ("\n")

    #print confusion matrix
    str_out += ("CONFUSION MATRIX:\n")
    conf_mat = confusion_matrix(test, pred)
    str_out += ("{}".format(conf_mat))
    str_out += ("\n")
    str_out += ("\n")

    #print FP, FN
    str_out += ("FALSE POSITIVES:\n")
    fp = conf_mat[1][0]
    pos_labels = conf_mat[1][0]+conf_mat[1][1]
    str_out += ("{} out of {} positive labels ({:.4f}%)\n".format(fp, pos_labels,fp/pos_labels))
    str_out += ("\n")

    str_out += ("FALSE NEGATIVES:\n")
    fn = conf_mat[0][1]
    neg_labels = conf_mat[0][1]+conf_mat[0][0]
    str_out += ("{} out of {} negative labels ({:.4f}%)\n".format(fn, neg_labels, fn/neg_labels))
    str_out += ("\n")

    #print classification report
    str_out += ("PRECISION, RECALL, F1 scores:\n")
    str_out += ("{}".format(classification_report(test, pred)))
    
    false_indexes = np.where(test != pred)
    return str_out, false_indexes

In [8]:
y_pred = np.squeeze(yhat)
results, false = display_test_scores(nthu_test_y, y_pred)
print(results)
y_pred.shape

TEST SCORES

ACCURACY: 0.5984

AUC: 0.5818

CONFUSION MATRIX:
[[15307 47051]
 [ 5644 63226]]

FALSE POSITIVES:
5644 out of 68870 positive labels (0.0820%)

FALSE NEGATIVES:
47051 out of 62358 negative labels (0.7545%)

PRECISION, RECALL, F1 scores:
              precision    recall  f1-score   support

           0       0.73      0.25      0.37     62358
           1       0.57      0.92      0.71     68870

    accuracy                           0.60    131228
   macro avg       0.65      0.58      0.54    131228
weighted avg       0.65      0.60      0.55    131228



(131228,)

In [9]:
model.save("lstm_bidirectional_v2.h5")