# AD-LSTM Proof of Concept

This notebook is a proof of concept for using LSTM RNNs for anomaly detection purposes in time series, sequential data.  
In this notebook, we will:
- Create sequential input/output mapping of 0-100, e.g (1, 2, 3, ... , 100)
    - Create 'data' numpy array of 0-99
    - Create 'target' numpy array of 1-100
- Build LSTM model with Keras
- Make predictions based on our original data. Technically, the model is overfit, but it is for anomaly detection proof of concept only as we can compare the prediction to an erroneous 'dataset'. 
- Print a heatmap of potential anomalies

In [1]:
import pandas as pd
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, LSTM
import pandas as pd
from sklearn.model_selection import train_test_split
from keras.layers.core import Dense, Dropout
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

Using TensorFlow backend.


In [2]:
patient = pd.read_excel('/Users/bwi/Documents/AD-LSTM-Benchmark/testing/dummy_data.xlsx', header=None)

In [3]:
np.random.seed(7)

In [4]:
# Create data
data = patient[:-1] # all but last
target = patient[1:] # all but first

data = np.array(data, dtype=float)
target = np.array(target, dtype=float)

# Insert Erros/Anomalies into the test set
'''
test[0][5] = 66.0
test[0][10] = 2
test[0][30] = 50
test[0][35] = 10
test[0][56] = 80
test[0][89] = 100
test[0][90] = 80
test[0][67] = 70
test = np.array(test, dtype=float)
'''

'\ntest[0][5] = 66.0\ntest[0][10] = 2\ntest[0][30] = 50\ntest[0][35] = 10\ntest[0][56] = 80\ntest[0][89] = 100\ntest[0][90] = 80\ntest[0][67] = 70\ntest = np.array(test, dtype=float)\n'

In [5]:
#print(target)

In [6]:
scaler = MinMaxScaler(feature_range=(0, 1))
data = scaler.fit_transform(data)
target = scaler.fit_transform(target)
# Reshape data to input into model
data = data.reshape((1, 99, 3))
target = target.reshape((1, 99, 3))
#test = test.reshape((1, 1, 100))
#x_test = [i for i in range(100, 200)]
#x_test = np.array(x_test).reshape((1, 1, 100));
#y_test = [i for i in range(101, 201)]
#y_test=np.array(y_test).reshape(1,1,100)


In [7]:
print(data.shape)
print(target.shape)
print(data)

(1, 99, 3)
(1, 99, 3)
[[[ 0.          0.          0.        ]
  [ 0.01020408  0.5         0.5       ]
  [ 0.02040816  0.          1.        ]
  [ 0.03061224  1.          0.        ]
  [ 0.04081633  0.          0.        ]
  [ 0.05102041  0.5         1.        ]
  [ 0.06122449  0.          0.        ]
  [ 0.07142857  1.          0.5       ]
  [ 0.08163265  0.          1.        ]
  [ 0.09183673  0.5         0.        ]
  [ 0.10204082  0.          0.5       ]
  [ 0.1122449   1.          1.        ]
  [ 0.12244898  0.          0.        ]
  [ 0.13265306  0.5         0.        ]
  [ 0.14285714  0.          1.        ]
  [ 0.15306122  1.          0.        ]
  [ 0.16326531  0.          0.5       ]
  [ 0.17346939  0.5         1.        ]
  [ 0.18367347  0.          0.        ]
  [ 0.19387755  1.          0.5       ]
  [ 0.20408163  0.          1.        ]
  [ 0.21428571  0.5         0.        ]
  [ 0.2244898   0.          0.        ]
  [ 0.23469388  1.          1.        ]
  [ 0.24489796  0.

In [8]:
# Build model
model = Sequential()
model.add(LSTM(3, batch_input_shape=(3, 99, 3), return_sequences=True, stateful=True))
model.add(LSTM(3, batch_input_shape=(3, 99, 3), return_sequences=True, stateful=True))
model.add(Dense(3))
model.add(Activation('linear'))

model.compile(loss='mse', optimizer='adam', metrics=['accuracy'])
for i in range(5000):
    model.fit(data, target, epochs=1, batch_size=3, verbose=2, shuffle=False)
    model.reset_states()

ValueError: In a stateful network, you should only pass inputs with a number of samples that can be divided by the batch size. Found: 1 samples

In [None]:
predict = model.predict(data)

In [None]:
print(predict)

In [None]:
print(predict)

In [None]:
#loss_and_metrics = model.evaluate(data, target, batch_size=128)

In [None]:
#print(loss_and_metrics)

In [None]:
error = (((test - predict)**2) / 100)
error = np.round(error, decimals=2)

In [None]:
error = error[0][0]

In [None]:
error = np.reshape(error, (10, 10))

In [None]:
print(error)

In [None]:
import matplotlib.pyplot as plt
plt.imshow(error, cmap='coolwarm')
plt.colorbar(orientation='vertical')
plt.title("Potential Anomalies (MSE)")
plt.show()