In [33]:
#time series anomaly detection using RNN
 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf

In [34]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score, cohen_kappa_score


In [35]:
# read data
df = pd.read_csv('../../data/m16_cpu_usage_v2_10minutes_anomaly.csv')

In [36]:
df.head()

Unnamed: 0,time,machine_id,cpu_util,anomaly
0,2017-10-03 00:00:00,m16,0.3,0
1,2017-10-03 00:10:00,m16,0.34,0
2,2017-10-03 00:20:00,m16,0.253333,0
3,2017-10-03 00:30:00,m16,0.2,0
4,2017-10-03 00:40:00,m16,0.2,0


In [37]:
# data preprocessing
df['time'] = pd.to_datetime(df['time'])
df = df.set_index('time')
df = df.sort_index()


In [38]:
df['cpu_util'] = (df['cpu_util']/100).astype(float)

In [39]:
X = df['cpu_util'].values
y = df['anomaly'].values


In [49]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

In [50]:
X_train

array([0.003     , 0.0034    , 0.00253333, ..., 0.446     , 0.44808333,
       0.446     ])

In [56]:
X_train.shape

(2602,)

In [63]:
# data normalization
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train.reshape(-1, 1))
X_test = scaler.transform(X_test.reshape(-1, 1))


In [64]:
# data reshape
X_train = X_train.reshape(-1, 1, 1)
X_test = X_test.reshape(-1, 1, 1)

X_train.shape

(2602, 1, 1)

In [70]:
# build model
model = tf.keras.Sequential()
model.add(tf.keras.layers.LSTM(128, input_shape=(1, 1), return_sequences=True))
model.add(tf.keras.layers.LSTM(64))
model.add(tf.keras.layers.Dense(1, activation='sigmoid'))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [71]:
# train model
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x27e45cb5510>

In [72]:
model.predict(X_test)



array([[0.13345379],
       [0.13655853],
       [0.13323437],
       [0.13507243],
       [0.0406661 ],
       [0.08466595],
       [0.09074222],
       [0.13873824],
       [0.10375137],
       [0.1094388 ],
       [0.14529794],
       [0.15154412],
       [0.14929688],
       [0.14470884],
       [0.14571151],
       [0.14695814],
       [0.14695814],
       [0.14303117],
       [0.14547507],
       [0.15549687],
       [0.09825583],
       [0.05502931],
       [0.07985581],
       [0.0874793 ],
       [0.08703308],
       [0.1019842 ],
       [0.13881394],
       [0.13768232],
       [0.13851142],
       [0.13343549],
       [0.08368428],
       [0.14815369],
       [0.14465003],
       [0.0230414 ],
       [0.15117782],
       [0.14815369],
       [0.1491159 ],
       [0.14459127],
       [0.14935724],
       [0.1265683 ],
       [0.14592858],
       [0.14779416],
       [0.1444346 ],
       [0.14439546],
       [0.07081529],
       [0.1442781 ],
       [0.14341985],
       [0.145

In [79]:
model.predict([[[0.1]]])



array([[0.11958906]], dtype=float32)