In [None]:
# This is the implementation of the EncDec-AD model based on the paper "LSTM-based Encoder-Decoder for Multi-sensor Anomaly Detection". And a slight variantion "TimeNet" based on the paper "TimeNet: Pre-trained deep recurrent neural network for time series classiﬁcation", which has been shown that achives better performance in the time series anomaly detection scenario. The only difference is the second model feeds constants to its decoder as input.

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler

# Definition of the EncDec-AD model

In [7]:
class EncDecAD(object):
    def __init__(self,latent_dim,inputs):
        self.batch_num = inputs[0].get_shape().as_list()[0]
        self.input_dim = inputs[0].get_shape().as_list()[1]
        
        self.enc_cell = tf.nn.rnn_cell.LSTMCell(latent_dim)
        self.dec_cell = tf.nn.rnn_cell.LSTMCell(latent_dim)
        
        with tf.variable_scope('encoder1'):
            self.enc_outputs, self.enc_state = tf.nn.static_rnn(self.enc_cell, inputs, 
                                                         dtype=tf.float32)
        with tf.variable_scope('decoder1') as scope_dec:
            w = tf.Variable(tf.truncated_normal([latent_dim,self.input_dim],
                                               dtype=tf.float32),name="w")
            b = tf.Variable(tf.constant(0.1,shape=[self.input_dim],
                                               dtype=tf.float32),name="b")
            dec_state = self.enc_state
            dec_input = tf.zeros(tf.shape(inputs[0]),dtype=tf.float32)
            dec_outputs = []
            for time_steps in range(len(inputs)):
                if time_steps>0:
                    scope_dec.reuse_variables()
                dec_input, dec_state = self.dec_cell(dec_input,dec_state)
                dec_input = tf.matmul(dec_input,w) + b
                dec_outputs.append(dec_input)
            dec_outputs = dec_outputs[::-1]
            # convert output to the form [batch_size, time_steps, input_dim]
            self.output = tf.transpose(tf.stack(dec_outputs),[1,0,2]) 
        #Calculate loss
        self.input = tf.transpose(tf.stack(inputs),[1,0,2])
        self.loss = tf.reduce_mean(tf.square(self.input - self.output))
        
        #Optimization
        self.train = tf.train.AdamOptimizer().minimize(self.loss)

# Definition of the TimeNet model

In [21]:
class TimeNet(object):
    def __init__(self,latent_dim,inputs):
        self.batch_num = inputs[0].get_shape().as_list()[0]
        self.input_dim = inputs[0].get_shape().as_list()[1]
        
        self.enc_cell = tf.nn.rnn_cell.LSTMCell(latent_dim)
        self.dec_cell = tf.nn.rnn_cell.LSTMCell(latent_dim)
        
        with tf.variable_scope('encoder6'):
            self.enc_outputs, self.enc_state = tf.nn.static_rnn(self.enc_cell, inputs, 
                                                         dtype=tf.float32)
        with tf.variable_scope('decoder6') as scope_dec:
            w = tf.Variable(tf.truncated_normal([latent_dim,self.input_dim],
                                               dtype=tf.float32),name="w")
            b = tf.Variable(tf.constant(0.1,shape=[self.input_dim],
                                               dtype=tf.float32),name="b")
            
            dec_inputs = [tf.zeros(tf.shape(inputs[0]),dtype=tf.float32)
                                  for _ in range(len(inputs))]
            dec_outputs, dec_state = tf.nn.static_rnn(self.dec_cell, dec_inputs,
                                                     initial_state=self.enc_state,
                                                     dtype=tf.float32)
            dec_outputs = dec_outputs[::-1]
            dec_output = tf.transpose(tf.stack(dec_outputs),[1,0,2])
            w = tf.tile(tf.expand_dims(w,0),[self.batch_num,1,1])
            self.output = tf.matmul(dec_output,w)+b
            
        #Calculate loss
        self.input = tf.transpose(tf.stack(inputs),[1,0,2])
        self.loss = tf.reduce_mean(tf.square(self.input - self.output))
        
        #Optimization
        self.train = tf.train.AdamOptimizer().minimize(self.loss)

# Experiments

In [4]:
# read data 
def read():
    folder = "C:/Users/Bin/Documents/Datasets/KDD99/"
    col_name_suffix = "columns.txt"
    dataset_suffix = "kddcup.data_10_percent_corrected"
    
    with open(folder+col_name_suffix) as col_file:
        line = col_file.readline()
    columns = line.split('.')
    col_names = []
    col_types = []
    for col in columns:
        col_names.append(col.split(': ')[0].strip())
        col_types.append(col.split(': ')[1])
    col_names.append("label")
    df = pd.read_csv(folder+dataset_suffix,names=col_names)
    data = df.iloc[:,np.array(pd.Series(col_types)=="continuous")].as_matrix()   #Select only numeric features
    label = df.iloc[:,-1]

    # Scaling
    scaler = MinMaxScaler()
    scaler.fit(data)
    dataset = scaler.transform(data) 
    return dataset

In [5]:

dataset = read()

time_steps = 10
latent_dim = 10
n_epoch = 100
batch_size = 100
input_dim = dataset.shape[1]

# reshape the dataset to a list, each element with the shape [batch_size, time_steps, input_dim]
size = dataset.shape[0]//(batch_size*time_steps)
data_input = np.reshape(dataset[:size*batch_size*time_steps],(size,batch_size,time_steps,input_dim))
array = [t for t in data_input]
del dataset, data_input

# inputs placeholder
p_input = tf.placeholder(tf.float32, [batch_size, time_steps, input_dim])
p_inputs = [t for t in tf.split(p_input, time_steps, 1)]
p_inputs = [tf.squeeze(t, [1]) for t in tf.split(p_input, time_steps, 1)]


In [6]:
# EncDec-AD model

encdecad = EncDecAD(latent_dim, p_inputs)
with tf.Session() as sess1:
    sess1.run(tf.global_variables_initializer())
    batches = len(array)//batch_size
    for i in range(n_epoch):
        for j in range(batches):
            loss_val, _ = sess1.run([encdecad.loss,encdecad.train],{p_input:array[j]})
        print("Epoch %d: " % (i+1),loss_val)
    
    #example
    input_, output_, encoded_, enc_state = sess1.run([encdecad.input, encdecad.output, 
                                                    encdecad.enc_outputs, encdecad.enc_state], {p_input:array[101]})
    embedding = enc_state[0]
    print("Example: ")
    print("embedding: ",embedding)

Epoch 1:  0.220792
Epoch 2:  0.148115
Epoch 3:  0.139054
Epoch 4:  0.126811
Epoch 5:  0.116594
Epoch 6:  0.109886
Epoch 7:  0.10469
Epoch 8:  0.100545
Epoch 9:  0.0970631
Epoch 10:  0.0939343
Epoch 11:  0.0911488
Epoch 12:  0.0887058
Epoch 13:  0.0865162
Epoch 14:  0.0845019
Epoch 15:  0.0826256
Epoch 16:  0.0808598
Epoch 17:  0.0791878
Epoch 18:  0.0776027
Epoch 19:  0.0760942
Epoch 20:  0.0746492
Epoch 21:  0.0732577
Epoch 22:  0.0719138
Epoch 23:  0.0706126
Epoch 24:  0.0693487
Epoch 25:  0.0681176
Epoch 26:  0.0669148
Epoch 27:  0.0657378
Epoch 28:  0.0645837
Epoch 29:  0.0634507
Epoch 30:  0.0623367
Epoch 31:  0.0612397
Epoch 32:  0.0601579
Epoch 33:  0.0590899
Epoch 34:  0.0580344
Epoch 35:  0.0569903
Epoch 36:  0.0559569
Epoch 37:  0.0549332
Epoch 38:  0.0539187
Epoch 39:  0.0529128
Epoch 40:  0.051915
Epoch 41:  0.0509249
Epoch 42:  0.0499422
Epoch 43:  0.0489666
Epoch 44:  0.047998
Epoch 45:  0.0470361
Epoch 46:  0.046081
Epoch 47:  0.0451324
Epoch 48:  0.0441903
Epoch 49:  0.

In [22]:
# TimeNet
timenet = TimeNet(latent_dim, p_inputs)
with tf.Session() as sess2:
    sess2.run(tf.global_variables_initializer())
    batches = len(array)//batch_size
    for i in range(n_epoch):
        for j in range(batches):
            loss_val, _ = sess2.run([timenet.loss,timenet.train],{p_input:array[j]})
        print("Epoch %d: " % (i+1),loss_val)
    
   

Epoch 1:  0.110341
Epoch 2:  0.0986995
Epoch 3:  0.0903961
Epoch 4:  0.0841947
Epoch 5:  0.0793006
Epoch 6:  0.075198
Epoch 7:  0.0715471
Epoch 8:  0.0681798
Epoch 9:  0.0650505
Epoch 10:  0.0621422
Epoch 11:  0.0594265
Epoch 12:  0.0568731
Epoch 13:  0.05446
Epoch 14:  0.052171
Epoch 15:  0.0499934
Epoch 16:  0.0479174
Epoch 17:  0.0459354
Epoch 18:  0.0440397
Epoch 19:  0.0422223
Epoch 20:  0.0404768
Epoch 21:  0.0387987
Epoch 22:  0.0371844
Epoch 23:  0.035631
Epoch 24:  0.034135
Epoch 25:  0.032694
Epoch 26:  0.0313058
Epoch 27:  0.0299687
Epoch 28:  0.0286814
Epoch 29:  0.0274427
Epoch 30:  0.0262516
Epoch 31:  0.0251072
Epoch 32:  0.0240086
Epoch 33:  0.0229551
Epoch 34:  0.0219461
Epoch 35:  0.0209808
Epoch 36:  0.0200585
Epoch 37:  0.0191785
Epoch 38:  0.01834
Epoch 39:  0.0175424
Epoch 40:  0.016785
Epoch 41:  0.016067
Epoch 42:  0.015388
Epoch 43:  0.0147471
Epoch 44:  0.0141437
Epoch 45:  0.0135772
Epoch 46:  0.0130467
Epoch 47:  0.0125515
Epoch 48:  0.0120908
Epoch 49:  0.0