In [1]:
import tensorflow as tf
import numpy as np
import datetime
from sklearn.model_selection import train_test_split

In [2]:
node = [1,2,3,4,5,6,7,8,9,12,13]
start = datetime.datetime.strptime("2018-08-27", "%Y-%m-%d")
end = datetime.datetime.strptime("2018-9-27", "%Y-%m-%d")
date = [start + datetime.timedelta(days=x) for x in range(0, (end-start).days)]

In [3]:
def combine(data, slots=6):
    result = []
    for arr in data:
        result.append(calculate(arr, slots))

    return np.array(result)

def calculate(arr, slots):
    length = arr.shape[0]
    result = []
    for i in range(0, length, slots):
        tmp = arr[i:i + 6]
        tmp_mean = np.mean(tmp[:, 0:11], axis=0)
        tmp_median = np.median(tmp[:, 11:22], axis=0)
        tmp_max = np.max(tmp[:, 22:33], axis=0)
        tmp_min = np.min(tmp[:, 33:44], axis=0)
        tmp = ans = np.hstack((tmp_mean, tmp_median, tmp_max, tmp_min))
        result.append(tmp)

    return np.array(result)

In [4]:
def concatenate(date_list, node):
    data = []
    for date in date_list:
        if date.weekday() >= 5:
            continue
        tmp = np.load("./data/m4/" + str(date)[:10] + '_4.npy')
        if tmp.shape[0] < 286:
            continue
        data.append(tmp)
    return np.array(data)

class Data:

    def __init__(self, time, batch_size = 32):
        self.odata = concatenate(date_list=date, node=node)
        self.data = None
        self.data_init(miniute=5, time=10)
        self.data = combine(self.data)

        self.length = len(self.data)

        self.train_len = int(self.length * 0.8)
        self.val_len = int(self.length - self.train_len)

        self.index = None
        self.train_index = None
        self.train_pos = 0
        self.val_index = None
        self.val_pos = 0
        self.batch_size = batch_size

    def data_init(self, miniute, time):
        data = self.odata
        result = []
        start = int(9 * 60 / miniute)
        for i in range(start, 286 - 42, int(time / miniute)):
            result.append(data[:, i:i + 42, :])
        self.data =  np.array(result).reshape(-1, 42, 44)
            
    def reset_index(self):
        self.index = np.arange(self.length)
        
    def shuffle_index(self):
        self.train_pos = 0
        self.val_pos = 0
        np.random.shuffle(self.index)
        self.train_index = self.index[:self.train_len]
        self.val_index = self.index[self.train_len:]

    def get_train(self):
        train = self.data[self.train_index[self.train_pos:  self.train_pos + self.batch_size]][:, 0:-1]
        label = self.data[self.train_index[self.train_pos: self.train_pos + self.batch_size]][:, 1:, 0:11]
        self.train_pos += self.batch_size
        
        return train, abnormal_detection(label)

    def get_val(self):
        train = self.data[self.val_index[self.val_pos: self.val_pos + self.batch_size]][:, 0:-1]
        label = self.data[self.val_index[self.val_pos: self.val_pos + self.batch_size]][:, 1:, 0:11]
        self.val_pos += self.batch_size

        return train, abnormal_detection(label)

In [5]:
a = Data(10)
arr = a.odata.reshape(-1, 44)

In [6]:
min = np.min(arr, axis=0)
max = np.max(arr, axis=0)
mean = np.mean(arr, axis=0)
median = np.median(arr, axis=0)
std = np.std(arr, axis=0)

In [31]:
threshold_low = np.percentile(arr, 5, axis=0)
threshold_high = np.percentile(arr, 95, axis=0)

### The threshold P(population < threshold1) = 0.1

In [68]:
threshold_low[:11]

array([2.06666667, 2.03333333, 2.03333333, 2.06666667, 2.03333333,
       2.06666667, 2.06666667, 2.06666667, 2.03333333, 2.06666667,
       2.03333333])

### The threshold P(population > threshold2) = 0.1

In [69]:
threshold_high[:11]

array([56.78166667, 58.24833333, 57.47833333, 56.88166667, 56.64833333,
       57.23333333, 57.31166667, 56.67833333, 56.26      , 57.705     ,
       57.04833333])

In [70]:
def abnormal_detection( array, threshold_low=threshold_low, threshold_high=threshold_high):
    array[array > threshold_high[:11]] = 1e10
    array[array < threshold_low[:11]] = 1e9
    array[array < 1e9] = 1.0
    array[array == 1e10] = 2.0
    array[array == 1e9] = 0.0
    
    return array

In [71]:
num_input = 44
num_time_step = 6
# num_neuron_per_layer = 256
num_neuron_per_layer = 512
num_layers = 1
num_output = 11
learning_rate = 0.001
nepoches = 150

tf.reset_default_graph()
X = tf.placeholder(dtype=tf.float32, shape=[None, num_time_step, num_input])
y = tf.placeholder(dtype=tf.float32, shape=[None, num_time_step, num_output])

cell = tf.contrib.rnn.BasicLSTMCell(num_units=num_neuron_per_layer, activation=tf.nn.relu)
cell = tf.contrib.rnn.AttentionCellWrapper(cell, attn_length=6)
cell = tf.contrib.rnn.OutputProjectionWrapper(cell, num_output)

outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)

loss = tf.reduce_mean(tf.square(y - outputs))

optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)

train = optimizer.minimize(loss)
init = tf.global_variables_initializer()
saver = tf.train.Saver()
a.reset_index()

In [72]:
pred_10_2 = combine(np.load("./data/m4/2018-10-02_4.npy").reshape(-1, 286, 44))
pred_10_8 = combine(np.load("./data/m4/2018-10-08_4.npy").reshape(-1, 286, 44))
pred_10_11 = combine(np.load("./data/m4/2018-10-11_4.npy").reshape(-1, 286, 44))
pred_all = np.array([pred_10_2, pred_10_8, pred_10_11])

In [63]:
result = []
saver = tf.train.Saver()
with tf.Session() as sess:
    saver.restore(sess, "./RNN_MODEL_10_att_128130")
    for pred in pred_all:
        pos = 18
        tmp = []
        pred = pred.reshape(-1, 44)
        for i in range(25):
            x_batch = pred[pos:pos + 6].reshape(1, num_time_step, 44)
            y_pred = sess.run(outputs, feed_dict={X:x_batch})
            y_pred = abnormal_detection(y_pred)
            tmp.append(y_pred[0][5])
            pos += 1
        result.append(np.array(tmp))
    np.save("result_10min_abnormal.npy", np.array(result))

INFO:tensorflow:Restoring parameters from ./RNN_MODEL_10_att_128130


In [73]:
def get_abnotmal_rate(test, pred, line):
    a = test.shape[0]
    b = test.shape[1]
    c = test.shape[2]
    count = 0
    match = 0
    for i in range(a):
        for j in range(b):
            for k in range(c):
                if test[i][j][k] == line:
                    count += 1
                    if pred[i][j][k] == line:
                        match += 1
    return match / count

In [74]:
pred_10_2 = combine(np.load("./data/m4/2018-10-02_4.npy").reshape(-1, 286, 44))
pred_10_8 = combine(np.load("./data/m4/2018-10-08_4.npy").reshape(-1, 286, 44))
pred_10_11 = combine(np.load("./data/m4/2018-10-11_4.npy").reshape(-1, 286, 44))
pred_all = np.array([pred_10_2, pred_10_8, pred_10_11])
abnormal_real = abnormal_detection(pred_all[:, 0, 24:,:11])

In [75]:
abnormal_test = np.load("./result_10min_abnormal.npy")[:, :24]
get_abnotmal_rate(abnormal_real, abnormal_test, 2)

0.7291666666666666

In [76]:
abnormal_real[2]

array([[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 2.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.

In [77]:
abnormal_test[2]

array([[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.

In [78]:
result_VAR = np.load("./result_VAR.npy")
get_abnotmal_rate(abnormal_real, abnormal_detection(result_VAR[:, :24, :11]), 2)

0.3125

In [79]:
abnormal_test = np.load("./result_10min_abnormal.npy")[:, :24]
get_abnotmal_rate(abnormal_real, abnormal_test, 0)

0.46153846153846156

In [80]:
result_VAR = np.load("./result_VAR.npy")
get_abnotmal_rate(abnormal_real, abnormal_detection(result_VAR[:, :24, :11]), 0)

0.15384615384615385

In [81]:
input_date = '2018-11-05'
label = 'demo3'
abnormal_test = abnormal_test[0]
abnormal_real = abnormal_real[0]
f = open('./' + input_date + '_node_' + label + '.csv', 'w')
f.write('Id,Label,timeset,Thresh\n')
for i in range(abnormal_test.shape[1]):
    f.write(str(node[i]) + ',' + str(node[i]) + ',"<[')
    for j in range(0, abnormal_test.shape[0] - 1):
        f.write(str(j) + ', ')
    f.write(str(abnormal_test.shape[0]))
    f.write(']>","<')
    for j in range(0, abnormal_test.shape[0] - 1):
        f.write('[')
        f.write(str(j))
        f.write(', ')
        f.write(str(abnormal_test[j][i]))
        f.write(']; ')
    f.write('[')
    f.write(str(abnormal_test.shape[0]))
    f.write(', ')
    f.write(str(abnormal_test[-1][i]))
    f.write(']>"' + '\n')
    
for i in range(abnormal_real.shape[1]):
    f.write(str(node[i]) + 'r' + ',' + str(node[i]) + 'r' + ',"<[')
    for j in range(0, abnormal_real.shape[0] - 1):
        f.write(str(j) + ', ')
    f.write(str(abnormal_test.shape[0]))
    f.write(']>","<')
    for j in range(0, abnormal_real.shape[0] - 1):
        f.write('[')
        f.write(str(j))
        f.write(', ')
        f.write(str(abnormal_real[j][i]))
        f.write(']; ')
    f.write('[')
    f.write(str(abnormal_real.shape[0]))
    f.write(', ')
    f.write(str(abnormal_real[-1][i]))
    f.write(']>"' + '\n')

f.close()

In [265]:
abnormal_test.shape

(3, 24, 11)

In [257]:
def node_list_to_csv(node_list, input_date, path, label):
    """
        Write the node list to Gephi csv format dynamic graph
        Args:
            node_list: input node_list which is a list of dict which contains the
                       attributes of each at certain time
            input_date: the date of the data processing
            path: where the csv files stores
        Return:
    """
    id, time_stamp, attributes = graph_to_csv_formter(node_list);
    print(time_stamp)
    loc = path
    f = open(loc + input_date + '_node_' + label + '.csv', 'w')
    f.write('Id,Label,timeset,Population,color\n')
    for key in id.keys():
        color_list = key.split('_')
        f.write(str(key) + ',' + str(key) + ',"<[')
        length = len(time_stamp[id[key]])
        # write time stamp set
        for i in range(0, length - 1):
            f.write(str(time_stamp[id[key]][i]) + ', ')
        f.write(str(time_stamp[id[key]][length - 1]))
        f.write(']>","<')
        # write node attributes set
        for i in range(0, length - 1):
            f.write('[')
            f.write(str(time_stamp[id[key]][i]))
            f.write(', ')
            f.write(str(attributes[id[key]][i]))
            f.write(']; ')
        f.write('[')
        f.write(str(time_stamp[id[key]][length - 1]))
        f.write(', ')
        f.write(str(attributes[id[key]][length - 1]))
        if (len(color_list) > 1):
            f.write(']>",' + str(color_list[1]) + '\n')
    f.close()