Copyright (C) Microsoft Corporation. All rights reserved.​

Microsoft Corporation ("Microsoft") grants you a nonexclusive, perpetual, royalty-free right to use, copy, and modify the software code provided by us ("Software Code"). You may not sublicense the Software Code or any use of it (except to your affiliates and to vendors to perform work on your behalf) through distribution, network access, service agreement, lease, rental, or otherwise. This license does not purport to express any claim of ownership over data you may have shared with Microsoft in the creation of the Software Code. Unless applicable law gives you more rights, Microsoft reserves all other rights not expressly granted herein, whether by implication, estoppel or otherwise.

THE SOFTWARE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL MICROSOFT OR ITS LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

In [1]:
from utils.net import *
from utils.utilsimport *
import os
import time
import pandas as pd
import json
import torch
import numpy as np
from msanomalydetector.util import average_filter
from competition_metric import evaluate_for_all_series

import warnings

In [2]:
warnings.filterwarnings("ignore")

# Functions

## Generate Data

In [3]:
class gen():
    def __init__(self, win_siz, step, nums):
        self.control = 0
        self.win_siz = win_siz
        self.step = step
        self.number = nums

    def generate_train_data(self, value, back_k=5):
        def normalize(a):
            amin = np.min(a)
            amax = np.max(a)
            a = (a - amin) / (amax - amin + 1e-5)
            return 3 * a

        if back_k <= 5:
            back = back_k
        else:
            back = 5
        length = len(value)
        tmp = []
        for pt in range(self.win_siz, length - back, self.step):
            head = max(0, pt - self.win_siz)
            tail = min(length - back, pt)
            data = np.array(value[head:tail])
            data = data.astype(np.float64)

            # print('Initial', data)
            data = normalize(data)
            # print('Normalized', data)
            num = np.random.randint(1, self.number)
            ids = np.random.choice(self.win_siz, num, replace=False)
            lbs = np.zeros(self.win_siz, dtype=np.int64)
            if (self.win_siz - 6) not in ids:
                self.control += np.random.random()
            else:
                self.control = 0
            if self.control > 100:
                ids[0] = self.win_siz - 6
                self.control = 0
            mean = np.mean(data)
            dataavg = average_filter(data)
            var = np.var(data)
            for id in ids:
                data[id] += (dataavg[id] + mean) * np.random.randn() * min((1 + var), 10)
                lbs[id] = 1
            tmp.append([data.tolist(), lbs.tolist()])
        return tmp


def auto(dic):
    path_auto = os.getcwd() + '/auto.json'
    auto = {}
    for item, value in dic:
        if value != None:
            auto[item] = value
    with open(path_auto, 'w+') as f:
        json.dump(auto, f)


def get_path(dataset_path, t):
    files_path = []
    if t == 'train' or t == 'test':
        dir_data = os.getcwd() + '/' + dataset_path + '/' + t
        files = os.listdir(dir_data)
        files_path += [dir_data + '/' + _ for _ in files]
    else:
        print('Invalid option')
    return files_path

## Train

In [4]:
def auto(epoch):
    path_auto = os.getcwd() + '/auto.json'
    with open(path_auto, 'r+') as f:
        store = json.load(f)
    data = store['data']
    window = store['window']
    store['epoch'] = epoch
    with open(path_auto, 'w+') as f:
        json.dump(store, f)
    return data, window

## Evaluate

In [5]:
def auto():
    path_auto = os.getcwd() + '/auto.json'
    with open(path_auto, 'r+') as f:
        store = json.load(f)
    window = store['window']
    epoch = store['epoch']
    return window, epoch


def getfid(path):
    return path.split('/')[-1]


def get_score(data_source, files, thres, option):
    total_time = 0
    results = []
    savedscore = []

    for f in files:
        tmp_data = pd.read_csv(f, index_col=0)
        in_timestamp = tmp_data['timestamp']
        in_value = tmp_data['value']
        in_label = tmp_data['is_anomaly']
        length = len(in_timestamp)


        if model == 'sr_cnn' and len(in_value) < window:
            continue
        time_start = time.time()
        timestamp, label, pre, scores = models[model](np.array(in_timestamp), np.array(in_value), np.array(in_label),
                                                      window, net, option, thres)
        time_end = time.time()
        total_time += time_end - time_start
        results.append([timestamp, label, pre, f])
        savedscore.append([label, scores, f, timestamp])
    return total_time, results, savedscore

# Main

#### Parser Values



In [6]:
data_train = '../datasets/data_yahoo/'
window = 64 # window_nab = 64, window_yahoo = 64
step = 8 # step_nab = 8, step_yahoo = 8
seed = 54321
num = 10

In [7]:
lr = 1e-6 # lr_nab = 1e-5, lr_yahoo = 1e-6
load = False
save = 'snapshot'
epoch = 10
batch_size = 256
num_workers = 8
model = 'sr_cnn'
auto = False

In [8]:
data_test = '../datasets/data_yahoo'
epoch = 10
model_path_snapshot = 'snapshot'
delay = 0
thres = 0.95
auto = False
missing_option = 'anomaly'

## Generate Data

In [9]:
files = get_path(data_train, 'train')

#print('FILES', files)

train_data_path = os.getcwd() + '/' + data_train + '_' + str(window) + '_train.json'

results = []
total_time = 0

#print("generating train data")
generator = gen(window, step, num)
for f in files:
    #print('reading', f)

    file_read = pd.read_csv(f)
    in_timestamp = file_read['timestamp']
    in_value = file_read['value']
    in_label = []
    if len(in_value) < window:
        continue
    time_start = time.time()
    train_data = generator.generate_train_data(in_value)
    time_end = time.time()
    total_time += time_end - time_start
    results += train_data
    
with open(train_data_path, 'w+') as f:
    json.dump(results, f)



## Train

In [10]:
# you need to create a folder called snapshot where the model will be saved (file_name.bin)

if auto:
    data_train, window = auto(epoch)
else:
    data_train, window = data_train, window
torch.manual_seed(seed)
np.random.seed(seed)
models = {
    'sr_cnn': sr_cnn,
}
model = model
root_path = os.getcwd()

train_data_path = root_path + '/' + data_train + '_' + str(window) + '_train.json'
model_path = root_path + '/' + save + '/'
#print('MODEL PATH', model_path)
if load:
    load_path = root_path + '/' + load
else:
    load_path = None

#print('LOAD PATH', load_path)

total_time = 0
time_start = time.time()
models[model](train_data_path, model_path, window, lr, epoch, batch_size, num_workers,
              load_path=load_path)
time_end = time.time()
total_time += time_end - time_start
#print('time used for training:', total_time, 'seconds')



Iteration:   0%|          | 0/22 [00:00<?, ?it/s]

Anomaly(
  (layer1): Conv1d(64, 64, kernel_size=(1,), stride=(1,))
  (layer2): Conv1d(64, 128, kernel_size=(1,), stride=(1,))
  (fc1): Linear(in_features=128, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=64, bias=True)
  (relu): ReLU(inplace=True)
)
length : 5385
epoch : 1


Iteration:  45%|████▌     | 10/22 [00:10<00:09,  1.32it/s]



Iteration: 100%|██████████| 22/22 [00:50<00:00,  2.30s/it]
Iteration:   0%|          | 0/22 [00:00<?, ?it/s]

epoch : 2


Iteration:  41%|████      | 9/22 [00:11<00:11,  1.10it/s]



Iteration: 100%|██████████| 22/22 [00:51<00:00,  2.33s/it]
Iteration:   0%|          | 0/22 [00:00<?, ?it/s]

epoch : 3


Iteration:  41%|████      | 9/22 [00:11<00:12,  1.04it/s]



Iteration: 100%|██████████| 22/22 [00:52<00:00,  2.36s/it]
Iteration:   0%|          | 0/22 [00:00<?, ?it/s]

epoch : 4


Iteration:  41%|████      | 9/22 [00:11<00:11,  1.08it/s]



Iteration: 100%|██████████| 22/22 [00:51<00:00,  2.34s/it]
Iteration:   0%|          | 0/22 [00:00<?, ?it/s]

epoch : 5


Iteration:  41%|████      | 9/22 [00:10<00:11,  1.15it/s]



Iteration: 100%|██████████| 22/22 [00:50<00:00,  2.31s/it]
Iteration:   0%|          | 0/22 [00:00<?, ?it/s]

epoch : 6


Iteration:  41%|████      | 9/22 [00:11<00:11,  1.11it/s]



Iteration: 100%|██████████| 22/22 [00:51<00:00,  2.33s/it]
Iteration:   0%|          | 0/22 [00:00<?, ?it/s]

epoch : 7


Iteration:  45%|████▌     | 10/22 [00:10<00:09,  1.30it/s]



Iteration: 100%|██████████| 22/22 [00:50<00:00,  2.31s/it]
Iteration:   0%|          | 0/22 [00:00<?, ?it/s]

epoch : 8


Iteration:  41%|████      | 9/22 [00:10<00:11,  1.13it/s]



Iteration: 100%|██████████| 22/22 [00:51<00:00,  2.32s/it]
Iteration:   0%|          | 0/22 [00:00<?, ?it/s]

epoch : 9


Iteration:  41%|████      | 9/22 [00:11<00:11,  1.12it/s]



Iteration: 100%|██████████| 22/22 [00:51<00:00,  2.33s/it]
Iteration:   0%|          | 0/22 [00:00<?, ?it/s]

epoch : 10


Iteration:  41%|████      | 9/22 [00:13<00:13,  1.06s/it]



Iteration: 100%|██████████| 22/22 [00:53<00:00,  2.42s/it]


## Evaluate

In [11]:
if auto:
    window, epoch = auto()
else:
    window = window
    epoch = epoch
data_source = data_test
delay = delay
model = model

path_snapshot = '../'

root = os.getcwd()
models = {
    'sr_cnn': sr_cnn_eval,
}

model_path = path_snapshot + '/' + model_path_snapshot + '/srcnn_retry_' + 'None_' + 'None_' + str(epoch) + '_' + str(window) + '.bin'

srcnn_model = Anomaly(window)
net = load_model(srcnn_model, model_path)

files = get_path(data_source, 'test')

total_time, results, savedscore = get_score(data_source, files, thres, missing_option)

total_fscore, pre, rec, TP, FP, TN, FN = evaluate_for_all_series(results, delay=0)



loading /Users/lorenapoenaru-olaru/PycharmProjects/aiops/anomaly_detection/snapshot/srcnn_retry_None_None_10_64.bin
precision 0.6484517304189436
recall 0.4801078894133513
f1 0.5517241379310345
-------------------------------
