In [1]:
import time
time.gmtime()

time.struct_time(tm_year=2021, tm_mon=8, tm_mday=28, tm_hour=14, tm_min=56, tm_sec=38, tm_wday=5, tm_yday=240, tm_isdst=0)

In [2]:
import os,sys,inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
sys.path.insert(0,parentdir) 

In [3]:
import pandas as pd
import itertools, collections
import time
import numpy as np
import json

from _code import DeepLogModel
from _code import generator
from _code import seed
from _code import trie

In [4]:
### Parameters
_seed=0
subdir = "static-01"

dir_in = "..\\openstack\\static-openstack"
filename = "openstack_val_n1"
path_params = os.path.join(dir_in, filename)

In [5]:
def read_params(filename, dir_in):
    pathin = os.path.join(dir_in, "sim_{}.json".format(filename))
    print(pathin)
    assert os.path.isfile(pathin)
    data = json.loads(open(pathin).read())
    
    patterns = data["patterns"]
    patterns = [tuple(p) for p in patterns]
    
    return data["vocabulary"], patterns, data["p"]

vocabulary, patterns, p = read_params(filename, dir_in)
vocabulary_size = len(vocabulary)

..\openstack\static-openstack\sim_openstack_val_n1.json


In [6]:
seed.seed(_seed)
subdir = subdir

In [7]:
trie_g = trie.calc_g_value(patterns)
trie_h = 10 # trie.calc_h_value(patterns)
print("trie-g:", trie_g)
print("trie-h:", trie_h)
text_train, marks_train = generator.generate_text(patterns, text_size=10000, anomaly_ratio=0.00, vocabulary=vocabulary) 
tests = generator.generate_tests(patterns, vocabulary, n=1000, text_size = 4, anomaly_ratio=0.1)

trie-g: 3
trie-h: 10


In [8]:
import json, os

def write_inputs(subdir, vocabulary_size, vocabulary, patterns, text_train, marks_train, tests):
    dir_= os.path.join("..\\inputs", subdir)
    os.makedirs(dir_, exist_ok=True)
    j1={
        "vocabulary_size" : vocabulary_size,
        "vocabulary" : vocabulary,
        "workflows" : patterns
    }
    json.dump(j1, open(os.path.join(dir_, "v-wf.json"), "w"))

    j2 = {"text_train" : text_train, "marks_train" : marks_train}
    json.dump(j2, open(os.path.join(dir_, "train.json"), "w"))

    j3 = {"tests" : tests}
    json.dump(j3, open(os.path.join(dir_, "tests.json"), "w"))

def read_inputs(subdir):
    dir_ = os.path.join("..\\inputs", subdir)
    j1 = json.load(open(os.path.join(dir_, "v-wf.json")))
    j2 = json.load(open(os.path.join(dir_, "train.json")))
    j3 = json.load(open(os.path.join(dir_, "tests.json")))
    
    vocabulary_size = j1["vocabulary_size"]
    vocabulary = j1["vocabulary"]
    patterns = j1["workflows"]
    patterns = [tuple(p) for p in patterns]
    
    text_train = j2["text_train"]
    marks_train = j2["marks_train"]
    
    tests_str_keys = j3["tests"]
    tests = dict()
    for k,v in tests_str_keys.items():
        tests[int(k)] = tuple(v)
    
    
    return vocabulary_size, vocabulary, patterns, text_train, marks_train, tests

write_inputs(subdir, vocabulary_size, vocabulary, patterns, text_train, marks_train, tests)

In [9]:
ret_vocabulary_size, ret_vocabulary, ret_patterns, ret_text_train, ret_marks_train, ret_tests = read_inputs(subdir)

assert ret_vocabulary_size == vocabulary_size
assert ret_vocabulary == vocabulary
assert ret_patterns == patterns
assert ret_text_train == text_train
assert ret_marks_train == marks_train
assert ret_tests == tests


In [10]:
deep_log_model = DeepLogModel.DeepLogModel(h=trie_h+1, n=vocabulary_size, vocabulary=vocabulary)
deep_log_model.build(num_lstm_layers=2, lstm_size=64)
deep_log_model.fit(text_train,epochs=1)

<_code.DeepLogModel.HistoryLoss at 0x2a3d84c5c10>

In [11]:
b = time.time()
entries = list()
for k, (text_test, text_marks, anomaly) in tests.items():
    for g in range(0, vocabulary_size+1):
        res = deep_log_model.monitor_session(text_test, text_marks, g=g)
        entry = (k,g,res)
        entries.append(entry)
e = time.time()
print("time:", round(e-b,3), "seconds")

time: 3508.163 seconds


In [12]:
df = pd.DataFrame(entries, columns = ["i","g","status"])
ddf = df.pivot_table(index="i", columns = "g", values = "status", aggfunc = "sum")
def calc(s):
    c = dict(collections.Counter(s))
    TP = c.get("TP", 0)
    TN = c.get("TN", 0)
    FP = c.get("FP", 0)
    FN = c.get("FN", 0)
    eps = 1e-9
    
    prec = TP / (TP + FP + eps)
    rec = TP / (TP + FN + eps)
    acc = (TP + TN) / (TP + TN + FP + FN + eps)
    f1 = 2*(prec*rec)/(prec+rec+eps)
    
    return {"prec" : prec, "rec" : rec, "acc" : acc, "f1" : f1, "TP" : TP, "TN" : TN, "FP" : FP, "FN" : FN}
    
e=ddf.apply(calc, axis=0)
e1=pd.DataFrame(list(e.values))

print("measure results for each g-value")
e1

measure results for each g-value


Unnamed: 0,prec,rec,acc,f1,TP,TN,FP,FN
0,0.266,1.0,0.266,0.420221,266,0,734,0
1,0.2668,1.0,0.269,0.421219,266,3,731,0
2,0.384393,1.0,0.574,0.555324,266,308,426,0
3,1.0,1.0,1.0,1.0,266,734,0,0
4,1.0,1.0,1.0,1.0,266,734,0,0
5,1.0,0.958647,0.989,0.978887,255,734,0,11
6,1.0,0.834586,0.956,0.909836,222,734,0,44
7,1.0,0.789474,0.944,0.882353,210,734,0,56
8,1.0,0.736842,0.93,0.848485,196,734,0,70
9,1.0,0.718045,0.925,0.835886,191,734,0,75


In [13]:

def write_outputs(subdir, df, ddf, e1):
    df.to_csv(os.path.join("..\inputs", subdir, "DL-results.csv"))
    ddf.to_csv(os.path.join("..\inputs", subdir, "DL-resuls_pivot.csv"))
    e1.to_csv(os.path.join("..\inputs", subdir, "DL-resutls_metrics.csv"))

write_outputs(subdir, df, ddf, e1)

In [14]:
display(e1.loc[[e1["acc"].argmax()]])
display(e1.loc[[e1["f1"].argmax()]])

Unnamed: 0,prec,rec,acc,f1,TP,TN,FP,FN
3,1.0,1.0,1.0,1.0,266,734,0,0


Unnamed: 0,prec,rec,acc,f1,TP,TN,FP,FN
3,1.0,1.0,1.0,1.0,266,734,0,0
