In [None]:
import time
time.gmtime()

In [None]:
import os,sys,inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
sys.path.insert(0,parentdir) 

In [None]:
import pandas as pd
import itertools, collections
import time
import numpy as np

from _code import DeepLogModel
from _code import generator
from _code import seed
from _code import trie

In [None]:
### Parameters
_seed={0[seed]}
subdir = "{0[subdir]}"
vocabulary_size = {0[vocabulary_size]}
num_patterns = {0[num_patterns]}

In [None]:
seed.seed(_seed)
subdir = subdir

In [None]:
vocabulary_size = vocabulary_size
num_patterns = num_patterns
vocabulary = generator.make_vocabulary(vocabulary_size=vocabulary_size)
patterns = generator.generate_patterns(num_patterns=num_patterns, vocabulary=vocabulary, min_pattern_size=3, max_pattern_size=7)
trie_g = trie.calc_g_value(patterns)
trie_h = trie.calc_h_value(patterns)
print("trie-g:", trie_g)
print("trie-h:", trie_h)
text_train, marks_train = generator.generate_text(patterns, text_size=50000, anomaly_ratio=0.00, vocabulary=vocabulary) 
tests = generator.generate_tests(patterns, vocabulary, n=1000, text_size = 4, anomaly_ratio=0.1)

In [None]:
import json, os

def write_inputs(subdir, vocabulary_size, vocabulary, patterns, text_train, marks_train, tests):
    dir_= os.path.join("..\\inputs", subdir)
    os.makedirs(dir_, exist_ok=True)
    j1={
        "vocabulary_size" : vocabulary_size,
        "vocabulary" : vocabulary,
        "workflows" : patterns
    }
    json.dump(j1, open(os.path.join(dir_, "v-wf.json"), "w"))

    j2 = {"text_train" : text_train, "marks_train" : marks_train}
    json.dump(j2, open(os.path.join(dir_, "train.json"), "w"))

    j3 = {"tests" : tests}
    json.dump(j3, open(os.path.join(dir_, "tests.json"), "w"))

def read_inputs(subdir):
    dir_ = os.path.join("..\\inputs", subdir)
    j1 = json.load(open(os.path.join(dir_, "v-wf.json")))
    j2 = json.load(open(os.path.join(dir_, "train.json")))
    j3 = json.load(open(os.path.join(dir_, "tests.json")))
    
    vocabulary_size = j1["vocabulary_size"]
    vocabulary = j1["vocabulary"]
    patterns = j1["workflows"]
    patterns = [tuple(p) for p in patterns]
    
    text_train = j2["text_train"]
    marks_train = j2["marks_train"]
    
    tests_str_keys = j3["tests"]
    tests = dict()
    for k,v in tests_str_keys.items():
        tests[int(k)] = tuple(v)
    
    
    return vocabulary_size, vocabulary, patterns, text_train, marks_train, tests

write_inputs(subdir, vocabulary_size, vocabulary, patterns, text_train, marks_train, tests)

In [None]:
ret_vocabulary_size, ret_vocabulary, ret_patterns, ret_text_train, ret_marks_train, ret_tests = read_inputs(subdir)

assert ret_vocabulary_size == vocabulary_size
assert ret_vocabulary == vocabulary
assert ret_patterns == patterns
assert ret_text_train == text_train
assert ret_marks_train == marks_train
assert ret_tests == tests


In [None]:
deep_log_model = DeepLogModel.DeepLogModel(h=trie_h+1, n=vocabulary_size, vocabulary=vocabulary)
deep_log_model.build(num_lstm_layers=2, lstm_size=64)
deep_log_model.fit(text_train,epochs=1)

In [None]:
b = time.time()
entries = list()
for k, (text_test, text_marks, anomaly) in tests.items():
    for g in range(0, vocabulary_size+1):
        res = deep_log_model.monitor_session(text_test, text_marks, g=g)
        entry = (k,g,res)
        entries.append(entry)
e = time.time()
print("time:", round(e-b,3), "seconds")

In [None]:
df = pd.DataFrame(entries, columns = ["i","g","status"])
ddf = df.pivot_table(index="i", columns = "g", values = "status", aggfunc = "sum")
def calc(s):
    c = dict(collections.Counter(s))
    TP = c.get("TP", 0)
    TN = c.get("TN", 0)
    FP = c.get("FP", 0)
    FN = c.get("FN", 0)
    eps = 1e-9
    
    prec = TP / (TP + FP + eps)
    rec = TP / (TP + FN + eps)
    acc = (TP + TN) / (TP + TN + FP + FN + eps)
    f1 = 2*(prec*rec)/(prec+rec+eps)
    
    return {"prec" : prec, "rec" : rec, "acc" : acc, "f1" : f1, "TP" : TP, "TN" : TN, "FP" : FP, "FN" : FN}
    
e=ddf.apply(calc, axis=0)
e1=pd.DataFrame(list(e.values))

print("measure results for each g-value")
e1

In [None]:

def write_outputs(subdir, df, ddf, e1):
    df.to_csv(os.path.join("..\inputs", subdir, "DL-results.csv"))
    ddf.to_csv(os.path.join("..\inputs", subdir, "DL-resuls_pivot.csv"))
    e1.to_csv(os.path.join("..\inputs", subdir, "DL-resutls_metrics.csv"))

write_outputs(subdir, df, ddf, e1)

In [None]:
display(e1.loc[[e1["acc"].argmax()]])
display(e1.loc[[e1["f1"].argmax()]])