In [1]:
import time
time.gmtime()

time.struct_time(tm_year=2021, tm_mon=9, tm_mday=21, tm_hour=13, tm_min=24, tm_sec=30, tm_wday=1, tm_yday=264, tm_isdst=0)

In [2]:
import os,sys,inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
sys.path.insert(0,parentdir) 

In [3]:
import pandas as pd
import itertools
import collections

from _code import DeepLogModel
from _code import generator
from _code import trie
from _code import seed

In [4]:
seed.seed(0)

In [5]:
vocabulary_size = 20
num_patterns = 20
vocabulary = generator.make_vocabulary(vocabulary_size=vocabulary_size)
patterns = generator.generate_patterns(num_patterns=num_patterns, vocabulary=vocabulary, min_pattern_size=3, max_pattern_size=7)

trie_g = trie.calc_g_value(patterns)
trie_h = trie.calc_h_value(patterns)
print("trie-g:", trie_g)
print("trie-h:", trie_h)

trie-g: 14
trie-h: 3


In [6]:
text_train, marks_train = generator.generate_text(patterns, text_size=50000, anomaly_ratio=0.00, vocabulary=vocabulary) 

In [7]:
deep_log_model = DeepLogModel.DeepLogModel(h=trie_h+1, n=vocabulary_size, vocabulary=vocabulary)
deep_log_model.build(num_lstm_layers=2, lstm_size=64)
deep_log_model.fit(text_train,epochs=1)

<_code.DeepLogModel.HistoryLoss at 0x1fa1a40acd0>

## README
1. After the training we evaluates the network over tests, to see the current best performnace.
2. We use feedback to train the network for a new workflow with different contexts
3. We evaluate the updated network again over the previous tests.
4. The decline in performance results is actually the degree of forgetfulness of the network.

In [8]:
tests_legit = generator.generate_tests(patterns, vocabulary, n=1000, text_size = 4, anomaly_ratio=0.05)

In [9]:
b = time.time()
entries = list()
for k, (text_test, text_marks, anomaly) in tests_legit.items():
    for g in range(0, vocabulary_size+1):
#     for g in [trie_g-2, trie_g-1,trie_g,trie_g+1,trie_g+2]:
        res = deep_log_model.monitor_session(text_test, text_marks, g=g)
        entry = (k,g,res)
        entries.append(entry)
e = time.time()
print("time:", round(e-b,3), "seconds")

time: 1388.201 seconds


In [10]:
df1 = pd.DataFrame(entries, columns = ["i","g","status"])
ddf1 = df1.pivot_table(index="i", columns = "g", values = "status", aggfunc = "sum")
def calc(s):
    c = dict(collections.Counter(s))
    TP = c.get("TP", 0)
    TN = c.get("TN", 0)
    FP = c.get("FP", 0)
    FN = c.get("FN", 0)
    eps = 1e-9
    
    prec = TP / (TP + FP + eps)
    rec = TP / (TP + FN + eps)
    acc = (TP + TN) / (TP + TN + FP + FN + eps)
    f1 = 2*(prec*rec)/(prec+rec+eps)
    
    return {"prec" : prec, "rec" : rec, "acc" : acc, "f1" : f1, "TP" : TP, "TN" : TN, "FP" : FP, "FN" : FN}
    
e1=ddf1.apply(calc, axis=0)
e1=pd.DataFrame(list(e1.values))

print("measure results for each g-value")
e1

measure results for each g-value


Unnamed: 0,prec,rec,acc,f1,TP,TN,FP,FN
0,0.148,1.0,0.148,0.25784,148,0,852,0
1,0.148148,1.0,0.149,0.258065,148,1,851,0
2,0.15056,1.0,0.165,0.261715,148,17,835,0
3,0.157279,1.0,0.207,0.271809,148,59,793,0
4,0.174323,1.0,0.299,0.296891,148,151,701,0
5,0.182382,0.993243,0.34,0.308176,147,193,659,1
6,0.19016,0.966216,0.386,0.317778,143,243,609,5
7,0.195157,0.925676,0.424,0.322353,137,287,565,11
8,0.207752,0.905405,0.475,0.337957,134,341,511,14
9,0.222798,0.871622,0.531,0.354883,129,402,450,19


In [11]:
best_g = e1["f1"].argmax()
print("best-g", best_g)

best-g 14


In [12]:
new_workflows = generator.generate_patterns(num_patterns=1, vocabulary=vocabulary, min_pattern_size=3, max_pattern_size=7)
new_workflow = new_workflows[0]
assert new_workflow not in patterns



"""
Here we have to set the g for the feedback process.
We can either use the empricial best g for the original data, or the theoretical g for the updated data (trie_g2).
The theoretical g is typically higher than (or equal to) best_g and give worse results. 
So, to the benefit of DeepLog results, we are using the previous best_g for the feedback process.
DeepLog don't describe this part specifically.
"""

trie_g2 = trie.calc_g_value(patterns + [new_workflow])
print("trie-g2", trie_g2)
# if trie_g2 > best_g:
#     best_g = trie_g2 # 

trie-g2 14


In [13]:
new_marks = [1]*len(new_workflow)
new = dict()
for i in range(30): # this new workflow will typically appear in many different contexts
    before_text, before_marks = generator.generate_text(patterns, text_size=2, anomaly_ratio=0.00, vocabulary=vocabulary)
    after_text, after_marks = generator.generate_text(patterns, text_size=1, anomaly_ratio=0.00, vocabulary=vocabulary)
    new_text = before_text + list(new_workflow) + after_text
    new_marks = before_text + list(new_workflow) + after_text
    new[i] = (new_text, new_marks)

B = list()
for i, (new_text, new_marks) in new.items():
    B.append(deep_log_model.monitor_session(new_text, new_marks, g=best_g))
print("before", B)

num_feedbacks = 0

A1 = list()
for i, (new_text, new_marks) in new.items():
    res = deep_log_model.monitor_session(new_text, new_marks, g=best_g)
    if res == "FP":
        deep_log_model.train_feedback(new_text, new_marks, g = best_g)
        num_feedbacks += 1
    deep_log_model.train_feedback(new_text, new_marks, g = best_g)
    A1.append(deep_log_model.monitor_session(new_text, new_marks, g=best_g))
print("after", A1)

A = list()
for i, (new_text, new_marks) in new.items():
    A.append(deep_log_model.monitor_session(new_text, new_marks, g=best_g))

print("after all", A)

before ['FP', 'FP', 'FP', 'FP', 'FP', 'FP', 'FP', 'FP', 'FP', 'FP', 'FP', 'FP', 'FP', 'FP', 'FP', 'FP', 'FP', 'FP', 'FP', 'FP', 'FP', 'FP', 'FP', 'FP', 'FP', 'FP', 'FP', 'FP', 'FP', 'FP']
after ['TN', 'TN', 'TN', 'TN', 'TN', 'TN', 'TN', 'TN', 'TN', 'TN', 'TN', 'TN', 'TN', 'TN', 'TN', 'TN', 'TN', 'TN', 'TN', 'TN', 'TN', 'TN', 'TN', 'TN', 'TN', 'TN', 'TN', 'TN', 'TN', 'TN']
after all ['TN', 'TN', 'TN', 'TN', 'TN', 'TN', 'FP', 'TN', 'TN', 'TN', 'TN', 'TN', 'TN', 'FP', 'TN', 'TN', 'TN', 'TN', 'TN', 'TN', 'TN', 'TN', 'TN', 'TN', 'TN', 'TN', 'TN', 'TN', 'TN', 'TN']


In [14]:
entries = list()
for k, (text_test, text_marks, anomaly) in tests_legit.items():
    for g in range(0, vocabulary_size+1):
        res = deep_log_model.monitor_session(text_test, text_marks, g=g)
        entry = (k,g,res)
        entries.append(entry)
# entries

In [15]:
df2 = pd.DataFrame(entries, columns = ["i","g","status"])
ddf2 = df2.pivot_table(index="i", columns = "g", values = "status", aggfunc = "sum")
def calc(s):
    c = dict(collections.Counter(s))
    TP = c.get("TP", 0)
    TN = c.get("TN", 0)
    FP = c.get("FP", 0)
    FN = c.get("FN", 0)
    eps = 1e-9
    
    prec = TP / (TP + FP + eps)
    rec = TP / (TP + FN + eps)
    acc = (TP + TN) / (TP + TN + FP + FN + eps)
    f1 = 2*(prec*rec)/(prec+rec+eps)
    
    return {"prec" : prec, "rec" : rec, "acc" : acc, "f1" : f1, "TP" : TP, "TN" : TN, "FP" : FP, "FN" : FN}
    
e=ddf2.apply(calc, axis=0)
e2=pd.DataFrame(list(e.values))

print("measure results for each g-value")
e2

measure results for each g-value


Unnamed: 0,prec,rec,acc,f1,TP,TN,FP,FN
0,0.148,1.0,0.148,0.25784,148,0,852,0
1,0.148148,1.0,0.149,0.258065,148,1,851,0
2,0.150101,1.0,0.162,0.261023,148,14,838,0
3,0.156448,1.0,0.202,0.270567,148,54,798,0
4,0.173913,1.0,0.297,0.296296,148,149,703,0
5,0.18398,0.993243,0.347,0.310454,147,200,652,1
6,0.192612,0.986486,0.386,0.322296,146,240,612,2
7,0.199721,0.966216,0.422,0.331019,143,279,573,5
8,0.204615,0.898649,0.468,0.333333,133,335,517,15
9,0.218371,0.851351,0.527,0.347586,126,401,451,22


In [16]:
e1.columns = ["1-"+c for c in e1.columns]
e2.columns = ["2-"+c for c in e2.columns]

In [17]:
df_1_2 = pd.concat([e1,e2],axis=1)
df_1_2["diff-f1"] = df_1_2["2-f1"] -  df_1_2["1-f1"]

In [18]:
df_1_2

Unnamed: 0,1-prec,1-rec,1-acc,1-f1,1-TP,1-TN,1-FP,1-FN,2-prec,2-rec,2-acc,2-f1,2-TP,2-TN,2-FP,2-FN,diff-f1
0,0.148,1.0,0.148,0.25784,148,0,852,0,0.148,1.0,0.148,0.25784,148,0,852,0,0.0
1,0.148148,1.0,0.149,0.258065,148,1,851,0,0.148148,1.0,0.149,0.258065,148,1,851,0,0.0
2,0.15056,1.0,0.165,0.261715,148,17,835,0,0.150101,1.0,0.162,0.261023,148,14,838,0,-0.000692
3,0.157279,1.0,0.207,0.271809,148,59,793,0,0.156448,1.0,0.202,0.270567,148,54,798,0,-0.001242
4,0.174323,1.0,0.299,0.296891,148,151,701,0,0.173913,1.0,0.297,0.296296,148,149,703,0,-0.000594
5,0.182382,0.993243,0.34,0.308176,147,193,659,1,0.18398,0.993243,0.347,0.310454,147,200,652,1,0.002278
6,0.19016,0.966216,0.386,0.317778,143,243,609,5,0.192612,0.986486,0.386,0.322296,146,240,612,2,0.004518
7,0.195157,0.925676,0.424,0.322353,137,287,565,11,0.199721,0.966216,0.422,0.331019,143,279,573,5,0.008666
8,0.207752,0.905405,0.475,0.337957,134,341,511,14,0.204615,0.898649,0.468,0.333333,133,335,517,15,-0.004624
9,0.222798,0.871622,0.531,0.354883,129,402,450,19,0.218371,0.851351,0.527,0.347586,126,401,451,22,-0.007297


In [19]:
best_g_new = df_1_2["2-f1"].argmax()
display(df_1_2.loc[[best_g_new]])
print("num_feedbacks:", num_feedbacks)

Unnamed: 0,1-prec,1-rec,1-acc,1-f1,1-TP,1-TN,1-FP,1-FN,2-prec,2-rec,2-acc,2-f1,2-TP,2-TN,2-FP,2-FN,diff-f1
14,1.0,0.472973,0.922,0.642202,70,852,0,78,0.957746,0.459459,0.917,0.621005,68,849,3,80,-0.021197


num_feedbacks: 3
