In [1]:
import pandas as pd
from timeit import default_timer as timer
from tqdm import tqdm

from vmft_lad.BaseDetector import BaseDetector
from heap_providers.SubsequenceMaxHeap import SubsequenceMaxHeap
from log_inference_providers.BartLarge.BartLargeZeroShotLogInference import BartLargeZeroShotLogInference


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
dataset_df = pd.read_csv("./test_data/benign.csv", header=0)
data = dataset_df["value"].tolist()
window_size = 4
anomaly_threshold = 0.2
subsequence_match_threshold = 0.15
probationary_period = 150
bart_cache = {}

In [3]:
max_heap_provider = SubsequenceMaxHeap()
inference_provider = BartLargeZeroShotLogInference(bart_cache=bart_cache)

model = BaseDetector(
    inferenceProvider=inference_provider,
    maxHeapProvider=max_heap_provider,
    data=data, 
    windowSize=window_size, 
    probationaryPeriod=probationary_period, 
    subsequenceMatchThreshold=subsequence_match_threshold, 
    anomalyThreshold=anomaly_threshold)

model_out_df = dataset_df.copy()
anomaly_scores = []
labels = [0]*len(data)
record_handle_times = []
print(f"Starting detection...")
# for i in tqdm(range(len(data))):
for i in range(len(data)):
    start = timer()
    anomaly_scores.append(model.handleRecord(i))
    end = timer()
    record_handle_times.append(end - start)

print(f"Finished detection.")

# Add the anomaly scores to the dataframe
model_out_df["anomaly_score"] = anomaly_scores
model_out_df["label"] = labels
model_out_df["record_handle_time"] = record_handle_times

Starting detection...
Anomaly detected at window  150  with score  0.2668776371308017
Log templates: 
[ <:NUM:>.<:NUM:>] br0: port <:*:> entered <:*:> state
TP_FOUND:  [ <:NUM:>.<:NUM:>] L1TF CPU bug present and SMT on, data leak possible. See CVE-<:NUM:>-<:NUM:> and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.
tap1: Gained <:*:>
tap2: Gained <:*:>
Anomaly detected at window  210  with score  0.20048115477145148
Log templates: 
Accepted password for <:*:> from <:IP:> port <:NUM:> ssh2
Reached target <:*:> <:*:>
Reached target <:*:> <:*:> <:*:>
Listening on <:*:> <:*:> <:*:> <:*:> daemon.
Anomaly detected at window  236  with score  0.26476793248945146
Log templates: 
tap1: Gained <:*:>
TP_FOUND:  [ <:NUM:>.<:NUM:>] L1TF CPU bug present and SMT on, data leak possible. See CVE-<:NUM:>-<:NUM:> and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.
tap2: Gained <:*:>
[ <:NUM:>.<:NUM:>] IPv6: ADDRCONF(NETDEV CHANGE): <:*:> 

In [4]:
avg_time= model_out_df['record_handle_time'].mean() * 1000
print(f"Average record handle time: {avg_time} ms")

Average record handle time: 8.16176368098167 ms


In [5]:
print(bart_cache)

{'[ <:NUM:>.<:NUM:>] br0: port <:*:> entered <:*:> state': 'unsure', '[ <:NUM:>.<:NUM:>] L1TF CPU bug present and SMT on, data leak possible. See CVE-<:NUM:>-<:NUM:> and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.': 'fault', 'tap1: Gained <:*:>': 'unsure', 'tap2: Gained <:*:>': 'unsure', 'Accepted password for <:*:> from <:IP:> port <:NUM:> ssh2': 'normal', 'Reached target <:*:> <:*:>': 'normal', 'Reached target <:*:> <:*:> <:*:>': 'normal', 'Listening on <:*:> <:*:> <:*:> <:*:> daemon.': 'unsure', '[ <:NUM:>.<:NUM:>] IPv6: ADDRCONF(NETDEV CHANGE): <:*:> link becomes ready': 'unsure', 'pam <:*:> authentication failure; <:*:> uid=<:NUM:> euid=<:NUM:> <:*:> <:*:> <:*:> <:*:>': 'fault', 'Failed password for <:*:> from <:IP:> port <:NUM:> ssh2': 'fault', 'message repeated <:NUM:> times: [ Failed password for <:*:> from <:IP:> port <:NUM:> ssh2]': 'fault', 'Connection <:*:> by <:*:> user <:*:> <:IP:> port <:NUM:> [preauth]': 'unsure', 'PAM <:NUM:> more 

In [6]:
import plotly.graph_objects as go

probationary_period = 150
scaling_factor = model_out_df["value"].max()

fig = go.Figure()
fig.add_trace(go.Scatter(y=model_out_df["value"], name='Log key',
              line=go.scatter.Line(color='rgba(0,0,190, 0.2)')))
fig.add_trace(go.Scatter(y=model_out_df["anomaly_score"], name='Anomaly score',
            line_color='rgb(255, 0, 0)'))


fig.add_trace(go.Scatter(x=list(range(probationary_period+1)), y=[scaling_factor]*probationary_period,
                         name='Training region', fill='tozeroy', mode='none',
                         line_color='rgba(0, 0, 0, 0.4)',
                         fillcolor='rgba(0, 0, 0, 0.4)'
                         ))


fig.update_yaxes(title_text="Log key")
fig.update_xaxes(title_text="Time step")
# fig.update_layout(
#     title=dict(text="MP based model", yanchor='top', y=0.85)
# )

fig.show()