In [1]:
from typing import List
import pandas as pd
from timeit import default_timer as timer
from tqdm import tqdm
import plotly.graph_objects as go

from vmft_lad.BaseDetector import BaseDetector
from heap_providers.SubsequenceMaxHeap import SubsequenceMaxHeap
from log_inference_providers.NullLogInferenceProvider import NullLogInferenceProvider
from log_inference_providers.BartLarge.FastBartLargeZeroShotLogInference import FastBartLargeZeroShotLogInference
from log_inference_providers.Falcon7B.Falcon7BFewShotLogInference import FastFalcon7BFewShotLogInference
from log_inference_providers.GPT3.FastGPT3FewShotLogInference import FastGPT3FewShotLogInference
from log_inference_providers.FakeLogInferenceProvider import FakeLogInferenceProvider
from log_inference_providers.EmertonMonarch7B.FastEmertonMonarch7BFewShotLogInference import FastEmertonMonarch7BFewShotLogInference

In [2]:
window_size = 4
anomaly_threshold = 0.15
subsequence_match_threshold = 0.1
probationary_period = 150


inference_providers = {
    "Null": NullLogInferenceProvider,
    "Fake": FakeLogInferenceProvider,
    "BartLarge": FastBartLargeZeroShotLogInference,
    "Falcon7B": FastFalcon7BFewShotLogInference,
    "GPT3": FastGPT3FewShotLogInference,
    "EmertonMonarch7B": FastEmertonMonarch7BFewShotLogInference
}

inference_provider_name = "EmertonMonarch7B"
inference_provider = inference_providers[inference_provider_name]()

In [3]:
def test_vmft_lad(dataset_df: pd.DataFrame):
    data = dataset_df["value"].tolist()
    max_heap_provider = SubsequenceMaxHeap()
    model = BaseDetector(
        inferenceProvider=inference_provider,
        maxHeapProvider=max_heap_provider,
        data=data, 
        windowSize=window_size, 
        probationaryPeriod=probationary_period, 
        subsequenceMatchThreshold=subsequence_match_threshold, 
        anomalyThreshold=anomaly_threshold)

    model_out_df = dataset_df.copy()
    anomaly_scores = []
    labels = [0]*len(data)
    record_handle_times = []
    print(f"Starting detection...")
    for i in tqdm(range(len(data))):
    # for i in range(len(data)):
        start = timer()
        anomaly_scores.append(model.handleRecord(i))
        end = timer()
        record_handle_times.append(end - start)

    print(f"Finished detection.")

    # Add the anomaly scores to the dataframe
    model_out_df["anomaly_score"] = anomaly_scores
    model_out_df["label"] = labels
    model_out_df["record_handle_time"] = record_handle_times

    avg_time= model_out_df['record_handle_time'].mean() * 1000
    print(f"Average record handle time: {avg_time} ms")

    scaling_factor = model_out_df["value"].max()
    normalized_scores = [x*scaling_factor for x in anomaly_scores]

    fig = go.Figure()
    fig.add_trace(go.Scatter(y=model_out_df["value"], name='Log key',
                line=go.scatter.Line(color='rgba(0,0,190, 0.2)')))
    fig.add_trace(go.Scatter(y=normalized_scores, name='Anomaly score',
                line_color='rgb(11, 132, 165)'))


    fig.add_trace(go.Scatter(x=list(range(probationary_period+1)), y=[scaling_factor]*probationary_period,
                            name='Training region', fill='tozeroy', mode='none',
                            line_color='rgba(0, 0, 0, 0.4)',
                            fillcolor='rgba(0, 0, 0, 0.4)'
                            ))


    fig.update_yaxes(title_text="Log key")
    fig.update_xaxes(title_text="Time step")
    # fig.update_layout(
    #     title=dict(text="VMFT-LAD", yanchor='top', y=0.85)
    # )

    fig.show()

In [4]:
benign_df = pd.read_csv("./test_data/benign.csv", header=0)
test_vmft_lad(benign_df)

Starting detection...


 11%|█▏        | 748/6520 [00:00<00:00, 7432.59it/s]

Anomaly detected at window  150  with score  0.2668776371308017
Log templates: 
[ <:NUM:>.<:NUM:>] br0: port <:*:> entered <:*:> state
[ <:NUM:>.<:NUM:>] L1TF CPU bug present and SMT on, data leak possible. See CVE-<:NUM:>-<:NUM:> and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.
tap1: Gained <:*:>
tap2: Gained <:*:>
Anomaly detected at window  160  with score  0.1506849315068493
Log templates: 
<:*:> Lost carrier
Received disconnect from <:IP:> port <:NUM:>:<:NUM:>: disconnected by user
Disconnected from user <:*:> <:IP:> port <:NUM:>
Session <:NUM:> logged out. Waiting for processes to exit.
Anomaly detected at window  175  with score  0.1634304207119741
Log templates: 
<:*:> Succeeded.
<:*:> Succeeded.
Reached target <:*:> <:*:> <:*:>
<:*:> Succeeded.
Anomaly detected at window  180  with score  0.16978066612510154
Log templates: 
<:*:> Succeeded.
Closed GnuPG cryptographic agent and passphrase cache (access for web browsers).
<:*:> Succeeded.
<:*

 23%|██▎       | 1492/6520 [00:00<00:01, 4237.36it/s]

 with score  0.5437158469945356
Log templates: 
TP_FOUND:  rtnl: received neighbor for link '<:NUM:>' we don't know about, ignoring.
[<:NUM:>.<:NUM:>] br0: port <:*:> entered <:*:> state
[<:NUM:>.<:NUM:>] br0: port <:*:> entered <:*:> state
[<:NUM:>.<:NUM:>] device <:*:> <:*:> promiscuous mode
Anomaly detected at window  1251  with score  0.2
Log templates: 
[<:NUM:>.<:NUM:>] br0: port <:*:> entered <:*:> state
[<:NUM:>.<:NUM:>] br0: port <:*:> entered <:*:> state
[<:NUM:>.<:NUM:>] device <:*:> <:*:> promiscuous mode
[<:NUM:>.<:NUM:>] device <:*:> <:*:> promiscuous mode
Anomaly detected at window  1252  with score  0.2
Log templates: 
[<:NUM:>.<:NUM:>] br0: port <:*:> entered <:*:> state
[<:NUM:>.<:NUM:>] device <:*:> <:*:> promiscuous mode
[<:NUM:>.<:NUM:>] device <:*:> <:*:> promiscuous mode
[<:NUM:>.<:NUM:>] br0: port <:*:> entered <:*:> state
Anomaly detected at window  1253  with score  0.5375335120643432
Log templates: 
[<:NUM:>.<:NUM:>] device <:*:> <:*:> promiscuous mode
[<:NUM

 38%|███▊      | 2480/6520 [00:00<00:01, 3869.60it/s]

Anomaly detected at window  2003  with score  0.16167664670658682
Log templates: 
<:*:> Link UP
tap1: Gained <:*:>
[ <:NUM:>.<:NUM:>] L1TF CPU bug present and SMT on, data leak possible. See CVE-<:NUM:>-<:NUM:> and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.
[ <:NUM:>.<:NUM:>] IPv6: ADDRCONF(NETDEV CHANGE): <:*:> link becomes ready
Anomaly detected at window  2004  with score  0.3211629479377958
Log templates: 
tap1: Gained <:*:>
[ <:NUM:>.<:NUM:>] L1TF CPU bug present and SMT on, data leak possible. See CVE-<:NUM:>-<:NUM:> and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.
[ <:NUM:>.<:NUM:>] IPv6: ADDRCONF(NETDEV CHANGE): <:*:> link becomes ready
[ <:NUM:>.<:NUM:>] br0: port <:*:> entered <:*:> state
Anomaly detected at window  2005  with score  0.16534740545294635
Log templates: 
[ <:NUM:>.<:NUM:>] L1TF CPU bug present and SMT on, data leak possible. See CVE-<:NUM:>-<:NUM:> and https://www.kernel.org/doc/html/la

 44%|████▍     | 2899/6520 [00:00<00:01, 3212.06it/s]

[ <:NUM:>.<:NUM:>] IPv6: ADDRCONF(NETDEV CHANGE): <:*:> link becomes ready
[ <:NUM:>.<:NUM:>] br0: port <:*:> entered <:*:> state
Anomaly detected at window  2649  with score  0.16534740545294635
Log templates: 
[ <:NUM:>.<:NUM:>] L1TF CPU bug present and SMT on, data leak possible. See CVE-<:NUM:>-<:NUM:> and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.
[ <:NUM:>.<:NUM:>] IPv6: ADDRCONF(NETDEV CHANGE): <:*:> link becomes ready
[ <:NUM:>.<:NUM:>] br0: port <:*:> entered <:*:> state
[ <:NUM:>.<:NUM:>] br0: port <:*:> entered <:*:> state
Anomaly detected at window  2681  with score  0.21446179129005752
Log templates: 
[ <:NUM:>.<:NUM:>] device <:*:> <:*:> promiscuous mode
TP_FOUND:  <:*:> Link DOWN
TP_FOUND:  rtnl: received neighbor for link '<:NUM:>' we don't know about, ignoring.
[ <:NUM:>.<:NUM:>] br0: port <:*:> entered <:*:> state
Anomaly detected at window  2700  with score  0.1916376306620209
Log templates: 
TP_FOUND:  CR0=<:HEX:>
TP_FOUND:  CR

 59%|█████▉    | 3868/6520 [00:00<00:00, 3942.04it/s]

Anomaly detected at window  3410  with score  0.2668776371308017
Log templates: 
[ <:NUM:>.<:NUM:>] br0: port <:*:> entered <:*:> state
[ <:NUM:>.<:NUM:>] L1TF CPU bug present and SMT on, data leak possible. See CVE-<:NUM:>-<:NUM:> and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.
tap1: Gained <:*:>
tap2: Gained <:*:>
Anomaly detected at window  3420  with score  0.1506849315068493
Log templates: 
<:*:> Lost carrier
Received disconnect from <:IP:> port <:NUM:>:<:NUM:>: disconnected by user
Disconnected from user <:*:> <:IP:> port <:NUM:>
Session <:NUM:> logged out. Waiting for processes to exit.
Anomaly detected at window  3435  with score  0.1634304207119741
Log templates: 
<:*:> Succeeded.
<:*:> Succeeded.
Reached target <:*:> <:*:> <:*:>
<:*:> Succeeded.
Anomaly detected at window  3440  with score  0.16978066612510154
Log templates: 
<:*:> Succeeded.
Closed GnuPG cryptographic agent and passphrase cache (access for web browsers).
<:*:> Succeeded.

 71%|███████▏  | 4653/6520 [00:01<00:00, 3176.04it/s]

Anomaly detected at window  4294  with score  0.15489989462592202
Log templates: 
<:*:> Succeeded.
<:*:> Succeeded.
Removed slice system-modprobe.slice.
TP_FOUND:  action 'action-<:NUM:>-builtin:omfwd' resumed (module 'builtin:omfwd') [v8.<:NUM:>.<:NUM:> try https://www.rsyslog.com/e/<:NUM:> ]
Anomaly detected at window  4295  with score  0.1819021237303786
Log templates: 
<:*:> Succeeded.
Removed slice system-modprobe.slice.
TP_FOUND:  action 'action-<:NUM:>-builtin:omfwd' resumed (module 'builtin:omfwd') [v8.<:NUM:>.<:NUM:> try https://www.rsyslog.com/e/<:NUM:> ]
Listening on GnuPG cryptographic agent (ssh-agent emulation).
Anomaly detected at window  4300  with score  0.15335463258785942
Log templates: 
Listening on D-Bus User Message Bus Socket.
New session <:NUM:> of user <:*:>
Reached target <:*:> <:*:>
Reached target <:*:> <:*:> <:*:>
Anomaly detected at window  4301  with score  0.18018967334035826
Log templates: 
New session <:NUM:> of user <:*:>
Reached target <:*:> <:*:>
Rea

 95%|█████████▍| 6185/6520 [00:01<00:00, 4366.14it/s]

Anomaly detected at window  5144  with score  0.1570964247020585
Log templates: 
[ <:NUM:>.<:NUM:>] br0: port <:*:> entered <:*:> state
TP_FOUND:  CR0=<:HEX:>
TP_FOUND:  CR0=<:HEX:>
TP_FOUND:  CR0=<:HEX:>
Anomaly detected at window  5146  with score  0.18466898954703834
Log templates: 
TP_FOUND:  CR0=<:HEX:>
TP_FOUND:  CR0=<:HEX:>
TP_FOUND:  CR0=<:HEX:>
[ <:NUM:>.<:NUM:>] br0: port <:*:> entered <:*:> state
Anomaly detected at window  5147  with score  0.16809933142311365
Log templates: 
TP_FOUND:  CR0=<:HEX:>
TP_FOUND:  CR0=<:HEX:>
[ <:NUM:>.<:NUM:>] br0: port <:*:> entered <:*:> state
TP_FOUND:  ethtool: autonegotiation is unset or enabled, the speed and duplex are not writable.
Anomaly detected at window  5152  with score  0.18915343915343916
Log templates: 
tap2: Gained <:*:>
TP_FOUND:  unknown command <:NUM:>
[ <:NUM:>.<:NUM:>] device <:*:> <:*:> promiscuous mode
[ <:NUM:>.<:NUM:>] br0: port <:*:> entered <:*:> state
Anomaly detected at window  5153  with score  0.1692876965772433

100%|██████████| 6520/6520 [00:01<00:00, 3865.30it/s]


Anomaly detected at window  6198  with score  0.16167664670658682
Log templates: 
<:*:> Link UP
tap1: Gained <:*:>
[ <:NUM:>.<:NUM:>] L1TF CPU bug present and SMT on, data leak possible. See CVE-<:NUM:>-<:NUM:> and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.
[ <:NUM:>.<:NUM:>] IPv6: ADDRCONF(NETDEV CHANGE): <:*:> link becomes ready
Anomaly detected at window  6199  with score  0.3211629479377958
Log templates: 
tap1: Gained <:*:>
[ <:NUM:>.<:NUM:>] L1TF CPU bug present and SMT on, data leak possible. See CVE-<:NUM:>-<:NUM:> and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.
[ <:NUM:>.<:NUM:>] IPv6: ADDRCONF(NETDEV CHANGE): <:*:> link becomes ready
[ <:NUM:>.<:NUM:>] br0: port <:*:> entered <:*:> state
Anomaly detected at window  6200  with score  0.16534740545294635
Log templates: 
[ <:NUM:>.<:NUM:>] L1TF CPU bug present and SMT on, data leak possible. See CVE-<:NUM:>-<:NUM:> and https://www.kernel.org/doc/html/la

In [5]:
benign_df = pd.read_csv("./test_data/benign2.csv", header=0)
test_vmft_lad(benign_df)

Starting detection...


100%|██████████| 554/554 [00:00<00:00, 6660.51it/s]

Anomaly detected at window  218  with score  0.16176470588235295
Log templates: 
tap1: Gained <:*:>
[ <:NUM:>.<:NUM:>] device <:*:> <:*:> promiscuous mode
TP_FOUND:  rtnl: received neighbor for link '<:NUM:>' we don't know about, ignoring.
TP_FOUND:  rtnl: received neighbor for link '<:NUM:>' we don't know about, ignoring.
Anomaly detected at window  241  with score  0.16583333333333333
Log templates: 
TP_FOUND:  <:*:> Link DOWN
<:*:> Lost carrier
[ <:NUM:>.<:NUM:>] br0: port <:*:> entered <:*:> state
[ <:NUM:>.<:NUM:>] EXT4-fs <:*:> mounted filesystem with ordered data mode. Opts: (null)
Anomaly detected at window  258  with score  0.17291414752116083
Log templates: 
[ <:NUM:>.<:NUM:>] br0: port <:*:> entered <:*:> state
[ <:NUM:>.<:NUM:>] device <:*:> <:*:> promiscuous mode
TP_FOUND:  <:*:> Link DOWN
TP_FOUND:  rtnl: received neighbor for link '<:NUM:>' we don't know about, ignoring.
Anomaly detected at window  309  with score  0.15134803921568626
Log templates: 
<:*:> Lost carrier
T




In [6]:
hdd_df = pd.read_csv("./test_data/hdd.csv", header=0)
test_vmft_lad(hdd_df)


Starting detection...


  0%|          | 0/1141 [00:00<?, ?it/s]

Anomaly detected at window  168  with score  0.19211102994886778
Log templates: 
<:*:> Succeeded.
<:*:> Succeeded.
Closed GnuPG cryptographic agent and passphrase cache (access for web browsers).
[ <:NUM:>.<:NUM:>] br0: port <:*:> entered <:*:> state
Anomaly detected at window  169  with score  0.19759036144578312
Log templates: 
<:*:> Succeeded.
Closed GnuPG cryptographic agent and passphrase cache (access for web browsers).
[ <:NUM:>.<:NUM:>] br0: port <:*:> entered <:*:> state
[ <:NUM:>.<:NUM:>] br0: port <:*:> entered <:*:> state
Anomaly detected at window  171  with score  0.17391304347826086
Log templates: 
[ <:NUM:>.<:NUM:>] br0: port <:*:> entered <:*:> state
[ <:NUM:>.<:NUM:>] br0: port <:*:> entered <:*:> state
TP_FOUND:  rtnl: received neighbor for link '<:NUM:>' we don't know about, ignoring.
TP_FOUND:  rtnl: received neighbor for link '<:NUM:>' we don't know about, ignoring.
Anomaly detected at window  172  with score  0.22036474164133737
Log templates: 
[ <:NUM:>.<:NUM:>]

 59%|█████▊    | 670/1141 [00:00<00:00, 6633.53it/s]

 [ <:NUM:>.<:NUM:>] Buffer I/O error on dev <:*:> logical block <:NUM:>, async page read
Anomaly detected at window  637  with score  0.1714123006833713
Log templates: 
TP_FOUND:  [ <:NUM:>.<:NUM:>] Buffer I/O error on dev <:*:> logical block <:NUM:>, async page read
TP_FOUND:  [ <:NUM:>.<:NUM:>] sd <:NUM:>:<:NUM:>:<:NUM:>:<:NUM:>: [sdb] <:*:> <:*:> <:*:> <:*:> <:*:> <:*:> <:*:>
TP_FOUND:  [ <:NUM:>.<:NUM:>] sd <:NUM:>:<:NUM:>:<:NUM:>:<:NUM:>: [sdb] <:*:> <:*:> <:*:> <:*:> <:*:> <:*:>
TP_FOUND:  [ <:NUM:>.<:NUM:>] blk update request: critical medium error, dev sdb, sector <:NUM:> op <:HEX:>:(READ) flags <:HEX:> phys seg <:NUM:> prio class <:NUM:>
Anomaly detected at window  638  with score  0.17027334851936218
Log templates: 
TP_FOUND:  [ <:NUM:>.<:NUM:>] sd <:NUM:>:<:NUM:>:<:NUM:>:<:NUM:>: [sdb] <:*:> <:*:> <:*:> <:*:> <:*:> <:*:> <:*:>
TP_FOUND:  [ <:NUM:>.<:NUM:>] sd <:NUM:>:<:NUM:>:<:NUM:>:<:NUM:>: [sdb] <:*:> <:*:> <:*:> <:*:> <:*:> <:*:>
TP_FOUND:  [ <:NUM:>.<:NUM:>] blk update r

100%|██████████| 1141/1141 [00:00<00:00, 3984.16it/s]

TP_FOUND:  [ <:NUM:>.<:NUM:>] blk update request: critical medium error, dev sdb, sector <:NUM:> op <:HEX:>:(READ) flags <:HEX:> phys seg <:NUM:> prio class <:NUM:>
[ <:NUM:>.<:NUM:>] sd <:NUM:>:<:NUM:>:<:NUM:>:<:NUM:>: [sdb] tag#<:NUM:> CDB: Read(<:NUM:>) <:NUM:> <:NUM:> <:NUM:> <:NUM:> <:NUM:> <:*:> <:NUM:> <:NUM:> <:NUM:> <:NUM:>
TP_FOUND:  [ <:NUM:>.<:NUM:>] sd <:NUM:>:<:NUM:>:<:NUM:>:<:NUM:>: [sdb] <:*:> <:*:> <:*:> <:*:> <:*:> <:*:>
TP_FOUND:  [ <:NUM:>.<:NUM:>] sd <:NUM:>:<:NUM:>:<:NUM:>:<:NUM:>: [sdb] <:*:> <:*:> <:*:> <:*:> <:*:> <:*:> <:*:>
Anomaly detected at window  1076  with score  0.16343919442292795
Log templates: 
TP_FOUND:  [ <:NUM:>.<:NUM:>] sd <:NUM:>:<:NUM:>:<:NUM:>:<:NUM:>: [sdb] <:*:> <:*:> <:*:> <:*:> <:*:> <:*:> <:*:>
[ <:NUM:>.<:NUM:>] device <:*:> <:*:> promiscuous mode
TP_FOUND:  <:*:> Link DOWN
<:*:> Lost carrier
Anomaly detected at window  1080  with score  0.17311827956989248
Log templates: 
[ <:NUM:>.<:NUM:>] br0: port <:*:> entered <:*:> state
[ <:NUM:>


