# Models for P2P-0.3-1 dataset

Two models:
1. Normal mode without LTN
2. Model with LTN



In [1]:
import arrow
import socket
from sqlalchemy.orm import Session
from tqdm.notebook import tqdm
import time
time.clock = time.time

from april import Evaluator
from april.anomalydetection import *
from april.database import EventLog
from april.database import Model
from april.database import get_engine
from april.dataset import Dataset
from april.fs import DATE_FORMAT
from april.fs import get_event_log_files

In [2]:
import tensorflow as tf
physical_devices = tf.config.list_physical_devices('GPU')
print(physical_devices)
if len(physical_devices) > 0:
    tf.config.experimental.set_memory_growth(physical_devices[0], True)
    print("GPU found")
    print("Memory growth set")
else:
    print("No GPU found")

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
GPU found
Memory growth set


In [3]:
def fit_and_save(dataset_name, ad, ad_kwargs=None, fit_kwargs=None):
    if ad_kwargs is None:
        ad_kwargs = {}
    if fit_kwargs is None:
        fit_kwargs = {}

    # Save start time
    start_time = arrow.now()

    # Dataset
    dataset = Dataset(dataset_name)

    # AD
    ad = ad(**ad_kwargs)

    # Train and save
    ad.fit(dataset, **fit_kwargs)
    file_name = f'{dataset_name}_{ad.abbreviation}_{start_time.format(DATE_FORMAT)}'
    model_file = ad.save(file_name)

    # Save end time
    end_time = arrow.now()

    # Cache result
    Evaluator(model_file.str_path).cache_result()

    # Calculate training time in seconds
    training_time = (end_time - start_time).total_seconds()

    # Write to database
    engine = get_engine()
    session = Session(engine)

    session.add(Model(creation_date=end_time.datetime,
                      algorithm=ad.name,
                      training_duration=training_time,
                      file_name=model_file.file,
                      training_event_log_id=EventLog.get_id_by_name(dataset_name),
                      training_host=socket.gethostname(),
                      hyperparameters=str(dict(**ad_kwargs, **fit_kwargs))))
    session.commit()
    session.close()

    if isinstance(ad, NNAnomalyDetector):
        from keras.backend import clear_session
        clear_session()

In [4]:
datasets = sorted([e.name for e in get_event_log_files() if e.p == 0.3])
# select_datasets = ["paper", "p2p", "small", "medium"]
select_datasets = ["p2p"]
select_attributes = ["-1"]
datasets = [d for d in datasets if any([s in d for s in select_datasets])]
datasets = [d for d in datasets if any([s in d for s in select_attributes])]
dataset_name = datasets[0]
print(datasets)


['p2p-0.3-1']


In [None]:
# from april.anomalydetection.ltnencoder import LTNDAEP2P

In [None]:

# ldp =  LTNDAEP2P()

In [7]:
import arrow
import socket
from sqlalchemy.orm import Session
from tqdm.notebook import tqdm

from april import Evaluator
from april.anomalydetection import *
from april.database import EventLog
from april.database import Model
from april.database import get_engine
from april.dataset import Dataset
from april.fs import DATE_FORMAT
from april.fs import get_event_log_files

In [8]:

dataset = Dataset(dataset_name)


In [9]:
flat_onehot_features_2d = dataset.flat_onehot_features_2d
flat_features = dataset.flat_features
features  = dataset.features
print(f"features.shape: {len(features)}")
print(f"flat_features.shape: {flat_features.shape}")
print(f"flat_onehot_features_2d.shape: {flat_onehot_features_2d.shape}")


features.shape: 2
flat_features.shape: (5000, 16, 2)
flat_onehot_features_2d.shape: (5000, 2688)


In [10]:
print(features[0][0])
print(features[1][0])


[27.  7. 11.  4.  5. 24.  9. 10.  8. 26.  0.  0.  0.  0.  0.  0.]
[141.  32.  51.  27. 121.  13.  49. 123.  40. 140.   0.   0.   0.   0.
   0.   0.]


In [11]:
print(flat_features[0])
print(type(flat_features))

[[ 27. 141.]
 [  7.  32.]
 [ 11.  51.]
 [  4.  27.]
 [  5. 121.]
 [ 24.  13.]
 [  9.  49.]
 [ 10. 123.]
 [  8.  40.]
 [ 26. 140.]
 [  0.   0.]
 [  0.   0.]
 [  0.   0.]
 [  0.   0.]
 [  0.   0.]
 [  0.   0.]]
<class 'numpy.ndarray'>


In [12]:
print(flat_onehot_features_2d[0])

[0. 0. 0. ... 0. 0. 0.]


In [16]:
print(flat_onehot_features_2d.shape)


(5000, 2688)


In [17]:
ads = [
    dict(ad=DAE, fit_kwargs=dict(epochs=60, batch_size=500))
]
for ad in ads:
    [fit_and_save(d, **ad) for d in tqdm(datasets, leave=True, position=1)]

  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60
Epoch 43/60
Epoch 44/60
Epoch 45/60
Epoch 46/60
Epoch 47/60
Epoch 48/60
Epoch 49/60
Epoch 50/60
Epoch 51/60
Epoch 52/60
Epoch 53/60
Epoch 54/60
Epoch 55/60
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60
d:\binet\.out\models\p2p-0.3-1_dae_20250325-182516.756109.keras


In [18]:
dataset = Dataset(datasets[0])

In [19]:
print(dataset)
print(vars(dataset))

<april.dataset.Dataset object at 0x00000151DCAD01C0>
{'dataset_name': 'p2p-0.3-1', 'go_backwards': False, 'pad_mode': 'post', 'attribute_types': [<AttributeType.CATEGORICAL: 0>, <AttributeType.CATEGORICAL: 0>], 'attribute_keys': ['name', 'user'], 'classes': array([[[0, 0],
        [0, 0],
        [0, 0],
        ...,
        [0, 0],
        [0, 0],
        [0, 0]],

       [[0, 0],
        [0, 0],
        [0, 0],
        ...,
        [0, 0],
        [0, 0],
        [0, 0]],

       [[0, 0],
        [0, 0],
        [0, 0],
        ...,
        [0, 0],
        [0, 0],
        [0, 0]],

       ...,

       [[0, 0],
        [0, 0],
        [0, 0],
        ...,
        [0, 0],
        [0, 0],
        [0, 0]],

       [[0, 0],
        [0, 0],
        [0, 0],
        ...,
        [0, 0],
        [0, 0],
        [0, 0]],

       [[0, 0],
        [0, 0],
        [0, 0],
        ...,
        [0, 0],
        [0, 0],
        [0, 0]]], dtype=int64), 'labels': array(['normal', 'normal', 'normal', ..