# fit methods

In [1]:
import arrow
import socket
from sqlalchemy.orm import Session
from tqdm import tqdm

from april import Evaluator
from april.anomalydetection import *
from april.database import EventLog
from april.database import Model
from april.database import get_engine
from april.dataset import Dataset
from april.fs import DATE_FORMAT
from april.fs import get_event_log_files



In [2]:
def fit_and_save(dataset_name, ad, ad_kwargs=None, fit_kwargs=None):
    if ad_kwargs is None:
        ad_kwargs = {}
    if fit_kwargs is None:
        fit_kwargs = {}

    # Save start time
    start_time = arrow.now()

    # Dataset
    dataset = Dataset(dataset_name)

    # AD
    ad = ad(**ad_kwargs)

    # Train and save
    ad.fit(dataset, **fit_kwargs)
    file_name = f'{dataset_name}_{ad.abbreviation}_{start_time.format(DATE_FORMAT)}'
    model_file = ad.save(file_name)

    # Save end time
    end_time = arrow.now()

    # Cache result
    #print(model_file.str_path)
    Evaluator(model_file.str_path).cache_result()

    # Calculate training time in seconds
    training_time = (end_time - start_time).total_seconds()

    # Write to database
    engine = get_engine()
    session = Session(engine)

    session.add(Model(creation_date=end_time.datetime,
                      algorithm=ad.name,
                      training_duration=training_time,
                      file_name=model_file.file,
                      training_event_log_id=EventLog.get_id_by_name(dataset_name),
                      training_host=socket.gethostname(),
                      hyperparameters=str(dict(**ad_kwargs, **fit_kwargs))))
    print('**********', EventLog.get_id_by_name(dataset_name))
    session.commit()
    session.close()
    
    

    if isinstance(ad, NNAnomalyDetector):
        from keras.backend import clear_session
        clear_session()

In [3]:
anomaly_raio = 0.3 # 0.05 0.1 0.2 0.3
# datasets = sorted([e.name for e in get_event_log_files() if e.p == 0.01])
# 获取的是json文件，而不是csv文件
# datasets = sorted([e.name for e in get_event_log_files() if 'bpic1301' in e.name  and e.p == 0.1])[0:1]
# datasets = sorted([e.name for e in get_event_log_files() if '13' in e.name  and e.p == 0.2])
# datasets = sorted([e.name for e in get_event_log_files() if ('Helpdesk' in e.name or 'SepsisCases' in e.name)  and e.p == 0.05])
datasets = sorted([e.name for e in get_event_log_files() if  e.p == anomaly_raio])
# datasets = sorted([e.name for e in get_event_log_files() if 'BPIC13ClosedProblems' in e.name  and e.p == 0.1])
print(datasets)

3 aprit-EventLogFile-init:self.model, self.p, self.id= BPIC13ClosedProblems 0.0 1
3 aprit-EventLogFile-init:self.model, self.p, self.id= BPIC13ClosedProblems 0.05 1
3 aprit-EventLogFile-init:self.model, self.p, self.id= BPIC13ClosedProblems 0.1 1
3 aprit-EventLogFile-init:self.model, self.p, self.id= BPIC13ClosedProblems 0.2 1
3 aprit-EventLogFile-init:self.model, self.p, self.id= BPIC13ClosedProblems 0.3 1
3 aprit-EventLogFile-init:self.model, self.p, self.id= BPIC13Incidents 0.0 1
3 aprit-EventLogFile-init:self.model, self.p, self.id= BPIC13Incidents 0.05 1
3 aprit-EventLogFile-init:self.model, self.p, self.id= BPIC13Incidents 0.1 1
3 aprit-EventLogFile-init:self.model, self.p, self.id= BPIC13Incidents 0.2 1
3 aprit-EventLogFile-init:self.model, self.p, self.id= BPIC13Incidents 0.3 1
3 aprit-EventLogFile-init:self.model, self.p, self.id= BPIC13OpenProblems 0.0 1
3 aprit-EventLogFile-init:self.model, self.p, self.id= BPIC13OpenProblems 0.05 1
3 aprit-EventLogFile-init:self.model, self

In [4]:
# ads = [
#     dict(ad=OneClassSVM),
#     dict(ad=NaiveAnomalyDetector),
#     dict(ad=SamplingAnomalyDetector),
#     dict(ad=DAE, fit_kwargs=dict(epochs=100, batch_size=500)),
#     dict(ad=BINetv1, fit_kwargs=dict(epochs=50, batch_size=500)) # epochs=20,batch_size=500
# ]

# ads = [
#     dict(ad=TStidePlus, ad_kwargs=dict(k=2)),
#     dict(ad=NaivePlusAnomalyDetector),
#     dict(ad=LikelihoodPlusAnomalyDetector)
# ]

ads = [
    dict(ad=TStidePlus, ad_kwargs=dict(k=2)),
    dict(ad=NaivePlusAnomalyDetector),
    dict(ad=LikelihoodPlusAnomalyDetector),
    dict(ad=OneClassSVM),
    dict(ad=NaiveAnomalyDetector),
    dict(ad=SamplingAnomalyDetector),
    dict(ad=DAE, fit_kwargs=dict(epochs=50, batch_size=500)),
    dict(ad=BINetv1, fit_kwargs=dict(epochs=20, batch_size=500))
]


In [5]:
for ad in ads:
    [fit_and_save(d, **ad) for d in tqdm(datasets, desc=ad['ad'].name)]

t-STIDE+:   0%|                                                                                  | 0/9 [00:00<?, ?it/s]

1 Dataset-init:self.dataset_name= BPIC13ClosedProblems-0.3-1
2 Dataset-load:self.dataset_name= BPIC13ClosedProblems-0.3-1
3 aprit-EventLogFile-init:self.model, self.p, self.id= BPIC13ClosedProblems 0.3 1
4 Dataset-load:self.self.dataset_name= BPIC13ClosedProblems-0.3-1
5 april-fs-EventLogFile-cache_file: E:\Jupyter Notebook\Jupyter Notebook Project\Business Process Anomaly Detection\review_experiments_all\binet_five_test_from_reference_extra_methods\.out\.cache\eventlogs\BPIC13ClosedProblems-0.3-1.pkl.gz
6 el_file.path.exists:el_file.path= E:\Jupyter Notebook\Jupyter Notebook Project\Business Process Anomaly Detection\review_experiments_all\binet_five_test_from_reference_extra_methods\.out\eventlogs\BPIC13ClosedProblems-0.3-1.json.gz
8 april-dataset-EventLog-from_event_log
9 april-dataset-EventLog-_from_event_log
include_attributes is None= True
april-dataset-_from_event_log:include_attributes= ['name', 'Activity']
9 april-dataset-EventLog-_from_event_log:case_lens= [7 8 7 ... 4 4 4]
1

t-STIDE+:  11%|████████▏                                                                 | 1/9 [00:00<00:04,  1.90it/s]

15 april-database-table-EventLog-get_id_by_name:eventlog_id= None
********** None
1 Dataset-init:self.dataset_name= BPIC13Incidents-0.3-1
2 Dataset-load:self.dataset_name= BPIC13Incidents-0.3-1
3 aprit-EventLogFile-init:self.model, self.p, self.id= BPIC13Incidents 0.3 1
4 Dataset-load:self.self.dataset_name= BPIC13Incidents-0.3-1
5 april-fs-EventLogFile-cache_file: E:\Jupyter Notebook\Jupyter Notebook Project\Business Process Anomaly Detection\review_experiments_all\binet_five_test_from_reference_extra_methods\.out\.cache\eventlogs\BPIC13Incidents-0.3-1.pkl.gz
6 el_file.path.exists:el_file.path= E:\Jupyter Notebook\Jupyter Notebook Project\Business Process Anomaly Detection\review_experiments_all\binet_five_test_from_reference_extra_methods\.out\eventlogs\BPIC13Incidents-0.3-1.json.gz
8 april-dataset-EventLog-from_event_log
9 april-dataset-EventLog-_from_event_log
include_attributes is None= True
april-dataset-_from_event_log:include_attributes= ['name', 'Activity']
9 april-dataset-Eve

t-STIDE+:  22%|████████████████▍                                                         | 2/9 [00:07<00:28,  4.08s/it]

april-evaluator-result
15 april-database-table-EventLog-get_id_by_name:eventlog_id= None
15 april-database-table-EventLog-get_id_by_name:eventlog_id= None
********** None
1 Dataset-init:self.dataset_name= BPIC13OpenProblems-0.3-1
2 Dataset-load:self.dataset_name= BPIC13OpenProblems-0.3-1
3 aprit-EventLogFile-init:self.model, self.p, self.id= BPIC13OpenProblems 0.3 1
4 Dataset-load:self.self.dataset_name= BPIC13OpenProblems-0.3-1
5 april-fs-EventLogFile-cache_file: E:\Jupyter Notebook\Jupyter Notebook Project\Business Process Anomaly Detection\review_experiments_all\binet_five_test_from_reference_extra_methods\.out\.cache\eventlogs\BPIC13OpenProblems-0.3-1.pkl.gz
6 el_file.path.exists:el_file.path= E:\Jupyter Notebook\Jupyter Notebook Project\Business Process Anomaly Detection\review_experiments_all\binet_five_test_from_reference_extra_methods\.out\eventlogs\BPIC13OpenProblems-0.3-1.json.gz
8 april-dataset-EventLog-from_event_log
9 april-dataset-EventLog-_from_event_log
include_attribut

t-STIDE+:  33%|████████████████████████▋                                                 | 3/9 [00:07<00:13,  2.29s/it]

april-evaluator-result
april-evaluator-dataset
april-evaluator-datase self._dataset is None
1 Dataset-init:self.dataset_name= BPIC13OpenProblems-0.3-1
2 Dataset-load:self.dataset_name= BPIC13OpenProblems-0.3-1
3 aprit-EventLogFile-init:self.model, self.p, self.id= BPIC13OpenProblems 0.3 1
4 Dataset-load:self.self.dataset_name= BPIC13OpenProblems-0.3-1
5 april-fs-EventLogFile-cache_file: E:\Jupyter Notebook\Jupyter Notebook Project\Business Process Anomaly Detection\review_experiments_all\binet_five_test_from_reference_extra_methods\.out\.cache\eventlogs\BPIC13OpenProblems-0.3-1.pkl.gz
6 el_file.cache_file.exists():el_file.path= E:\Jupyter Notebook\Jupyter Notebook Project\Business Process Anomaly Detection\review_experiments_all\binet_five_test_from_reference_extra_methods\.out\eventlogs\BPIC13OpenProblems-0.3-1.json.gz
5 april-fs-EventLogFile-cache_file: E:\Jupyter Notebook\Jupyter Notebook Project\Business Process Anomaly Detection\review_experiments_all\binet_five_test_from_referenc

t-STIDE+:  44%|████████████████████████████████▉                                         | 4/9 [00:09<00:12,  2.42s/it]

april-evaluator-result
15 april-database-table-EventLog-get_id_by_name:eventlog_id= None
15 april-database-table-EventLog-get_id_by_name:eventlog_id= None
********** None
1 Dataset-init:self.dataset_name= BPIC20International-0.3-1
2 Dataset-load:self.dataset_name= BPIC20International-0.3-1
3 aprit-EventLogFile-init:self.model, self.p, self.id= BPIC20International 0.3 1
4 Dataset-load:self.self.dataset_name= BPIC20International-0.3-1
5 april-fs-EventLogFile-cache_file: E:\Jupyter Notebook\Jupyter Notebook Project\Business Process Anomaly Detection\review_experiments_all\binet_five_test_from_reference_extra_methods\.out\.cache\eventlogs\BPIC20International-0.3-1.pkl.gz
6 el_file.path.exists:el_file.path= E:\Jupyter Notebook\Jupyter Notebook Project\Business Process Anomaly Detection\review_experiments_all\binet_five_test_from_reference_extra_methods\.out\eventlogs\BPIC20International-0.3-1.json.gz
8 april-dataset-EventLog-from_event_log
9 april-dataset-EventLog-_from_event_log
include_at

t-STIDE+:  56%|█████████████████████████████████████████                                 | 5/9 [00:11<00:09,  2.31s/it]

april-evaluator-result
15 april-database-table-EventLog-get_id_by_name:eventlog_id= None
15 april-database-table-EventLog-get_id_by_name:eventlog_id= None
********** None
1 Dataset-init:self.dataset_name= BPIC20PermitLog-0.3-1
2 Dataset-load:self.dataset_name= BPIC20PermitLog-0.3-1
3 aprit-EventLogFile-init:self.model, self.p, self.id= BPIC20PermitLog 0.3 1
4 Dataset-load:self.self.dataset_name= BPIC20PermitLog-0.3-1
5 april-fs-EventLogFile-cache_file: E:\Jupyter Notebook\Jupyter Notebook Project\Business Process Anomaly Detection\review_experiments_all\binet_five_test_from_reference_extra_methods\.out\.cache\eventlogs\BPIC20PermitLog-0.3-1.pkl.gz
6 el_file.path.exists:el_file.path= E:\Jupyter Notebook\Jupyter Notebook Project\Business Process Anomaly Detection\review_experiments_all\binet_five_test_from_reference_extra_methods\.out\eventlogs\BPIC20PermitLog-0.3-1.json.gz
8 april-dataset-EventLog-from_event_log
9 april-dataset-EventLog-_from_event_log
include_attributes is None= True
a

t-STIDE+:  67%|█████████████████████████████████████████████████▎                        | 6/9 [00:17<00:09,  3.26s/it]

april-evaluator-result
15 april-database-table-EventLog-get_id_by_name:eventlog_id= None
15 april-database-table-EventLog-get_id_by_name:eventlog_id= None
********** None
1 Dataset-init:self.dataset_name= BPIC20PrepaidTravelCost-0.3-1
2 Dataset-load:self.dataset_name= BPIC20PrepaidTravelCost-0.3-1
3 aprit-EventLogFile-init:self.model, self.p, self.id= BPIC20PrepaidTravelCost 0.3 1
4 Dataset-load:self.self.dataset_name= BPIC20PrepaidTravelCost-0.3-1
5 april-fs-EventLogFile-cache_file: E:\Jupyter Notebook\Jupyter Notebook Project\Business Process Anomaly Detection\review_experiments_all\binet_five_test_from_reference_extra_methods\.out\.cache\eventlogs\BPIC20PrepaidTravelCost-0.3-1.pkl.gz
6 el_file.path.exists:el_file.path= E:\Jupyter Notebook\Jupyter Notebook Project\Business Process Anomaly Detection\review_experiments_all\binet_five_test_from_reference_extra_methods\.out\eventlogs\BPIC20PrepaidTravelCost-0.3-1.json.gz
8 april-dataset-EventLog-from_event_log
9 april-dataset-EventLog-_f

t-STIDE+:  78%|█████████████████████████████████████████████████████████▌                | 7/9 [00:17<00:04,  2.36s/it]

april-evaluator-result
15 april-database-table-EventLog-get_id_by_name:eventlog_id= None
15 april-database-table-EventLog-get_id_by_name:eventlog_id= None
********** None
1 Dataset-init:self.dataset_name= BPIC20RequestForPayment-0.3-1
2 Dataset-load:self.dataset_name= BPIC20RequestForPayment-0.3-1
3 aprit-EventLogFile-init:self.model, self.p, self.id= BPIC20RequestForPayment 0.3 1
4 Dataset-load:self.self.dataset_name= BPIC20RequestForPayment-0.3-1
5 april-fs-EventLogFile-cache_file: E:\Jupyter Notebook\Jupyter Notebook Project\Business Process Anomaly Detection\review_experiments_all\binet_five_test_from_reference_extra_methods\.out\.cache\eventlogs\BPIC20RequestForPayment-0.3-1.pkl.gz
6 el_file.path.exists:el_file.path= E:\Jupyter Notebook\Jupyter Notebook Project\Business Process Anomaly Detection\review_experiments_all\binet_five_test_from_reference_extra_methods\.out\eventlogs\BPIC20RequestForPayment-0.3-1.json.gz
8 april-dataset-EventLog-from_event_log
9 april-dataset-EventLog-_f

t-STIDE+:  89%|█████████████████████████████████████████████████████████████████▊        | 8/9 [00:19<00:02,  2.09s/it]

april-evaluator-result
15 april-database-table-EventLog-get_id_by_name:eventlog_id= None
15 april-database-table-EventLog-get_id_by_name:eventlog_id= None
********** None
1 Dataset-init:self.dataset_name= Helpdesk-0.3-1
2 Dataset-load:self.dataset_name= Helpdesk-0.3-1
3 aprit-EventLogFile-init:self.model, self.p, self.id= Helpdesk 0.3 1
4 Dataset-load:self.self.dataset_name= Helpdesk-0.3-1
5 april-fs-EventLogFile-cache_file: E:\Jupyter Notebook\Jupyter Notebook Project\Business Process Anomaly Detection\review_experiments_all\binet_five_test_from_reference_extra_methods\.out\.cache\eventlogs\Helpdesk-0.3-1.pkl.gz
6 el_file.path.exists:el_file.path= E:\Jupyter Notebook\Jupyter Notebook Project\Business Process Anomaly Detection\review_experiments_all\binet_five_test_from_reference_extra_methods\.out\eventlogs\Helpdesk-0.3-1.json.gz
8 april-dataset-EventLog-from_event_log
9 april-dataset-EventLog-_from_event_log
include_attributes is None= True
april-dataset-_from_event_log:include_attri

t-STIDE+: 100%|██████████████████████████████████████████████████████████████████████████| 9/9 [00:19<00:00,  2.22s/it]


april-evaluator-result
15 april-database-table-EventLog-get_id_by_name:eventlog_id= None
15 april-database-table-EventLog-get_id_by_name:eventlog_id= None
********** None


Naive+:   0%|                                                                                    | 0/9 [00:00<?, ?it/s]

1 Dataset-init:self.dataset_name= BPIC13ClosedProblems-0.3-1
2 Dataset-load:self.dataset_name= BPIC13ClosedProblems-0.3-1
3 aprit-EventLogFile-init:self.model, self.p, self.id= BPIC13ClosedProblems 0.3 1
4 Dataset-load:self.self.dataset_name= BPIC13ClosedProblems-0.3-1
5 april-fs-EventLogFile-cache_file: E:\Jupyter Notebook\Jupyter Notebook Project\Business Process Anomaly Detection\review_experiments_all\binet_five_test_from_reference_extra_methods\.out\.cache\eventlogs\BPIC13ClosedProblems-0.3-1.pkl.gz
6 el_file.cache_file.exists():el_file.path= E:\Jupyter Notebook\Jupyter Notebook Project\Business Process Anomaly Detection\review_experiments_all\binet_five_test_from_reference_extra_methods\.out\eventlogs\BPIC13ClosedProblems-0.3-1.json.gz
5 april-fs-EventLogFile-cache_file: E:\Jupyter Notebook\Jupyter Notebook Project\Business Process Anomaly Detection\review_experiments_all\binet_five_test_from_reference_extra_methods\.out\.cache\eventlogs\BPIC13ClosedProblems-0.3-1.pkl.gz
april-ev

Naive+:  22%|████████████████▉                                                           | 2/9 [00:00<00:00,  8.58it/s]

15 april-database-table-EventLog-get_id_by_name:eventlog_id= None
15 april-database-table-EventLog-get_id_by_name:eventlog_id= None
********** None
1 Dataset-init:self.dataset_name= BPIC13OpenProblems-0.3-1
2 Dataset-load:self.dataset_name= BPIC13OpenProblems-0.3-1
3 aprit-EventLogFile-init:self.model, self.p, self.id= BPIC13OpenProblems 0.3 1
4 Dataset-load:self.self.dataset_name= BPIC13OpenProblems-0.3-1
5 april-fs-EventLogFile-cache_file: E:\Jupyter Notebook\Jupyter Notebook Project\Business Process Anomaly Detection\review_experiments_all\binet_five_test_from_reference_extra_methods\.out\.cache\eventlogs\BPIC13OpenProblems-0.3-1.pkl.gz
6 el_file.cache_file.exists():el_file.path= E:\Jupyter Notebook\Jupyter Notebook Project\Business Process Anomaly Detection\review_experiments_all\binet_five_test_from_reference_extra_methods\.out\eventlogs\BPIC13OpenProblems-0.3-1.json.gz
5 april-fs-EventLogFile-cache_file: E:\Jupyter Notebook\Jupyter Notebook Project\Business Process Anomaly Detect

Naive+:  44%|█████████████████████████████████▊                                          | 4/9 [00:00<00:00, 10.76it/s]

15 april-database-table-EventLog-get_id_by_name:eventlog_id= None
15 april-database-table-EventLog-get_id_by_name:eventlog_id= None
********** None
1 Dataset-init:self.dataset_name= BPIC20International-0.3-1
2 Dataset-load:self.dataset_name= BPIC20International-0.3-1
3 aprit-EventLogFile-init:self.model, self.p, self.id= BPIC20International 0.3 1
4 Dataset-load:self.self.dataset_name= BPIC20International-0.3-1
5 april-fs-EventLogFile-cache_file: E:\Jupyter Notebook\Jupyter Notebook Project\Business Process Anomaly Detection\review_experiments_all\binet_five_test_from_reference_extra_methods\.out\.cache\eventlogs\BPIC20International-0.3-1.pkl.gz
6 el_file.cache_file.exists():el_file.path= E:\Jupyter Notebook\Jupyter Notebook Project\Business Process Anomaly Detection\review_experiments_all\binet_five_test_from_reference_extra_methods\.out\eventlogs\BPIC20International-0.3-1.json.gz
5 april-fs-EventLogFile-cache_file: E:\Jupyter Notebook\Jupyter Notebook Project\Business Process Anomaly 

Naive+:  67%|██████████████████████████████████████████████████▋                         | 6/9 [00:00<00:00,  8.09it/s]

april-dataset-binary_targets
april-evaluator-result
15 april-database-table-EventLog-get_id_by_name:eventlog_id= None
15 april-database-table-EventLog-get_id_by_name:eventlog_id= None
********** None
1 Dataset-init:self.dataset_name= BPIC20PrepaidTravelCost-0.3-1
2 Dataset-load:self.dataset_name= BPIC20PrepaidTravelCost-0.3-1
3 aprit-EventLogFile-init:self.model, self.p, self.id= BPIC20PrepaidTravelCost 0.3 1
4 Dataset-load:self.self.dataset_name= BPIC20PrepaidTravelCost-0.3-1
5 april-fs-EventLogFile-cache_file: E:\Jupyter Notebook\Jupyter Notebook Project\Business Process Anomaly Detection\review_experiments_all\binet_five_test_from_reference_extra_methods\.out\.cache\eventlogs\BPIC20PrepaidTravelCost-0.3-1.pkl.gz
6 el_file.cache_file.exists():el_file.path= E:\Jupyter Notebook\Jupyter Notebook Project\Business Process Anomaly Detection\review_experiments_all\binet_five_test_from_reference_extra_methods\.out\eventlogs\BPIC20PrepaidTravelCost-0.3-1.json.gz
5 april-fs-EventLogFile-cache_

Naive+: 100%|████████████████████████████████████████████████████████████████████████████| 9/9 [00:00<00:00, 10.00it/s]


april-evaluator-result
15 april-database-table-EventLog-get_id_by_name:eventlog_id= None
15 april-database-table-EventLog-get_id_by_name:eventlog_id= None
********** None
1 Dataset-init:self.dataset_name= Helpdesk-0.3-1
2 Dataset-load:self.dataset_name= Helpdesk-0.3-1
3 aprit-EventLogFile-init:self.model, self.p, self.id= Helpdesk 0.3 1
4 Dataset-load:self.self.dataset_name= Helpdesk-0.3-1
5 april-fs-EventLogFile-cache_file: E:\Jupyter Notebook\Jupyter Notebook Project\Business Process Anomaly Detection\review_experiments_all\binet_five_test_from_reference_extra_methods\.out\.cache\eventlogs\Helpdesk-0.3-1.pkl.gz
6 el_file.cache_file.exists():el_file.path= E:\Jupyter Notebook\Jupyter Notebook Project\Business Process Anomaly Detection\review_experiments_all\binet_five_test_from_reference_extra_methods\.out\eventlogs\Helpdesk-0.3-1.json.gz
5 april-fs-EventLogFile-cache_file: E:\Jupyter Notebook\Jupyter Notebook Project\Business Process Anomaly Detection\review_experiments_all\binet_five

Likelihood+:   0%|                                                                               | 0/9 [00:00<?, ?it/s]

1 Dataset-init:self.dataset_name= BPIC13ClosedProblems-0.3-1
2 Dataset-load:self.dataset_name= BPIC13ClosedProblems-0.3-1
3 aprit-EventLogFile-init:self.model, self.p, self.id= BPIC13ClosedProblems 0.3 1
4 Dataset-load:self.self.dataset_name= BPIC13ClosedProblems-0.3-1
5 april-fs-EventLogFile-cache_file: E:\Jupyter Notebook\Jupyter Notebook Project\Business Process Anomaly Detection\review_experiments_all\binet_five_test_from_reference_extra_methods\.out\.cache\eventlogs\BPIC13ClosedProblems-0.3-1.pkl.gz
6 el_file.cache_file.exists():el_file.path= E:\Jupyter Notebook\Jupyter Notebook Project\Business Process Anomaly Detection\review_experiments_all\binet_five_test_from_reference_extra_methods\.out\eventlogs\BPIC13ClosedProblems-0.3-1.json.gz
5 april-fs-EventLogFile-cache_file: E:\Jupyter Notebook\Jupyter Notebook Project\Business Process Anomaly Detection\review_experiments_all\binet_five_test_from_reference_extra_methods\.out\.cache\eventlogs\BPIC13ClosedProblems-0.3-1.pkl.gz
april-da

Likelihood+:  11%|███████▉                                                               | 1/9 [00:00<00:03,  2.09it/s]

april-dataset-flat_features
april-evaluator-result
15 april-database-table-EventLog-get_id_by_name:eventlog_id= None
15 april-database-table-EventLog-get_id_by_name:eventlog_id= None
********** None
1 Dataset-init:self.dataset_name= BPIC13Incidents-0.3-1
2 Dataset-load:self.dataset_name= BPIC13Incidents-0.3-1
3 aprit-EventLogFile-init:self.model, self.p, self.id= BPIC13Incidents 0.3 1
4 Dataset-load:self.self.dataset_name= BPIC13Incidents-0.3-1
5 april-fs-EventLogFile-cache_file: E:\Jupyter Notebook\Jupyter Notebook Project\Business Process Anomaly Detection\review_experiments_all\binet_five_test_from_reference_extra_methods\.out\.cache\eventlogs\BPIC13Incidents-0.3-1.pkl.gz
6 el_file.cache_file.exists():el_file.path= E:\Jupyter Notebook\Jupyter Notebook Project\Business Process Anomaly Detection\review_experiments_all\binet_five_test_from_reference_extra_methods\.out\eventlogs\BPIC13Incidents-0.3-1.json.gz
5 april-fs-EventLogFile-cache_file: E:\Jupyter Notebook\Jupyter Notebook Projec

Likelihood+:  22%|███████████████▊                                                       | 2/9 [00:07<00:30,  4.38s/it]

april-evaluator-result
15 april-database-table-EventLog-get_id_by_name:eventlog_id= None
15 april-database-table-EventLog-get_id_by_name:eventlog_id= None
********** None
1 Dataset-init:self.dataset_name= BPIC13OpenProblems-0.3-1
2 Dataset-load:self.dataset_name= BPIC13OpenProblems-0.3-1
3 aprit-EventLogFile-init:self.model, self.p, self.id= BPIC13OpenProblems 0.3 1
4 Dataset-load:self.self.dataset_name= BPIC13OpenProblems-0.3-1
5 april-fs-EventLogFile-cache_file: E:\Jupyter Notebook\Jupyter Notebook Project\Business Process Anomaly Detection\review_experiments_all\binet_five_test_from_reference_extra_methods\.out\.cache\eventlogs\BPIC13OpenProblems-0.3-1.pkl.gz
6 el_file.cache_file.exists():el_file.path= E:\Jupyter Notebook\Jupyter Notebook Project\Business Process Anomaly Detection\review_experiments_all\binet_five_test_from_reference_extra_methods\.out\eventlogs\BPIC13OpenProblems-0.3-1.json.gz
5 april-fs-EventLogFile-cache_file: E:\Jupyter Notebook\Jupyter Notebook Project\Business

Likelihood+:  33%|███████████████████████▋                                               | 3/9 [00:07<00:14,  2.46s/it]

april-dataset-flat_features
april-evaluator-result
15 april-database-table-EventLog-get_id_by_name:eventlog_id= None
15 april-database-table-EventLog-get_id_by_name:eventlog_id= None
********** None
1 Dataset-init:self.dataset_name= BPIC20Domestic-0.3-1
2 Dataset-load:self.dataset_name= BPIC20Domestic-0.3-1
3 aprit-EventLogFile-init:self.model, self.p, self.id= BPIC20Domestic 0.3 1
4 Dataset-load:self.self.dataset_name= BPIC20Domestic-0.3-1
5 april-fs-EventLogFile-cache_file: E:\Jupyter Notebook\Jupyter Notebook Project\Business Process Anomaly Detection\review_experiments_all\binet_five_test_from_reference_extra_methods\.out\.cache\eventlogs\BPIC20Domestic-0.3-1.pkl.gz
6 el_file.cache_file.exists():el_file.path= E:\Jupyter Notebook\Jupyter Notebook Project\Business Process Anomaly Detection\review_experiments_all\binet_five_test_from_reference_extra_methods\.out\eventlogs\BPIC20Domestic-0.3-1.json.gz
5 april-fs-EventLogFile-cache_file: E:\Jupyter Notebook\Jupyter Notebook Project\Busi

Likelihood+:  44%|███████████████████████████████▌                                       | 4/9 [00:10<00:12,  2.46s/it]

april-evaluator-result
15 april-database-table-EventLog-get_id_by_name:eventlog_id= None
15 april-database-table-EventLog-get_id_by_name:eventlog_id= None
********** None
1 Dataset-init:self.dataset_name= BPIC20International-0.3-1
2 Dataset-load:self.dataset_name= BPIC20International-0.3-1
3 aprit-EventLogFile-init:self.model, self.p, self.id= BPIC20International 0.3 1
4 Dataset-load:self.self.dataset_name= BPIC20International-0.3-1
5 april-fs-EventLogFile-cache_file: E:\Jupyter Notebook\Jupyter Notebook Project\Business Process Anomaly Detection\review_experiments_all\binet_five_test_from_reference_extra_methods\.out\.cache\eventlogs\BPIC20International-0.3-1.pkl.gz
6 el_file.cache_file.exists():el_file.path= E:\Jupyter Notebook\Jupyter Notebook Project\Business Process Anomaly Detection\review_experiments_all\binet_five_test_from_reference_extra_methods\.out\eventlogs\BPIC20International-0.3-1.json.gz
5 april-fs-EventLogFile-cache_file: E:\Jupyter Notebook\Jupyter Notebook Project\Bu

Likelihood+:  56%|███████████████████████████████████████▍                               | 5/9 [00:12<00:08,  2.24s/it]

april-evaluator-result
15 april-database-table-EventLog-get_id_by_name:eventlog_id= None
15 april-database-table-EventLog-get_id_by_name:eventlog_id= None
********** None
1 Dataset-init:self.dataset_name= BPIC20PermitLog-0.3-1
2 Dataset-load:self.dataset_name= BPIC20PermitLog-0.3-1
3 aprit-EventLogFile-init:self.model, self.p, self.id= BPIC20PermitLog 0.3 1
4 Dataset-load:self.self.dataset_name= BPIC20PermitLog-0.3-1
5 april-fs-EventLogFile-cache_file: E:\Jupyter Notebook\Jupyter Notebook Project\Business Process Anomaly Detection\review_experiments_all\binet_five_test_from_reference_extra_methods\.out\.cache\eventlogs\BPIC20PermitLog-0.3-1.pkl.gz
6 el_file.cache_file.exists():el_file.path= E:\Jupyter Notebook\Jupyter Notebook Project\Business Process Anomaly Detection\review_experiments_all\binet_five_test_from_reference_extra_methods\.out\eventlogs\BPIC20PermitLog-0.3-1.json.gz
5 april-fs-EventLogFile-cache_file: E:\Jupyter Notebook\Jupyter Notebook Project\Business Process Anomaly D

Likelihood+:  67%|███████████████████████████████████████████████▎                       | 6/9 [00:17<00:09,  3.25s/it]

april-evaluator-result
15 april-database-table-EventLog-get_id_by_name:eventlog_id= None
15 april-database-table-EventLog-get_id_by_name:eventlog_id= None
********** None
1 Dataset-init:self.dataset_name= BPIC20PrepaidTravelCost-0.3-1
2 Dataset-load:self.dataset_name= BPIC20PrepaidTravelCost-0.3-1
3 aprit-EventLogFile-init:self.model, self.p, self.id= BPIC20PrepaidTravelCost 0.3 1
4 Dataset-load:self.self.dataset_name= BPIC20PrepaidTravelCost-0.3-1
5 april-fs-EventLogFile-cache_file: E:\Jupyter Notebook\Jupyter Notebook Project\Business Process Anomaly Detection\review_experiments_all\binet_five_test_from_reference_extra_methods\.out\.cache\eventlogs\BPIC20PrepaidTravelCost-0.3-1.pkl.gz
6 el_file.cache_file.exists():el_file.path= E:\Jupyter Notebook\Jupyter Notebook Project\Business Process Anomaly Detection\review_experiments_all\binet_five_test_from_reference_extra_methods\.out\eventlogs\BPIC20PrepaidTravelCost-0.3-1.json.gz
5 april-fs-EventLogFile-cache_file: E:\Jupyter Notebook\Jup

Likelihood+:  78%|███████████████████████████████████████████████████████▏               | 7/9 [00:17<00:04,  2.36s/it]

april-dataset-flat_features
april-evaluator-result
15 april-database-table-EventLog-get_id_by_name:eventlog_id= None
15 april-database-table-EventLog-get_id_by_name:eventlog_id= None
********** None
1 Dataset-init:self.dataset_name= BPIC20RequestForPayment-0.3-1
2 Dataset-load:self.dataset_name= BPIC20RequestForPayment-0.3-1
3 aprit-EventLogFile-init:self.model, self.p, self.id= BPIC20RequestForPayment 0.3 1
4 Dataset-load:self.self.dataset_name= BPIC20RequestForPayment-0.3-1
5 april-fs-EventLogFile-cache_file: E:\Jupyter Notebook\Jupyter Notebook Project\Business Process Anomaly Detection\review_experiments_all\binet_five_test_from_reference_extra_methods\.out\.cache\eventlogs\BPIC20RequestForPayment-0.3-1.pkl.gz
6 el_file.cache_file.exists():el_file.path= E:\Jupyter Notebook\Jupyter Notebook Project\Business Process Anomaly Detection\review_experiments_all\binet_five_test_from_reference_extra_methods\.out\eventlogs\BPIC20RequestForPayment-0.3-1.json.gz
5 april-fs-EventLogFile-cache_f

Likelihood+:  89%|███████████████████████████████████████████████████████████████        | 8/9 [00:19<00:02,  2.04s/it]

april-evaluator-result
15 april-database-table-EventLog-get_id_by_name:eventlog_id= None
15 april-database-table-EventLog-get_id_by_name:eventlog_id= None
********** None
1 Dataset-init:self.dataset_name= Helpdesk-0.3-1
2 Dataset-load:self.dataset_name= Helpdesk-0.3-1
3 aprit-EventLogFile-init:self.model, self.p, self.id= Helpdesk 0.3 1
4 Dataset-load:self.self.dataset_name= Helpdesk-0.3-1
5 april-fs-EventLogFile-cache_file: E:\Jupyter Notebook\Jupyter Notebook Project\Business Process Anomaly Detection\review_experiments_all\binet_five_test_from_reference_extra_methods\.out\.cache\eventlogs\Helpdesk-0.3-1.pkl.gz
6 el_file.cache_file.exists():el_file.path= E:\Jupyter Notebook\Jupyter Notebook Project\Business Process Anomaly Detection\review_experiments_all\binet_five_test_from_reference_extra_methods\.out\eventlogs\Helpdesk-0.3-1.json.gz
5 april-fs-EventLogFile-cache_file: E:\Jupyter Notebook\Jupyter Notebook Project\Business Process Anomaly Detection\review_experiments_all\binet_five

Likelihood+: 100%|███████████████████████████████████████████████████████████████████████| 9/9 [00:19<00:00,  2.22s/it]







# evaluate

In [6]:
import itertools
from multiprocessing.pool import Pool

import pandas as pd
from sklearn import metrics
from sqlalchemy.orm import Session
from tqdm import tqdm

from april.anomalydetection import BINet
from april.anomalydetection.utils import label_collapse
from april.database import Evaluation
from april.database import Model
from april.database import get_engine
from april.enums import Base
from april.enums import Heuristic
from april.enums import Strategy
from april.evaluator import Evaluator
from april.fs import get_model_files
from april.fs import PLOT_DIR

In [7]:
heuristics = [h for h in Heuristic.keys() if h not in [Heuristic.DEFAULT, Heuristic.MANUAL, Heuristic.RATIO,
                                                       Heuristic.MEDIAN, Heuristic.MEAN]]
params = [(Base.SCORES, Heuristic.DEFAULT, Strategy.SINGLE), *itertools.product([Base.SCORES], heuristics, Strategy.keys())]


In [8]:
def _evaluate(params):
    e, base, heuristic, strategy = params

    session = Session(get_engine())
    model = session.query(Model).filter_by(file_name=e.model_file.name).first()
    session.close()

    # Generate evaluation frames, e表示evaluator  e = Evaluator(model_name)
    y_pred = e.binarizer.binarize(base=base, heuristic=heuristic, strategy=strategy, go_backwards=False)
    y_true = e.binarizer.get_targets()

    evaluations = []
    e.dataset.attribute_keys = ["name"]
    for axis in [0, 1, 2]:
        for i, attribute_name in enumerate(e.dataset.attribute_keys):
            if attribute_name == "name":
                def get_evaluation(label, precision, recall, f1):
                    return Evaluation(model_id=model.id, file_name=model.file_name,
                                      label=label, perspective=perspective, attribute_name=attribute_name,
                                      axis=axis, base=base, heuristic=heuristic, strategy=strategy,
                                      precision=precision, recall=recall, f1=f1)

                perspective = 'Control Flow' if i == 0 else 'Data'
                if i > 0  and not e.ad_.supports_attributes:
                    evaluations.append(get_evaluation('Normal', 0.0, 0.0, 0.0))
                    evaluations.append(get_evaluation('Anomaly', 0.0, 0.0, 0.0))
                else:
                    yp = label_collapse(y_pred[:, :, i:i + 1], axis=axis).compressed()
                    yt = label_collapse(y_true[:, :, i:i + 1], axis=axis).compressed()
                    p, r, f, _ = metrics.precision_recall_fscore_support(yt, yp, labels=[0, 1])
                    evaluations.append(get_evaluation('Normal', p[0], r[0], f[0]))
                    evaluations.append(get_evaluation('Anomaly', p[1], r[1], f[1]))
    return evaluations

def evaluate(model_name):
    e = Evaluator(model_name)

    _params = []
    for base, heuristic, strategy in params:
        if e.dataset.num_attributes == 1 and strategy in [Strategy.ATTRIBUTE, Strategy.POSITION_ATTRIBUTE]:
            continue
        if isinstance(e.ad_, BINet) and e.ad_.version == 0:
            print('isinstance(e.ad_, BINet)')
            continue
        if heuristic is not None and heuristic not in e.ad_.supported_heuristics:
            continue
        if strategy is not None and strategy not in e.ad_.supported_strategies:
            continue
        if base is not None and base not in e.ad_.supported_bases:
            continue
        _params.append([e, base, heuristic, strategy])
    print('_params=', _params)
    return [_e for p in _params for _e in _evaluate(p)]

In [9]:
models = sorted([m.name for m in get_model_files() ])
models

['BPIC13ClosedProblems-0.3-1_likelihood+_20220103-131343.224173',
 'BPIC13ClosedProblems-0.3-1_naive+_20220103-131342.322662',
 'BPIC13ClosedProblems-0.3-1_t-stide+_20220103-131322.375434',
 'BPIC13Incidents-0.3-1_likelihood+_20220103-131343.703085',
 'BPIC13Incidents-0.3-1_naive+_20220103-131342.358511',
 'BPIC13Incidents-0.3-1_t-stide+_20220103-131322.901635',
 'BPIC13OpenProblems-0.3-1_likelihood+_20220103-131350.809440',
 'BPIC13OpenProblems-0.3-1_naive+_20220103-131342.555707',
 'BPIC13OpenProblems-0.3-1_t-stide+_20220103-131329.469127',
 'BPIC20Domestic-0.3-1_likelihood+_20220103-131350.988241',
 'BPIC20Domestic-0.3-1_naive+_20220103-131342.578784',
 'BPIC20Domestic-0.3-1_t-stide+_20220103-131329.639629',
 'BPIC20International-0.3-1_likelihood+_20220103-131353.438045',
 'BPIC20International-0.3-1_naive+_20220103-131342.708440',
 'BPIC20International-0.3-1_t-stide+_20220103-131332.240546',
 'BPIC20PermitLog-0.3-1_likelihood+_20220103-131355.298750',
 'BPIC20PermitLog-0.3-1_naive+_

In [10]:
# models = models[20:]
# models = [models[index] for index in range(len(models)) if 'Helpdesk' in models[index]]
# models

In [11]:
evaluations = []
for i in range(len(models)):
    print('-----------------------', models[i], '--------------------------')
    e= evaluate(models[i])
    evaluations.append(e)



----------------------- BPIC13ClosedProblems-0.3-1_likelihood+_20220103-131343.224173 --------------------------
april-evaluator-dataset
april-evaluator-datase self._dataset is None
1 Dataset-init:self.dataset_name= BPIC13ClosedProblems-0.3-1
2 Dataset-load:self.dataset_name= BPIC13ClosedProblems-0.3-1
3 aprit-EventLogFile-init:self.model, self.p, self.id= BPIC13ClosedProblems 0.3 1
4 Dataset-load:self.self.dataset_name= BPIC13ClosedProblems-0.3-1
5 april-fs-EventLogFile-cache_file: E:\Jupyter Notebook\Jupyter Notebook Project\Business Process Anomaly Detection\review_experiments_all\binet_five_test_from_reference_extra_methods\.out\.cache\eventlogs\BPIC13ClosedProblems-0.3-1.pkl.gz
6 el_file.cache_file.exists():el_file.path= E:\Jupyter Notebook\Jupyter Notebook Project\Business Process Anomaly Detection\review_experiments_all\binet_five_test_from_reference_extra_methods\.out\eventlogs\BPIC13ClosedProblems-0.3-1.json.gz
5 april-fs-EventLogFile-cache_file: E:\Jupyter Notebook\Jupyter No

april-evaluator-dataset
april-evaluator-dataset
april-evaluator-result
**************a.shape= (1487, 37, 2)
? len(a.shape)= 3
type(a)= <class 'numpy.ndarray'> ,type(m)= <class 'numpy.ndarray'>
a.shape= (1487, 37, 2) , m.shape= (1487, 37, 2)
april-evaluator-result
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-result
**************a.shape= (1487, 37, 2)
? len(a.shape)= 3
type(a)= <class 'numpy.ndarray'> ,type(m)= <class 'numpy.ndarray'>
a.shape= (1487, 37, 2) , m.shape= (1487, 37, 2)
april-evaluator-result
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-result
**************a.shape= (1487, 37, 2)
? len(a.shape)= 3
type(a)= <class 'numpy.ndarray'> ,type(m)= <class 'numpy.ndarray'>
a.shape= (1487, 37, 2) , m.shape= (1487, 37, 2)
april-evaluator-result
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-

april-evaluator-dataset
april-evaluator-dataset
----------------------- BPIC13Incidents-0.3-1_likelihood+_20220103-131343.703085 --------------------------
april-evaluator-dataset
april-evaluator-datase self._dataset is None
1 Dataset-init:self.dataset_name= BPIC13Incidents-0.3-1
2 Dataset-load:self.dataset_name= BPIC13Incidents-0.3-1
3 aprit-EventLogFile-init:self.model, self.p, self.id= BPIC13Incidents 0.3 1
4 Dataset-load:self.self.dataset_name= BPIC13Incidents-0.3-1
5 april-fs-EventLogFile-cache_file: E:\Jupyter Notebook\Jupyter Notebook Project\Business Process Anomaly Detection\review_experiments_all\binet_five_test_from_reference_extra_methods\.out\.cache\eventlogs\BPIC13Incidents-0.3-1.pkl.gz
6 el_file.cache_file.exists():el_file.path= E:\Jupyter Notebook\Jupyter Notebook Project\Business Process Anomaly Detection\review_experiments_all\binet_five_test_from_reference_extra_methods\.out\eventlogs\BPIC13Incidents-0.3-1.json.gz
5 april-fs-EventLogFile-cache_file: E:\Jupyter Notebo

april-evaluator-result
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-result
**************a.shape= (7554, 125, 2)
? len(a.shape)= 3
type(a)= <class 'numpy.ndarray'> ,type(m)= <class 'numpy.ndarray'>
a.shape= (7554, 125, 2) , m.shape= (7554, 125, 2)
april-evaluator-result
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-result
**************a.shape= (7554, 125, 2)
? len(a.shape)= 3
type(a)= <class 'numpy.ndarray'> ,type(m)= <class 'numpy.ndarray'>
a.shape= (7554, 125, 2) , m.shape= (7554, 125, 2)
april-evaluator-result
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-result
**************a.shape= (7554, 125, 2)
? len(a.shape)= 3
type(a)= <class 'numpy.ndarray'> ,type(m)= <class 'numpy.ndarray'>
a.shape= (7554, 125, 2) , m.shape= (7554, 125, 2)
april-evaluator-result
april-evaluator-dataset
april-ev

april-evaluator-result
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-result
**************a.shape= (7554, 125, 1)
? len(a.shape)= 3
type(a)= <class 'numpy.ndarray'> ,type(m)= <class 'numpy.ndarray'>
a.shape= (7554, 125, 1) , m.shape= (7554, 125, 1)
april-evaluator-result
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-result
**************a.shape= (7554, 125, 1)
? len(a.shape)= 3
type(a)= <class 'numpy.ndarray'> ,type(m)= <class 'numpy.ndarray'>
a.shape= (7554, 125, 1) , m.shape= (7554, 125, 1)
april-evaluator-result
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
----------------------- BPIC13OpenProblems-0.3-1_likelihood+_20220103-131350.809440 --------------------------
april-evaluator-dataset
april-evaluator-datase self._dataset is None
1 Dataset-init:self.dataset_name= BPIC13OpenProblems-0.3-1
2 Dataset-lo

april-evaluator-result
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-result
**************a.shape= (819, 24, 1)
? len(a.shape)= 3
type(a)= <class 'numpy.ndarray'> ,type(m)= <class 'numpy.ndarray'>
a.shape= (819, 24, 1) , m.shape= (819, 24, 1)
april-evaluator-result
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-result
**************a.shape= (819, 24, 1)
? len(a.shape)= 3
type(a)= <class 'numpy.ndarray'> ,type(m)= <class 'numpy.ndarray'>
a.shape= (819, 24, 1) , m.shape= (819, 24, 1)
april-evaluator-result
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-result
**************a.shape= (819, 24, 1)
? len(a.shape)= 3
type(a)= <class 'numpy.ndarray'> ,type(m)= <class 'numpy.ndarray'>
a.shape= (819, 24, 1) , m.shape= (819, 24, 1)
april-evaluator-result
april-evaluator-dataset
april-evaluator-dataset
ap

april-evaluator-result
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-result
**************a.shape= (10500, 26, 1)
? len(a.shape)= 3
type(a)= <class 'numpy.ndarray'> ,type(m)= <class 'numpy.ndarray'>
a.shape= (10500, 26, 1) , m.shape= (10500, 26, 1)
april-evaluator-result
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-result
**************a.shape= (10500, 26, 1)
? len(a.shape)= 3
type(a)= <class 'numpy.ndarray'> ,type(m)= <class 'numpy.ndarray'>
a.shape= (10500, 26, 1) , m.shape= (10500, 26, 1)
april-evaluator-result
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-result
**************a.shape= (10500, 26, 1)
? len(a.shape)= 3
type(a)= <class 'numpy.ndarray'> ,type(m)= <class 'numpy.ndarray'>
a.shape= (10500, 26, 1) , m.shape= (10500, 26, 1)
april-evaluator-result
april-evaluator-dataset
april-ev

april-evaluator-result
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-result
**************a.shape= (10500, 26, 1)
? len(a.shape)= 3
type(a)= <class 'numpy.ndarray'> ,type(m)= <class 'numpy.ndarray'>
a.shape= (10500, 26, 1) , m.shape= (10500, 26, 1)
april-evaluator-result
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-result
**************a.shape= (10500, 26, 1)
? len(a.shape)= 3
type(a)= <class 'numpy.ndarray'> ,type(m)= <class 'numpy.ndarray'>
a.shape= (10500, 26, 1) , m.shape= (10500, 26, 1)
april-evaluator-result
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-result
**************a.shape= (10500, 26, 1)
? len(a.shape)= 3
type(a)= <class 'numpy.ndarray'> ,type(m)= <class 'numpy.ndarray'>
a.shape= (10500, 26, 1) , m.shape= (10500, 26, 1)
april-evaluator-result
april-evaluator-dataset
april-ev

april-evaluator-result
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-result
**************a.shape= (6449, 30, 1)
? len(a.shape)= 3
type(a)= <class 'numpy.ndarray'> ,type(m)= <class 'numpy.ndarray'>
a.shape= (6449, 30, 1) , m.shape= (6449, 30, 1)
april-evaluator-result
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-result
**************a.shape= (6449, 30, 1)
? len(a.shape)= 3
type(a)= <class 'numpy.ndarray'> ,type(m)= <class 'numpy.ndarray'>
a.shape= (6449, 30, 1) , m.shape= (6449, 30, 1)
april-evaluator-result
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-result
**************a.shape= (6449, 30, 1)
? len(a.shape)= 3
type(a)= <class 'numpy.ndarray'> ,type(m)= <class 'numpy.ndarray'>
a.shape= (6449, 30, 1) , m.shape= (6449, 30, 1)
april-evaluator-result
april-evaluator-dataset
april-evaluator-d

april-evaluator-result
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-result
**************a.shape= (6449, 30, 1)
? len(a.shape)= 3
type(a)= <class 'numpy.ndarray'> ,type(m)= <class 'numpy.ndarray'>
a.shape= (6449, 30, 1) , m.shape= (6449, 30, 1)
april-evaluator-result
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-result
**************a.shape= (6449, 30, 1)
? len(a.shape)= 3
type(a)= <class 'numpy.ndarray'> ,type(m)= <class 'numpy.ndarray'>
a.shape= (6449, 30, 1) , m.shape= (6449, 30, 1)
april-evaluator-result
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-result
**************a.shape= (6449, 30, 1)
? len(a.shape)= 3
type(a)= <class 'numpy.ndarray'> ,type(m)= <class 'numpy.ndarray'>
a.shape= (6449, 30, 1) , m.shape= (6449, 30, 1)
april-evaluator-result
april-evaluator-dataset
april-evaluator-d

april-evaluator-result
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-result
**************a.shape= (7065, 92, 1)
? len(a.shape)= 3
type(a)= <class 'numpy.ndarray'> ,type(m)= <class 'numpy.ndarray'>
a.shape= (7065, 92, 1) , m.shape= (7065, 92, 1)
april-evaluator-result
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-result
**************a.shape= (7065, 92, 1)
? len(a.shape)= 3
type(a)= <class 'numpy.ndarray'> ,type(m)= <class 'numpy.ndarray'>
a.shape= (7065, 92, 1) , m.shape= (7065, 92, 1)
april-evaluator-result
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-result
**************a.shape= (7065, 92, 1)
? len(a.shape)= 3
type(a)= <class 'numpy.ndarray'> ,type(m)= <class 'numpy.ndarray'>
a.shape= (7065, 92, 1) , m.shape= (7065, 92, 1)
april-evaluator-result
april-evaluator-dataset
april-evaluator-d

april-evaluator-result
april-evaluator-binarizer
april-evaluator-result
april-evaluator-dataset
april-dataset-mask
april-dataset-mask self._mask is None
april-dataset-mask self.pad_mode= post
april-evaluator-dataset
april-dataset-flat_features
april-evaluator-dataset
april-dataset-binary_targets
**************a.shape= (7065,)
1 len(a.shape)= 1
type(a)= <class 'numpy.ndarray'> ,type(m)= <class 'numpy.ndarray'>
a.shape= (7065,) , m.shape= (7065,)
**************a.shape= (7065, 92)
2 len(a.shape)= 2
type(a)= <class 'numpy.ndarray'> ,type(m)= <class 'numpy.ndarray'>
a.shape= (7065, 92) , m.shape= (7065, 92)
**************a.shape= (7065, 92, 1)
? len(a.shape)= 3
type(a)= <class 'numpy.ndarray'> ,type(m)= <class 'numpy.ndarray'>
a.shape= (7065, 92, 1) , m.shape= (7065, 92, 1)
**************a.shape= (7065, 92, 1)
? len(a.shape)= 3
type(a)= <class 'numpy.ndarray'> ,type(m)= <class 'numpy.ndarray'>
a.shape= (7065, 92, 1) , m.shape= (7065, 92, 1)
april-evaluator-result
april-evaluator-dataset
apr

april-evaluator-result
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-result
**************a.shape= (2099, 23, 1)
? len(a.shape)= 3
type(a)= <class 'numpy.ndarray'> ,type(m)= <class 'numpy.ndarray'>
a.shape= (2099, 23, 1) , m.shape= (2099, 23, 1)
april-evaluator-result
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-result
**************a.shape= (2099, 23, 1)
? len(a.shape)= 3
type(a)= <class 'numpy.ndarray'> ,type(m)= <class 'numpy.ndarray'>
a.shape= (2099, 23, 1) , m.shape= (2099, 23, 1)
april-evaluator-result
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-result
**************a.shape= (2099, 23, 1)
? len(a.shape)= 3
type(a)= <class 'numpy.ndarray'> ,type(m)= <class 'numpy.ndarray'>
a.shape= (2099, 23, 1) , m.shape= (2099, 23, 1)
april-evaluator-result
april-evaluator-dataset
april-evaluator-d

april-evaluator-result
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-result
**************a.shape= (2099, 23, 1)
? len(a.shape)= 3
type(a)= <class 'numpy.ndarray'> ,type(m)= <class 'numpy.ndarray'>
a.shape= (2099, 23, 1) , m.shape= (2099, 23, 1)
april-evaluator-result
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-result
**************a.shape= (2099, 23, 1)
? len(a.shape)= 3
type(a)= <class 'numpy.ndarray'> ,type(m)= <class 'numpy.ndarray'>
a.shape= (2099, 23, 1) , m.shape= (2099, 23, 1)
april-evaluator-result
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-result
**************a.shape= (2099, 23, 1)
? len(a.shape)= 3
type(a)= <class 'numpy.ndarray'> ,type(m)= <class 'numpy.ndarray'>
a.shape= (2099, 23, 1) , m.shape= (2099, 23, 1)
april-evaluator-result
april-evaluator-dataset
april-evaluator-d

april-evaluator-dataset
april-evaluator-dataset
april-evaluator-result
**************a.shape= (6886, 21, 1)
? len(a.shape)= 3
type(a)= <class 'numpy.ndarray'> ,type(m)= <class 'numpy.ndarray'>
a.shape= (6886, 21, 1) , m.shape= (6886, 21, 1)
april-evaluator-result
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-result
**************a.shape= (6886, 21, 1)
? len(a.shape)= 3
type(a)= <class 'numpy.ndarray'> ,type(m)= <class 'numpy.ndarray'>
a.shape= (6886, 21, 1) , m.shape= (6886, 21, 1)
april-evaluator-result
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-result
**************a.shape= (6886, 21, 1)
? len(a.shape)= 3
type(a)= <class 'numpy.ndarray'> ,type(m)= <class 'numpy.ndarray'>
a.shape= (6886, 21, 1) , m.shape= (6886, 21, 1)
april-evaluator-result
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-

april-evaluator-result
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-result
**************a.shape= (6886, 21, 1)
? len(a.shape)= 3
type(a)= <class 'numpy.ndarray'> ,type(m)= <class 'numpy.ndarray'>
a.shape= (6886, 21, 1) , m.shape= (6886, 21, 1)
april-evaluator-result
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-result
**************a.shape= (6886, 21, 1)
? len(a.shape)= 3
type(a)= <class 'numpy.ndarray'> ,type(m)= <class 'numpy.ndarray'>
a.shape= (6886, 21, 1) , m.shape= (6886, 21, 1)
april-evaluator-result
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-result
**************a.shape= (6886, 21, 1)
? len(a.shape)= 3
type(a)= <class 'numpy.ndarray'> ,type(m)= <class 'numpy.ndarray'>
a.shape= (6886, 21, 1) , m.shape= (6886, 21, 1)
april-evaluator-result
april-evaluator-dataset
april-evaluator-d

april-evaluator-dataset
april-evaluator-dataset
april-evaluator-result
**************a.shape= (4580, 17, 1)
? len(a.shape)= 3
type(a)= <class 'numpy.ndarray'> ,type(m)= <class 'numpy.ndarray'>
a.shape= (4580, 17, 1) , m.shape= (4580, 17, 1)
april-evaluator-result
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-result
**************a.shape= (4580, 17, 1)
? len(a.shape)= 3
type(a)= <class 'numpy.ndarray'> ,type(m)= <class 'numpy.ndarray'>
a.shape= (4580, 17, 1) , m.shape= (4580, 17, 1)
april-evaluator-result
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-result
**************a.shape= (4580, 17, 1)
? len(a.shape)= 3
type(a)= <class 'numpy.ndarray'> ,type(m)= <class 'numpy.ndarray'>
a.shape= (4580, 17, 1) , m.shape= (4580, 17, 1)
april-evaluator-result
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-

april-evaluator-result
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-result
**************a.shape= (4580, 17, 1)
? len(a.shape)= 3
type(a)= <class 'numpy.ndarray'> ,type(m)= <class 'numpy.ndarray'>
a.shape= (4580, 17, 1) , m.shape= (4580, 17, 1)
april-evaluator-result
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-result
**************a.shape= (4580, 17, 1)
? len(a.shape)= 3
type(a)= <class 'numpy.ndarray'> ,type(m)= <class 'numpy.ndarray'>
a.shape= (4580, 17, 1) , m.shape= (4580, 17, 1)
april-evaluator-result
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-dataset
april-evaluator-result
**************a.shape= (4580, 17, 1)
? len(a.shape)= 3
type(a)= <class 'numpy.ndarray'> ,type(m)= <class 'numpy.ndarray'>
a.shape= (4580, 17, 1) , m.shape= (4580, 17, 1)
april-evaluator-result
april-evaluator-dataset
april-evaluator-d

In [12]:
# Write to database
session = Session(get_engine())
for e in evaluations:
    session.bulk_save_objects(e)
    session.commit()
session.close()

In [13]:
out_dir = PLOT_DIR / 'isj-2019'
eval_file = out_dir / 'eval.pkl'

session = Session(get_engine())
evaluations = session.query(Evaluation).all()
rows = []
for ev in tqdm(evaluations):
    m = ev.model
    el = ev.model.training_event_log
    rows.append([m.file_name, m.creation_date, m.hyperparameters, m.training_duration, m.training_host, m.algorithm,
                 m.file_name.split('-')[0], m.file_name, m.creation_date, m.hyperparameters,
                 ev.axis, ev.base, ev.heuristic, ev.strategy, ev.label, ev.attribute_name, ev.perspective, 
                 ev.precision, ev.recall, ev.f1])
session.close()
columns = ['file_name', 'date', 'hyperparameters', 'training_duration', 'training_host', 'ad',
           'dataset_name', 'process_model', 'noise', 'dataset_id',
           'axis', 'base', 'heuristic', 'strategy', 'label', 'attribute_name', 'perspective', 'precision', 'recall', 'f1']

evaluation = pd.DataFrame(rows, columns=columns)
evaluation.to_pickle(eval_file)

100%|███████████████████████████████████████████████████████████████████████████| 1620/1620 [00:00<00:00, 42746.79it/s]


In [14]:
evaluation

Unnamed: 0,file_name,date,hyperparameters,training_duration,training_host,ad,dataset_name,process_model,noise,dataset_id,axis,base,heuristic,strategy,label,attribute_name,perspective,precision,recall,f1
0,BPIC13ClosedProblems-0.3-1_likelihood+_2022010...,2022-01-03 13:13:43.306952,{},0.082779,LAPTOP-OTT2IGBT,Likelihood+,BPIC13ClosedProblems,BPIC13ClosedProblems-0.3-1_likelihood+_2022010...,2022-01-03 13:13:43.306952,{},0,scores,best,single,Normal,name,Control Flow,0.892000,0.967042,0.928007
1,BPIC13ClosedProblems-0.3-1_likelihood+_2022010...,2022-01-03 13:13:43.306952,{},0.082779,LAPTOP-OTT2IGBT,Likelihood+,BPIC13ClosedProblems,BPIC13ClosedProblems-0.3-1_likelihood+_2022010...,2022-01-03 13:13:43.306952,{},0,scores,best,single,Anomaly,name,Control Flow,0.839662,0.595808,0.697023
2,BPIC13ClosedProblems-0.3-1_likelihood+_2022010...,2022-01-03 13:13:43.306952,{},0.082779,LAPTOP-OTT2IGBT,Likelihood+,BPIC13ClosedProblems,BPIC13ClosedProblems-0.3-1_likelihood+_2022010...,2022-01-03 13:13:43.306952,{},1,scores,best,single,Normal,name,Control Flow,0.960942,0.992645,0.976536
3,BPIC13ClosedProblems-0.3-1_likelihood+_2022010...,2022-01-03 13:13:43.306952,{},0.082779,LAPTOP-OTT2IGBT,Likelihood+,BPIC13ClosedProblems,BPIC13ClosedProblems-0.3-1_likelihood+_2022010...,2022-01-03 13:13:43.306952,{},1,scores,best,single,Anomaly,name,Control Flow,0.758007,0.363481,0.491349
4,BPIC13ClosedProblems-0.3-1_likelihood+_2022010...,2022-01-03 13:13:43.306952,{},0.082779,LAPTOP-OTT2IGBT,Likelihood+,BPIC13ClosedProblems,BPIC13ClosedProblems-0.3-1_likelihood+_2022010...,2022-01-03 13:13:43.306952,{},2,scores,best,single,Normal,name,Control Flow,0.960942,0.992645,0.976536
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1615,Helpdesk-0.3-1_t-stide+_20220103-131341.493298...,2022-01-03 13:13:42.044967,{'k': 2},0.551669,LAPTOP-OTT2IGBT,t-STIDE+,Helpdesk,Helpdesk-0.3-1_t-stide+_20220103-131341.493298...,2022-01-03 13:13:42.044967,{'k': 2},0,scores,stable_right,position,Anomaly,name,Control Flow,0.309389,1.000000,0.472570
1616,Helpdesk-0.3-1_t-stide+_20220103-131341.493298...,2022-01-03 13:13:42.044967,{'k': 2},0.551669,LAPTOP-OTT2IGBT,t-STIDE+,Helpdesk,Helpdesk-0.3-1_t-stide+_20220103-131341.493298...,2022-01-03 13:13:42.044967,{'k': 2},1,scores,stable_right,position,Normal,name,Control Flow,0.902940,0.838496,0.869526
1617,Helpdesk-0.3-1_t-stide+_20220103-131341.493298...,2022-01-03 13:13:42.044967,{'k': 2},0.551669,LAPTOP-OTT2IGBT,t-STIDE+,Helpdesk,Helpdesk-0.3-1_t-stide+_20220103-131341.493298...,2022-01-03 13:13:42.044967,{'k': 2},1,scores,stable_right,position,Anomaly,name,Control Flow,0.009871,0.017551,0.012636
1618,Helpdesk-0.3-1_t-stide+_20220103-131341.493298...,2022-01-03 13:13:42.044967,{'k': 2},0.551669,LAPTOP-OTT2IGBT,t-STIDE+,Helpdesk,Helpdesk-0.3-1_t-stide+_20220103-131341.493298...,2022-01-03 13:13:42.044967,{'k': 2},2,scores,stable_right,position,Normal,name,Control Flow,0.902940,0.838496,0.869526


In [15]:
evaluation.to_csv('./evaluation.csv')

# save detect results

In [16]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from sqlalchemy.orm import Session
import scikit_posthocs as sp

from april.database import get_engine
from april.fs import PLOT_DIR
from april.utils import microsoft_colors, prettify_dataframe, cd_plot, get_cd
from april.enums import Base, Strategy, Heuristic

In [17]:
sns.set_style('white')
pd.set_option('display.max_rows', 50)

out_dir = PLOT_DIR / 'isj-2019'
eval_file = out_dir / 'eval.pkl'

synth_datasets = ['small', 'medium', 'large', 'huge', 'wide']
bpic_datasets = ['bpic12', 'bpic13', 'bpic15', 'bpic17', 'bpic19','smalllog', 'largelog']
anonymous_datasets = ['real']
datasets = synth_datasets + bpic_datasets + anonymous_datasets
dataset_types = ['Synthetic', 'Real-life']

binet_ads = ["BINetv1"]
nn_ads = ["DAE"] + binet_ads
# d_ads = ["Naive", "Sampling","DAE","BINetv1", "OC-SVM"]
d_ads = ["Naive", "Sampling","DAE","BINetv1", "OC-SVM", "Naive+", "t-STIDE+", "Likelihood+"]
# d_ads = ["Naive+", "t-STIDE+", "Likelihood+"]
ads = nn_ads + d_ads

heuristics = [r'$best$', r'$default$', r'$elbow_\downarrow$', r'$elbow_\uparrow$',
              r'$lp_\leftarrow$', r'$lp_\leftrightarrow$', r'$lp_\rightarrow$']


evaluation = pd.read_pickle(eval_file)

evaluation = evaluation.query(f'ad in {ads} and label == "Anomaly"')

evaluation['perspective-label'] = evaluation['perspective'] + '-' + evaluation['label']
evaluation['attribute_name-label'] = evaluation['attribute_name'] + '-' + evaluation['label']
evaluation['dataset_type'] = 'Synthetic'
evaluation.loc[evaluation['process_model'].str.contains('bpic'), 'dataset_type'] = 'Real-life'
evaluation.loc[evaluation['process_model'].str.contains('real'), 'dataset_type'] = 'Real-life'

_filtered_evaluation = evaluation.query(f'ad in {nn_ads} and (strategy == "{Strategy.ATTRIBUTE}"'
                                       f' or (strategy == "{Strategy.SINGLE}" and process_model == "bpic12")'
                                       f' or (strategy == "{Strategy.SINGLE}" and ad == "Naive+")) or ad in {d_ads}')

filtered_evaluation2 = _filtered_evaluation.query(f'heuristic == "{Heuristic.DEFAULT}"'
                                                 f' or (heuristic == "{Heuristic.LP_MEAN}" and ad != "DAE")'
                                                 f' or (heuristic == "{Heuristic.ELBOW_UP}" and ad == "DAE")')
filtered_evaluation = filtered_evaluation2.query('strategy == "single"')  # added

df = filtered_evaluation.query('axis in [0, 2]')

df = prettify_dataframe(df)
df = df.groupby(['axis', 'process_model', 'dataset_name', 'ad', 'file_name', 
                 'perspective'])['precision', 'recall', 'f1'].mean().reset_index()
df = df.groupby(['axis', 'dataset_name', 'ad'])['precision', 'recall', 'f1'].mean().reset_index()
df = pd.pivot_table(df, index=['axis', 'ad'], columns=['dataset_name'], values=['precision', 'recall', 'f1'])
df = df.fillna(0)
df = df.stack(1).reset_index()
df = df[df['axis'] == 'Case']

df.reset_index(drop=True, inplace=True)
df.to_csv(str(out_dir / 'table_'+ anomaly_raio + '.csv'), index=False)

