You will also have received the `evaluation.xlsx` file as part of the download. This is the code to create your own evaluation.

In [1]:
import editdistance
import h5py
import numpy as np
import pandas as pd
from tqdm import tqdm

from deepalign import Dataset
from deepalign import fs
from deepalign.utils import gather

In [2]:
def get_alignmnets(file):
    with h5py.File(file.result_file, 'r') as file:
        alignments = np.array(file['alignments'])
        x = np.array(file['beams'])
        costs = np.array(file['costs'])
    return alignments, x, costs

We evaluate for each case if any of the `k` beams matches the ground truth exactly and calculate the accuracy. This will give us five values, top-1, top-2, and so on. Top-3 accuracy indicates that at least one of the top-3 beams is correct.

Optimality is only calculated for alignments that match the ground truth. An alignment is optimal if its cost matches the optimal costs from the ground truth.

The error is only calculated for alignments that do not match the ground truth. We use the `editdistance` package to calculate the distance between the corrected case and the ground truth case (without the empty moves).

In [3]:
results = sorted([fs.AlignerFile(f) for f in fs.RESULT_DIR.glob('*')], key=lambda f: f.event_log_name)

frames = []
dataset = None
for file in tqdm(results):
    if dataset is None or dataset.dataset_name != file.event_log_name:
        dataset = Dataset(file.event_log_name)

    alignments, x, costs = get_alignmnets(file)

    y = dataset.correct_features[0]
    y = np.pad(y, ((0, 0), (0, x.shape[-1] - y.shape[1])))

    match = np.all(x == y[:, None], -1)

    correct = match
    incorrect = ~match
    optimal_costs = costs == dataset.alignments[1][:, None]
    optimal_alignment = np.logical_and(optimal_costs, correct)

    distances = np.array([[editdistance.distance(a, _b) for _b in b] for a, b in zip(y, x)])

    for label in dataset.unique_text_labels:
        label_type = 'Normal' if label == 'Normal' else 'Anomalous'
        dataset_type = 'Synthetic'
        indices = np.where(dataset.text_labels == label)[0]
        for k in range(1, alignments.shape[1] + 1, 1):
            cor = correct[indices][:, :k].any(-1)
            inc = ~cor
            opt = optimal_alignment[indices][cor][:, :k].any(-1)
            dist = gather(distances[indices][inc], incorrect[indices][:, :k].argmax(-1)[inc][:, None])

            evaluation = [
                file.name, file.event_log_name, file.ad, file.model, file.p, file.id, k, label, label_type,
                dataset_type,
                cor.sum(), inc.sum(), opt.sum(), cor.mean(), opt.mean(), dist.mean()
            ]

            frames.append(evaluation)

columns = ['file_name', 'dataset', 'algorithm', 'process_model', 'p', 'dataset_id', 'k', 'label', 'binary_label',
           'dataset_type', 'num_correct', 'num_incorrect', 'num_optimal', 'accuracy', 'optimality', 'error']

evaluation = pd.DataFrame(frames, columns=columns)

evaluation.to_excel(str(fs.OUT_DIR / 'evaluation.xlsx'))

  0%|          | 0/7 [00:00<?, ?it/s]
  0%|          | 0/5000 [00:00<?, ?it/s][A
  1%|          | 57/5000 [00:00<00:08, 561.49it/s][A
  2%|▏         | 119/5000 [00:00<00:08, 577.81it/s][A
  3%|▎         | 174/5000 [00:00<00:08, 563.94it/s][A
  5%|▍         | 230/5000 [00:00<00:08, 562.75it/s][A
  6%|▌         | 289/5000 [00:00<00:08, 567.37it/s][A
  7%|▋         | 347/5000 [00:00<00:08, 569.42it/s][A
  8%|▊         | 399/5000 [00:00<00:08, 550.97it/s][A
  9%|▉         | 458/5000 [00:00<00:08, 562.12it/s][A
 10%|█         | 519/5000 [00:00<00:08, 558.29it/s][A
 12%|█▏        | 577/5000 [00:01<00:07, 562.99it/s][A
 13%|█▎        | 641/5000 [00:01<00:07, 582.48it/s][A
 14%|█▍        | 699/5000 [00:01<00:07, 578.25it/s][A
 15%|█▌        | 759/5000 [00:01<00:07, 582.91it/s][A
 16%|█▋        | 817/5000 [00:01<00:07, 576.82it/s][A
 18%|█▊        | 875/5000 [00:01<00:07, 561.00it/s][A
 19%|█▊        | 932/5000 [00:01<00:07, 544.30it/s][A
 20%|█▉        | 992/5000 [00:01<00:07,