In [1]:
# Required on some Windows machines
import os
os.environ["TF_FORCE_GPU_ALLOW_GROWTH "] = "true"

import h5py
from tqdm.notebook import tqdm

from deepalign import Dataset
from deepalign import fs
from deepalign.alignments import ALIGNERS
from deepalign.alignments.confnet import ConfNet

To speed up the evaluation, we are caching all results. You will have received these cache files with the download of the GitHub release. In case you want to run your own experiments, this is the code.

In [2]:
def get_aligner(model_file, dataset):
    if 'confnet' in model_file.ad:
        aligner = ALIGNERS[model_file.ad[:-2]](dataset,
                                               use_case_attributes=model_file.use_case_attributes,
                                               use_event_attributes=model_file.use_event_attributes,
                                               align_steps=10)
        aligner.load(str(fs.MODEL_DIR / model_file.name), dataset)
    else:
        aligner = ALIGNERS[model_file.ad]()
        aligner.load(str(fs.MODEL_DIR / model_file.name))
    
    return aligner

In [3]:
synthetic = ['paper', 'p2p', 'small', 'medium', 'huge', 'wide']

models = sorted(list(set([f.name.replace('_forward', '').replace('_backward', '')
                          for f in fs.get_aligner_files()])))

models = [m for m in models if not (fs.RESULT_DIR / (fs.ModelFile(m).name + '.h5')).exists()]

for model in tqdm(models):
    model_file = fs.AlignerFile(model)
    dataset = Dataset(model_file.event_log_name,
                      use_case_attributes=model_file.use_case_attributes,
                      use_event_attributes=model_file.use_event_attributes)
    aligner = get_aligner(model_file, dataset)

    if isinstance(aligner, ConfNet):
        alignments, beams, costs = aligner.batch_align(dataset, batch_size=5000)
    else:
        try:
            alignments, beams, costs = aligner.align(dataset)
        except Exception as e:
            print(e)
            continue

    with h5py.File(str(fs.RESULT_DIR / (model_file.name + '.h5')), 'w') as file:
        file.create_dataset('alignments', data=alignments, compression="gzip", compression_opts=9)
        file.create_dataset('beams', data=beams, compression="gzip", compression_opts=9)
        file.create_dataset('costs', data=costs, compression="gzip", compression_opts=9)

HBox(children=(FloatProgress(value=0.0, max=6.0), HTML(value='')))

Step 1 → 0.9140007495880127s (25000, 27) finished=3618
Step 2 ← 0.677009105682373s (25000, 27) finished=3618
Step 3 → 0.6100156307220459s (25000, 27) finished=3769
Step 4 ← 0.5740082263946533s (25000, 27) finished=4277
Step 5 → 0.3769993782043457s (25000, 27) finished=4782
Step 6 ← 0.1271042823791504s (25000, 27) finished=4805
Step 7 → 0.11500000953674316s (25000, 27) finished=4832
Step 8 ← 0.11099529266357422s (25000, 27) finished=4833
Step 9 → 0.10400032997131348s (25000, 27) finished=4833
Step 10 ← 0.11799955368041992s (25000, 27) finished=5000
Step 1 → 2.067323923110962s (25000, 27) finished=3797
Step 2 ← 1.2885076999664307s (25000, 27) finished=3797
Step 3 → 0.994004487991333s (25000, 27) finished=3901
Step 4 ← 0.9129984378814697s (25000, 27) finished=4388
Step 5 → 0.5460047721862793s (25000, 27) finished=4781
Step 6 ← 0.1810016632080078s (25000, 27) finished=4820
Step 7 → 0.2239995002746582s (25000, 27) finished=4831
Step 8 ← 0.1741173267364502s (25000, 27) finished=4841
Step 9 →

  return np.array(self.cases)[indices]
100%|██████████| 610/610 [00:11<00:00, 53.97it/s]
100%|██████████| 610/610 [00:11<00:00, 51.85it/s]
p2p-0.3-1: 100%|██████████| 610/610 [00:11<00:00, 52.20it/s]
