# Learning of Process Representations Using Recurrent Neural Networks

In [11]:
import numpy as np
import pandas as pd

from replearn.eventlog import EventLog

from replearn.embedding_predict import EmbeddingPredict
from replearn.autoencoder import AutoencoderRepresentation
from replearn.doc2vec import Doc2VecRepresentation

from replearn.clustering import Clustering
from replearn.evaluation import Evaluation

## Load event log

In [14]:
# event log configuration
event_log_path = '../logs/multi-perspective_02'
file_name = 'p2p_500_10_20_5_2_1-0.3-1.json.gz'

case_attributes = None # auto-detect attributes
event_attributes = ['concept:name', 'user'] # use activity name and user
true_cluster_label = 'cluster'

# load file
event_log = EventLog(file_name, case_attributes=case_attributes, event_attributes=event_attributes, true_cluster_label=true_cluster_label)
event_log.load(event_log_path + '/' + file_name, False)
event_log.preprocess()

## Representation Learning

In [15]:
# hyperparameters
n_epochs = 10
n_batch_size = 64
n_clusters = 5

vector_size = 32

### LSTM / GRU

In [16]:
# init and train LSTM
predictor = EmbeddingPredict(event_log)
predictor.build_model(embedding_dim=vector_size, gru_dim=vector_size, rnn='LSTM')
predictor.fit(epochs=n_epochs, batch_size=n_batch_size, verbose=True)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [6]:
# get feature vector
pred_model, feature_vector, embedding_vector = predictor.predict()

### Clustering

In [7]:
# cluster feature vector
cluster_analysis = Clustering(event_log)
cluster_analysis.cluster(feature_vector, 'agglomerative', n_clusters, 'cosine')

cluster_result = cluster_analysis.evaluate()

In [8]:
print('Adjusted Rand Index: ' + str(cluster_result[0]))
print('Normalized Mutual Information: ' + str(cluster_result[1]))
print('F1-BCubed: ' + str(cluster_result[2]))

Adjusted Rand Index: 0.9908098820521286
Normalized Mutual Information: 0.9813041914061419
F1-BCubed: 0.9920901626047526


In [9]:
evaluation_a = Evaluation(event_log)
evaluation_a.evaluate_clusters(n_clusters,cluster_analysis._pred_labels)



replaying log with TBR, completed variants ::   0%|          | 0/292 [00:00<?, ?it/s]



replaying log with TBR, completed variants ::   0%|          | 0/1245 [00:00<?, ?it/s]



replaying log with TBR, completed variants ::   0%|          | 0/205 [00:00<?, ?it/s]

replaying log with TBR, completed variants ::   0%|          | 0/887 [00:00<?, ?it/s]

replaying log with TBR, completed variants ::   0%|          | 0/12 [00:00<?, ?it/s]

replaying log with TBR, completed variants ::   0%|          | 0/63 [00:00<?, ?it/s]

replaying log with TBR, completed variants ::   0%|          | 0/126 [00:00<?, ?it/s]

replaying log with TBR, completed variants ::   0%|          | 0/526 [00:00<?, ?it/s]

(0.8248620900710927, 0.8022950730482481, 0.5464288440736238)

In [19]:
import gzip
import json

with gzip.open('../logs/multi-perspective_02/huge_500_10_20_5_2_1-0.1-1.json.gz', "r") as f:
   data = f.read()
   j = json.loads (data.decode('utf-8'))
   print(json.dumps(j, indent=4, sort_keys=True))


{
    "attributes": {
        "generation_parameters": {
            "activity_dependency_p": 0.25,
            "anomalies": [
                {
                    "anomaly": "SkipSequence",
                    "parameters": {
                        "activities": [
                            "Activity A",
                            "Activity AA",
                            "Activity AB",
                            "Activity AC",
                            "Activity AD",
                            "Activity AE",
                            "Activity AF",
                            "Activity AG",
                            "Activity AH",
                            "Activity AI",
                            "Activity AJ",
                            "Activity AK",
                            "Activity AL",
                            "Activity AM",
                            "Activity AN",
                            "Activity AO",
                            "Activity AP",
  