# Learning of Process Representations Using Recurrent Neural Networks

In [16]:
import numpy as np
import pandas as pd

from replearn.eventlog import EventLog

from replearn.embedding_predict import EmbeddingPredict
from replearn.autoencoder import AutoencoderRepresentation
from replearn.doc2vec import Doc2VecRepresentation

from replearn.clustering import Clustering

## Load event log

In [17]:
# event log configuration
event_log_path = '../logs/multi-perspective_02'
file_name = 'huge_500_10_20_5_1_1-0.0-1.json.gz'

case_attributes = None # auto-detect attributes
event_attributes = ['concept:name', 'user'] # use activity name and user
true_cluster_label = 'cluster'

# load file
event_log = EventLog(file_name, case_attributes=case_attributes, event_attributes=event_attributes, true_cluster_label=true_cluster_label)
event_log.load(event_log_path + '/' + file_name, False)
event_log.preprocess()

## Representation Learning

In [18]:
# hyperparameters
n_epochs = 10
n_batch_size = 64
n_clusters = 5

vector_size = 32

### Trace2vec

In [19]:
doc2vec = Doc2VecRepresentation(event_log)
doc2vec.build_model(append_case_attr=False, append_event_attr=False, vector_size=vector_size, concat=True, epochs=n_epochs)
doc2vec.fit()

In [20]:
# infer the vector from the model
feature_vector = doc2vec.predict(epochs=50)

In [21]:
# cluster feature vector
cluster_analysis = Clustering(event_log)
cluster_analysis.cluster(feature_vector, 'agglomerative', n_clusters, 'cosine')

cluster_result = cluster_analysis.evaluate()

In [22]:
print('Adjusted Rand Index: ' + str(cluster_result[0]))
print('Normalized Mutual Information: ' + str(cluster_result[1]))
print('F1-BCubed: ' + str(cluster_result[2]))

Adjusted Rand Index: 0.4357940866180324
Normalized Mutual Information: 0.6104346623757338
F1-BCubed: 0.6369988968145126
Adjusted Rand Index: 0.4357940866180324
Normalized Mutual Information: 0.6104346623757338
F1-BCubed: 0.6369988968145126


### Case2vec (event)

In [23]:
# train doc2vec model
doc2vec = Doc2VecRepresentation(event_log)
doc2vec.build_model(append_case_attr=False, append_event_attr=True, vector_size=vector_size, concat=True, epochs=n_epochs)
doc2vec.fit()

In [24]:
# infer the vector from the model
feature_vector = doc2vec.predict(epochs=50)

In [25]:
# cluster feature vector
cluster_analysis = Clustering(event_log)
cluster_analysis.cluster(feature_vector, 'agglomerative', n_clusters, 'cosine')

cluster_result = cluster_analysis.evaluate()

In [26]:
print('Adjusted Rand Index: ' + str(cluster_result[0]))
print('Normalized Mutual Information: ' + str(cluster_result[1]))
print('F1-BCubed: ' + str(cluster_result[2]))

Adjusted Rand Index: 0.36276534639450136
Normalized Mutual Information: 0.5619908040738674
F1-BCubed: 0.6046124507488921
Adjusted Rand Index: 0.36276534639450136
Normalized Mutual Information: 0.5619908040738674
F1-BCubed: 0.6046124507488921


### Case2vec (event+case)

In [27]:
doc2vec = Doc2VecRepresentation(event_log)
doc2vec.build_model(append_case_attr=True, append_event_attr=True, vector_size=vector_size, concat=True, epochs=n_epochs)
doc2vec.fit()

In [28]:
# infer the vector from the model
feature_vector = doc2vec.predict(epochs=50)

In [29]:
# cluster feature vector
cluster_analysis = Clustering(event_log)
cluster_analysis.cluster(feature_vector, 'agglomerative', n_clusters, 'cosine')

cluster_result = cluster_analysis.evaluate()

In [30]:
print('Adjusted Rand Index: ' + str(cluster_result[0]))
print('Normalized Mutual Information: ' + str(cluster_result[1]))
print('F1-BCubed: ' + str(cluster_result[2]))

Adjusted Rand Index: 0.09703430402454258
Normalized Mutual Information: 0.37298003589492845
F1-BCubed: 0.5621685665235079
Adjusted Rand Index: 0.09703430402454258
Normalized Mutual Information: 0.37298003589492845
F1-BCubed: 0.5621685665235079
