# Trying BPIC2019 event logs calculations

In [None]:
import numpy as np
import pandas as pd

from replearn.eventlog import EventLog

from replearn.embedding_predict import EmbeddingPredict
from replearn.autoencoder import AutoencoderRepresentation
from replearn.doc2vec import Doc2VecRepresentation

from replearn.clustering import Clustering
import random
from replearn.evaluation import Evaluation
import pm4py

## Load event log

In [None]:
# event log configuration
event_log_path = '../logs/BPIC2019'
file_name = 'BPI_Challenge_2019.xes'

case_attributes = None # auto-detect attributes
event_attributes = ['concept:name', 'org:resource'] # use activity name and user
true_cluster_label = 'Item Type'

# load file
event_log = EventLog(file_name, case_attributes=case_attributes, event_attributes=event_attributes, true_cluster_label=true_cluster_label)


event_log.load(event_log_path + '/' + file_name, False)



In [None]:
backup_log = event_log._event_log

In [None]:
event_log._event_log = pm4py.filter_trace_attribute_values(backup_log, "Item Type", "Standard", retain=False)

In [None]:
event_log.preprocess()

## Representation Learning LSTM/GRU

In [None]:
# hyperparameters
n_epochs = 5
n_batch_size = 64
n_clusters = 5

vector_size = 8

In [None]:
# init and train LSTM
predictor = EmbeddingPredict(event_log)
predictor.build_model(embedding_dim=vector_size, gru_dim=vector_size, rnn='LSTM')
predictor.fit(epochs=n_epochs, batch_size=n_batch_size, verbose=True)
# get feature vector
pred_model, feature_vector, embedding_vector = predictor.predict()

### Clustering

In [None]:
# cluster feature vector
cluster_analysis = Clustering(event_log)
cluster_analysis.cluster(feature_vector, 'agglomerative', n_clusters, 'cosine')

cluster_result = cluster_analysis.evaluate()

In [None]:
evaluation_a = Evaluation(event_log)
results = evaluation_a.evaluate_clusters(n_clusters,cluster_analysis._pred_labels)

In [None]:
print({'avgFitness': results[0], 'avgPrecision': results[1], 'avgSimp': results[2], 'avgF1-BCubed': cluster_result[2]})

In [None]:
cluster_result[2]