### Import Libraries

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve
import pm4py

Importing Data

In [3]:
log = pm4py.read_xes('BPI_Challenge_2012.xes')
df = pm4py.convert_to_dataframe(log)
df.head()

Exception: File does not exist

In [None]:
df[df['case:concept:name']==173688]

Splitting the Data Set

In [None]:
# splitting the data into train and test
propotion = 0.2
train, test = train_test_split(df, test_size=propotion, random_state=42, shuffle=True)

Training the Model

In [None]:
df.columns

In [None]:
# Create a random forest Classifier. By convention, clf means 'Classifier'
clf = RandomForestClassifier(n_jobs=2, random_state=0)

# specifying the predictor columns and the target column
predictor_columns = ['position', 'concept:name', 'case:concept:name', 'lifecycle:transition', 'case:AMOUNT_REQ']
target_column = 'next_case:concept:name'

In [None]:
# Implementing one-hot encoding on columns
X = pd.get_dummies(train[predictor_columns])
y = train[target_column]
X

In [None]:
# Train the Classifier to take the training features and learn how they relate
# to the training y
clf.fit(X, y)

Testing the Model

In [None]:
# evaluating the model
X_test = pd.get_dummies(test[predictor_columns])
y_test = test[target_column]
y_pred = clf.predict(X_test)

Result

In [None]:
# result
accuracy = accuracy_score(y_test, y_pred)
# printing f1 score
f1 = f1_score(y_test, y_pred, average='weighted')
# printing precision score
precision = precision_score(y_test, y_pred, average='weighted')
# printing recall score
recall = recall_score(y_test, y_pred, average='weighted')

f1, precision, recall, accuracy
f'f1: {f1}, precision: {precision}, recall: {recall}, accuracy: {accuracy}'

In [None]:
# ploting the confusion matrix with seaborn
import seaborn as sns
import matplotlib.pyplot as plt

conf_mat = confusion_matrix(y_test, y_pred)
fig, ax = plt.subplots(figsize=(20,20))
sns.heatmap(conf_mat, annot=True, fmt='d',
            xticklabels=clf.classes_, yticklabels=clf.classes_)
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.show()