In [1]:
from pm4py.objects.log.importer.xes import importer as xes_importer

In [2]:
filepath = "data/"

log = xes_importer.apply(filepath + 'DomesticDeclarations.xes')




### Example "Domestic Declarations" dataset from the BPI Challenge 2020:
* contains events pertaining to two years of travel expense claims
* contains 10,500 cases, 56,437 events
* process flow: 
    * after submission by the employee, the request is sent for approval to the travel administration
    * if approved, the request is then forwarded to the budget owner and after that to the supervisor
    * if the budget owner and supervisor are the same person, then only one of these steps is taken
    * in some cases, the director also needs to approve the request
    * process finished with either the trip taking place or a payment being requested and payed.
* for domestic trips, no prior permission is needed, i.e. an employee can undertake these trips and ask for reimbursement of the costs afterwards
* to get the costs for a travel reimbursed, a claim is filed. This can be done as soon as costs are actually payed (for example for flights or conference registration fees), or within two months after the trip (for example hotel and food costs which are usually payed on the spot)

In [3]:
print("first trace of the log: \n", log[0], "\n")
print("first event of the first trace: \n", log[0][0], "\n")

first trace of the log: 
 {'attributes': {'id': 'declaration 86791', 'concept:name': 'declaration 86791', 'BudgetNumber': 'budget 86566', 'Amount': 26.85120450862128, 'DeclarationNumber': 'declaration number 86792'}, 'events': [{'id': 'st_step 86794_0', 'concept:name': 'Declaration SUBMITTED by EMPLOYEE', 'time:timestamp': datetime.datetime(2017, 1, 9, 9, 49, 50, tzinfo=datetime.timezone(datetime.timedelta(0, 3600))), 'org:role': 'EMPLOYEE', 'org:resource': 'STAFF MEMBER'}, '..', {'id': 'dd_declaration 86791_20', 'concept:name': 'Payment Handled', 'time:timestamp': datetime.datetime(2017, 1, 12, 17, 31, 22, tzinfo=datetime.timezone(datetime.timedelta(0, 3600))), 'org:role': 'UNDEFINED', 'org:resource': 'SYSTEM'}]} 

first event of the first trace: 
 {'id': 'st_step 86794_0', 'concept:name': 'Declaration SUBMITTED by EMPLOYEE', 'time:timestamp': datetime.datetime(2017, 1, 9, 9, 49, 50, tzinfo=datetime.timezone(datetime.timedelta(0, 3600))), 'org:role': 'EMPLOYEE', 'org:resource': 'STAFF

In [4]:
type(log)

pm4py.objects.log.log.EventLog

Event Log (objects.log.log.EventLog): 
Represents a sequence of sequences of events. The concept of an event log is the more traditional view on event data, i.e., executions of a process are captured in traces of events.

### Preprocessing Stage:
* extract activity IDs for all events in a trace

In [7]:
log[0][0]["concept:name"]

'Declaration SUBMITTED by EMPLOYEE'

In [29]:
concept_dict = dict()

for trace in log:
    for event in trace:
        name = event["concept:name"]
        if name in concept_dict:
            concept_dict[name] += 1
        else:
            concept_dict[name] = 1

In [30]:
print(len(concept_dict))

17


In [31]:
print(concept_dict)

{'Payment Handled': 10044, 'Declaration APPROVED by PRE_APPROVER': 685, 'Declaration FOR_APPROVAL by ADMINISTRATION': 1, 'Declaration REJECTED by ADMINISTRATION': 952, 'Declaration FOR_APPROVAL by PRE_APPROVER': 1, 'Declaration REJECTED by SUPERVISOR': 293, 'Declaration REJECTED by PRE_APPROVER': 86, 'Declaration FINAL_APPROVED by SUPERVISOR': 10131, 'Declaration REJECTED by EMPLOYEE': 1365, 'Declaration REJECTED by BUDGET OWNER': 59, 'Declaration SAVED by EMPLOYEE': 135, 'Declaration SUBMITTED by EMPLOYEE': 11531, 'Declaration FOR_APPROVAL by SUPERVISOR': 1, 'Request Payment': 10040, 'Declaration REJECTED by MISSING': 91, 'Declaration APPROVED by ADMINISTRATION': 8202, 'Declaration APPROVED by BUDGET OWNER': 2820}


In [32]:
trace_activities_id = list()

for trace in log:
    event_list = list()
    for event in trace:
        name = event["concept:name"]
        event_list.append(name)
    trace_activities_id.append(event_list)

In [35]:
trace_activities_id[:5]

[['Declaration SUBMITTED by EMPLOYEE',
  'Declaration FINAL_APPROVED by SUPERVISOR',
  'Request Payment',
  'Payment Handled'],
 ['Declaration SUBMITTED by EMPLOYEE',
  'Declaration APPROVED by PRE_APPROVER',
  'Declaration FINAL_APPROVED by SUPERVISOR',
  'Request Payment',
  'Payment Handled'],
 ['Declaration SUBMITTED by EMPLOYEE',
  'Declaration APPROVED by PRE_APPROVER',
  'Declaration FINAL_APPROVED by SUPERVISOR',
  'Request Payment',
  'Payment Handled'],
 ['Declaration SUBMITTED by EMPLOYEE',
  'Declaration FINAL_APPROVED by SUPERVISOR',
  'Request Payment',
  'Payment Handled'],
 ['Declaration SUBMITTED by EMPLOYEE',
  'Declaration FINAL_APPROVED by SUPERVISOR',
  'Request Payment',
  'Payment Handled']]