# Test Avatar Input Generation

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [4]:
from ocpa.objects.log.importer.ocel import factory as ocel_import_factory
from ocpa.objects.log.importer.csv import factory as ocel_import_factory_csv

In [5]:
def writeVariantToFile(file, lst):
    with open(file, 'w') as outfile:
        for entry in lst:
            print_trace = ""
            for index, ev in enumerate(entry):
                if index == 0:
                    print_trace = str(ev).replace(" ", "")
                else:
                    print_trace = print_trace + " " + str(ev).replace(" ", "")
            outfile.write(print_trace.strip() + "\n")

# O2C Log

### Standard Petri Net

In a first step, we load the OCEL-log into the notebook and generate the object-centric petri net.

In [6]:
filename = "../src/data/jsonocel/order_process.jsonocel"
ocel = ocel_import_factory.apply(filename)


In [8]:
ocel.log.log

Unnamed: 0_level_0,event_id,event_activity,event_timestamp,event_start_timestamp,order,item,delivery
event_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0,Place Order,2022-10-20 16:56:19.864004,2022-10-20 16:56:19.864004,[order1],"[item2, item3]",[]
1,1,Confirm Order,2022-10-20 16:56:20.864004,2022-10-20 16:56:20.864004,[order1],"[item2, item3]",[]
2,2,Pick Item,2022-10-20 17:01:51.864004,2022-10-20 17:01:51.864004,[],[item3],[]
3,3,Pick Item,2022-10-20 17:01:51.864004,2022-10-20 17:01:51.864004,[],[item2],[]
4,4,Pay Order,2022-10-20 17:01:51.864004,2022-10-20 17:01:51.864004,[order1],[],[]
...,...,...,...,...,...,...,...
2386,2386,Start Route,2022-12-17 23:00:15.871959,2022-12-17 23:00:15.871959,[],"[item199, item196]",[delivery64]
2387,2387,End Route,2022-12-18 04:01:04.871959,2022-12-18 04:01:04.871959,[],"[item199, item196]",[delivery64]
2388,2388,Fuel Car,2022-12-18 06:59:45.871959,2022-12-18 06:59:45.871959,[],[],[delivery193]
2389,2389,Start Route,2022-12-18 07:25:24.871959,2022-12-18 07:25:24.871959,[],"[item259, item265]",[delivery193]


In [9]:
# since the process execution mappings have lists of length one,
# we create another dictionary that only contains the the value inside the list to be able to derive the case
mapping_dict = {key: ocel.process_execution_mappings[key][0] for key in ocel.process_execution_mappings}
# we generate a new column in the class (log) that contains the process execution (case) number via the generated dictionary
ocel.log.log['event_execution'] = ocel.log.log.index.map(mapping_dict)

In [10]:
ocel.log.log

Unnamed: 0_level_0,event_id,event_activity,event_timestamp,event_start_timestamp,order,item,delivery,event_execution
event_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,0,Place Order,2022-10-20 16:56:19.864004,2022-10-20 16:56:19.864004,[order1],"[item2, item3]",[],33
1,1,Confirm Order,2022-10-20 16:56:20.864004,2022-10-20 16:56:20.864004,[order1],"[item2, item3]",[],33
2,2,Pick Item,2022-10-20 17:01:51.864004,2022-10-20 17:01:51.864004,[],[item3],[],33
3,3,Pick Item,2022-10-20 17:01:51.864004,2022-10-20 17:01:51.864004,[],[item2],[],33
4,4,Pay Order,2022-10-20 17:01:51.864004,2022-10-20 17:01:51.864004,[order1],[],[],33
...,...,...,...,...,...,...,...,...
2386,2386,Start Route,2022-12-17 23:00:15.871959,2022-12-17 23:00:15.871959,[],"[item199, item196]",[delivery64],0
2387,2387,End Route,2022-12-18 04:01:04.871959,2022-12-18 04:01:04.871959,[],"[item199, item196]",[delivery64],0
2388,2388,Fuel Car,2022-12-18 06:59:45.871959,2022-12-18 06:59:45.871959,[],[],[delivery193],0
2389,2389,Start Route,2022-12-18 07:25:24.871959,2022-12-18 07:25:24.871959,[],"[item259, item265]",[delivery193],0


In [11]:
# generate a grouped df such that we can iterate through the log case by case (sort by timestamp to ensure the correct process sequence)
grouped_df = ocel.log.log.sort_values('event_timestamp').groupby('event_execution')

In [20]:
sorted_log = ocel.log.log.sort_values(['event_execution','event_timestamp'])

In [21]:
sorted_log

Unnamed: 0_level_0,event_id,event_activity,event_timestamp,event_start_timestamp,order,item,delivery,event_execution
event_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
5,5,Place Order,2022-10-20 17:06:18.864004,2022-10-20 17:06:18.864004,[order2],"[item4, item5]",[],0
6,6,Confirm Order,2022-10-20 17:06:19.864004,2022-10-20 17:06:19.864004,[order2],"[item4, item5]",[],0
7,7,Item out of stock,2022-10-20 17:11:46.864004,2022-10-20 17:11:46.864004,[],[item5],[],0
8,8,Pick Item,2022-10-20 17:11:47.864004,2022-10-20 17:11:47.864004,[],[item4],[],0
9,9,Pay Order,2022-10-20 17:11:47.864004,2022-10-20 17:11:47.864004,[order2],[],[],0
...,...,...,...,...,...,...,...,...
1220,1220,Pick Item,2022-10-21 23:47:47.867991,2022-10-21 23:47:47.867991,[],[item373],[],47
1240,1240,Load Cargo,2022-10-22 00:18:31.867991,2022-10-22 00:18:31.867991,[],"[item373, item372]",[delivery139],47
1838,1838,Fuel Car,2022-10-29 13:17:48.870968,2022-10-29 13:17:48.870968,[],[],[delivery139],47
1840,1840,Start Route,2022-10-29 13:43:07.870968,2022-10-29 13:43:07.870968,[],"[item373, item372]",[delivery139],47


In [16]:
object_types = ["order","item","delivery"]

In [17]:
columns_to_consider = ['event_activity']

In [18]:
columns_to_consider.extend(object_types)

In [19]:
columns_to_consider

['event_activity', 'order', 'item', 'delivery']

In [22]:
sorted_log.loc[:, columns_to_consider]

Unnamed: 0_level_0,event_activity,order,item,delivery
event_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
5,Place Order,[order2],"[item4, item5]",[]
6,Confirm Order,[order2],"[item4, item5]",[]
7,Item out of stock,[],[item5],[]
8,Pick Item,[],[item4],[]
9,Pay Order,[order2],[],[]
...,...,...,...,...
1220,Pick Item,[],[item373],[]
1240,Load Cargo,[],"[item373, item372]",[delivery139]
1838,Fuel Car,[],[],[delivery139]
1840,Start Route,[],"[item373, item372]",[delivery139]


In [23]:
# create a list of lists from the DataFrame
input_sentences = []
for row in sorted_log.loc[:, columns_to_consider].values:
    input_sentences.append(list(row))

print(input_sentences)

[['Place Order', ['order2'], ['item4', 'item5'], []], ['Confirm Order', ['order2'], ['item4', 'item5'], []], ['Item out of stock', [], ['item5'], []], ['Pick Item', [], ['item4'], []], ['Pay Order', ['order2'], [], []], ['Reorder Item', [], ['item5'], []], ['Place Order', ['order5'], ['item11', 'item10'], []], ['Confirm Order', ['order5'], ['item11', 'item10'], []], ['Pay Order', ['order5'], [], []], ['Pick Item', [], ['item10'], []], ['Pick Item', [], ['item11'], []], ['Place Order', ['order6'], ['item12', 'item13'], []], ['Confirm Order', ['order6'], ['item12', 'item13'], []], ['Item out of stock', [], ['item13'], []], ['Pick Item', [], ['item12'], []], ['Payment Reminder', ['order6'], [], []], ['Reorder Item', [], ['item13'], []], ['Place Order', ['order7'], ['item14', 'item15'], []], ['Confirm Order', ['order7'], ['item14', 'item15'], []], ['Item out of stock', [], ['item14'], []], ['Reorder Item', [], ['item14'], []], ['Pay Order', ['order7'], [], []], ['Pick Item', [], ['item15']