# Test Avatar Input Generation

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
from ocpa.objects.log.importer.ocel import factory as ocel_import_factory
from ocpa.algo.discovery.ocpn import algorithm as ocpn_discovery_factory
from src.utils import get_happy_path_log, create_flower_model, generate_variant_model, sample_traces
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras import layers

In [3]:
filename = "../src/data/jsonocel/order_process.jsonocel"
ocel = ocel_import_factory.apply(filename)
ocpn = ocpn_discovery_factory.apply(ocel, parameters={"debug": False})
log = sample_traces(ocel, ocpn, 1000)

Check the arcs: 100%|██████████| 46/46 [00:00<00:00, 45461.35it/s]
Generate the traces: 100%|██████████| 1000/1000 [00:00<00:00, 4894.69it/s]


In [4]:
# def writeToFile(file, lst):
#     with open(file, 'w') as outfile:
#         traces = set()
#         for entry in lst:
#             trace = ' '.join(entry)
#             traces.add(trace)
#         for trace in traces:
#             outfile.write(trace)
#             outfile.write('\n')

In [5]:
# f_out = "../src/data/playout/traces_order_process.txt"

In [6]:
# writeToFile(f_out, log)

In [7]:
# def readVariantFile(f_name, unique=False):
#     """
#     Reads Variant File

#     :param f_name: filename
#     :param unique: True or False
#     :return:
#     """
#     traces = []
#     with open(f_name) as file:
#         file_contents = file.read()
#         file_contents = file_contents.split("\n")
#         for row in file_contents:
#             if unique:
#                 if row not in traces:
#                     traces.append(row)
#             else:
#                 traces.append(row)

#     f_traces = []
#     for trace in traces:
#         f_trace = []
#         t = trace.split(" ")
#         for i in t:
#             if i != "" and "<" not in i:
#                 f_trace.append(str(i))
#         if len(f_trace) > 0:
#             f_traces.append(f_trace)

#     return f_traces

In [8]:
# gen = readVariantFile(f_out)

In [9]:
# gen

[['Place',
  'Order',
  'Fuel',
  'Car',
  'Confirm',
  'Order',
  'Pick',
  'Item',
  'Payment',
  'Reminder',
  'Pay',
  'Order',
  'Item',
  'out',
  'of',
  'stock',
  'Reorder',
  'Item',
  'Pick',
  'Item',
  'Load',
  'Cargo',
  'Start',
  'Route',
  'End',
  'Route'],
 ['Place',
  'Order',
  'Fuel',
  'Car',
  'Confirm',
  'Order',
  'Pick',
  'Item',
  'Payment',
  'Reminder',
  'Item',
  'out',
  'of',
  'stock',
  'Reorder',
  'Item',
  'Pick',
  'Item',
  'Payment',
  'Reminder',
  'Pay',
  'Order',
  'Load',
  'Cargo',
  'Start',
  'Route',
  'End',
  'Route'],
 ['Fuel',
  'Car',
  'Place',
  'Order',
  'Confirm',
  'Order',
  'Payment',
  'Reminder',
  'Item',
  'out',
  'of',
  'stock',
  'Reorder',
  'Item',
  'Pay',
  'Order',
  'Pick',
  'Item',
  'Load',
  'Cargo',
  'Start',
  'Route',
  'End',
  'Route'],
 ['Place',
  'Order',
  'Confirm',
  'Order',
  'Fuel',
  'Car',
  'Pay',
  'Order',
  'Item',
  'out',
  'of',
  'stock',
  'Pick',
  'Item',
  'Reorder',
  'Ite

In [15]:
log

[['Place Order',
  'Fuel Car',
  'Confirm Order',
  'Pick Item',
  'Pay Order',
  'Load Cargo',
  'Start Route',
  'Item out of stock',
  'Reorder Item',
  'End Route'],
 ['Place Order',
  'Fuel Car',
  'Confirm Order',
  'Payment Reminder',
  'Pick Item',
  'Pay Order',
  'Item out of stock',
  'Load Cargo',
  'Reorder Item',
  'Pick Item',
  'Load Cargo',
  'Start Route',
  'End Route'],
 ['Place Order',
  'Fuel Car',
  'Confirm Order',
  'Payment Reminder',
  'Item out of stock',
  'Payment Reminder',
  'Payment Reminder',
  'Pay Order',
  'Reorder Item',
  'Pick Item',
  'Load Cargo',
  'Start Route',
  'End Route'],
 ['Place Order',
  'Fuel Car',
  'Confirm Order',
  'Item out of stock',
  'Pay Order',
  'Pick Item',
  'Load Cargo',
  'Start Route',
  'End Route'],
 ['Place Order',
  'Fuel Car',
  'Confirm Order',
  'Item out of stock',
  'Pay Order',
  'Reorder Item',
  'Pick Item',
  'Load Cargo',
  'Start Route',
  'End Route'],
 ['Fuel Car',
  'Place Order',
  'Confirm Order',

In [10]:
# def find_num_sentences(system):
#     file = f"../src/data/playout/{system}.txt"
#     return str(sum(1 for _ in open(file)))

In [11]:
# find_num_sentences('traces_order_process')

'723'

In [17]:
def get_longest_sentence(sentences):
    longest_sentence = max(sentences, key=lambda x: len(x))
    return len(longest_sentence)

def get_vocabulary(sentences):
    vocabulary = set()
    for sentence in sentences:
        vocabulary.update(sentence)
    return vocabulary

In [18]:
max_seq_length = get_longest_sentence(log)

In [19]:
vocab = get_vocabulary(log)

In [20]:
max_seq_length

22

In [21]:
vocab

{'Confirm Order',
 'End Route',
 'Fuel Car',
 'Item out of stock',
 'Load Cargo',
 'Pay Order',
 'Payment Reminder',
 'Pick Item',
 'Place Order',
 'Reorder Item',
 'Start Route'}

In [22]:
vocab_size = len(vocab)

In [23]:
vocab_size

11

In [46]:
# Define the VAE architecture
latent_dim = 32  # Dimensionality of the latent space

# Tokenize the sentences and convert them to sequences
tokenizer = Tokenizer()
tokenizer.fit_on_texts(log)
vocab_size = len(tokenizer.word_index) + 1
sequences = tokenizer.texts_to_sequences(log)
max_seq_length = max(len(seq) for seq in log)
padded_sequences = pad_sequences(sequences, maxlen=max_seq_length, padding='post')

In [47]:
padded_sequences

array([[5, 6, 7, ..., 0, 0, 0],
       [5, 6, 7, ..., 0, 0, 0],
       [5, 6, 7, ..., 0, 0, 0],
       ...,
       [5, 7, 1, ..., 0, 0, 0],
       [6, 5, 7, ..., 0, 0, 0],
       [6, 5, 7, ..., 0, 0, 0]])

In [53]:
# Encoder
encoder_inputs = layers.Input(shape=(max_seq_length,))
x = layers.Embedding(input_dim=vocab_size, output_dim=64)(encoder_inputs)
x = layers.GRU(64, return_sequences=True)(x)
x = layers.GRU(32)(x)
mean = layers.Dense(latent_dim)(x)
log_var = layers.Dense(latent_dim)(x)

# Sampling layer
z = layers.Lambda(lambda args: args[0] + tf.exp(args[1] / 2) * tf.random.normal(shape=tf.shape(args[0])))([mean, log_var])

# Decoder
decoder_inputs = layers.Input(shape=(latent_dim,))
x = layers.Dense(32)(decoder_inputs)
x = layers.RepeatVector(max_seq_length)(x)
x = layers.GRU(32, return_sequences=True)(x)
decoder_outputs = layers.TimeDistributed(layers.Dense(vocab_size, activation='softmax'))(x)


In [54]:
# Define the VAE model
vae = tf.keras.Model(encoder_inputs, decoder_outputs)

ValueError: Graph disconnected: cannot obtain value for tensor KerasTensor(type_spec=TensorSpec(shape=(None, 32), dtype=tf.float32, name='input_11'), name='input_11', description="created by layer 'input_11'") at layer "dense_16". The following previous layers were accessed without issue: []

In [None]:
# Define the loss function
def vae_loss(inputs, outputs):
    reconstruction_loss = tf.keras.losses.sparse_categorical_crossentropy(inputs, outputs)
    reconstruction_loss *= max_seq_length  # Scale the loss by the sequence length
    kl_loss = -0.5 * tf.reduce_mean(1 + encoder_outputs[:, latent_dim:] - tf.square(encoder_outputs[:, :latent_dim]) - tf.exp(encoder_outputs[:, latent_dim:]))
    return reconstruction_loss + kl_loss

vae.compile(optimizer='adam', loss=vae_loss)

# Train the VAE
vae.fit(padded_sequences, padded_sequences, batch_size=batch_size, epochs=num_epochs)

# Generate sentences from the latent space
def generate_sentences_from_latent_space(model, latent_dim, num_sentences=5):
    random_latent_vectors = tf.random.normal(shape=(num_sentences, latent_dim))
    generated_sequences = model.predict(random_latent_vectors)
    generated_sentences = tokenizer.sequences_to_texts(generated_sequences)
    for sentence in generated_sentences:
        print("Generated sentence:", sentence)

generate_sentences_from_latent_space(vae, latent_dim)

In [9]:
happy_path__ocel = get_happy_path_log(filename)
happy_path_ocpn = ocpn_discovery_factory.apply(happy_path__ocel, parameters={"debug": False})
log_happy = sample_traces(ocel, happy_path_ocpn, 1000)

Check the arcs: 100%|██████████| 26/26 [00:00<?, ?it/s]
Generate the traces: 100%|██████████| 1000/1000 [00:00<00:00, 28474.76it/s]


In [10]:
ots = ["order","item","delivery"]
flower_ocpn = create_flower_model(filename,ots)
log_flower = sample_traces(ocel, flower_ocpn, 1000)

Check the arcs: 100%|██████████| 32/32 [00:00<00:00, 33197.56it/s]
Generate the traces: 100%|██████████| 1000/1000 [00:01<00:00, 725.67it/s]


In [3]:
filename = "../src/data/jsonocel/order_process.jsonocel"
ots = ["order","item","delivery"]
ocel = ocel_import_factory.apply(filename)
variant_ocpn = generate_variant_model(ocel,save_path_logs='../src/data/csv/order_process_variants/order_variant',object_types = ots )
log_variant = sample_traces(ocel, variant_ocpn, 20)

Generating Variant Models: 100%|██████████| 12/12 [00:02<00:00,  5.59it/s]
Processing Variant Nets: 100%|██████████| 12/12 [00:00<00:00, 5927.65it/s]


#########Start generating Object-Centric Petri Net#########
#########Finished generating Object-Centric Petri Net#########


Check the arcs: 100%|██████████| 378/378 [00:00<00:00, 14471.04it/s]
Generate the traces: 100%|██████████| 20/20 [00:00<00:00, 435.28it/s]


In [4]:
log_variant

[['Place Order_11',
  'Confirm Order_11',
  'Pick Item_11',
  'Payment Reminder_11',
  'Load Cargo_11',
  'Fuel Car_11',
  'Start Route_11',
  'Payment Reminder_11',
  'Payment Reminder_11',
  'End Route_11',
  'Pay Order_11'],
 ['Place Order_11',
  'Confirm Order_11',
  'Pick Item_11',
  'Payment Reminder_11',
  'Load Cargo_11',
  'Payment Reminder_11',
  'Payment Reminder_11',
  'Pay Order_11',
  'Fuel Car_11',
  'Start Route_11',
  'End Route_11'],
 ['Place Order_0',
  'Confirm Order_0',
  'Pick Item_0',
  'Pay Order_0',
  'Load Cargo_0',
  'Fuel Car_0',
  'Start Route_0',
  'End Route_0'],
 ['Place Order_0',
  'Confirm Order_0',
  'Pick Item_0',
  'Pay Order_0',
  'Load Cargo_0',
  'Fuel Car_0',
  'Start Route_0',
  'End Route_0'],
 ['Place Order_1',
  'Confirm Order_1',
  'Item out of stock_1',
  'Pay Order_1',
  'Reorder Item_1',
  'Pick Item_1',
  'Load Cargo_1',
  'Fuel Car_1',
  'Start Route_1',
  'End Route_1'],
 ['Place Order_8',
  'Confirm Order_8',
  'Pay Order_8',
  'Fuel

# O2C Log

### Standard Petri Net

In a first step, we generate a sample from the object-centric petri net.

In [14]:
filename = "../src/data/jsonocel/order_process.jsonocel"
ocel = ocel_import_factory.apply(filename)
ocpn = ocpn_discovery_factory.apply(ocel, parameters={"debug": False})
log = sample_traces(ocel, ocpn, 1000)

Check the arcs: 100%|██████████| 46/46 [00:00<00:00, 88503.66it/s]
Generate the traces: 100%|██████████| 1000/1000 [00:00<00:00, 25439.76it/s]


In [15]:
happy_path__ocel = get_happy_path_log(filename)
happy_path_ocpn = ocpn_discovery_factory.apply(happy_path__ocel, parameters={"debug": False})
log_happy = sample_traces(ocel, happy_path_ocpn, 1000)

Check the arcs: 100%|██████████| 26/26 [00:00<00:00, 33056.05it/s]
Generate the traces: 100%|██████████| 1000/1000 [00:00<00:00, 37635.30it/s]


In [16]:
ots = ["order","item","delivery"]
flower_ocpn = create_flower_model(filename,ots)
log_flower = sample_traces(ocel, flower_ocpn, 1000)

Check the arcs: 100%|██████████| 32/32 [00:00<00:00, 32009.95it/s]
Generate the traces: 100%|██████████| 1000/1000 [00:01<00:00, 666.01it/s]


In [17]:
filename = "../src/data/jsonocel/order_process.jsonocel"
ots = ["order","item","delivery"]
ocel = ocel_import_factory.apply(filename)
variant_ocpn = generate_variant_model(ocel,save_path_logs='../src/data/csv/order_process_variants/order_variant',object_types = ots,save_path_visuals=f"../reports/figures/order_variant_total.svg" )
log_variant = sample_traces(ocel, variant_ocpn, 20)

Generating Variant Models: 100%|██████████| 12/12 [00:01<00:00,  7.01it/s]
Processing Variant Nets: 100%|██████████| 12/12 [00:00<00:00, 11941.08it/s]


#########Start generating Object-Centric Petri Net#########
#########Finished generating Object-Centric Petri Net#########


Check the arcs: 100%|██████████| 378/378 [00:00<00:00, 28846.24it/s]
Generate the traces: 100%|██████████| 20/20 [00:00<00:00, 959.94it/s]
