# VAE Approach

In [6]:
import warnings
warnings.filterwarnings('ignore')

In [7]:
from ocpa.objects.log.importer.ocel import factory as ocel_import_factory
from ocpa.algo.discovery.ocpn import algorithm as ocpn_discovery_factory
from src.utils import get_happy_path_log, create_flower_model, generate_variant_model, sample_traces, process_log
from ocpa.objects.log.importer.csv import factory as ocel_import_factory_csv
from models.VAE_measure import get_text_data, decode_sequence, create_lstm_vae, VAE_generalization, create_VAE_input
from tqdm import tqdm
import numpy as np

# Order Process

In [8]:
filename = "../src/data/jsonocel/order_process.jsonocel"
ocel = ocel_import_factory.apply(filename)
ocpn = ocpn_discovery_factory.apply(ocel, parameters={"debug": False})

In [9]:
train_log = create_VAE_input(ocel,'../src/data/VAE_input/order_process.txt')

In [10]:
timesteps_max, enc_tokens, characters, char2id, id2char, x, x_decoder = get_text_data(num_samples=10000,
                                                                                      data_path='../src/data/VAE_input/order_process.txt')

print(x.shape, "Creating model...")

Number of samples: 48
Number of unique input tokens: 21
Max sequence length for inputs: 3988
(48, 3988, 21) Creating model...


In [None]:
input_dim, timesteps = x.shape[-1], x.shape[-2]
batch_size, latent_dim = 1, 191
intermediate_dim, epochs = 353, 20

vae, enc, gen, stepper = create_lstm_vae(input_dim,
                                         batch_size=batch_size,
                                         intermediate_dim=intermediate_dim,
                                         latent_dim=latent_dim,
                                        )
print("Training model...")

vae.fit([x, x_decoder], x, epochs=epochs, verbose=1)

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, None, 21)]   0           []                               
                                                                                                  
 lstm (LSTM)                    (None, 353)          529500      ['input_1[0][0]']                
                                                                                                  
 dense (Dense)                  (None, 191)          67614       ['lstm[0][0]']                   
                                                                                                  
 dense_1 (Dense)                (None, 191)          67614       ['lstm[0][0]']                   
                                                                                              

In [None]:
print("Fitted, predicting...")
#rearrange the input data and get the max amount of characters
input_data = [' '.join(inner_list) for inner_list in train_log]
max_length = max(len(string) for string in input_data)

def decode(s):
    return decode_sequence(s, gen, stepper, input_dim, char2id, id2char, max_length)

log = []

for _ in tqdm(range(500), desc="Sample Traces"):

    id_from = np.random.randint(0, x.shape[0] - 1)

    m_from, std_from = enc.predict([[x[id_from]]])

    seq_from = np.random.normal(size=(latent_dim,))
    seq_from = m_from + std_from * seq_from

    #print(decode(seq_from))
    log.append([decode(seq_from)])

In [None]:
log

In [None]:
df_log = process_log(log, ocel, ocpn, '../src/data/VAE_generated/order_process_original_sampled.csv')

In [None]:
df_log

In [None]:
object_types = ["order","item","delivery"]
parameters = {"obj_names": object_types,
              "val_names": [],
              "act_name": "event_activity",
              "time_name": "event_timestamp",
              "sep": ","}
ocel_gen = ocel_import_factory_csv.apply(file_path='../src/data/VAE_generated/order_process_original_sampled.csv', parameters=parameters)

# OCPN Model

In [None]:
generalization = VAE_generalization(ocel_gen, ocpn)

# Happy Path Order

In [None]:
happy_path__ocel = get_happy_path_log(filename)

In [None]:
happy_path_ocpn = ocpn_discovery_factory.apply(happy_path__ocel, parameters={"debug": False})

In [None]:
happy_path__ocel = get_happy_path_log(filename)
generalization = VAE_generalization(ocel_gen, happy_path_ocpn)

# Flower Model Order

In [None]:
filename = "../src/data/jsonocel/order_process.jsonocel"
ots = ["order","item","delivery"]
flower_ocpn = create_flower_model(filename,ots)

In [None]:
generalization = VAE_generalization(ocel_gen, flower_ocpn)

# Variant OCPN

In [None]:
filename = "../src/data/jsonocel/order_process.jsonocel"
ots = ["order","item","delivery"]
ocel = ocel_import_factory.apply(filename)
variant_ocpn = generate_variant_model(ocel,save_path_logs='../src/data/csv/order_process_variants/order_process_variant',object_types = ots,save_path_visuals=f"../reports/figures/order_variant_total.svg" )

In [None]:
for transition in variant_ocpn.transitions:
    split_string = transition.name.split("_")
    transition.name = split_string[0]

In [None]:
generalization = VAE_generalization(ocel_gen, variant_ocpn)

# P2P Process

In [23]:
filename = "../src/data/jsonocel/p2p-normal.jsonocel"
ocel = ocel_import_factory.apply(filename)
ocpn = ocpn_discovery_factory.apply(ocel, parameters={"debug": False})

Check the arcs: 100%|██████████| 40/40 [00:00<00:00, 20720.29it/s]
Generate the traces: 100%|██████████| 10000/10000 [00:01<00:00, 8630.07it/s]


In [12]:
train_log = create_VAE_input(ocel,'../src/data/VAE_input/p2p_process.txt')

In [14]:
timesteps_max, enc_tokens, characters, char2id, id2char, x, x_decoder = get_text_data(num_samples=10000,
                                                                                      data_path='../src/data/VAE_input/p2p_process.txt')

print(x.shape, "Creating model...")

Number of samples: 10000
Number of unique input tokens: 15
Max sequence length for inputs: 24
(10000, 24, 15) Creating model...


In [15]:
input_dim, timesteps = x.shape[-1], x.shape[-2]
batch_size, latent_dim = 1, 191
intermediate_dim, epochs = 353, 20

vae, enc, gen, stepper = create_lstm_vae(input_dim,
                                         batch_size=batch_size,
                                         intermediate_dim=intermediate_dim,
                                         latent_dim=latent_dim,
                                        )
print("Training model...")

vae.fit([x, x_decoder], x, epochs=epochs, verbose=1)

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, None, 15)]   0           []                               
                                                                                                  
 lstm (LSTM)                    (None, 353)          521028      ['input_1[0][0]']                
                                                                                                  
 dense (Dense)                  (None, 191)          67614       ['lstm[0][0]']                   
                                                                                                  
 dense_1 (Dense)                (None, 191)          67614       ['lstm[0][0]']                   
                                                                                              

<keras.callbacks.History at 0x26fac903c40>

In [16]:
print("Fitted, predicting...")
#rearrange the input data and get the max amount of characters
input_data = [' '.join(inner_list) for inner_list in train_log]
max_length = max(len(string) for string in input_data)

def decode(s):
    return decode_sequence(s, gen, stepper, input_dim, char2id, id2char, max_length)

log = []

for _ in tqdm(range(500), desc="Sample Traces"):

    id_from = np.random.randint(0, x.shape[0] - 1)

    m_from, std_from = enc.predict([[x[id_from]]])

    seq_from = np.random.normal(size=(latent_dim,))
    seq_from = m_from + std_from * seq_from

    #print(decode(seq_from))
    log.append([decode(seq_from)])

Fitted, predicting...


Sample Traces: 100%|██████████| 500/500 [00:20<00:00, 24.46it/s]


In [17]:
log

[['create purchase requisition create purchase order receive goods issue goods receipt verify material receive invoice clear invoice plan goods issue goods issue '],
 ['create purchase requisition create purchase order receive goods issue goods receipt plan goods issue receive invoice clear invoice verify material goods issue '],
 ['create purchase requisition create purchase order receive goods issue goods receipt receive invoice verify material clear invoice plan goods issue goods issue '],
 ['create purchase requisition create purchase order receive goods issue goods receipt receive invoice plan goods issue verify material goods issue clear invoice '],
 ['create purchase requisition create purchase order receive goods issue goods receipt plan goods issue verify material receive invoice clear invoice goods issue '],
 ['create purchase requisition create purchase order receive goods issue goods receipt plan goods issue verify material receive invoice clear invoice goods issue '],
 ['c

In [18]:
df_log = process_log(log, ocel, ocpn, '../src/data/VAE_generated/p2p_process_original_sampled.csv')

In [19]:
df_log

Unnamed: 0,event_id,event_activity,event_execution,event_timestamp,INVOICE,GDSRCPT,PURCHREQ,MATERIAL,PURCHORD
0,0,Create Purchase Requisition,1,2022-01-01 13:18:50.341361,[],[],[PURCHREQ1],[MATERIAL1],[]
1,1,Create Purchase Order,1,2022-01-01 13:19:50.341361,[],[],[PURCHREQ1],[MATERIAL1],[PURCHORD1]
2,2,Receive Goods,1,2022-01-01 13:20:50.341361,[],[GDSRCPT1],[],[MATERIAL1],[PURCHORD1]
3,3,Issue Goods Receipt,1,2022-01-01 13:21:50.341361,[],[GDSRCPT1],[],[MATERIAL1],[PURCHORD1]
4,4,Verify Material,1,2022-01-01 13:22:50.341361,[],[],[],[MATERIAL1],[]
...,...,...,...,...,...,...,...,...,...
4495,4495,Plan Goods Issue,500,2022-02-15 08:53:29.737267,[],[],[],[MATERIAL500],[]
4496,4496,Receive Invoice,500,2022-02-15 08:54:29.737267,[INVOICE500],[],[],[],[PURCHORD500]
4497,4497,Verify Material,500,2022-02-15 08:55:29.737267,[],[],[],[MATERIAL500],[]
4498,4498,Clear Invoice,500,2022-02-15 08:56:29.737267,[INVOICE500],[GDSRCPT500],[],[],[PURCHORD500]


In [24]:
object_types = ["PURCHORD","INVOICE","PURCHREQ","MATERIAL","GDSRCPT"]
parameters = {"obj_names": object_types,
              "val_names": [],
              "act_name": "event_activity",
              "time_name": "event_timestamp",
              "sep": ","}
ocel_gen = ocel_import_factory_csv.apply(file_path='../src/data/VAE_generated/p2p_process_original_sampled.csv', parameters=parameters)

# OCPN Model

In [21]:
generalization = VAE_generalization(ocel_gen, ocpn)

Precision of IM-discovered net:  0.8519
Fitness of IM-discovered net:  1.0
VAE Generalization= 0.92


# Happy Path Order

In [22]:
happy_path__ocel = get_happy_path_log(filename)

In [23]:
happy_path_ocpn = ocpn_discovery_factory.apply(happy_path__ocel, parameters={"debug": False})

In [24]:
happy_path__ocel = get_happy_path_log(filename)
generalization = VAE_generalization(ocel_gen, happy_path_ocpn)

Precision of IM-discovered net:  0.8721
Fitness of IM-discovered net:  0.8129
VAE Generalization= 0.8414


# Flower Model Order

In [25]:
filename = "../src/data/jsonocel/p2p-normal.jsonocel"
ots = ["PURCHORD","INVOICE","PURCHREQ","MATERIAL","GDSRCPT"]
flower_ocpn = create_flower_model(filename,ots)

In [26]:
generalization = VAE_generalization(ocel_gen, flower_ocpn)

Precision of IM-discovered net:  0.1699
Fitness of IM-discovered net:  1.0
VAE Generalization= 0.2905


# Variant OCPN

In [20]:
filename = "../src/data/jsonocel/p2p-normal.jsonocel"
ots = ["PURCHORD","INVOICE","PURCHREQ","MATERIAL","GDSRCPT"]
ocel = ocel_import_factory.apply(filename)
variant_ocpn = generate_variant_model(ocel,save_path_logs='../src/data/csv/p2p-normal_variants/p2p-normal_variant',object_types = ots ,save_path_visuals=f"../reports/figures/p2p_variant_total.svg" )

Generating Variant Models: 100%|██████████| 20/20 [00:05<00:00,  3.37it/s]
Processing Variant Nets: 100%|██████████| 20/20 [00:00<00:00, 2531.19it/s]


#########Start generating Object-Centric Petri Net#########
#########Finished generating Object-Centric Petri Net#########


In [21]:
for transition in variant_ocpn.transitions:
    split_string = transition.name.split("_")
    transition.name = split_string[0]

In [22]:
generalization = VAE_generalization(ocel_gen, variant_ocpn)

KeyboardInterrupt: 

# BPI Challenge

In [27]:
filename = "../src/data/jsonocel/BPI2017-Final.jsonocel"
ocel = ocel_import_factory.apply(filename)
ocpn = ocpn_discovery_factory.apply(ocel, parameters={"debug": False})

In [28]:
train_log = create_VAE_input(ocel,'../src/data/VAE_input/BPI_process.txt')

In [29]:
train_log

['Create application Accept Create offer Create offer Cancel offer Send (mail and online) Complete Call Create offer Cancel offer Create offer Send (mail and online) Return Validate Create offer Send (mail and online) Create offer Send (mail and online) Create offer Send (mail and online) Return Call incomplete files Validate Validate Call incomplete files Call incomplete files Validate Call incomplete files Create offer Send (mail and online) Create offer Send (online) Validate Accept offer Pending Cancel offer Cancel offer Cancel offer Cancel offer Cancel offer Cancel offer',
 'Create application Submit Complete Accept Create offer Create offer Send (mail and online) Send (mail and online) Call Cancel offer Cancel offer Create offer Create offer Create offer Create offer Cancel offer Cancel offer Create offer Create offer Send (mail and online) Send (mail and online) Validate Call incomplete files Validate Call incomplete files Create offer Send (mail and online) Create offer Send (o

In [32]:
timesteps_max, enc_tokens, characters, char2id, id2char, x, x_decoder = get_text_data(num_samples=10000,
                                                                                      data_path='../src/data/VAE_input/BPI_process.txt')

print(x.shape, "Creating model...")

Number of samples: 10000
Number of unique input tokens: 33
Max sequence length for inputs: 109
(10000, 109, 33) Creating model...


In [33]:
input_dim, timesteps = x.shape[-1], x.shape[-2]
batch_size, latent_dim = 1, 191
intermediate_dim, epochs = 353, 20

vae, enc, gen, stepper = create_lstm_vae(input_dim,
                                         batch_size=batch_size,
                                         intermediate_dim=intermediate_dim,
                                         latent_dim=latent_dim,
                                        )
print("Training model...")

vae.fit([x, x_decoder], x, epochs=epochs, verbose=1)

Model: "model_8"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_11 (InputLayer)          [(None, None, 33)]   0           []                               
                                                                                                  
 lstm_4 (LSTM)                  (None, 353)          546444      ['input_11[0][0]']               
                                                                                                  
 dense_8 (Dense)                (None, 191)          67614       ['lstm_4[0][0]']                 
                                                                                                  
 dense_9 (Dense)                (None, 191)          67614       ['lstm_4[0][0]']                 
                                                                                            

<keras.callbacks.History at 0x270fa6025b0>

In [34]:
print("Fitted, predicting...")
#rearrange the input data and get the max amount of characters
input_data = [' '.join(inner_list) for inner_list in train_log]
max_length = max(len(string) for string in input_data)

def decode(s):
    return decode_sequence(s, gen, stepper, input_dim, char2id, id2char, max_length)

log = []

for _ in tqdm(range(500), desc="Sample Traces"):

    id_from = np.random.randint(0, x.shape[0] - 1)

    m_from, std_from = enc.predict([[x[id_from]]])

    seq_from = np.random.normal(size=(latent_dim,))
    seq_from = m_from + std_from * seq_from

    #print(decode(seq_from))
    log.append([decode(seq_from)])

Fitted, predicting...


Sample Traces: 100%|██████████| 500/500 [00:38<00:00, 12.88it/s]


In [35]:
log

[['create application submit accept create offer send ( mail and online ) complete call return validate call incomplete files validate call incomplete files validate call incomplete files accept offer pending <end> '],
 ['create application submit accept create offer send ( mail and online ) complete call return validate call incomplete files validate call incomplete files validate call incomplete files accept offer pending <end> '],
 ['create application submit accept create offer send ( mail and online ) complete call return validate call incomplete files validate call incomplete files validate call incomplete files accept offer pending <end> '],
 ['create application submit accept create offer send ( mail and online ) complete call return validate call incomplete files validate call incomplete files validate call incomplete files accept offer pending <end> '],
 ['create application submit accept create offer send ( mail and online ) complete call return validate call incomplete file

In [36]:
df_log = process_log(log, ocel, ocpn, '../src/data/VAE_generated/BPI_process_sampled.csv')

In [37]:
df_log

Unnamed: 0,event_id,event_activity,event_execution,event_timestamp,offer,application
0,0,Create application,1,2022-01-01 10:06:21.404384,[],[application1]
1,1,Submit,1,2022-01-01 10:07:21.404384,[],[application1]
2,2,Accept,1,2022-01-01 10:08:21.404384,[],[application1]
3,3,Create offer,1,2022-01-01 10:09:21.404384,[offer1],[application1]
4,4,Complete,1,2022-01-01 10:10:21.404384,[],[application1]
...,...,...,...,...,...,...
7495,7495,Call,500,2022-03-17 10:25:23.695751,[offer500],[application500]
7496,7496,Validate,500,2022-03-17 10:26:23.695751,[],[application500]
7497,7497,Call,500,2022-03-17 10:27:23.695751,[offer500],[application500]
7498,7498,Accept,500,2022-03-17 10:28:23.695751,[],[application500]


In [38]:
object_types = ["application","offer"]
parameters = {"obj_names": object_types,
              "val_names": [],
              "act_name": "event_activity",
              "time_name": "event_timestamp",
              "sep": ","}
ocel_gen = ocel_import_factory_csv.apply(file_path='../src/data/VAE_generated/BPI_process_sampled.csv', parameters=parameters)

# OCPN Model

In [39]:
generalization = VAE_generalization(ocel_gen, ocpn)

Precision of IM-discovered net:  0.1886
Fitness of IM-discovered net:  0.8667
VAE Generalization= 0.3097


# Happy Path

In [40]:
happy_path__ocel = get_happy_path_log(filename)

In [41]:
happy_path_ocpn = ocpn_discovery_factory.apply(happy_path__ocel, parameters={"debug": False})

In [42]:
happy_path__ocel = get_happy_path_log(filename)
generalization = VAE_generalization(ocel_gen, happy_path_ocpn)

Precision of IM-discovered net:  0.9
Fitness of IM-discovered net:  0.3333
VAE Generalization= 0.4865


# Flower Model 

In [43]:
filename = "../src/data/jsonocel/BPI2017-Final.jsonocel"
ots = ["application","offer"]
flower_ocpn = create_flower_model(filename,ots)

In [44]:
generalization = VAE_generalization(ocel_gen, flower_ocpn)

Precision of IM-discovered net:  0.0626
Fitness of IM-discovered net:  1.0
VAE Generalization= 0.1179


# DS3 Log

In [3]:
filename = "../src/data/jsonocel/DS3.jsonocel"
ocel = ocel_import_factory.apply(filename)
ocpn = ocpn_discovery_factory.apply(ocel, parameters={"debug": False})
#train_log = sample_traces(ocel, ocpn, 10000, save_path='../src/data/playout/ocpn_data_p2p.txt')


In [4]:
train_log = create_VAE_input(ocel,'../src/data/VAE_input/DS3.txt')

In [5]:
train_log

['Resolved Closed Incident New Incident New Incident Awaiting User Info Awaiting User Info Awaiting User Info Resolved Closed Incident Closed Incident Resolved Active Active Active Active Closed Incident Resolved Resolved Resolved Active New Incident Active New Incident New Incident New Incident Active New Incident Awaiting User Info Awaiting User Info Resolved Closed Incident Awaiting User Info Closed Incident Resolved Awaiting User Info Active Awaiting User Info Awaiting User Info Awaiting User Info New Incident New Incident Active Resolved Closed Incident New Incident Resolved Closed Incident Resolved Closed Incident New Incident New Incident New Incident New Incident New Incident New Incident New Incident Resolved Closed Incident Active Closed Incident Resolved Active Active Active Active Resolved Active Active New Incident Closed Incident New Incident New Incident Closed Incident Resolved New Incident New Incident New Incident Closed Incident New Incident New Incident Active Resol

In [6]:
timesteps_max, enc_tokens, characters, char2id, id2char, x, x_decoder = get_text_data(num_samples=10000,
                                                                                      data_path='../src/data/VAE_input/DS3.txt')

print(x.shape, "Creating model...")

Number of samples: 4825
Number of unique input tokens: 13
Max sequence length for inputs: 452
(4825, 452, 13) Creating model...


In [7]:
input_dim, timesteps = x.shape[-1], x.shape[-2]
batch_size, latent_dim = 1, 191
intermediate_dim, epochs = 353, 20

vae, enc, gen, stepper = create_lstm_vae(input_dim,
                                         batch_size=batch_size,
                                         intermediate_dim=intermediate_dim,
                                         latent_dim=latent_dim,
                                        )
print("Training model...")

vae.fit([x, x_decoder], x, epochs=epochs, verbose=1)

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, None, 13)]   0           []                               
                                                                                                  
 lstm (LSTM)                    (None, 353)          518204      ['input_1[0][0]']                
                                                                                                  
 dense (Dense)                  (None, 191)          67614       ['lstm[0][0]']                   
                                                                                                  
 dense_1 (Dense)                (None, 191)          67614       ['lstm[0][0]']                   
                                                                                              

<keras.callbacks.History at 0x1d43813e640>

In [8]:
print("Fitted, predicting...")
#rearrange the input data and get the max amount of characters
input_data = [' '.join(inner_list) for inner_list in train_log]
max_length = max(len(string) for string in input_data)

def decode(s):
    return decode_sequence(s, gen, stepper, input_dim, char2id, id2char, max_length)

log = []

for _ in tqdm(range(500), desc="Sample Traces"):

    id_from = np.random.randint(0, x.shape[0] - 1)

    m_from, std_from = enc.predict([[x[id_from]]])

    seq_from = np.random.normal(size=(latent_dim,))
    seq_from = m_from + std_from * seq_from

    #print(decode(seq_from))
    log.append([decode(seq_from)])

Fitted, predicting...


Sample Traces: 100%|██████████| 500/500 [01:06<00:00,  7.50it/s]


In [9]:
log

[['new incident new incident new incident new incident new incident new incident new incident new incident new incident new incident new incident new incident new incident resolved closed incident <end> '],
 ['new incident new incident new incident new incident new incident new incident new incident new incident new incident new incident new incident new incident new incident resolved closed incident <end> '],
 ['new incident new incident new incident new incident new incident new incident new incident new incident new incident new incident new incident new incident new incident resolved closed incident <end> '],
 ['new incident new incident new incident new incident new incident new incident new incident new incident new incident new incident new incident new incident new incident resolved closed incident <end> '],
 ['new incident new incident new incident new incident new incident new incident new incident new incident new incident new incident new incident new incident new incident 

In [10]:
df_log = process_log(log, ocel, ocpn, '../src/data/VAE_generated/DS3_process_sampled.csv')

In [11]:
df_log

Unnamed: 0,event_id,event_activity,event_execution,event_timestamp,customer,incident
0,0,New Incident,1,2022-01-01 11:14:57.653267,[customer1],[incident1]
1,1,New Incident,1,2022-01-01 11:15:57.653267,[customer1],[incident1]
2,2,New Incident,1,2022-01-01 11:16:57.653267,[customer1],[incident1]
3,3,New Incident,1,2022-01-01 11:17:57.653267,[customer1],[incident1]
4,4,New Incident,1,2022-01-01 11:18:57.653267,[customer1],[incident1]
...,...,...,...,...,...,...
7495,7495,New Incident,500,2022-03-17 14:53:10.622774,[customer500],[incident500]
7496,7496,New Incident,500,2022-03-17 14:54:10.622774,[customer500],[incident500]
7497,7497,New Incident,500,2022-03-17 14:55:10.622774,[customer500],[incident500]
7498,7498,Resolved,500,2022-03-17 14:56:10.622774,[],[incident500]


In [12]:
object_types = ["incident","customer"]
parameters = {"obj_names": object_types,
              "val_names": [],
              "act_name": "event_activity",
              "time_name": "event_timestamp",
              "sep": ","}
ocel_gen = ocel_import_factory_csv.apply(file_path='../src/data/VAE_generated/DS3_process_sampled.csv', parameters=parameters)

# OCPN Model

In [13]:
generalization = VAE_generalization(ocel_gen, ocpn)

Precision of IM-discovered net:  0.1262
Fitness of IM-discovered net:  1.0
VAE Generalization= 0.2241


# Happy Path Order

In [14]:
happy_path__ocel = get_happy_path_log(filename)

In [15]:
happy_path_ocpn = ocpn_discovery_factory.apply(happy_path__ocel, parameters={"debug": False})

In [16]:
happy_path__ocel = get_happy_path_log(filename)
generalization = VAE_generalization(ocel_gen, happy_path_ocpn)

Precision of IM-discovered net:  0
Fitness of IM-discovered net:  0.0


ZeroDivisionError: float division by zero

# Flower Model Order

In [17]:
filename = "../src/data/jsonocel/DS3.jsonocel"
ots = ["incident","customer"]
flower_ocpn = create_flower_model(filename,ots)

In [18]:
generalization = VAE_generalization(ocel_gen, flower_ocpn)

Precision of IM-discovered net:  0.125
Fitness of IM-discovered net:  1.0
VAE Generalization= 0.2222


# DS4 Log

In [19]:
filename = "../src/data/jsonocel/DS4.jsonocel"
ocel = ocel_import_factory.apply(filename)
ocpn = ocpn_discovery_factory.apply(ocel, parameters={"debug": False})
#train_log = sample_traces(ocel, ocpn, 10000, save_path='../src/data/playout/ocpn_data_p2p.txt')


In [28]:
group_sizes = ocel.log.log.groupby('event_execution').size()
largest_groups = group_sizes.nlargest(2).index

filtered_df =ocel.log.log[~ocel.log.log['event_execution'].isin(largest_groups)]

In [30]:
filtered_df.to_csv("../src/data/jsonocel/DS4_filtered.csv")

In [3]:
object_types =  ["Payment application","Control summary","Entitlement application","Geo parcel document","Inspection","Reference alignment"]
parameters = {"obj_names": object_types,
              "val_names": [],
              "act_name": "event_activity",
              "time_name": "event_timestamp",
              "sep": ","}
ocel_filt = ocel_import_factory_csv.apply(file_path='../src/data/jsonocel/DS4_filtered.csv', parameters=parameters)

In [4]:
train_log = create_VAE_input(ocel_filt,'../src/data/VAE_input/DS4.txt')

In [5]:
train_log

['Payment application mail income Payment application mail valid Geo parcel document initialize Geo parcel document begin editing Geo parcel document create Control summary initialize Control summary begin editing Control summary finish editing Geo parcel document finish pre-check Geo parcel document save Geo parcel document finish pre-check Geo parcel document finish editing Geo parcel document finish editing Geo parcel document finish editing Geo parcel document save Inspection initialize Inspection plan Inspection save Inspection save Inspection save Inspection save Inspection save Inspection save Geo parcel document finish editing Inspection finish preparations Inspection save Inspection save Inspection save Inspection save Inspection save Inspection save Inspection save Inspection save Inspection save Inspection save Inspection save Inspection save Inspection save Geo parcel document begin editing Geo parcel document save Geo parcel document save Inspection save Geo parcel documen

In [6]:
timesteps_max, enc_tokens, characters, char2id, id2char, x, x_decoder = get_text_data(num_samples=2000,
                                                                                      data_path='../src/data/VAE_input/DS4.txt')

print(x.shape, "Creating model...")

Number of samples: 2000
Number of unique input tokens: 47
Max sequence length for inputs: 2608
(2000, 2608, 47) Creating model...


In [None]:
input_dim, timesteps = x.shape[-1], x.shape[-2]
batch_size, latent_dim = 1, 191
intermediate_dim, epochs = 353, 8

vae, enc, gen, stepper = create_lstm_vae(input_dim,
                                         batch_size=batch_size,
                                         intermediate_dim=intermediate_dim,
                                         latent_dim=latent_dim,
                                        )
print("Training model...")

vae.fit([x, x_decoder], x, epochs=epochs, verbose=1)

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, None, 47)]   0           []                               
                                                                                                  
 lstm (LSTM)                    (None, 353)          566212      ['input_1[0][0]']                
                                                                                                  
 dense (Dense)                  (None, 191)          67614       ['lstm[0][0]']                   
                                                                                                  
 dense_1 (Dense)                (None, 191)          67614       ['lstm[0][0]']                   
                                                                                              

In [None]:
print("Fitted, predicting...")
#rearrange the input data and get the max amount of characters
input_data = [' '.join(inner_list) for inner_list in train_log]
max_length = max(len(string) for string in input_data)

def decode(s):
    return decode_sequence(s, gen, stepper, input_dim, char2id, id2char, max_length)

log = []

for _ in tqdm(range(500), desc="Sample Traces"):

    id_from = np.random.randint(0, x.shape[0] - 1)

    m_from, std_from = enc.predict([[x[id_from]]])

    seq_from = np.random.normal(size=(latent_dim,))
    seq_from = m_from + std_from * seq_from

    #print(decode(seq_from))
    log.append([decode(seq_from)])

In [None]:
log

In [None]:
df_log = process_log(log, ocel, ocpn, '../src/data/VAE_generated/DS4_process_sampled.csv')

In [None]:
df_log

In [None]:
object_types =  ["Payment application","Control summary","Entitlement application","Geo parcel document","Inspection","Reference alignment"]
parameters = {"obj_names": object_types,
              "val_names": [],
              "act_name": "event_activity",
              "time_name": "event_timestamp",
              "sep": ","}
ocel_gen = ocel_import_factory_csv.apply(file_path='../src/data/VAE_generated/DS4_process_sampled.csv', parameters=parameters)

# OCPN Model

In [None]:
generalization = VAE_generalization(ocel_gen, ocpn)

# Happy Path Order

In [None]:
happy_path__ocel = get_happy_path_log(filename)

In [None]:
happy_path_ocpn = ocpn_discovery_factory.apply(happy_path__ocel, parameters={"debug": False})

In [None]:
happy_path__ocel = get_happy_path_log(filename)
generalization = VAE_generalization(ocel_gen, happy_path_ocpn)

# Flower Model Order

In [None]:
filename = "../src/data/jsonocel/DS4.jsonocel"
ots =  ["Payment application","Control summary","Entitlement application","Geo parcel document","Inspection","Reference alignment"]
flower_ocpn = create_flower_model(filename,ots)

In [None]:
generalization = VAE_generalization(ocel_gen, flower_ocpn)