In [None]:
from paho.mqtt import client as mqtt_client
import logging, time
import json
import pm4py
from tqdm import tqdm
import pandas as pd
import random
import os

In [None]:
for k,v in prob_swaps.items():
    print(k)
    print(len(v))
    print("--")

In [None]:
broker = 'localhost'
port = 1883
topic = "cominds"
client_id = f'mqtt-publisher'

def connect_mqtt():
    def on_connect(client, userdata, flags, rc):
        if rc == 0:
            print("Connected to MQTT Broker!")
        else:
            print("Failed to connect, return code %d\n", rc)
    # Set Connecting Client ID
    client = mqtt_client.Client(client_id)
    # client.username_pw_set(username, password)
    client.on_connect = on_connect
    client.connect(broker, port)
    return client

client = connect_mqtt()

FIRST_RECONNECT_DELAY = 1
RECONNECT_RATE = 2
MAX_RECONNECT_COUNT = 12
MAX_RECONNECT_DELAY = 60

def on_disconnect(client, userdata, rc):
    logging.info("Disconnected with result code: %s", rc)
    reconnect_count, reconnect_delay = 0, FIRST_RECONNECT_DELAY
    while reconnect_count < MAX_RECONNECT_COUNT:
        logging.info("Reconnecting in %d seconds...", reconnect_delay)
        time.sleep(reconnect_delay)

        try:
            client.reconnect()
            logging.info("Reconnected successfully!")
            return
        except Exception as err:
            logging.error("%s. Reconnect failed. Retrying...", err)

        reconnect_delay *= RECONNECT_RATE
        reconnect_delay = min(reconnect_delay, MAX_RECONNECT_DELAY)
        reconnect_count += 1
    logging.info("Reconnect failed after %s attempts. Exiting...", reconnect_count)

client.on_disconnect = on_disconnect


In [None]:
prob_maxdist = {"01":1,
                "05":5,
                "10":7,
                "20":10,
                "50":20,
                "99":99}


In [None]:
path = "logs"
# dataset = "bpi2012"
# dataset = "bpi2017"
dataset = "bpi2020_travelpermits"
log = pm4py.read_xes(os.path.join(path,dataset,f"{dataset}_100traces.xes.gz"))
log = log[["case:concept:name", "concept:name", "time:timestamp"]]
log["time:timestamp"] = log["time:timestamp"].dt.strftime("%Y-%m-%dT%H:%M:%S.%f%z")

In [None]:
prob_swaps = {}
for pr,max_dist in prob_maxdist.items():
    prob = int(pr)/100
    index_swaps = {}
    for l in pd.unique(log["case:concept:name"]):
        swapped_indexes = set()
        idx = log[log["case:concept:name"]==l].index
        for i in idx:
            if i in swapped_indexes:
                continue
            if random.random() < prob:
                looking_for_spot = True
                attempt_counter = 0
                while True:
                    if random.random()<0.5:
                        new_spot = -random.randint(1,min(max_dist,len(idx)))
                    else:
                        new_spot = random.randint(1,min(max_dist,len(idx)))
                    if i+new_spot in idx and i+new_spot not in swapped_indexes:
                        index_swaps[i] = i+new_spot
                        swapped_indexes.add(i)
                        swapped_indexes.add(i+new_spot)
                        break
                    attempt_counter += 1
                    if attempt_counter > max_dist*5:
                        break
    prob_swaps[pr] = index_swaps

In [None]:
def swap_rows(df, swap_dict):
    for k,v in swap_dict.items():
        tmp = df.iloc[k].copy()
        df.iloc[k] = df.iloc[v]
        df.iloc[v] = tmp
    return df

ooo_logs = {}
ooo_logs["00"] = log
for k,v in prob_swaps.items():
    log_swapped = swap_rows(log.copy(), v)
    ooo_logs[k] = log_swapped
    log_swapped.to_csv(os.path.join(path,dataset,k+".csv"))

log.to_csv(os.path.join(path,dataset,dataset+".csv"))

In [None]:
output_path = r"..\output"

In [None]:
for k,v in ooo_logs.items():
    counter = 0
    for r in tqdm(v.itertuples()):
        trace = r[1]
        activity = r[2]
        timestamp = str(r[3])
        payload = {"event":{"concept:name":activity,"time:timestamp":timestamp},"trace":{"concept:name":trace}}
        client.publish(f"{topic}/swap_{k}_{trace}/{activity}", json.dumps(payload),qos=0)
        counter += 1
        if counter > 50:
            time.sleep(100/1000)
            counter = 0
