In [2]:
import pandas as pd
import networkx as nx
import os
import configparser
import itertools
import json

In [3]:
config = configparser.ConfigParser()
config.read('rules.ini')

settings = {}
settings['path'] = config['RULES']['path']
settings['variation'] = config['RULES']['variation'][0]
settings['prop_variation'] = float(config['RULES']['variation'][1:])

In [4]:
path = r'C:\DanielBaron\GenerativeLSTM\output_files\20230417_3E8ABBDA_765B_4740_A305_D925CF5DE07C'
org_log_path = os.path.join(path, 'parameters', 'original_log.csv')

df_org = pd.read_csv(org_log_path)
df_org['start_timestamp'] = pd.to_datetime(df_org['start_timestamp'])
df_org['end_timestamp'] = pd.to_datetime(df_org['end_timestamp'])
df_org['rank'] = df_org.groupby('caseid')['start_timestamp'].rank().astype(int)

In [5]:
ac_index = dict(zip(df_org.task, df_org.ac_index))

In [6]:
path = r'C:\DanielBaron\GenerativeLSTM\output_files\20230417_3E8ABBDA_765B_4740_A305_D925CF5DE07C'
gen_log_path = os.path.join(path, 'parameters', 'generated_log.csv')

df_gen = pd.read_csv(gen_log_path)
df_gen['start_timestamp'] = pd.to_datetime(df_gen['start_timestamp'])
df_gen['end_timestamp'] = pd.to_datetime(df_gen['end_timestamp'])
df_gen['rank'] = df_gen.groupby('caseid')['start_timestamp'].rank().astype(int)

In [7]:
act_paths = list(itertools.product(*[x if type(x) is list else [x] for x in [json.loads(x.strip()) for x in settings['path'].split('>>')] ]))
act_paths_idx = [(ac_index[x[0]], ac_index[x[1]]) for x in act_paths]

## Add edges

In [11]:
class GenerateStats:
    def __init__(self, log, ac_index, ac_paths) -> None:
        self.log = log
        self.log['start_timestamp'] = pd.to_datetime(self.log['start_timestamp'])
        self.log['end_timestamp'] = pd.to_datetime(self.log['end_timestamp'])
        self.log['rank'] = self.log.groupby('caseid')['start_timestamp'].rank().astype(int)
        self.ac_index = ac_index
        self.ac_paths = ac_paths

    def evaluate_condition(self, df_case):
        df_case = df_case.sort_values(by='rank')
        u_tasks = [self.ac_index[x] for x in df_case['task'].drop_duplicates()]
        
        G = nx.DiGraph()
        for task in u_tasks:
            G.add_node(task)

        tasks = list(df_case['task'])
        if list(df_case['rank']) == list(set(list(df_case['rank']))):
            order = [(self.ac_index[x[0]], self.ac_index[x[1]]) for x in [(a, b) for a, b in zip(tasks[:-1], tasks[1:])]]
        else:
            order = []
            for i in range(1, len(df_case['rank'])):
                c_task = list(df_case[df_case['rank']==i]['task'])
                n_task = list(df_case[df_case['rank']==i+1]['task'])
                order += [(self.ac_index[x[0]], self.ac_index[x[1]]) for x in list(itertools.product(c_task, n_task))]

        G.add_edges_from(order)
        conds = [nx.is_simple_path(G, act_path) for act_path in act_paths_idx]

        return min(conds)

    def get_stats(self):

        pos_cases = 0
        total_cases = len(self.log['caseid'].drop_duplicates())
        for caseid in self.log['caseid'].drop_duplicates():

            df_case = self.log[self.log['caseid']==caseid]
            cond = self.evaluate_condition(df_case)
            if cond:
                pos_cases += 1

        return pos_cases, total_cases

In [12]:
gs = GenerateStats(df_org, ac_index, act_paths_idx)