# Experiment: Understanding Tasks (Localization, Characterization, and Explanation) on CPN_Logs_Characterization_Ext - (Ostovar - Robust)

## Lib Imports and configurations

In [31]:
# %matplotlib notebook
# %matplotlib inline 
%load_ext autoreload
%autoreload 2


import sys
import os
import glob
import io

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import pm4py
import ruptures as rpt
from ruptures.metrics import precision_recall, meantime
import scipy.stats as ss
from sklearn.model_selection import ParameterGrid

from tqdm.notebook import tqdm_notebook
import time
from matplotlib.backends.backend_pdf import PdfPages
from joblib import Parallel, delayed

sys.path.append("../Codes/")
import TMPD_utils
import TMPD_class

import warnings
warnings.filterwarnings("ignore")

import gc
gc.enable()

pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 100)
pd.set_option('display.max_colwidth', 150)
# pd.set_option('display.float_format', lambda x: f'{x:,.3f}')
pd.options.display.float_format = '{:.4f}'.format
np.set_printoptions(threshold=sys.maxsize)


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Function to run the class TMPD pipeline in parallel

In [32]:
def run_pipeline_TMPD(kwargs, display=False, return_bool=False):

    try:

        ### Loading event log
        # Extracting event log
        event_log = TMPD_utils.parse_mxml(kwargs['log_path'], gzip=True)

        # This is a specifc step for this event log that remove an activity that shows where the drift happens
        event_log = event_log[event_log['WorkflowModelElement'] != 'DRIFT_PO']

        # Save original event index
        event_log = event_log.reset_index(names='original_index') 

        # Create an id based on the order of the event in the raw event log
        event_log["Event_order_id"] = event_log.index


        ### Step 1 - Instantiating class and setting event log
        print('step 1')
        # Iniatializing the TMPD_class
        TMPD_instance = TMPD_class.TMPD(scenario='offline')

        # Setting the transition log
        TMPD_instance.set_transition_log(event_log, case_id = kwargs['case_id'], activity_key = kwargs['activity_key']
                                         , timestamp_key = kwargs['timestamp_key'], timestamp_format=kwargs['timestamp_format'], other_columns_keys=kwargs['other_columns_keys'])

        # Executing the transition log
        TMPD_instance.run_transition_log()

        # Showing the transition log created
        # TMPD_instance.get_transition_log().head(15)

        
        ### Step 2 - Window Strategy
        print('step 2')
        # Setting the window strategy parameters
        TMPD_instance.set_windowing_strategy(window_size_mode = kwargs['window_size_mode'], window_size = kwargs['window_size'], window_ref_mode = kwargs['window_ref_mode']
                                                , overlap = kwargs['overlap'], sliding_step = kwargs['sliding_step'])

        # Executing the window strategy indexation
        TMPD_instance.run_windowing_strategy()

        # Showing the windows indexes
        # dict(list(TMPD_instance.get_windowing_strategy().items())[:15])


        ### Step 3 - Process Representation (using Transition Matrix)
        print('step 3')
        # Setting the Transition Matrix (TM) process representation
        TMPD_instance.set_process_representation(threshold_anomaly = kwargs['threshold_anomaly']
                                            , control_flow_features = kwargs['control_flow_features']
                                            , time_features = kwargs['time_features']
                                            , resource_features = kwargs['resource_features']
                                            , data_features = kwargs['data_features'])

        # Executing the process_representation using all dataset just for an example
        # TMPD_instance.run_process_representation(TMPD_instance.transition_log)

        # Showing the process representation created
        # TMPD_instance.get_process_representation().head(15)


        ### Step 4 - Change Representation
        print('step 4')
        # Setting Change Representation
        TMPD_instance.set_change_representation(kwargs['change_features_strategy_dict'])

        # Executing the Change Representation using the window strategy
        TMPD_instance.run_change_representation()

        # Showing the Change Representation created
        # TMPD_instance.get_change_representation().head(15)


        ### Step 5 - Detection Task
        # print('step 5')
        # # Setting Detection Task
        # TMPD_instance.set_detection_task(kwargs['detection_task_strategy_dict'])

        # # Executing the Detection Task
        # TMPD_instance.run_detection_task()

        # # Getting Detection Task Results
        # detection_task_results = TMPD_instance.get_detection_task()


        ### Step 6a - Localization Task
        print('step 6a')
        # Setting Localization Task
        TMPD_instance.set_localization_task(reference_window_index=kwargs['reference_window_index'], detection_window_index=kwargs['detection_window_index']
                                            , pvalue_threshold=kwargs['pvalue_threshold'], effect_prop_threshold=kwargs['effect_prop_threshold'], effect_count_threshold=kwargs['effect_count_threshold'], pseudo_count=kwargs['pseudo_count'])

        # Executing Localization Task
        TMPD_instance.run_localization_task()

        # Showing Localization Task Results
        changed_transitions, change_informations, reference_bpmn_text, detection_bpmn_text = TMPD_instance.get_localization_task(show_localization_dfg=False, show_original_dfg=False, show_original_bpmn=False)

        # Getting the distinct activities in the Localization Result
        localization_distinct_activities = set()
        for key, value in change_informations.items():
            if isinstance(value, list): # Check if the value is a list
                # Skip the list if it contains only "None"
                if len(value) == 1 and value[0] == "None":
                    continue
                for item in value:
                    if isinstance(item, tuple): # If the items are tuples, extend the set with the tuple items
                        localization_distinct_activities.update(item)
                    else: # If the items are not tuples (i.e., strings), add them directly to the set
                        localization_distinct_activities.add(item)
        localization_result = list(localization_distinct_activities)


        ### Step 6b - Characterization Task
        print('step 6b')
        # Setting Characterization Task
        TMPD_instance.set_characterization_task(llm_company = kwargs['llm_company'], llm_model=kwargs['llm_model'], api_key_path=kwargs['api_key_path'], llm_instructions_path=kwargs['llm_instructions_path'])

        # Executing Characterization Task
        TMPD_instance.run_characterization_task()

        # Showing Characterization Task Results
        # change_patterns_llm_classification, change_patterns_llm_response = TMPD_instance.get_characterization_task()

        ### Defining Grounding truth
        change_pattern_ground_truth = kwargs['log_path'].split("\\")[-3]

        change_activities_ground_truth = {
            'ConditionalMove': {
                'change_pattern_name': 'Conditional-Move',
                'characterization_activities': ['Prepare_acceptance_pack'],
                'localization_activities': ['Send_acceptance_pack', 'Check_if_home_insurance_quote_is_requested', 'Prepare_acceptance_pack', 'Assess_eligibility', 'Send_home_insurance_quote', 'Verify_repayment_agreement']
            },
            'ConditionalRemoval': {
                'change_pattern_name': 'Conditional-Switch',
                'characterization_activities': ['Prepare_acceptance_pack', 'Check_if_home_insurance_quote_is_requested', 'Verify_repayment_agreement'],
                'localization_activities': ['Send_acceptance_pack', 'Cancel_application', 'Check_if_home_insurance_quote_is_requested', 'Approve_application', 'Prepare_acceptance_pack', 'Assess_eligibility', 'Send_home_insurance_quote', 'Verify_repayment_agreement']
            },
            'ConditionalToSequence': {
                'change_pattern_name': 'Conditional-To-Sequence',
                'characterization_activities': ['Send_home_insurance_quote', 'Send_acceptance_pack'],
                'localization_activities': ['Check_if_home_insurance_quote_is_requested', 'Send_home_insurance_quote', 'Send_acceptance_pack', 'Verify_repayment_agreement']
            },
            'Frequency': {
                'change_pattern_name': 'Frequency',
                'characterization_activities': ['Check_if_home_insurance_quote_is_requested', 'Send_acceptance_pack', 'Send_home_insurance_quote'],
                'localization_activities': ['Check_if_home_insurance_quote_is_requested', 'Send_home_insurance_quote', 'Send_acceptance_pack', 'Verify_repayment_agreement']
            },
            'Loop': {
                'change_pattern_name': 'Loop',
                'characterization_activities': ['Assess_loan_risk', 'Appraise_property', 'Assess_eligibility', 'Check_credit_history'],
                'localization_activities': ['Assess_loan_risk', 'Appraise_property', 'Assess_eligibility', 'Check_credit_history', 'Reject_application', 'Prepare_acceptance_pack']
            },
            'ParallelMove': {
                'change_pattern_name': 'Parallel-Move',
                'characterization_activities': ['Prepare_acceptance_pack', 'Send_home_insurance_quote'],
                'localization_activities': ['Send_acceptance_pack', 'Check_if_home_insurance_quote_is_requested', 'Prepare_acceptance_pack', 'Assess_eligibility', 'Send_home_insurance_quote', 'Verify_repayment_agreement']
            },
            'ParallelRemoval': {
                'change_pattern_name': 'Parallel-Switch',
                'characterization_activities': ['Prepare_acceptance_pack', 'Send_home_insurance_quote'],
                'localization_activities': ['Send_acceptance_pack', 'Check_if_home_insurance_quote_is_requested', 'Prepare_acceptance_pack', 'Assess_eligibility', 'Send_home_insurance_quote', 'Verify_repayment_agreement']
            },
            'ParallelToSequence': {
                'change_pattern_name': 'Parallel-To-Sequence',
                'characterization_activities': ['Appraise_property', 'Check_credit_history', 'Assess_loan_risk'],
                'localization_activities': ['Assess_loan_risk', 'Check__application__form_completeness', 'Appraise_property', 'Assess_eligibility', 'Check_credit_history']
            },
            'SerialMove': {
                'change_pattern_name': 'Serial-Move',
                'characterization_activities': ['Assess_eligibility'],
                'localization_activities': ['Assess_eligibility', 'Reject_application', 'Prepare_acceptance_pack', 'Assess_loan_risk', 'Appraise_property']
            },
            'SerialRemoval': {
                'change_pattern_name': 'Serial-Switch',
                'characterization_activities': ['Assess_eligibility'],
                'localization_activities': ['Assess_eligibility', 'Reject_application', 'Prepare_acceptance_pack', 'Assess_loan_risk', 'Appraise_property']
            },
            'Skip': {
                'change_pattern_name': 'Skip',
                'characterization_activities': ['Check_if_home_insurance_quote_is_requested', 'Prepare_acceptance_pack'],
                'localization_activities': ['Send_acceptance_pack', 'Check_if_home_insurance_quote_is_requested', 'Prepare_acceptance_pack', 'Assess_eligibility', 'Send_home_insurance_quote']
            },
            'Substitute': {
                'change_pattern_name': 'Replace',
                'characterization_activities': ['Verify_repayment_agreement', 'Replaced_Activity'],
                'localization_activities': ['Cancel_application', 'Send_acceptance_pack', 'Replaced_Activity', 'Approve_application', 'Send_home_insurance_quote', 'Verify_repayment_agreement']
            },
            'Swap': {
                'change_pattern_name': 'Swap',
                'characterization_activities': ['Prepare_acceptance_pack', 'Check_if_home_insurance_quote_is_requested', 'Verify_repayment_agreement'],
                'localization_activities': ['Send_acceptance_pack', 'Cancel_application', 'Check_if_home_insurance_quote_is_requested', 'Approve_application', 'Prepare_acceptance_pack', 'Assess_eligibility', 'Send_home_insurance_quote', 'Verify_repayment_agreement']
            } 
        }

        localization_ground_truth = change_activities_ground_truth[change_pattern_ground_truth]['localization_activities']
        characterization_ground_truth = {'concept_drift' : ['Yes'], 'change_pattern' : [change_activities_ground_truth[change_pattern_ground_truth]['change_pattern_name']], 'activities' : change_activities_ground_truth[change_pattern_ground_truth]['characterization_activities']}


        ### Validation metrics
        print('Validation metrics')

        ## Localization
        # Set comparison for 'Activities'
        precision, recall, f1_score = TMPD_utils.list_match_metrics(localization_ground_truth, localization_result)

        localization_task_validation_results = ({
            'localization_activities_precision': precision,
            'localization_activities_recall': recall,
            'localization_activities_f1_score': f1_score
        })

        ## Characterization
        characterization_result = TMPD_instance.get_characterization_task()[0]

        # Initialize a results dictionary
        characterization_task_validation_results = {
            'characterization_concept_drift_match': None,
            'characterization_change_pattern_match_f1_score': None,
            'characterization_change_pattern_match_precision': None,
            'characterization_change_pattern_match_recall': None,
            'characterization_activities_precision': None,
            'characterization_activities_recall': None,
            'characterization_activities_f1_score': None
        }

        # Check matches
        try:
            characterization_task_validation_results['characterization_concept_drift_match'] = TMPD_utils.list_match_metrics(characterization_ground_truth['concept_drift'], characterization_result['concept_drift'])[2]
        except:
            characterization_task_validation_results['characterization_concept_drift_match'] = -1

        try:
            precision, recall, f1_score = TMPD_utils.list_match_metrics(characterization_ground_truth['change_pattern'], characterization_result['change_pattern'])
            characterization_task_validation_results.update({
                'characterization_change_pattern_match_f1_score': f1_score,
                'characterization_change_pattern_match_precision': precision,
                'characterization_change_pattern_match_recall': recall
            })

        except:
            characterization_task_validation_results.update({
                'characterization_change_pattern_match_f1_score': -1,
                'characterization_change_pattern_match_precision': -1,
                'characterization_change_pattern_match_recall': -1
            })

        # Set comparison for 'Activities'
        precision, recall, f1_score = TMPD_utils.list_match_metrics(characterization_ground_truth['activities'], characterization_result['activities'])

        characterization_task_validation_results.update({
            'characterization_activities_precision': precision,
            'characterization_activities_recall': recall,
            'characterization_activities_f1_score': f1_score
        })


        understanding_tasks_results = pd.DataFrame([{**localization_task_validation_results, **characterization_task_validation_results}])

        understanding_tasks_results['localization_activities'] = ', '.join(localization_result) 
        understanding_tasks_results['characterization_change_pattern'] = ', '.join(TMPD_instance.get_characterization_task()[0]['change_pattern'])
        understanding_tasks_results['characterization_activities'] = ', '.join(TMPD_instance.get_characterization_task()[0]['activities']) 
        understanding_tasks_results['localization_changes'] = [TMPD_instance.get_localization_task(show_localization_dfg=False, show_original_dfg=False, show_original_bpmn=False)[1]]
        understanding_tasks_results['reference_bpmn_text'] = TMPD_instance.get_localization_task(show_localization_dfg=False, show_original_dfg=False, show_original_bpmn=False)[2]
        understanding_tasks_results['detection_bpmn_text'] = TMPD_instance.get_localization_task(show_localization_dfg=False, show_original_dfg=False, show_original_bpmn=False)[3]
        understanding_tasks_results['change_patterns_llm_response'] = TMPD_instance.get_characterization_task()[1]
        


        ### Add informations to final result
        print('Adding informations')
        understanding_tasks_results['log_path'] = kwargs['log_path']
        understanding_tasks_results['log_size'] = kwargs['log_path'].split("\\")[-5] 
        understanding_tasks_results['mix_type'] = kwargs['log_path'].split("\\")[-4]
        understanding_tasks_results['change_pattern'] = kwargs['log_path'].split("\\")[-3]
        understanding_tasks_results['noise_size'] = kwargs['log_path'].split("\\")[-1].split(".")[0].split("_")[-1] if kwargs['log_path'].split("\\")[-1].split(".")[0].split("_")[-1].isdigit() else 0
        understanding_tasks_results['window_size_mode'] = kwargs['window_size_mode']
        understanding_tasks_results['window_size'] = kwargs['window_size']
        understanding_tasks_results['window_ref_mode'] = kwargs['window_ref_mode']
        understanding_tasks_results['overlap'] = kwargs['overlap']
        understanding_tasks_results['sliding_step'] = kwargs['sliding_step']

        understanding_tasks_results['reference_window_index'] = kwargs['reference_window_index']
        understanding_tasks_results['detection_window_index'] = kwargs['detection_window_index']
        understanding_tasks_results['pvalue_threshold'] = kwargs['pvalue_threshold']
        understanding_tasks_results['effect_prop_threshold'] = kwargs['effect_prop_threshold']
        understanding_tasks_results['effect_count_threshold'] = kwargs['effect_count_threshold']
        understanding_tasks_results['pseudo_count'] = kwargs['pseudo_count']
        understanding_tasks_results['llm_company'] = kwargs['llm_company']
        understanding_tasks_results['llm_model'] = kwargs['llm_model']
        understanding_tasks_results['llm_instructions_path'] = kwargs['llm_instructions_path']

    

    except Exception as e:
        understanding_tasks_results = pd.DataFrame(data={'Error': [e]})
        understanding_tasks_results['log_path'] = kwargs['log_path']
        understanding_tasks_results['log_size'] = kwargs['log_path'].split("\\")[-5] 
        understanding_tasks_results['mix_type'] = kwargs['log_path'].split("\\")[-4]
        understanding_tasks_results['change_pattern'] = kwargs['log_path'].split("\\")[-3]
        understanding_tasks_results['noise_size'] = kwargs['log_path'].split("\\")[-1].split(".")[0].split("_")[-1] if kwargs['log_path'].split("\\")[-1].split(".")[0].split("_")[-1].isdigit() else 0
        understanding_tasks_results['window_size_mode'] = kwargs['window_size_mode']
        understanding_tasks_results['window_size'] = kwargs['window_size']
        understanding_tasks_results['window_ref_mode'] = kwargs['window_ref_mode']
        understanding_tasks_results['overlap'] = kwargs['overlap']
        understanding_tasks_results['sliding_step'] = kwargs['sliding_step']

        understanding_tasks_results['reference_window_index'] = kwargs['reference_window_index']
        understanding_tasks_results['detection_window_index'] = kwargs['detection_window_index']
        understanding_tasks_results['pvalue_threshold'] = kwargs['pvalue_threshold']
        understanding_tasks_results['effect_prop_threshold'] = kwargs['effect_prop_threshold']
        understanding_tasks_results['effect_count_threshold'] = kwargs['effect_count_threshold']
        understanding_tasks_results['pseudo_count'] = kwargs['pseudo_count']
        understanding_tasks_results['llm_company'] = kwargs['llm_company']
        understanding_tasks_results['llm_model'] = kwargs['llm_model']
        understanding_tasks_results['llm_instructions_path'] = kwargs['llm_instructions_path']
    
    # Deleting class instance
    try:
        del TMPD_instance
    except:
        pass

    # Returning detection results or save to file
    if return_bool:
        return understanding_tasks_results
    else:
        understanding_tasks_results.to_pickle("Results/Understanding_CPN_Logs_Characterization_Ext_files/"+ str(kwargs['id']) + ".pkl")

## Loading CPN_Logs_Characterization_Ext - (Ostovar - Robust) event logs

In [33]:
# Mapping all event_logs paths
logs_path = glob.glob("../Input/Synthetic/CPN_Logs (Ostovar - Robust)/FragmentChange/*/*/*/*/*.mxml.gz")

# Temp filter
logs_path = [x for x in logs_path if "_2" not in x and "_5" not in x and "Size1" in x and "Atomic" in x]

# Showing mapped paths
print("How many logs? ", len(logs_path))
pd.DataFrame(logs_path)

How many logs?  13


Unnamed: 0,0
0,../Input/Synthetic/CPN_Logs (Ostovar - Robust)/FragmentChange\Size1\Atomic\ConditionalMove\output\ConditionalMove.mxml.gz
1,../Input/Synthetic/CPN_Logs (Ostovar - Robust)/FragmentChange\Size1\Atomic\ConditionalRemoval\output\ConditionalRemoval.mxml.gz
2,../Input/Synthetic/CPN_Logs (Ostovar - Robust)/FragmentChange\Size1\Atomic\ConditionalToSequence\output\ConditionalToSequence.mxml.gz
3,../Input/Synthetic/CPN_Logs (Ostovar - Robust)/FragmentChange\Size1\Atomic\Frequency\output\Frequency.mxml.gz
4,../Input/Synthetic/CPN_Logs (Ostovar - Robust)/FragmentChange\Size1\Atomic\Loop\output\Loop.mxml.gz
5,../Input/Synthetic/CPN_Logs (Ostovar - Robust)/FragmentChange\Size1\Atomic\ParallelMove\output\ParallelMove.mxml.gz
6,../Input/Synthetic/CPN_Logs (Ostovar - Robust)/FragmentChange\Size1\Atomic\ParallelRemoval\output\ParallelRemoval.mxml.gz
7,../Input/Synthetic/CPN_Logs (Ostovar - Robust)/FragmentChange\Size1\Atomic\ParallelToSequence\output\ParallelToSequence.mxml.gz
8,../Input/Synthetic/CPN_Logs (Ostovar - Robust)/FragmentChange\Size1\Atomic\SerialMove\output\SerialMove.mxml.gz
9,../Input/Synthetic/CPN_Logs (Ostovar - Robust)/FragmentChange\Size1\Atomic\SerialRemoval\output\SerialRemoval.mxml.gz


<!-- ![Alt text](../Images/Concept_drift_firstcycle_steps_eng.png "General steps") -->

## Experiment impacts of the parameters

### Define the Parameters GridSearch 

In [34]:
TMPD_ParameterGrid_experiment = ParameterGrid(
    [{
        # Step 1 - Instantiating class and setting event log
        'case_id' : ['CaseId']
        , 'activity_key' : ['Activity']
        , 'timestamp_key' : ['timestamp']
        , 'timestamp_format' : [None]
        , 'other_columns_keys' : [[]]

        # Step 2 - Setting Window Strategy
        , 'window_size_mode' : ['Fixed']
        , 'window_size' : [8000]
        , 'window_ref_mode' : ['Fixed'] #, 'Sliding'
        , 'overlap' : [True]
        , 'sliding_step' : [2000]

        # Step 3 - Setting Process Representation (using Transition Matrix)
        , 'threshold_anomaly': [0.005]
        , 'control_flow_features': [{'frequency', 'probability'}]
        , 'time_features': [{}] #'avg_time':'timestamp', 'time_std':'timestamp'
        , 'resource_features': [{}]
        , 'data_features': [{}]

        # Step 4 - Setting Change Representation
        , 'change_features_strategy_dict' : [{
            'delta_matrix_strategy': 
                {
                    'frequency_delta' : {'process_feature':'frequency', 'method':'aggregation', 'agg_function' : 'sum'}
                    , 'frequency_delta_percentage' : {'process_feature':'frequency', 'method':'percentage'}
                    # , 'prob_freq_delta_weight' : {'process_feature':'probability', 'method':'aggregation_weight', 'agg_function' : 'sum', 'weight_feature' : 'frequency'}
                }
            ,'statistic_test_strategy' : 
                {
                    # 'frequency_gtest_pvalue' : {'process_feature':'frequency', 'method':'g_test', 'contingency_matrix_sum_value' : '5', 'remove_zeros':'True'}
                    # , 'frequency_cramersv' : {'process_feature':'frequency', 'method':'cramers_v', 'contingency_matrix_sum_value' : '5', 'remove_zeros':'True'}
                }
        }]
        
        # Step 5 - Setting Detection Task
        , 'detection_task_strategy_dict' :  [{
            'time_series_strategy': 
                {
                    # 'cpd_frequency_delta3' : {'change_features':['frequency_delta'], 'method':'cpd_pelt', 'smooth' : '3'}
                    # 'cpd_prob_freq_delta3' : {'change_features':['prob_freq_delta_weight'], 'method':'cpd_pelt', 'smooth' : '3'}
            #         , 'cpd_cramersv_frequency3' : {'change_features':['frequency_cramersv'], 'method':'cpd_pelt', 'smooth' : '3'} 
                }
            ,'threshold_strategy' : 
                {
                    # 'gtest_frequency3' : {'change_features':['frequency_gtest_pvalue'], 'method':'comparison_operator', 'operator' : 'le', 'threshold_value' : '0.025', 'smooth' : '3'}
                    'fixed_frequency_delta_percentage3' : {'change_features':['frequency_delta_percentage'], 'method':'comparison_operator', 'operator' : 'ge', 'threshold_value' : '0.05', 'smooth' : '3'}
                    # , 'fixed_cramersv_frequency3' : {'change_features':['frequency_cramersv'], 'method':'comparison_operator', 'operator' : 'ge', 'threshold_value' : '0.05', 'smooth' : '3'}
                }
        }]

        # Step 6a - Localization Task
        , 'reference_window_index': [0]
        , 'detection_window_index': [14]
        , 'pvalue_threshold': [0.05]
        , 'effect_prop_threshold': [0.2]
        , 'effect_count_threshold': [0.02]
        , 'pseudo_count': [5]

        # Step 6b - Characterization Task
        , 'llm_company' : ['openai'] # 'google', 'openai'
        , 'llm_model': ["gpt-4o"] # "gpt-4o", "gemini-pro", "gpt-4-turbo", "gpt-4"
        , 'api_key_path' : ['../Temp/openai_api_key.txt'] # '../Temp/google_api_key.txt', '../Temp/openai_api_key.txt'
        , 'llm_instructions_path': ['../Codes/LLM_Instructions/instructions_v8.yaml']

    }]
)

### Combine parameters GridSearch with all event logs

In [35]:
TMPD_ParameterGrid_logs_experiment = []
id=0
for param_grid in TMPD_ParameterGrid_experiment:
    for log_path in logs_path:
        param_grid_aux = param_grid.copy()
        param_grid_aux['log_path'] = log_path 
        param_grid_aux['id'] = id
        TMPD_ParameterGrid_logs_experiment.append(param_grid_aux)
        id = id+1
len(TMPD_ParameterGrid_logs_experiment)


13

### Execute all experiments in parallel

In [36]:
TMPD_logs_results_experiment = Parallel(n_jobs=4)(delayed(run_pipeline_TMPD)(TMPD_Parameters, display=False, return_bool=True) for TMPD_Parameters in tqdm_notebook(TMPD_ParameterGrid_logs_experiment))
TMPD_logs_results_experiment_df = pd.concat(TMPD_logs_results_experiment, axis=0, ignore_index=True)

  0%|          | 0/13 [00:00<?, ?it/s]

### Analyse results

In [37]:
TMPD_logs_results_experiment_df

Unnamed: 0,localization_activities_precision,localization_activities_recall,localization_activities_f1_score,characterization_concept_drift_match,characterization_change_pattern_match_f1_score,characterization_change_pattern_match_precision,characterization_change_pattern_match_recall,characterization_activities_precision,characterization_activities_recall,characterization_activities_f1_score,localization_activities,characterization_change_pattern,characterization_activities,localization_changes,reference_bpmn_text,detection_bpmn_text,change_patterns_llm_response,log_path,log_size,mix_type,change_pattern,noise_size,window_size_mode,window_size,window_ref_mode,overlap,sliding_step,reference_window_index,detection_window_index,pvalue_threshold,effect_prop_threshold,effect_count_threshold,pseudo_count,llm_company,llm_model,llm_instructions_path
0,0.0,0.0,0,1.0,1.0,1.0,1.0,0.0,0.0,0,"p, n, o, t, u",Conditional-Move,"p, n","{'Transitions with variations in frequency': [('n', 'o'), ('o', 'p'), ('p', 'u'), ('t', 'n'), ('n', 'u'), ('o', 'n'), ('t', 'o')], 'Transitions wi...","Sequence( Conditional( Sequence( 'a', Parallel( 'c', 'd', 'e' ) ), Sequence( 'b', Parallel( 'g', 'h', 'f' ) ) ), 'i', Loop( Conditional( 'j', 'k' ...","Sequence( Conditional( Sequence( 'a', Parallel( 'c', 'd', 'e' ) ), Sequence( 'b', Parallel( 'g', 'h', 'f' ) ) ), 'i', Loop( Conditional( 'j', 'k' ...",### BPMN Diagrams Comparison Analysis ###\n\n#### BPMN before the concept drift (reference window): ####\n- Sequence(\n Conditional(\n S...,../Input/Synthetic/CPN_Logs (Ostovar - Robust)/FragmentChange\Size1\Atomic\ConditionalMove\output\ConditionalMove.mxml.gz,Size1,Atomic,ConditionalMove,0,Fixed,8000,Fixed,True,2000,0,14,0.05,0.2,0.02,5,openai,gpt-4o,../Codes/LLM_Instructions/instructions_v8.yaml
1,0.0,0.0,0,1.0,0.0,0.0,0.0,0.0,0.0,0,"y, z, re",Serial-Switch,re,"{'Transitions with variations in frequency': [('re', 'z'), ('y', 're'), ('y', 'z')], 'Transitions with variations in probability': [('y', 're'), (...","Sequence( Conditional( Sequence( 'a', Parallel( 'c', 'd', 'e' ) ), Sequence( 'b', Parallel( 'f', 'g', 'h' ) ) ), 'i', Loop( Conditional( 'k', 'j' ...","Sequence( Conditional( Sequence( 'a', Parallel( 'c', 'd', 'e' ) ), Sequence( 'b', Parallel( 'f', 'g', 'h' ) ) ), 'i', Loop( Conditional( 'k', 'j' ...","### BPMN Diagrams Comparison Analysis ###\n\n#### Reference Window BPMN Diagram: ####\n```\nSequence(\n Conditional(\n Sequence('a', Par...",../Input/Synthetic/CPN_Logs (Ostovar - Robust)/FragmentChange\Size1\Atomic\ConditionalRemoval\output\ConditionalRemoval.mxml.gz,Size1,Atomic,ConditionalRemoval,0,Fixed,8000,Fixed,True,2000,0,14,0.05,0.2,0.02,5,openai,gpt-4o,../Codes/LLM_Instructions/instructions_v8.yaml
2,0.0,0.0,0,1.0,1.0,1.0,1.0,0.0,0.0,0,"i, o, p, f, u, q",Conditional-To-Sequence,"p, q","{'Transitions with variations in probability': [('f', 'i'), ('o', 'p'), ('o', 'q'), ('p', 'u'), ('p', 'q')], 'Transitions with variations in frequ...","Sequence( Conditional( Sequence( 'a', Parallel( 'c', 'e', 'd' ) ), Sequence( 'b', Parallel( 'h', 'f', 'g' ) ) ), 'i', Loop( Conditional( 'k', 'j' ...","Sequence( Conditional( Sequence( 'a', Parallel( 'c', 'e', 'd' ) ), Sequence( 'b', Parallel( 'h', 'f', 'g' ) ) ), 'i', Loop( Conditional( 'k', 'j' ...",### BPMN Diagrams Comparison Analysis ###\n\n#### BPMN before the concept drift (reference window): ####\n```\nSequence(\n Conditional(\n ...,../Input/Synthetic/CPN_Logs (Ostovar - Robust)/FragmentChange\Size1\Atomic\ConditionalToSequence\output\ConditionalToSequence.mxml.gz,Size1,Atomic,ConditionalToSequence,0,Fixed,8000,Fixed,True,2000,0,14,0.05,0.2,0.02,5,openai,gpt-4o,../Codes/LLM_Instructions/instructions_v8.yaml
3,0.0,0.0,0,1.0,1.0,1.0,1.0,0.0,0.0,0,"g, u, q, i, o, p",Frequency,"g, i, o, p, q, u","{'Transitions with variations in probability': [('g', 'i'), ('o', 'p'), ('o', 'q')], 'Transitions with variations in frequency': [('o', 'p'), ('o'...","Sequence( Conditional( Sequence( 'b', Parallel( 'g', 'f', 'h' ) ), Sequence( 'a', Parallel( 'd', 'e', 'c' ) ) ), 'i', Loop( Conditional( 'k', 'j' ...","Sequence( Conditional( Sequence( 'b', Parallel( 'g', 'f', 'h' ) ), Sequence( 'a', Parallel( 'd', 'e', 'c' ) ) ), 'i', Loop( Conditional( 'k', 'j' ...",### BPMN Diagrams Comparison Analysis ###\n\n#### BPMN before the concept drift (reference window): ####\n```\nSequence(\n Conditional(\n ...,../Input/Synthetic/CPN_Logs (Ostovar - Robust)/FragmentChange\Size1\Atomic\Frequency\output\Frequency.mxml.gz,Size1,Atomic,Frequency,0,Fixed,8000,Fixed,True,2000,0,14,0.05,0.2,0.02,5,openai,gpt-4o,../Codes/LLM_Instructions/instructions_v8.yaml
4,0.0,0.0,0,1.0,0.0,0.0,0.0,0.0,0.0,0,"l, k, s",Serial-Move,k,"{'Transitions with variations in probability': [('k', 'k'), ('k', 'l'), ('k', 's')], 'Transitions with variations in frequency': [('k', 'k')], 'Ne...","Sequence( Conditional( Sequence( 'a', Parallel( 'c', 'e', 'd' ) ), Sequence( 'b', Parallel( 'h', 'f', 'g' ) ) ), 'i', Loop( 'k', tau ), Parallel( ...","Sequence( Conditional( Sequence( 'a', Parallel( 'c', 'e', 'd' ) ), Sequence( 'b', Parallel( 'h', 'f', 'g' ) ) ), 'i', 'k', Parallel( 's', Conditio...","### BPMN Diagrams Comparison Analysis ###\n\n#### Reference Window BPMN Diagram: ####\n```\nSequence(\n Conditional(\n Sequence('a', Par...",../Input/Synthetic/CPN_Logs (Ostovar - Robust)/FragmentChange\Size1\Atomic\Loop\output\Loop.mxml.gz,Size1,Atomic,Loop,0,Fixed,8000,Fixed,True,2000,0,14,0.05,0.2,0.02,5,openai,gpt-4o,../Codes/LLM_Instructions/instructions_v8.yaml
5,0.0,0.0,0,1.0,1.0,1.0,1.0,0.0,0.0,0,"g, l, r, p, m, n, f, c, o, t, u, e, b",Parallel-Move,"n, p","{'Transitions with variations in frequency': [('m', 'r'), ('n', 'o'), ('o', 'p'), ('p', 'u'), ('t', 'n'), ('n', 'p'), ('n', 'u'), ('o', 'n'), ('p'...","Sequence( Conditional( Sequence( 'a', Parallel( 'c', 'd', 'e' ) ), Sequence( 'b', Parallel( 'g', 'h', 'f' ) ) ), 'i', Loop( Conditional( 'j', 'k' ...","Sequence( Conditional( Sequence( 'a', Parallel( 'c', 'd', 'e' ) ), Sequence( 'b', Parallel( 'g', 'h', 'f' ) ) ), 'i', Loop( Conditional( 'j', 'k' ...","### BPMN Diagrams Comparison Analysis ###\n\n#### Reference Window BPMN Diagram: ####\n```\nSequence(\n Conditional(\n Sequence('a', Par...",../Input/Synthetic/CPN_Logs (Ostovar - Robust)/FragmentChange\Size1\Atomic\ParallelMove\output\ParallelMove.mxml.gz,Size1,Atomic,ParallelMove,0,Fixed,8000,Fixed,True,2000,0,14,0.05,0.2,0.02,5,openai,gpt-4o,../Codes/LLM_Instructions/instructions_v8.yaml
6,0.0,0.0,0,1.0,1.0,1.0,1.0,0.0,0.0,0,"u, d, t, c, m, a, z1, z, l, re, y, v",Parallel-Switch,re,"{'Transitions with variations in probability': [('a', 'd'), ('l', 'm'), ('l', 't'), ('m', 'l'), ('m', 't'), ('re', 'z'), ('re', 'z1'), ('v', 'u'),...","Sequence( Conditional( Sequence( 'b', Parallel( 'g', 'f', 'h' ) ), Sequence( 'a', Parallel( 'd', Loop( Conditional( 'e', 'c' ), tau ) ) ) ), 'i', ...","Sequence( Conditional( Sequence( 'b', Parallel( 'g', 'f', 'h' ) ), Sequence( 'a', Parallel( 'd', 'e', 'c' ) ) ), 'i', Loop( Conditional( 'k', 'j' ...",### BPMN Diagrams Comparison Analysis ###\n\n#### Reference Window BPMN Diagram: ####\n```\nSequence(\n Conditional(\n Sequence(\n ...,../Input/Synthetic/CPN_Logs (Ostovar - Robust)/FragmentChange\Size1\Atomic\ParallelRemoval\output\ParallelRemoval.mxml.gz,Size1,Atomic,ParallelRemoval,0,Fixed,8000,Fixed,True,2000,0,14,0.05,0.2,0.02,5,openai,gpt-4o,../Codes/LLM_Instructions/instructions_v8.yaml
7,0.0,0.0,0,1.0,0.0,0.0,0.0,0.0,0.0,0,"d, p, e, j, i, n, b, k, f, t, o",Parallel-Move,"o, n","{'Transitions with variations in frequency': [('k', 'j'), ('n', 'o'), ('o', 'p'), ('t', 'n'), ('n', 'p'), ('o', 'n'), ('t', 'o')], 'Transitions wi...","Sequence( Conditional( Sequence( 'a', Parallel( 'c', 'd', 'e' ) ), Sequence( 'b', Parallel( 'f', 'g', 'h' ) ) ), 'i', Loop( Conditional( 'k', 'j' ...","Sequence( Conditional( Sequence( 'a', Parallel( 'c', 'd', 'e' ) ), Sequence( 'b', Parallel( 'f', 'g', 'h' ) ) ), 'i', Loop( Conditional( 'k', 'j' ...","### BPMN Diagrams Comparison Analysis ###\n\n#### Reference Window BPMN Diagram: ####\n```\nSequence(\n Conditional(\n Sequence('a', Par...",../Input/Synthetic/CPN_Logs (Ostovar - Robust)/FragmentChange\Size1\Atomic\ParallelToSequence\output\ParallelToSequence.mxml.gz,Size1,Atomic,ParallelToSequence,0,Fixed,8000,Fixed,True,2000,0,14,0.05,0.2,0.02,5,openai,gpt-4o,../Codes/LLM_Instructions/instructions_v8.yaml
8,0.0,0.0,0,1.0,1.0,1.0,1.0,0.0,0.0,0,"x, y, n, i, o, p, f, u, t",Serial-Move,n,"{'Transitions with variations in probability': [('f', 'i'), ('n', 'o'), ('p', 'u'), ('t', 'n'), ('x', 'u'), ('x', 'y'), ('n', 'u'), ('p', 'n'), ('...","Sequence( Conditional( Sequence( 'a', Parallel( 'c', 'e', 'd' ) ), Sequence( 'b', Parallel( 'h', 'f', 'g' ) ) ), 'i', Loop( Conditional( 'k', 'j' ...","Sequence( Conditional( Sequence( 'a', Parallel( 'c', 'e', 'd' ) ), Sequence( 'b', Parallel( 'h', 'f', 'g' ) ) ), 'i', Loop( Conditional( 'k', 'j' ...",### BPMN Diagrams Comparison Analysis ###\n\n#### BPMN before the concept drift (reference window): ####\n- Sequence(\n Conditional(\n S...,../Input/Synthetic/CPN_Logs (Ostovar - Robust)/FragmentChange\Size1\Atomic\SerialMove\output\SerialMove.mxml.gz,Size1,Atomic,SerialMove,0,Fixed,8000,Fixed,True,2000,0,14,0.05,0.2,0.02,5,openai,gpt-4o,../Codes/LLM_Instructions/instructions_v8.yaml
9,0.0,0.0,0,1.0,0.0,0.0,0.0,0.0,0.0,0,"y, l, r, re, z",Parallel-Switch,"l, m","{'Transitions with variations in frequency': [('re', 'z'), ('y', 're'), ('l', 'r'), ('y', 'z')], 'Transitions with variations in probability': [('...","Sequence( Conditional( Sequence( 'a', Parallel( 'c', 'd', 'e' ) ), Sequence( 'b', Parallel( 'g', 'h', 'f' ) ) ), 'i', Loop( Conditional( 'j', 'k' ...","Sequence( Conditional( Sequence( 'a', Parallel( 'c', 'd', 'e' ) ), Sequence( 'b', Parallel( 'g', 'h', 'f' ) ) ), 'i', Loop( Conditional( 'j', 'k' ...","### BPMN Diagrams Comparison Analysis ###\n\n#### Reference Window BPMN Diagram: ####\n```\nSequence(\n Conditional(\n Sequence('a', Par...",../Input/Synthetic/CPN_Logs (Ostovar - Robust)/FragmentChange\Size1\Atomic\SerialRemoval\output\SerialRemoval.mxml.gz,Size1,Atomic,SerialRemoval,0,Fixed,8000,Fixed,True,2000,0,14,0.05,0.2,0.02,5,openai,gpt-4o,../Codes/LLM_Instructions/instructions_v8.yaml


In [38]:
TMPD_logs_results_experiment_df['characterization_change_pattern_match_f1_score'].mean()
#77%

0.6923076923076923

In [39]:
TMPD_logs_results_experiment_df.to_excel('Results/CPN_Logs_Characterization_Ext_Ostovar_Robust_Understanding_Task_Single_v8c.xlsx')

In [40]:
# with PdfPages('Results/gtest_frequency3_experiment_Business_Process_Drift.pdf') as pdf:
#     for index, result in TMPD_logs_results_experiment_df.iterrows(): 
#         fig, ax = plt.subplots(figsize=(15,3))
#         plt.axis('off')
#         ax.imshow(result['display'])
#         pdf.savefig(bbox_inches='tight')
#         plt.show()