## *import libraries*

In [1]:
import functions as f

import os
import re
import numpy as np
import pandas as pd
from tabulate import tabulate
from scipy.spatial.distance import cdist

import pm4py
from pm4py.algo.evaluation import algorithm
from pm4py.visualization.petri_net import visualizer as pn_visualizer
from pm4py.algo.conformance.tokenreplay import algorithm as token_replay
from pm4py.objects.petri_net.importer.variants import pnml as pnml_importer
from pm4py.algo.conformance.alignments.petri_net import algorithm as alignments

import warnings
warnings.filterwarnings('ignore')

## *Parameter Setting*

In [2]:
# log_number = 6
# noise_prob = 0.2

# if noise_prob == 0:
#     log_path = f"Transformed_Logs_and_Results/Our/Transformed_Log_Without_Noise/transform_log{log_number}.xes"
#     routines_file_path = f"Transformed_Logs_and_Results/Simon/Transformed_Log_Without_Noise/Discovered_Routines/simon_noisy_transform_log{log_number}_.txt"
# else:
#     log_path = f"Transformed_Logs_and_Results/Our/Transformed_Log_With_Noise_{noise_prob}/noisy_transform_log{log_number}.xes"
#     routines_file_path = f"Transformed_Logs_and_Results/Simon/Transformed_Log_With_Noise_{noise_prob}/Discovered_Routines/simon_noisy_transform_log{log_number}_.txt"

# cpn_ground_truth_dir = f"GT_Models/log{log_number}/"

# key_value = 1


## *Read Log and Discovered Routines*

In [3]:
def UI_read_log(log_path):
    # Load event log
    log = f.read_log(log_path)
    
    # Define activities in the reference set
    reference_set = list(log['concept:name'].unique())

    return log, reference_set

In [4]:
def read_routines(routines_path):
    all_routines = []
    # Open the text file and read line by line
    with open(routines_path, "r") as file:
        for line in file:
            # Split the line into parts (splits by spaces by default)
            parts = line.strip().split()
            
            # Exclude the last item (supporting number) and store the routine list
            routine = parts[:-1]  # This excludes the last element
            all_routines.append(routine)  # Add the routine to the main list
    return all_routines

## *Prepare encoding of discovered routines*

In [5]:
def get_discover_routines_encoding(all_routines, reference_set):
    routines = dict()
    
    # Iterate over the sequences
    for i, sequence in enumerate(all_routines):
        # Define a unique routine name for each sequence
        routine_name = f"pattern{i+1}"
        # Convert the sequence into a set of unique activities
        routines[routine_name] = sequence
    
    # routine_vectors = {name: [1 if act in routine else 0 for act in reference_set] for name, routine in routines.items()}
    routine_vectors = {name: [routine.count(act) for act in reference_set] for name, routine in routines.items()}
    return routine_vectors, routines

## *Prepare encoding of traces*

In [6]:
def freq_encoding(session_log):
    # Perform one-hot encoding
    one_hot_encoded = pd.get_dummies(session_log['concept:name'])
    
    # Replace frequency with 1 where frequency is not 0
    one_hot_encoded = one_hot_encoded.applymap(lambda x:x if x > 0 else 0)
    
    if 'lifecycle:transition' in session_log.columns:
        df_encoded = pd.concat([session_log[['case:concept:name', 'lifecycle:transition', 'Session']], one_hot_encoded], axis=1)
        df_grouped = df_encoded.groupby(['case:concept:name', 'lifecycle:transition', 'Session']).sum().reset_index()
    else:
        df_encoded = pd.concat([session_log[['case:concept:name', 'Session']], one_hot_encoded], axis=1)
        df_grouped = df_encoded.groupby(['case:concept:name', 'Session']).sum().reset_index()

    return df_grouped


def assign_labels_by_distance(encoded_log, encoded_vectors, reference_set):

    # Convert the encoded vectors to a DataFrame for easier computation
    vector_labels = list(encoded_vectors.keys())
    vector_values = np.array(list(encoded_vectors.values()), dtype=float)

    # enc_log = encoded_log[reference_set]
    
    # Compute distances for each row in the log
    enc_log = encoded_log[reference_set].astype(float)
    distances = np.linalg.norm(enc_log.values[:, None] - vector_values, axis=2)
    
    # Find the index of the minimum distance for each sample
    min_distance_indices = np.argmin(distances, axis=1)
    
    # Map the indices to vector labels
    assigned_labels = [vector_labels[idx] for idx in min_distance_indices]
    
    # Add the assigned labels to the log
    encoded_log['pattern_label'] = assigned_labels
    return encoded_log

## *Assign Patterns to Traces*

In [7]:
def assign_pattern_to_traces(session_log, cluster_log):

    cluster_column = 'pattern_label'


    if 'lifecycle:transition' in session_log.columns:
        # Perform inner join
        merged_log = pd.merge(session_log, cluster_log, on=['case:concept:name', 'Session', 'lifecycle:transition'], how='inner')
        merged_log = merged_log[['case:concept:name', 'Session', 'lifecycle:transition', 'time:timestamp', 'concept:name', cluster_column]]
    else:
        # Perform inner join
        merged_log = pd.merge(session_log, cluster_log, on=['case:concept:name', 'Session'], how='inner')
        merged_log = merged_log[['case:concept:name', 'Session', 'time:timestamp', 'concept:name', cluster_column]]

    merged_log = merged_log.sort_values(by=['case:concept:name', 'Session', 'time:timestamp'])

    activity_log = merged_log[['case:concept:name', 'time:timestamp','concept:name', cluster_column]]

    return activity_log

## *Evaluation Functions*

In [8]:
# Extract transitions (activities) from both models to calculate Jaccard Coefficient
def extract_transitions(net):
    """ Extracts the transitions (activities) from the given Petri net model. """
    return {t.label for t in net.transitions if t.label is not None}


def get_fScore(fitness, precision):
    if fitness+precision == 0:
        return 0
    f_score = (2*fitness*precision)/(fitness+precision)
    return f_score


def get_JC(net, im, fm, routine_label):
    gt_routine_activities = extract_transitions(net)
    routine_activities = set(routines[routine_label])
    # print(f"Ground Truth Activities: {gt_routine_activities}\n")
    # print(f"Discovered Activities: {routine_activities}\n")

    # Calculate the Jaccard Coefficient (Intersection over Union)
    intersection = routine_activities.intersection(gt_routine_activities)
    union = routine_activities.union(gt_routine_activities)
    jaccard_coefficient = len(intersection) / len(union)

    return jaccard_coefficient


def token_base_evaluation(log, net, im, fm):
    replayed_traces = token_replay.apply(log, net, im, fm)
    
    # Calculate Support (fraction of transitions supported by the log)
    activated_transitions = set()
    for trace in replayed_traces:
        for trans in trace['activated_transitions']:
            if trans is not None:  # Ensure the transition is valid
                activated_transitions.add(trans)
    total_transitions = len(net.transitions)
    support = len(activated_transitions) / total_transitions

    # Calculate Coverage
    covered_traces = sum([1 for trace in replayed_traces if trace['activated_transitions']])
    total_traces = len(log)
    coverage = covered_traces / total_traces

    return support, coverage
    

def alignment_base_evaluation(log, net, im, fm):
    # Apply alignment-based conformance checking
    aligned_traces = alignments.apply_log(log, net, im, fm)
    
    # Calculate Support (fraction of transitions supported by the log)
    activated_transitions = set()
    for trace in aligned_traces:
        for step in trace['alignment']:
            # Check if the step corresponds to a 'model move' (indicating a supported transition)
            if step[0] == step[1] and step[1] is not None:
                activated_transitions.add(step[1])
    total_transitions = len(net.transitions)
    support = len(activated_transitions) / total_transitions
    
    # Calculate Coverage (fraction of traces covered by at least one activated transition)
    covered_traces = sum(1 for trace in aligned_traces if any(step[0] == step[1] and step[1] is not None for step in trace['alignment']))
    total_traces = len(log)
    coverage = covered_traces / total_traces

    return support, coverage


def evaluate_routines(log, net, im, fm, token_base, routine_label):
    gt_routine_activities = extract_transitions(net)
    pattern_activities = set(routines[routine_label])

    case_id = log['case:concept:name'].unique()[0]
    routine_length = len(log[log['case:concept:name'] == case_id])

    # Calculate the Jaccard Coefficient (Intersection over Union)
    intersection = pattern_activities.intersection(gt_routine_activities)
    union = pattern_activities.union(gt_routine_activities)
    jc = len(intersection) / len(union)

    log = pm4py.convert_to_event_log(log)
    support, coverage = token_base_evaluation(log, net, im, fm) if token_base else alignment_base_evaluation(log, net, im, fm)

    # Calculate fitness, precision, generalization, and F-score
    q_o = algorithm.apply(log, net, im, fm)
    fitness = round(q_o['fitness']['average_trace_fitness'],3)
    prec = round(q_o['precision'],3)
    gen = round(q_o['generalization'],3)
    simp = round(q_o['simplicity'],3)
    f_score = get_fScore(fitness, prec)

    # # Print metrics
    # print("\nEvaluation Scores:")
    # print("=====================")
    # print("Fitness: ", fitness)
    # print("Precision: ", prec)
    # print("Generalization: ", gen)
    # print("Simplicity: ", simp)
    
    # print(f"\nCoverage: {coverage:.2f}")
    # print(f"Support: {support:.2f}")
    # print(f"Jaccard Coefficient: {jc:.2f}")

    return [routine_label, len(log), len(routines[routine_label]), fitness, prec, gen, simp, f_score, coverage, support, jc]

## *Prepare Penalized Traces*

In [9]:
# # Iterate over traces and process activities
# def penalize_trace(trace, pattern_activities):
#     trace['concept:name'] = trace['concept:name'].apply(
#         lambda x: f"extra_{x}" if x not in pattern_activities else x
#     )
#     return trace

def routines_evaluate(activity_log, routine_vectors, routines):
    key_value = 1
    for pattern_label in routine_vectors.keys():
        fitted_traces = activity_log[activity_log['pattern_label'] == pattern_label]
        # fitted_traces = pm4py.convert_to_event_log(fitted_traces)
        if len(fitted_traces):
    
            # # Group by 'case:concept:name' and apply the penalization
            # penalized_traces_log = fitted_traces.groupby('case:concept:name').apply(lambda group: penalize_trace(group, routines[pattern_label]))
            
            # # Reset index for better readability
            # penalized_traces_log.reset_index(drop=True, inplace=True)
    
            JC_scores = {}
            # print("\n")
            # print("="*150)
            for file_name in os.listdir(cpn_ground_truth_dir):
                model_name = file_name.split('.')[0]
                cpn_ground_truth_path = os.path.join(cpn_ground_truth_dir, file_name)
                gt_net, gt_im, gt_fm = pnml_importer.import_net(cpn_ground_truth_path)
                JC = get_JC(gt_net, gt_im, gt_fm, routine_label=pattern_label)
                JC_scores[file_name] = JC
                # print(f"JC Score of {pattern_label} is {JC} with {model_name}")
                # print("="*150)
                # print("\n")
            # Find the key with the maximum value
            max_key = max(JC_scores, key=JC_scores.get)
            max_value = JC_scores[max_key]
        
            # Output the result
            # print(f"\nMaximum JC Score of {pattern_label} is {max_value}")
    
            cpn_ground_truth_path = os.path.join(cpn_ground_truth_dir, max_key)
            gt_net, gt_im, gt_fm = pnml_importer.import_net(cpn_ground_truth_path)
            scores = evaluate_routines(fitted_traces, gt_net, gt_im, gt_fm, token_base=False, routine_label=pattern_label)
            result_dic[key_value] = scores
            key_value += 1
        else:
            JC_scores = {}
            # print("\n")
            # print("="*150)
            for file_name in os.listdir(cpn_ground_truth_dir):
                model_name = file_name.split('.')[0]
                cpn_ground_truth_path = os.path.join(cpn_ground_truth_dir, file_name)
                gt_net, gt_im, gt_fm = pnml_importer.import_net(cpn_ground_truth_path)
                JC = get_JC(gt_net, gt_im, gt_fm, routine_label=pattern_label)
                JC_scores[file_name] = JC
                # print(f"JC Score of {pattern_label} is {JC} with {model_name}")
                # print("="*150)
                # print("\n")
    
            # Find the key with the maximum value
            max_key = max(JC_scores, key=JC_scores.get)
            max_value = JC_scores[max_key]
        
            # Output the result
            # print(f"\nMaximum JC Score of {pattern_label} is {max_value}")
            
            scores = [pattern_label, 0, len(routines[pattern_label]), 0, 0, 0, 0, 0, 0, 0, max_value]
            result_dic[key_value] = scores
            key_value += 1

## *Summary of Evaluation Scores*

In [10]:
# # Initialize a list for the average values
# average_values = ["Average"]

# # Calculate the sum of Traces for weighted calculations and add as a separate metric
# total_traces = sum(result_dic[key][1] for key in result_dic if isinstance(key, int))
# average_values.append(total_traces)  # Sum of Traces (instead of an average)

# # Calculate averages and weighted means for each metric column
# for i in range(2, len(result_dic[1])):  # Start from index 2 to skip Routine and Traces
#     values = [result_dic[key][i] for key in result_dic if isinstance(key, int)]
    
#     if i in [9, 10, 11]:  # For Coverage, Support, and JC indices
#         # Calculate weighted mean
#         weighted_mean = sum(result_dic[key][i] * result_dic[key][1] for key in result_dic if isinstance(key, int)) / total_traces
#         average_values.append(weighted_mean)
#     else:
#         # For other metrics, calculate a simple average
#         avg = sum(values) / len(values)
#         average_values.append(avg)

# # Find the next available numeric key
# next_key = max([key for key in result_dic if isinstance(key, int)]) + 1

# # Add the new row to result_dic with the calculated averages
# result_dic[next_key] = average_values

In [11]:
# # Initialize two lists for average values
# simple_average_values = ["Simple Average"]
# weighted_average_values = ["Weighted Average"]

# # Calculate the sum of Traces for weighted calculations
# total_traces = sum(result_dic[key][1] for key in result_dic if isinstance(key, int))

# # Append the total traces to both average rows
# simple_average_values.append(total_traces)  # Sum of Traces for simple average
# weighted_average_values.append(total_traces)  # Sum of Traces for weighted average

# # Calculate averages for each metric column
# for i in range(2, len(result_dic[1])):  # Start from index 2 to skip Routine and Traces
#     values = [result_dic[key][i] for key in result_dic if isinstance(key, int)]

#     if i == 2:
#         # Special case: Simple average for trace length (index 2)
#         simple_avg = sum(values) / len(values)
#         weighted_avg = simple_avg  # Use the same for weighted average
#     else:
#         # Simple average for other metrics
#         simple_avg = sum(values) / len(values)
        
#         # Weighted average for other metrics
#         weighted_avg = sum(result_dic[key][i] * result_dic[key][1] for key in result_dic if isinstance(key, int)) / total_traces

#     # Append the averages to the respective lists
#     simple_average_values.append(simple_avg)
#     weighted_average_values.append(weighted_avg)

# # Find the next available numeric keys
# next_key_simple = max([key for key in result_dic if isinstance(key, int)]) + 1
# next_key_weighted = next_key_simple + 1

# # Add the new rows to result_dic
# result_dic[next_key_simple] = simple_average_values
# result_dic[next_key_weighted] = weighted_average_values


In [12]:
# result_df = pd.DataFrame(result_dic)
# result_df = result_df.set_index('Metrics')
# # displaying the DataFrame
# print(tabulate(result_df.T, headers = 'keys', tablefmt = 'psql'))
# # result_df.T

In [13]:
# ✅ Function to calculate F-score given fitness and precision
def calculate_f_score(fitness, precision):
    if fitness + precision == 0:
        return 0
    return 2 * (fitness * precision) / (fitness + precision)

# ✅ Function to calculate simple and weighted averages for a single result dictionary
def UI_logs_summary(result_dic):
    # Column names including Coverage, Support, and JC
    columns = ["Metrics", "Traces", "Trace Length", "Fitness", "Precision", 
               "Generalization", "Simplicity", "F-Score", "Coverage", "Support", "JC"]

    # Initialize lists for averages
    simple_average_values = ["Simple Average"]
    weighted_average_values = ["Weighted Average"]

    total_traces = sum(result_dic[key][1] for key in result_dic if isinstance(key, int))
    simple_average_values.append(total_traces)
    weighted_average_values.append(total_traces)

    mean_fitness = mean_precision = weighted_fitness = weighted_precision = 0

    # Iterate through metrics starting from index 2 ("Trace Length")
    for i in range(2, len(result_dic[1])):  
        values = [result_dic[key][i] for key in result_dic if isinstance(key, int)]

        simple_avg = sum(values) / len(values)
        weighted_avg = sum(result_dic[key][i] * result_dic[key][1] for key in result_dic if isinstance(key, int)) / total_traces

        simple_avg = round(simple_avg, 3)
        weighted_avg = round(weighted_avg, 3)
        
        if i == 3:  # Fitness column
            mean_fitness = simple_avg
            weighted_fitness = weighted_avg
        elif i == 4:  # Precision column
            mean_precision = simple_avg
            weighted_precision = weighted_avg

        simple_average_values.append(simple_avg)
        weighted_average_values.append(weighted_avg)

    # 🔢 Calculate and update the F-Score
    f_score_simple_avg = calculate_f_score(mean_fitness, mean_precision)
    f_score_weighted_avg = calculate_f_score(weighted_fitness, weighted_precision)

    # Update F-Score at the correct index (7th metric after 'Metrics' & 'Traces')
    simple_average_values[7] = round(f_score_simple_avg, 3)
    weighted_average_values[7] = round(f_score_weighted_avg, 3)

    # ➕ Insert average rows into result_dic
    next_key_simple = max([key for key in result_dic if isinstance(key, int)], default=0) + 1
    next_key_weighted = next_key_simple + 1

    result_dic[next_key_simple] = simple_average_values
    result_dic[next_key_weighted] = weighted_average_values

    # 📝 Convert to DataFrame and print
    result_df = pd.DataFrame.from_dict(result_dic, orient='index', columns=columns)
    result_df = result_df.set_index('Metrics')

    # print("\n✅ Per-Log Summary:")
    # print(tabulate(result_df, headers='keys', tablefmt='psql'))

    return result_dic, simple_average_values  # Return simple average row for aggregation

# 🚀 Function to handle cross-validation and final aggregation
def cross_validation_summary(cross_val_results):
    """Performs cross-validation, collects simple averages, and calculates final averages."""
    iter_results = {}
    simple_averages_list = []

    # 📊 Collect simple averages from each iteration
    for i, result_dic in enumerate(cross_val_results, start=1):
        _, simple_avg_row = UI_logs_summary(result_dic)
        iter_results[f"CV {i}"] = simple_avg_row[1:]  # Exclude 'Simple Average' label
        simple_averages_list.append(simple_avg_row[1:])

    # 🔎 Convert iteration results into DataFrame
    iter_columns = ["Traces", "Trace Length", "Fitness", "Precision", "Gen", 
                    "Simp", "F-Score", "Coverage", "Support", "JC"]
    iter_df = pd.DataFrame(iter_results, index=iter_columns).T

    print("\n✅ Cross-Validation Iteration Results:")
    print(tabulate(iter_df, headers='keys', tablefmt='psql'))

    # 🧮 Final Simple and Weighted Averages
    final_simple_avg = iter_df.mean().tolist()
    total_traces = iter_df["Traces"].sum()
    weighted_avg = [(iter_df[col] * iter_df["Traces"]).sum() / total_traces for col in iter_df.columns]

    # 📢 Final Aggregated Results
    final_df = pd.DataFrame(
        [final_simple_avg, weighted_avg],
        index=["Simple Average", "Weighted Average"],
        columns=iter_columns
    )

    print("\n🚀 Final Aggregated Results:")
    print(tabulate(final_df, headers='keys', tablefmt='outline'))

    return iter_df, final_df

## *main*

In [24]:
noise_prob = 0.1

for log_number in range(3, 6):
    cross_val_results = []
    for variant in range(1, 11):
        try:
            log_path = f"Transformed_Logs_and_Results/Our/Transformed_Log_With_Noise_{noise_prob}/log{log_number}/noisy_transform_log{log_number}_{variant}.xes"
            routines_file_path = f"Transformed_Logs_and_Results/Simon/Transformed_Log_With_Noise_{noise_prob}/log{log_number}/simon_noisy_transform_log{log_number}_{variant}_.txt"
            cpn_ground_truth_dir = f"GT_Models/log{log_number}/"
        
            result_dic = {'Metrics':['Routine', "Traces", "Length", 'Fitness','Precision','Generalization','Simplicity', 'F_Score', 'coverage', 'Support', 'JC'],}
            
            log, reference_set = UI_read_log(log_path)
            all_routines = read_routines(routines_file_path)
            routine_vectors, routines = get_discover_routines_encoding(all_routines=all_routines, reference_set=reference_set)
            
            session_log = f.create_session(log)
            encoded_log = freq_encoding(session_log)
            
            # Assign labels
            encoded_log = assign_labels_by_distance(encoded_log, routine_vectors, reference_set)
            activity_log = assign_pattern_to_traces(session_log, encoded_log)
            
            result_dict = routines_evaluate(activity_log, routine_vectors, routines)
            cross_val_results.append(result_dic)
        except Exception as e:
            print(f"Error encountered for Log{log_number}, Variant {variant}: {e}")
            continue  # Continue with the next variant
    
    # Run cross-validation summary
    iteration_results, final_averages = cross_validation_summary(cross_val_results)
    results_df = pd.concat([iteration_results, final_averages])
    
    outputfile = f"Transformed_Logs_and_Results/Simon/Transformed_Log_With_Noise_{noise_prob}/Results"
    if not os.path.exists(outputfile):
        os.makedirs(outputfile)
    output_file_name = f"log{log_number}_reuslts.csv"
    results_df.to_csv(os.path.join(outputfile, output_file_name))

parsing log, completed traces :: 100%|████████████████████████████████████████████| 1000/1000 [00:01<00:00, 557.08it/s]
aligning log, completed variants :: 100%|████████████████████████████████████████████████| 9/9 [00:00<00:00, 94.64it/s]
replaying log with TBR, completed variants :: 100%|█████████████████████████████████████| 9/9 [00:00<00:00, 902.50it/s]
replaying log with TBR, completed variants :: 100%|██████████████████████████████████| 78/78 [00:00<00:00, 1403.97it/s]
aligning log, completed variants :: 100%|███████████████████████████████████████████████| 3/3 [00:00<00:00, 104.99it/s]
replaying log with TBR, completed variants :: 100%|████████████████████████████████████| 3/3 [00:00<00:00, 1001.35it/s]
replaying log with TBR, completed variants :: 100%|██████████████████████████████████| 20/20 [00:00<00:00, 1600.91it/s]
replaying log with TBR, completed variants :: 100%|██████████████████████████████████| 11/11 [00:00<00:00, 2205.00it/s]
aligning log, completed variants :: 100%


✅ Cross-Validation Iteration Results:
+-------+----------+----------------+-----------+-------------+-------+--------+-----------+------------+-----------+-------+
|       |   Traces |   Trace Length |   Fitness |   Precision |   Gen |   Simp |   F-Score |   Coverage |   Support |    JC |
|-------+----------+----------------+-----------+-------------+-------+--------+-----------+------------+-----------+-------|
| CV 1  |     1000 |          8.859 |     0.172 |       0.158 | 0.092 |  0.212 |     0.165 |      0.212 |     0.187 | 0.567 |
| CV 2  |     1000 |          9.162 |     0.122 |       0.116 | 0.076 |  0.147 |     0.119 |      0.147 |     0.133 | 0.579 |
| CV 3  |     1000 |          9.377 |     0.124 |       0.118 | 0.066 |  0.154 |     0.121 |      0.154 |     0.14  | 0.565 |
| CV 4  |     1000 |          8.974 |     0.143 |       0.137 | 0.073 |  0.175 |     0.14  |      0.175 |     0.156 | 0.551 |
| CV 5  |     1000 |          9.796 |     0.147 |       0.138 | 0.08  |  0.18  

parsing log, completed traces :: 100%|██████████████████████████████████████████████| 100/100 [00:00<00:00, 606.00it/s]
aligning log, completed variants :: 100%|█████████████████████████████████████████████| 36/36 [00:00<00:00, 140.37it/s]
replaying log with TBR, completed variants :: 100%|███████████████████████████████████| 36/36 [00:00<00:00, 964.19it/s]
replaying log with TBR, completed variants :: 100%|████████████████████████████████| 276/276 [00:00<00:00, 1893.19it/s]
replaying log with TBR, completed variants :: 100%|██████████████████████████████████| 12/12 [00:00<00:00, 1673.26it/s]
aligning log, completed variants :: 100%|███████████████████████████████████████████████| 4/4 [00:00<00:00, 148.55it/s]
replaying log with TBR, completed variants :: 100%|█████████████████████████████████████| 4/4 [00:00<00:00, 668.65it/s]
replaying log with TBR, completed variants :: 100%|██████████████████████████████████| 45/45 [00:00<00:00, 2370.41it/s]
aligning log, completed variants :: 100%


✅ Cross-Validation Iteration Results:
+-------+----------+----------------+-----------+-------------+-------+--------+-----------+------------+-----------+-------+
|       |   Traces |   Trace Length |   Fitness |   Precision |   Gen |   Simp |   F-Score |   Coverage |   Support |    JC |
|-------+----------+----------------+-----------+-------------+-------+--------+-----------+------------+-----------+-------|
| CV 1  |      100 |          8.638 |     0.122 |       0.122 | 0.056 |  0.138 |     0.122 |      0.138 |     0.121 | 0.52  |
| CV 2  |      100 |          8.05  |     0.133 |       0.124 | 0.059 |  0.15  |     0.128 |      0.15  |     0.133 | 0.5   |
| CV 3  |      100 |         11.527 |     0.077 |       0.084 | 0.043 |  0.091 |     0.08  |      0.091 |     0.083 | 0.56  |
| CV 4  |      100 |         10.107 |     0.123 |       0.126 | 0.066 |  0.143 |     0.124 |      0.143 |     0.125 | 0.562 |
| CV 5  |      100 |          7.889 |     0.14  |       0.144 | 0.068 |  0.159 

parsing log, completed traces :: 100%|████████████████████████████████████████████| 1000/1000 [00:01<00:00, 892.10it/s]
replaying log with TBR, completed variants :: 100%|████████████████████████████████████| 5/5 [00:00<00:00, 4983.73it/s]
aligning log, completed variants :: 100%|█████████████████████████████████████████████| 14/14 [00:00<00:00, 268.87it/s]
replaying log with TBR, completed variants :: 100%|██████████████████████████████████| 14/14 [00:00<00:00, 2006.16it/s]
replaying log with TBR, completed variants :: 100%|██████████████████████████████████| 65/65 [00:00<00:00, 1074.91it/s]
aligning log, completed variants :: 100%|█████████████████████████████████████████████| 16/16 [00:00<00:00, 249.47it/s]
replaying log with TBR, completed variants :: 100%|██████████████████████████████████| 16/16 [00:00<00:00, 1458.63it/s]
replaying log with TBR, completed variants :: 100%|██████████████████████████████████| 74/74 [00:00<00:00, 3372.51it/s]
aligning log, completed variants :: 100%


✅ Cross-Validation Iteration Results:
+-------+----------+----------------+-----------+-------------+-------+--------+-----------+------------+-----------+-------+
|       |   Traces |   Trace Length |   Fitness |   Precision |   Gen |   Simp |   F-Score |   Coverage |   Support |    JC |
|-------+----------+----------------+-----------+-------------+-------+--------+-----------+------------+-----------+-------|
| CV 1  |     1000 |          6.329 |     0.256 |       0.241 | 0.126 |  0.301 |     0.248 |      0.301 |     0.271 | 0.648 |
| CV 2  |     1000 |          6.39  |     0.276 |       0.256 | 0.133 |  0.323 |     0.266 |      0.323 |     0.29  | 0.652 |
| CV 3  |     1000 |          6.509 |     0.274 |       0.244 | 0.128 |  0.321 |     0.258 |      0.321 |     0.29  | 0.659 |
| CV 4  |     1000 |          6.654 |     0.261 |       0.24  | 0.126 |  0.308 |     0.25  |      0.308 |     0.277 | 0.649 |
| CV 5  |     1000 |          6.389 |     0.236 |       0.211 | 0.122 |  0.281 