# Evaluation of Output Quality


In [1]:
import os
import pickle
import warnings
from typing import Tuple, Union

from datetime import datetime
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from tabulate import tabulate
import tkinter as tk
from tkinter import filedialog
from tokenizers import Tokenizer

from config import extract_log_name, get_file_path
from train import check_boundary_activity_rule, check_directly_following_rule

# Suppress warnings
warnings.filterwarnings("ignore")


In [2]:
def add_elusive_equivalents(metrics: pd.DataFrame) -> pd.DataFrame:
    """
    Add the elusive metrics to the metrics DataFrame.

    :param metrics: DataFrame containing the metrics.
    :return: DataFrame containing the metrics with the elusive metrics added.
    """
    start_completeness = metrics.loc[metrics['Iteration'] == 0, 'Completeness'].values[0]
    
    if start_completeness > 0:
        for metric in ["Completeness", "Accuracy", "Factual Accuracy", "Overall Accuracy"]:
            start_metric = metrics.loc[metrics['Iteration'] == 0, metric].values[0]
            column_index = metrics.columns.get_loc(metric) + 1
            metrics.insert(column_index, f"Elus. {metric}", None)

            for i in metrics.index:
                iteration_metric = metrics.loc[i, metric]
                difference = iteration_metric - start_metric
                
                if start_metric != 100:
                    added_metric = (difference / (100 - start_metric)) * 100
                else:
                    added_metric = 0
                
                metrics.loc[i, f"Elus. {metric}"] = added_metric

    return metrics


In [None]:
def add_proportional_equivalents(metrics: pd.DataFrame) -> pd.DataFrame:
    """
    Add the proportional metrics to the metrics DataFrame.

    :param metrics: DataFrame containing the metrics.
    :return: DataFrame containing the metrics with the proportional metrics added.
    """
    start_completeness = metrics.loc[metrics['Iteration'] == 0, 'Completeness'].values[0]
    
    if start_completeness > 0:
        for metric in ["Accuracy", "Factual Accuracy", "Overall Accuracy"]:
            column_index = metrics.columns.get_loc(f"Elus. {metric}") + 1
            metrics.insert(column_index, f"Prop. {metric}", None)

            for i in metrics.index:
                iteration_metric = metrics.loc[i, f"Elus. {metric}"]
                iteration_completeness = metrics.loc[i, 'Elus. Completeness']
                
                if iteration_completeness > 0:
                    added_metric = (iteration_metric / iteration_completeness) * 100
                else:
                    added_metric = 0
                
                metrics.loc[i, f"Prop. {metric}"] = added_metric

    return metrics


In [3]:
def calculate_completely_correct_cases(predicted_df: pd.DataFrame, correct_df: pd.DataFrame, column: str) -> float:
    """
    Calculate the proportion of completely correct Case ID values.

    A completely correct Case ID value is one where the 'Case ID' matches the 'Determined Case ID' for all rows where
    it appears, and vice versa.

    :param predicted_df: DataFrame containing Determined Case ID values.
    :param correct_df: DataFrame containing Case ID values.
    :param column: Column to evaluate accuracy.
    :return: Proportion of completely correct Case ID values.
    """
    if predicted_df.empty or correct_df.empty:
        return 0

    correct_cases = []

    for case_id in correct_df['Case ID'].unique():
        subset_correct = correct_df[correct_df['Case ID'] == case_id]
        subset_predicted = predicted_df.loc[subset_correct.index]

        condition_1 = all(subset_correct['Case ID'] == subset_predicted[column])

        if not condition_1:
            continue

        subset_determined = predicted_df[predicted_df[column] == case_id]
        subset_cases = correct_df.loc[subset_determined.index]

        condition_2 = all(subset_determined[column] == subset_cases['Case ID'])

        if condition_2:
            correct_cases.append(case_id)

    proportion_completely_correct = len(correct_cases) / len(correct_df['Case ID'].unique()) * 100

    return proportion_completely_correct


def calculate_correct_case_different_naming(predicted_df: pd.DataFrame, correct_df: pd.DataFrame, column: str,
                                            calculation: bool = True) -> Union[pd.DataFrame, float]:
    """
    Calculate the proportion of factually completely correct Case ID values.

    A factually completely correct Case ID value is one where the Determined Case ID is different from the Case ID, yet
    uniquely maps back to the same Case ID value.

    :param predicted_df: DataFrame containing Determined Case ID values.
    :param correct_df: DataFrame containing Case ID values.
    :param column: Column to evaluate accuracy.
    :param calculation: If True, calculate the proportion of factually completely correct Case ID values.
                        If False, return the list of factually completely correct Case ID values.
    :return: Proportion of factually completely correct Case ID values if calculation is True,
             otherwise return the DataFrame of factually completely correct Case ID values.
    """
    if predicted_df.empty or correct_df.empty:
        return 0 if calculation else pd.DataFrame()

    correct_cases = []

    for case_id in correct_df['Case ID'].unique():
        subset_correct = correct_df[correct_df['Case ID'] == case_id]
        subset_predicted = predicted_df.loc[subset_correct.index]

        condition_1 = len(subset_predicted[column].unique()) == 1

        if not condition_1:
            continue

        unique_value = subset_predicted[column].iloc[0]

        condition_2 = unique_value != case_id

        if not condition_2:
            continue

        condition_3 = not predicted_df[(predicted_df[column] == unique_value) & 
                                       (correct_df['Case ID'] != case_id)].any().any()

        if condition_3:
            correct_cases.append(case_id)

    if not calculation:
        matching_rows = correct_df[correct_df['Case ID'].isin(correct_cases)]
        return matching_rows

    proportion_correct_different_naming = len(correct_cases) / len(correct_df['Case ID'].unique()) * 100

    return proportion_correct_different_naming


def calculate_factual_matching_proportion(predicted_df: pd.DataFrame, correct_df: pd.DataFrame, column: str) -> float:
    """
    Calculate the proportion of factually matching Case ID values.

    :param predicted_df: DataFrame containing Determined Case ID values.
    :param correct_df: DataFrame containing Case ID values.
    :param column: Column to evaluate accuracy.
    :return: Proportion of factually matching Case ID values.
    """
    if predicted_df.empty or correct_df.empty:
        return 0

    matching_rows = calculate_correct_case_different_naming(predicted_df, correct_df, column, False)
    proportion_factual_matching = len(matching_rows) / len(predicted_df) * 100

    return proportion_factual_matching


def calculate_matching_proportion(predicted_df: pd.DataFrame, correct_df: pd.DataFrame, column: str) -> float:
    """
    Calculate the proportion of matching Case ID values.

    :param predicted_df: DataFrame containing Determined Case ID values.
    :param correct_df: DataFrame containing Case ID values.
    :param column: Column to evaluate accuracy.
    :return: Proportion of matching Case ID values.
    """
    if predicted_df.empty or correct_df.empty:
        return 0

    matching_rows = predicted_df[predicted_df[column] == correct_df['Case ID']]
    proportion_matching = len(matching_rows) / len(predicted_df) * 100

    return proportion_matching


def evaluate_accuracy(predicted_df: pd.DataFrame, complete_df: pd.DataFrame, column: str) -> dict:
    """
    Evaluate the accuracy of the repaired log.

    :param predicted_df: DataFrame containing the determined log.
    :param complete_df: DataFrame containing complete log.
    :param column: Column to evaluate accuracy.
    :return: Dictionary containing the quality metrics.
    """
    matching = calculate_matching_proportion(predicted_df, complete_df, column)
    factual_matching = calculate_factual_matching_proportion(predicted_df, complete_df, column)
    correct_proportion = calculate_completely_correct_cases(predicted_df, complete_df, column)
    factual_correct_proportion = calculate_correct_case_different_naming(predicted_df, complete_df, column)

    return {
        "Accuracy": matching,
        "Factual Accuracy": factual_matching,
        "Overall Accuracy": matching + factual_matching,
        "Real Case Accuracy": correct_proportion,
        "Factual Case Accuracy": factual_correct_proportion,
        "Overall Case Accuracy": correct_proportion + factual_correct_proportion
    }


In [4]:
def evaluate_completeness(df: pd.DataFrame, column: str) -> float:
    """
    Evaluate the completeness of the log.

    :param df: DataFrame containing the log.
    :param column: Column to evaluate completeness.
    :return: Proportion of missing values in the 'Case ID' column.
    """
    if column not in df.columns:
        return float('-inf')

    percentage_not_na = (1 - df[column].isna().sum() / len(df[column])) * 100
    return percentage_not_na


In [5]:
def calculate_directly_following_consistency(df: pd.DataFrame, configuration: dict, column: str) -> float:
    """
    Calculate the proportion of cases containing solely correct directly following activities.

    Solely correct directly following activities means that, for each case, it verifies whether the directly following 
    activities occur in the correct order and have the same non-zero number of predecessors and successors as defined 
    in the configuration.
    
    :param df: The DataFrame containing the log.
    :param configuration: The configuration dictionary.
    :param column: The column to evaluate consistency.
    :return: The proportion of correct directly following activities in all cases.
    """
    num_correct_directly_following = 0
    directly_following = configuration['complete_expert_values']['Directly Following']
    always_directly_following = [pair for pair, occurrence in zip(
        directly_following['values'], directly_following['occurrences']) if occurrence == 'always']

    for case_id, group in df.groupby(column):
        if pd.notna(case_id):
            is_consistent_case = True
            
            for predecessor, successor in always_directly_following:
                if not is_consistent_case:
                    break
                positions_predecessor = group[group['Activity'] == predecessor].index.tolist()
                positions_successor = group[group['Activity'] == successor].index.tolist()

                if not positions_predecessor or not positions_successor:
                    is_consistent_case = False
                    break
                
                if len(positions_predecessor) != len(positions_successor):
                    is_consistent_case = False
                    break
                
                first_predecessor, last_predecessor = positions_predecessor[0], positions_predecessor[-1]
                first_successor, last_successor = positions_successor[0], positions_successor[-1]
                
                if first_predecessor > first_successor or last_predecessor > last_successor:
                    is_consistent_case = False
                    break

            if is_consistent_case:
                num_correct_directly_following += 1

    num_cases = len(df[column].dropna().unique())

    return (num_correct_directly_following / num_cases * 100) if num_cases else 0


def calculate_end_activity_consistency(df: pd.DataFrame, configuration: dict, column: str) -> float:
    """
    Calculate the proportion of correct end activities.

    A correct end activity is one that is present in the expert input values for the end activity.

    :param df: The DataFrame containing the log.
    :param configuration: The configuration dictionary.
    :param column: The column to evaluate consistency.
    :return: The proportion of correct end activities in all cases.
    """
    num_correct_end = 0

    for case_id, group in df.groupby(column):
        if pd.notna(case_id):
            end_activity = group['Activity'].iloc[-1]
            if end_activity in configuration['complete_expert_values']['End Activity']['values']:
                num_correct_end += 1

    num_cases = len(df[column].dropna().unique())

    return (num_correct_end / num_cases * 100) if num_cases else 0


def calculate_start_activity_consistency(df: pd.DataFrame, configuration: dict, column: str) -> float:
    """
    Calculate the proportion of correct start activities.

    A correct start activity is one that is present in the expert input values for the start activity.

    :param df: The DataFrame containing the log.
    :param configuration: The configuration dictionary.
    :param column: The column to evaluate consistency.
    :return: The proportion of correct start activities in all cases.
    """
    num_correct_start = 0

    for case_id, group in df.groupby(column):
        if pd.notna(case_id):
            start_activity = group['Activity'].iloc[0]
            if start_activity in configuration['complete_expert_values']['Start Activity']['values']:
                num_correct_start += 1

    num_cases = len(df[column].dropna().unique())

    return (num_correct_start / num_cases * 100) if num_cases else 0


def evaluate_consistency(df: pd.DataFrame, configuration: dict, column: str) -> dict:
    """
    Evaluate the consistency of the log.

    :param df: The DataFrame containing the log.
    :param configuration: The configuration dictionary.
    :param column: The column to evaluate consistency.
    :return: Dictionary containing the quality metrics.
    """
    if not configuration['complete_expert_attributes'] or column not in df.columns:
        return {}
    
    start_activity_consistency, end_activity_consistency, directly_following_consistency = None, None, None

    for attribute in configuration['complete_expert_attributes']:
        if attribute == 'Start Activity':
            start_activity_consistency = calculate_start_activity_consistency(df, configuration, column)
        elif attribute == 'End Activity':
            end_activity_consistency = calculate_end_activity_consistency(df, configuration, column)
        elif attribute == 'Directly Following':
            directly_following_consistency = calculate_directly_following_consistency(df, configuration, column)

    return {
        "St. Ac. Consistency": start_activity_consistency,
        "End Ac. Consistency": end_activity_consistency,
        "Di. Fo. Consistency": directly_following_consistency
    }


In [6]:
def evaluate_iteration(iteration: int, predicted_log: pd.DataFrame, complete_log: pd.DataFrame, configuration: dict, 
                       column: str = 'Determined Case ID') -> dict:
    """
    Evaluate the quality metrics for a given iteration.
    
    :param iteration: The iteration number.
    :param predicted_log: The DataFrame containing the predicted log.
    :param complete_log: The DataFrame containing the complete log.
    :param configuration: The configuration dictionary.
    :param column: The column to evaluate the metrics.
    :return: Dictionary containing the quality metrics.
    """
    completeness = evaluate_completeness(predicted_log, column)
    iteration_metrics = {"Iteration": iteration, "Completeness": completeness}
    accuracy = evaluate_accuracy(predicted_log, complete_log, column)
    iteration_metrics.update(accuracy)
    consistency = evaluate_consistency(predicted_log, configuration, column)
    iteration_metrics.update(consistency)
    return iteration_metrics


In [7]:
def evaluate_other_log(metrics: pd.DataFrame, log: pd.DataFrame, complete_log: pd.DataFrame,
                       model_configuration: dict) -> pd.DataFrame:
    """
    Evaluate the created log.
    
    :param metrics: The DataFrame containing the quality metrics.
    :param log: The DataFrame containing the created log.
    :param complete_log: The DataFrame containing the complete log.
    :param model_configuration: The configuration dictionary.
    :return: The DataFrame containing the quality metrics.
    """
    log_metrics = metrics[metrics['Iteration'] == 0].copy()
    iteration_metrics = evaluate_iteration(1, log, complete_log, model_configuration)
    log_metrics = pd.concat([log_metrics, pd.DataFrame([iteration_metrics])], ignore_index=True)
    return log_metrics


In [8]:
def evaluate_repaired_logs(folder_path: str, log_name: str, complete_log: pd.DataFrame, 
                           configuration: dict) -> Tuple[pd.DataFrame, pd.DataFrame]:
    """
    Evaluate the repaired logs.

    :param folder_path: Path to the folder containing the repaired logs.
    :param log_name: Name of the log.
    :param complete_log: DataFrame containing the complete log.
    :param configuration: Dictionary containing the expert input used for training.
    :return: DataFrame containing the quality metrics and DataFrame containing the elusive log.
    """
    quality_metrics = pd.DataFrame(columns=[
        "Iteration", "Completeness", "Accuracy", "Factual Accuracy", "Overall Accuracy", "Real Case Accuracy",
        "Factual Case Accuracy", "Overall Case Accuracy", "St. Ac. Consistency", "End Ac. Consistency", 
        "Di. Fo. Consistency"
    ])
    
    elusive_log = pd.DataFrame()

    if os.path.exists(folder_path):
        num_iterations = len([f for f in os.listdir(folder_path) if f.endswith('.csv')
                              and f.startswith(f"determined_{log_name}_iteration_")])

        if num_iterations:
            for i in range(1, num_iterations + 1):
                log_file = os.path.join(folder_path, f"determined_{log_name}_iteration_{i}.csv")

                if os.path.exists(log_file):
                    predicted_log = pd.read_csv(log_file)

                    if i == 1:
                        iteration_metrics = evaluate_iteration(0, predicted_log, complete_log, configuration,
                                                               'Original Case ID')
                        quality_metrics = pd.DataFrame([iteration_metrics])
                        columns_to_exclude = {'Determined Case ID', 'Iteration Probability',
                                              'Determination Probability', 'Determination Follow-up Probability'}
                        elusive_log = predicted_log.drop(columns=columns_to_exclude.intersection(predicted_log.columns))
                    iteration_metrics = evaluate_iteration(i, predicted_log, complete_log, configuration)
                    quality_metrics = pd.concat([quality_metrics, pd.DataFrame([iteration_metrics])], ignore_index=True)
      
    quality_metrics = format_consistency_metrics(quality_metrics, configuration)

    return quality_metrics, elusive_log


In [9]:
def fill_missing_case_ids_heuristic(elusive_log: pd.DataFrame, configuration: dict, log_name: str) -> pd.DataFrame:
    """
    Fill the missing Case ID values in the elusive log using a heuristic approach.
    
    :param elusive_log: The DataFrame containing the elusive log.
    :param configuration: The configuration dictionary.
    :param log_name: The name of the log file.
    :return: The DataFrame containing the elusive log with the missing Case ID values filled.
    """
    heuristic_file = f"evaluation/{log_name}/heuristic/heuristic_{log_name}.csv"
    
    if os.path.exists(heuristic_file) and os.path.getsize(heuristic_file) > 0:
        log = pd.read_csv(heuristic_file)
        print("Heuristic log successfully read.")
        return log
    
    elusive_log['Determined Case ID'] = elusive_log['Original Case ID']
    elusive_log['Timestamp'] = pd.to_datetime(elusive_log['Timestamp'])
    
    possible_expert_attributes = ['Start Activity', 'End Activity', 'Directly Following']
    included_attributes = [attr for attr in possible_expert_attributes if 
                           attr in configuration['complete_expert_attributes']]
    
    if included_attributes:
        if 'Start Activity' in included_attributes:
            elusive_log = check_boundary_activity_rule(configuration, elusive_log, False, 'start')
        if 'End Activity' in included_attributes:
            elusive_log = check_boundary_activity_rule(configuration, elusive_log, False, 'end')
        if 'Directly Following' in included_attributes:
            elusive_log = check_directly_following_rule(configuration, elusive_log, False)
    
    columns_to_exclude = {'Probability', 'Follow-up Probability', 'Modification'}
    elusive_log.drop(columns=columns_to_exclude.intersection(elusive_log.columns), inplace=True)
    
    return elusive_log


In [10]:
def fill_missing_case_ids_logreg(complete_log: pd.DataFrame, elusive_log: pd.DataFrame, log_name: str) -> pd.DataFrame:
    """
    Fill the missing Case ID values in the elusive log using a logistic regression model.
    
    :param complete_log: The DataFrame containing the complete log.
    :param elusive_log: The DataFrame containing the elusive log.
    :param log_name: The name of the log file.
    :return: The DataFrame containing the elusive log with the missing Case ID values filled.
    """
    log_reg_file = f"evaluation/{log_name}/logistic_regression/logistic_regression_{log_name}.csv"
    
    if os.path.exists(log_reg_file) and os.path.getsize(log_reg_file) > 0:
        log = pd.read_csv(log_reg_file)
        print("Logistic regression log successfully read.")
        return log
    
    elusive_log['Determined Case ID'] = elusive_log['Original Case ID']
    input_data = elusive_log.copy()
    
    categorical_cols = ['Activity', 'Resource']
    for col in categorical_cols:
        if col in input_data.columns:
            if len(input_data[col].unique()) > 2:
                encoder = OneHotEncoder()
                encoded_cols = pd.DataFrame(encoder.fit_transform(input_data[[col]]).toarray(),
                                            columns=encoder.get_feature_names_out([col]))
                input_data.drop(columns=[col], inplace=True)
                input_data = pd.concat([input_data, encoded_cols], axis=1)
            else:
                input_data[col] = input_data[col].astype('category').cat.codes
    
    if 'Timestamp' in input_data.columns:
        input_data['Timestamp'] = pd.to_datetime(input_data['Timestamp'])
        min_timestamp = input_data['Timestamp'].min()
        max_timestamp = input_data['Timestamp'].max()
        input_data['Timestamp'] = (input_data['Timestamp'] - min_timestamp) / (max_timestamp - min_timestamp)
    
    missing_indices = input_data['Determined Case ID'].isna()
    missing_data = input_data[missing_indices].drop(columns=['Original Case ID', 'Determined Case ID'])
    
    if missing_indices.any():
        target_values = complete_log['Case ID']
        source_values = input_data.drop(columns=['Original Case ID', 'Determined Case ID'])
        
        X_train, X_test, y_train, y_test = train_test_split(source_values, target_values, test_size=0.2)
        
        model = LogisticRegression()
        model.fit(X_train, y_train)
        
        predicted_values = model.predict(missing_data)
        elusive_log.loc[missing_indices, 'Determined Case ID'] = predicted_values
        
        print("Filled missing Case ID values using logistic regression.")
    
    return elusive_log


In [11]:
def format_consistency_metrics(metrics: pd.DataFrame, configuration: dict) -> pd.DataFrame:
    """
    Format the consistency metrics DataFrame.
    
    :param metrics: DataFrame containing the consistency metrics.
    :param configuration: The configuration dictionary.
    :return: DataFrame containing the consistency metrics with the appropriate columns.
    """
    possible_expert_attributes = ['Start Activity', 'End Activity', 'Directly Following']
    included_attributes = [attr for attr in possible_expert_attributes if 
                           attr in configuration['complete_expert_attributes']]
    missing_attributes = [attr for attr in possible_expert_attributes if 
                          attr not in configuration['complete_expert_attributes']]
    
    if included_attributes:
        if ('Directly Following' in included_attributes and 
                'always' not in configuration['complete_expert_values']['Directly Following']['occurrences']):
            metrics.drop(columns="Di. Fo. Consistency", inplace=True)
    
    if missing_attributes:
        if 'Start Activity' in missing_attributes and "St. Ac. Consistency" in metrics.columns:
            metrics.drop(columns="St. Ac. Consistency", inplace=True)
        if 'End Activity' in missing_attributes and "End Ac. Consistency" in metrics.columns:
            metrics.drop(columns="End Ac. Consistency", inplace=True)
        if 'Directly Following' in missing_attributes and "Di. Fo. Consistency" in metrics.columns:
            metrics.drop(columns="Di. Fo. Consistency", inplace=True)
    
    return metrics


In [12]:
def format_metrics(metrics: pd.DataFrame) -> pd.DataFrame:
    """
    Format the quality metrics DataFrame.

    :param metrics: DataFrame containing the quality metrics.
    :return: Formatted DataFrame.
    """
    if metrics.empty:
        return metrics

    cols_to_format = metrics.columns.drop('Iteration')
    metrics[cols_to_format] = metrics[cols_to_format].applymap(lambda x: '{:.2f}%'.format(x) if pd.notnull(x) else "")
    metrics['Iteration'] = metrics['Iteration'].astype(int)

    return metrics


In [13]:
def get_input() -> Tuple[pd.DataFrame, str, str, dict, Tokenizer]:
    """
    Prompt the user to input the path to the folder containing the repaired logs as well as the complete log with 
    corresponding name and the tokenizer for the case IDs, and retrieve the model configuration if available.

    :return: A tuple containing the complete log DataFrame, the name of the complete log file, the path to the folder 
     containing the repaired logs after each iteration, and a dictionary representing the model configuration if 
     available, otherwise an empty dictionary, and a Tokenizer object representing the tokenizer if available,
     otherwise a Tokenizer object initialized with None.
    """
    complete_log_path = get_file_path("preprocessed complete log")

    if os.path.exists(complete_log_path):
        complete_log = pd.read_csv(complete_log_path)
        print("CSV file successfully read.")
        log_name = extract_log_name(complete_log_path)

        if "DISPLAY" in os.environ:
            root = tk.Tk()
            root.withdraw()

            folder_path = filedialog.askdirectory(
                title="Select the folder that contains the repaired logs after each iteration")

            if not folder_path:
                raise ValueError("Error: No file selected.")
        else:
            folder_path = input("Enter the path to the folder that contains the repaired logs after each iteration: ")

            if not folder_path:
                raise ValueError("Error: No file selected.")

            folder_path = folder_path.strip('"')

        print("Folder path successfully read.")
        
        configuration_path = get_file_path("model configuration")
        
        if os.path.exists(configuration_path):
            with open(configuration_path, 'rb') as file:
                model_configuration = pickle.load(file)
                print("Model configuration file successfully read.")
            
            model_configuration = {key: value.to_dict() if isinstance(value, pd.DataFrame) else value for key, value in
                                   model_configuration.items()}
            
            tokenizer_path = get_file_path("case ID tokenizer")
            
            if os.path.exists(tokenizer_path):
                tokenizer = Tokenizer.from_file(tokenizer_path)
                print("Tokenizer successfully read.")
                
                return complete_log, log_name, folder_path, model_configuration, tokenizer

            return complete_log, log_name, folder_path, model_configuration, Tokenizer(None)
        
        return complete_log, log_name, folder_path, {}, Tokenizer(None)

    return pd.DataFrame(), "", "", {}, Tokenizer(None)


In [14]:
def perform_evaluation() -> None:
    """
    Perform the evaluation of the repaired logs.
    """
    complete_log, log_name, folder_path, model_configuration, tokenizer = get_input()
    
    if folder_path:
        metrics, elusive_log = evaluate_repaired_logs(folder_path, log_name, complete_log, model_configuration)
        
        random_log = randomize_missing_case_ids(elusive_log, tokenizer, log_name)
        random_metrics = evaluate_other_log(metrics, random_log, complete_log, model_configuration)
        random_metrics = format_consistency_metrics(random_metrics, model_configuration)
        
        logistic_log = fill_missing_case_ids_logreg(complete_log, elusive_log, log_name)
        logistic_metrics = evaluate_other_log(metrics, logistic_log, complete_log, model_configuration)
        logistic_metrics = format_consistency_metrics(logistic_metrics, model_configuration)
        
        heuristic_log = fill_missing_case_ids_heuristic(elusive_log, model_configuration, log_name)
        heuristic_metrics = evaluate_other_log(metrics, heuristic_log, complete_log, model_configuration)
        heuristic_metrics = format_consistency_metrics(heuristic_metrics, model_configuration)
        
        process_metrics(metrics, log_name)
        process_metrics(random_metrics, log_name, 'randomized', random_log)
        process_metrics(logistic_metrics, log_name, 'logistic regression', logistic_log)
        process_metrics(heuristic_metrics, log_name, 'heuristic', heuristic_log)


In [15]:
def print_metrics(metrics: pd.DataFrame) -> None:
    """
    Print the evaluation metrics in a tabular format.
    
    :param metrics: DataFrame containing the evaluation metrics.
    """
    num_columns = len(metrics.columns) - 1
    num_tables = (num_columns + 3) // 4
    
    for i in range(num_tables):
        start_idx = 1 + 4 * i
        end_idx = min(start_idx + 4, num_columns + 1)
        indices = [0] + list(range(start_idx, end_idx))
        metrics_table = metrics.iloc[:, indices]
        print(tabulate(metrics_table, headers='keys', tablefmt='grid', showindex=False))


In [16]:
def process_metrics(metrics: pd.DataFrame, log_name: str, model_name: str = 'transformer', 
                    log: pd.DataFrame = pd. DataFrame()) -> None:
    """
    Process the evaluation metrics and save them to a CSV file.
    
    :param metrics: DataFrame containing the evaluation metrics.
    :param log_name: Name of the log file.
    :param model_name: Name of the model used for evaluation. Default is 'transformer'.
    :param log: DataFrame containing the log. Default is an empty DataFrame.
    """
    metrics = add_elusive_equivalents(metrics)
    metrics = add_proportional_equivalents(metrics)
    metrics = format_metrics(metrics)
    save_metrics(metrics, log_name, model_name, log)
    print("\n=== " + model_name.upper() + " ===\n")
    print_metrics(metrics)


In [17]:
def randomize_missing_case_ids(elusive_log: pd.DataFrame, tokenizer: Tokenizer, log_name: str) -> pd.DataFrame:
    """
    Randomize the missing Case ID values in the elusive log.
    
    :param elusive_log: Log containing missing Case ID values.
    :param tokenizer: Tokenizer object used to generate random case IDs.
    :param log_name: Name of the log file.
    :return: Log with missing Case ID values randomized.
    """
    randomized_file = f"evaluation/{log_name}/randomized/randomized_{log_name}.csv"
    
    if os.path.exists(randomized_file) and os.path.getsize(randomized_file) > 0:
        log = pd.read_csv(randomized_file)
        print("Randomized log successfully read.")
        return log
    
    elusive_log['Determined Case ID'] = elusive_log['Original Case ID']
    missing_indices = elusive_log['Determined Case ID'].isna()
    
    if missing_indices.any():
        special_tokens = {"[UNK]", "[PAD]", "[SOS]", "[EOS]", "[NONE]"}
        all_tokens = set(tokenizer.get_vocab().keys())
        eligible_tokens = list(all_tokens - special_tokens)
        
        if not eligible_tokens:
            raise ValueError("Tokenizer vocabulary does not contain any eligible tokens.")
        
        num_missing = missing_indices.sum()
        random_tokens = np.random.choice(eligible_tokens, size=num_missing, replace=True)
        
        if num_missing != len(random_tokens):
            raise ValueError("Could not generate enough unique random tokens.")
        
        elusive_log.loc[missing_indices, 'Determined Case ID'] = random_tokens
        
        print("Randomized missing Case ID values.")
        
    return elusive_log


In [18]:
def save_metrics(metrics: pd.DataFrame, log_name: str, model_name: str, log: pd.DataFrame) -> None:
    """
    Save evaluation metrics to a CSV file and save the log to a CSV file if applicable.
    
    :param metrics: DataFrame containing evaluation metrics.
    :param log_name: Name of the log file.
    :param model_name: Name of the model used for evaluation.
    :param log: DataFrame containing the log.
    """
    model_name = model_name.replace(' ', '_')
    folder_name = f"evaluation/{log_name}/{model_name}"
    os.makedirs(folder_name, exist_ok=True)

    current_time = datetime.utcnow().strftime('%Y%m%d_%H%M%S')

    file_name = f'metrics_{current_time}.csv'
    metrics.to_csv(os.path.join(folder_name, file_name), index=False)
    
    if model_name == 'randomized':
        file_name = f'randomized_{log_name}.csv'
        log.to_csv(os.path.join(folder_name, file_name), index=False)
    elif model_name == 'logistic_regression':
        file_name = f'logistic_regression_{log_name}.csv'
        log.to_csv(os.path.join(folder_name, file_name), index=False)
    elif model_name == 'heuristic':
        file_name = f'heuristic_{log_name}.csv'
        log.to_csv(os.path.join(folder_name, file_name), index=False)


## review_example_large


### Elusiveness: 10%, Input: Complete, Threshold: 0%, Rule Checking: No


In [19]:
perform_evaluation()


Enter the path to the file that contains the preprocessed complete log:  logs/preprocessed/multiple_discrete_expert_single_continuous/review_example_large.csv


CSV file successfully read.


Enter the path to the folder that contains the repaired logs after each iteration:  repaired_logs/multiple_discrete_expert_single_continuous/review_example_large/iterations


Folder path successfully read.


Enter the path to the file that contains the model configuration:  model_configurations/multiple_discrete_expert_single_continuous/review_example_large/config.pkl


Model configuration file successfully read.


Enter the path to the file that contains the case ID tokenizer:  tokenizers/multiple_discrete_expert_single_continuous/review_example_large/tokenizer_Case ID.json


Tokenizer successfully read.
Randomized missing Case ID values.
Filled missing Case ID values using logistic regression.
Set 99 determined Case ID values based on the input for 'Start Activity'.

=== TRANSFORMER ===

+-------------+----------------+----------------------+------------+------------------+
|   Iteration | Completeness   | Elus. Completeness   | Accuracy   | Elus. Accuracy   |
|           0 | 90.00%         | 0.00%                | 90.00%     | 0.00%            |
+-------------+----------------+----------------------+------------+------------------+
|           1 | 100.00%        | 100.00%              | 94.14%     | 41.39%           |
+-------------+----------------+----------------------+------------+------------------+
+-------------+--------------------+--------------------------+--------------------+--------------------------+
|   Iteration | Factual Accuracy   | Elus. Factual Accuracy   | Overall Accuracy   | Elus. Overall Accuracy   |
|           0 | 0.00%          

### Elusiveness: 10%, Input: Complete, Threshold: 0%, Rule Checking: Both


In [19]:
perform_evaluation()


Enter the path to the file that contains the preprocessed complete log:  logs/preprocessed/multiple_discrete_expert_single_continuous/review_example_large.csv


CSV file successfully read.


Enter the path to the folder that contains the repaired logs after each iteration:  repaired_logs/multiple_discrete_expert_single_continuous/review_example_large/iterations


Folder path successfully read.


Enter the path to the file that contains the model configuration:  model_configurations/multiple_discrete_expert_single_continuous/review_example_large/config.pkl


Model configuration file successfully read.


Enter the path to the file that contains the case ID tokenizer:  tokenizers/multiple_discrete_expert_single_continuous/review_example_large/tokenizer_Case ID.json


Tokenizer successfully read.
Randomized log successfully read.
Logistic regression log successfully read.
Heuristic log successfully read.

=== TRANSFORMER ===

+-------------+----------------+----------------------+------------+------------------+
|   Iteration | Completeness   | Elus. Completeness   | Accuracy   | Elus. Accuracy   |
|           0 | 90.00%         | 0.00%                | 90.00%     | 0.00%            |
+-------------+----------------+----------------------+------------+------------------+
|           1 | 98.85%         | 88.48%               | 94.03%     | 40.29%           |
+-------------+----------------+----------------------+------------+------------------+
|           2 | 99.63%         | 96.29%               | 94.29%     | 42.93%           |
+-------------+----------------+----------------------+------------+------------------+
|           3 | 99.82%         | 98.16%               | 94.34%     | 43.44%           |
+-------------+----------------+---------------

### Elusiveness: 10%, Input: Complete, Threshold: 50%, Rule Checking: No


In [19]:
perform_evaluation()


Enter the path to the file that contains the preprocessed complete log:  logs/preprocessed/multiple_discrete_expert_single_continuous/review_example_large.csv


CSV file successfully read.


Enter the path to the folder that contains the repaired logs after each iteration:  repaired_logs/multiple_discrete_expert_single_continuous/review_example_large/iterations


Folder path successfully read.


Enter the path to the file that contains the model configuration:  model_configurations/multiple_discrete_expert_single_continuous/review_example_large/config.pkl


Model configuration file successfully read.


Enter the path to the file that contains the case ID tokenizer:  tokenizers/multiple_discrete_expert_single_continuous/review_example_large/tokenizer_Case ID.json


Tokenizer successfully read.
Randomized log successfully read.
Logistic regression log successfully read.
Heuristic log successfully read.

=== TRANSFORMER ===

+-------------+----------------+----------------------+------------+------------------+
|   Iteration | Completeness   | Elus. Completeness   | Accuracy   | Elus. Accuracy   |
|           0 | 90.00%         | 0.00%                | 90.00%     | 0.00%            |
+-------------+----------------+----------------------+------------+------------------+
|           1 | 90.87%         | 8.69%                | 90.80%     | 8.00%            |
+-------------+----------------+----------------------+------------+------------------+
|           2 | 91.30%         | 13.03%               | 91.19%     | 11.91%           |
+-------------+----------------+----------------------+------------+------------------+
|           3 | 91.62%         | 16.20%               | 91.46%     | 14.62%           |
+-------------+----------------+---------------

### Elusiveness: 10%, Input: Complete, Threshold: 50%, Rule Checking: Both


In [19]:
perform_evaluation()


Enter the path to the file that contains the preprocessed complete log:  logs/preprocessed/multiple_discrete_expert_single_continuous/review_example_large.csv


CSV file successfully read.


Enter the path to the folder that contains the repaired logs after each iteration:  repaired_logs/multiple_discrete_expert_single_continuous/review_example_large/iterations


Folder path successfully read.


Enter the path to the file that contains the model configuration:  model_configurations/multiple_discrete_expert_single_continuous/review_example_large/config.pkl


Model configuration file successfully read.


Enter the path to the file that contains the case ID tokenizer:  tokenizers/multiple_discrete_expert_single_continuous/review_example_large/tokenizer_Case ID.json


Tokenizer successfully read.
Randomized log successfully read.
Logistic regression log successfully read.
Heuristic log successfully read.

=== TRANSFORMER ===

+-------------+----------------+----------------------+------------+------------------+
|   Iteration | Completeness   | Elus. Completeness   | Accuracy   | Elus. Accuracy   |
|           0 | 90.00%         | 0.00%                | 90.00%     | 0.00%            |
+-------------+----------------+----------------------+------------+------------------+
|           1 | 90.91%         | 9.10%                | 90.80%     | 8.00%            |
+-------------+----------------+----------------------+------------+------------------+
|           2 | 91.37%         | 13.72%               | 91.22%     | 12.20%           |
+-------------+----------------+----------------------+------------+------------------+
|           3 | 91.67%         | 16.75%               | 91.49%     | 14.93%           |
+-------------+----------------+---------------

### Elusiveness: 20%, Input: Complete, Threshold: 0%, Rule Checking: No


In [19]:
perform_evaluation()


Enter the path to the file that contains the preprocessed complete log:  logs/preprocessed/multiple_discrete_expert_single_continuous/review_example_large.csv


CSV file successfully read.


Enter the path to the folder that contains the repaired logs after each iteration:  repaired_logs/multiple_discrete_expert_single_continuous/review_example_large/iterations


Folder path successfully read.


Enter the path to the file that contains the model configuration:  model_configurations/multiple_discrete_expert_single_continuous/review_example_large/config.pkl


Model configuration file successfully read.


Enter the path to the file that contains the case ID tokenizer:  tokenizers/multiple_discrete_expert_single_continuous/review_example_large/tokenizer_Case ID.json


Tokenizer successfully read.
Randomized missing Case ID values.
Filled missing Case ID values using logistic regression.
Set 393 determined Case ID values based on the input for 'Start Activity'.

=== TRANSFORMER ===

+-------------+----------------+----------------------+------------+------------------+
|   Iteration | Completeness   | Elus. Completeness   | Accuracy   | Elus. Accuracy   |
|           0 | 80.00%         | 0.00%                | 80.00%     | 0.00%            |
+-------------+----------------+----------------------+------------+------------------+
|           1 | 100.00%        | 100.00%              | 88.22%     | 41.10%           |
+-------------+----------------+----------------------+------------+------------------+
+-------------+--------------------+--------------------------+--------------------+--------------------------+
|   Iteration | Factual Accuracy   | Elus. Factual Accuracy   | Overall Accuracy   | Elus. Overall Accuracy   |
|           0 | 0.00%         

### Elusiveness: 10%, Input: No Expert, Threshold: 0%, Rule Checking: No


In [19]:
perform_evaluation()


Enter the path to the file that contains the preprocessed complete log:  logs/preprocessed/multiple_discrete_single_continuous/review_example_large.csv


CSV file successfully read.


Enter the path to the folder that contains the repaired logs after each iteration:  repaired_logs/multiple_discrete_single_continuous/review_example_large/iterations


Folder path successfully read.


Enter the path to the file that contains the model configuration:  model_configurations/multiple_discrete_single_continuous/review_example_large/config.pkl


Model configuration file successfully read.


Enter the path to the file that contains the case ID tokenizer:  tokenizers/multiple_discrete_single_continuous/review_example_large/tokenizer_Case ID.json


Tokenizer successfully read.
Randomized missing Case ID values.
Filled missing Case ID values using logistic regression.

=== TRANSFORMER ===

+-------------+----------------+----------------------+------------+------------------+
|   Iteration | Completeness   | Elus. Completeness   | Accuracy   | Elus. Accuracy   |
|           0 | 90.00%         | 0.00%                | 90.00%     | 0.00%            |
+-------------+----------------+----------------------+------------+------------------+
|           1 | 100.00%        | 100.00%              | 93.62%     | 36.22%           |
+-------------+----------------+----------------------+------------+------------------+
+-------------+--------------------+--------------------------+--------------------+--------------------------+
|   Iteration | Factual Accuracy   | Elus. Factual Accuracy   | Overall Accuracy   | Elus. Overall Accuracy   |
|           0 | 0.00%              | 0.00%                    | 90.00%             | 0.00%               

### Elusiveness: 10%, Input: No Resource, Threshold: 0%, Rule Checking: No


In [19]:
perform_evaluation()


Enter the path to the file that contains the preprocessed complete log:  logs/preprocessed/continuous_multiple_expert/review_example_large.csv


CSV file successfully read.


Enter the path to the folder that contains the repaired logs after each iteration:  repaired_logs/continuous_multiple_expert/review_example_large/iterations


Folder path successfully read.


Enter the path to the file that contains the model configuration:  model_configurations/continuous_multiple_expert/review_example_large/config.pkl


Model configuration file successfully read.


Enter the path to the file that contains the case ID tokenizer:  tokenizers/continuous_multiple_expert/review_example_large/tokenizer_Case ID.json


Tokenizer successfully read.
Randomized missing Case ID values.
Filled missing Case ID values using logistic regression.
Set 99 determined Case ID values based on the input for 'Start Activity'.

=== TRANSFORMER ===

+-------------+----------------+----------------------+------------+------------------+
|   Iteration | Completeness   | Elus. Completeness   | Accuracy   | Elus. Accuracy   |
|           0 | 90.00%         | 0.00%                | 90.00%     | 0.00%            |
+-------------+----------------+----------------------+------------+------------------+
|           1 | 100.00%        | 100.00%              | 93.84%     | 38.44%           |
+-------------+----------------+----------------------+------------+------------------+
+-------------+--------------------+--------------------------+--------------------+--------------------------+
|   Iteration | Factual Accuracy   | Elus. Factual Accuracy   | Overall Accuracy   | Elus. Overall Accuracy   |
|           0 | 0.00%          

## renting_log_low


### Elusiveness: 10%, Input: Complete, Threshold: 0%, Rule Checking: Both


In [19]:
perform_evaluation()


Enter the path to the file that contains the preprocessed complete log:  logs/preprocessed/multiple_discrete_expert_single_continuous/renting_log_low.csv


CSV file successfully read.


Enter the path to the folder that contains the repaired logs after each iteration:  repaired_logs/multiple_discrete_expert_single_continuous/renting_log_low/iterations


Folder path successfully read.


Enter the path to the file that contains the model configuration:  model_configurations/multiple_discrete_expert_single_continuous/renting_log_low/config.pkl


Model configuration file successfully read.


Enter the path to the file that contains the case ID tokenizer:  tokenizers/multiple_discrete_expert_single_continuous/renting_log_low/tokenizer_Case ID.json


Tokenizer successfully read.
Randomized missing Case ID values.
Filled missing Case ID values using logistic regression.
Set 17 determined Case ID values based on the input for 'Start Activity'.

=== TRANSFORMER ===

+-------------+----------------+----------------------+------------+------------------+
|   Iteration | Completeness   | Elus. Completeness   | Accuracy   | Elus. Accuracy   |
|           0 | 90.00%         | 0.00%                | 90.00%     | 0.00%            |
+-------------+----------------+----------------------+------------+------------------+
|           1 | 98.72%         | 87.20%               | 90.70%     | 7.03%            |
+-------------+----------------+----------------------+------------+------------------+
|           2 | 99.41%         | 94.07%               | 90.75%     | 7.50%            |
+-------------+----------------+----------------------+------------+------------------+
|           3 | 99.60%         | 96.04%               | 90.76%     | 7.60%     

## Hospital Billing - Event Log

### Elusiveness: 10%, Input: Complete, Threshold: 0%, Rule Checking: Both


In [None]:
perform_evaluation()


Enter the path to the file that contains the preprocessed complete log:  logs/preprocessed/multiple_discrete_expert_single_continuous/Hospital Billing - Event Log.csv


CSV file successfully read.


Enter the path to the folder that contains the repaired logs after each iteration:  repaired_logs/multiple_discrete_expert_single_continuous/Hospital Billing - Event Log/iterations


Folder path successfully read.


Enter the path to the file that contains the model configuration:  model_configurations/multiple_discrete_expert_single_continuous/Hospital Billing - Event Log/config.pkl


Model configuration file successfully read.


Enter the path to the file that contains the case ID tokenizer:  tokenizers/multiple_discrete_expert_single_continuous/Hospital Billing - Event Log/tokenizer_Case ID.json


Tokenizer successfully read.
