In [2]:
import glob
import io
import logging
import os
import warnings
import json
import re

import pandas as pd
from definitions import RESULTS_DIR

# Results of the paper

In [45]:
layer_freezing_csv = """
log,setting,freezed_layers
BPI12,Helpdesk,"[0,1,2,3]"
BPI12,RTFM,"[0,1]"
BPI13 Closed,Helpdesk,"[0,1,3]"
BPI13 Closed,RTFM,"[0]"
BPI13 Incidents,Helpdesk,"[0,1,2]"
BPI13 Incidents,RTFM,"[0]"
BPI13 Open,Helpdesk,"[0,1,2]"
BPI13 Open,RTFM,"[0]"
BPIC 15,Helpdesk,"[0,2,3]"
BPIC 15,RTFM,"[0,1,2,3]"
BPI17,Helpdesk,"[0,1]"
BPI17,RTFM,"[0,1]"
RTFM,Helpdesk,"[0,2,3]"
Sepsis,Helpdesk,"[1,2,3]"
Sepsis,RTFM,"[0,1]"
Helpdesk,RTFM,"[0,1]"
"""

layer_df = pd.read_csv(io.StringIO(layer_freezing_csv))
layer_df

Unnamed: 0,log,setting,freezed_layers
0,BPI12,Helpdesk,"[0,1,2,3]"
1,BPI12,RTFM,"[0,1]"
2,BPI13 Closed,Helpdesk,"[0,1,3]"
3,BPI13 Closed,RTFM,[0]
4,BPI13 Incidents,Helpdesk,"[0,1,2]"
5,BPI13 Incidents,RTFM,[0]
6,BPI13 Open,Helpdesk,"[0,1,2]"
7,BPI13 Open,RTFM,[0]
8,BPIC 15,Helpdesk,"[0,2,3]"
9,BPIC 15,RTFM,"[0,1,2,3]"


In [46]:
scores_csv = """
Log,Model,DLS_Regular,DLS_Helpdesk,DLS_RTFM,MAE_Regular,MAE_Helpdesk,MAE_RTFM
BPI12,GPT,0.1606,0.1586,0.1509,77.19,67.15,54.83
BPI12,LSTM,0.0622,0.1377,0.1188,101.78,38.92,68.44
BPI13 Closed,GPT,0.4712,0.6850,0.7289,585.01,111.47,97.29
BPI13 Closed,LSTM,0.5014,0.6959,0.6976,154.69,95.70,97.19
BPI13 Incidents,GPT,0.2445,0.3082,0.3021,76.94,26.83,23.19
BPI13 Incidents,LSTM,0.2652,0.4893,0.4902,213.14,43.26,19.39
BPI13 Open,GPT,0.3450,0.5157,0.4171,105.17,52.55,42.79
BPI13 Open,LSTM,0.4500,0.4918,0.5341,47.70,47.31,49.81
BPIC 15,GPT,0.2554,0.0213,0.0169,285.35,211.66,164.65
BPIC 15,LSTM,0.0239,0.0332,0.0340,2357.00,331.32,827.97
BPI17,GPT,0.1428,0.1424,0.1423,36.85,32.50,57.23
BPI17,LSTM,0.0549,0.0598,0.0460,36.44,24.65,35.72
Sepsis,GPT,0.0876,0.1042,0.1030,1078.09,82.99,142.14
Sepsis,LSTM,0.0970,0.1536,0.1992,214.69,33.18,35.87
RTFM,GPT,0.8250,0.8275,-,158.02,156.72,-
RTFM,LSTM,0.5860,0.7564,-,535.44,183.19,-
Helpdesk,GPT,0.8479,-,0.8473,4.13,-,4.15
Helpdesk,LSTM,0.8556,-,0.8599,4.01,-,4.17
"""

scores_df = pd.read_csv(io.StringIO(scores_csv))
scores_df

Unnamed: 0,Log,Model,DLS_Regular,DLS_Helpdesk,DLS_RTFM,MAE_Regular,MAE_Helpdesk,MAE_RTFM
0,BPI12,GPT,0.1606,0.1586,0.1509,77.19,67.15,54.83
1,BPI12,LSTM,0.0622,0.1377,0.1188,101.78,38.92,68.44
2,BPI13 Closed,GPT,0.4712,0.6850,0.7289,585.01,111.47,97.29
3,BPI13 Closed,LSTM,0.5014,0.6959,0.6976,154.69,95.70,97.19
4,BPI13 Incidents,GPT,0.2445,0.3082,0.3021,76.94,26.83,23.19
5,BPI13 Incidents,LSTM,0.2652,0.4893,0.4902,213.14,43.26,19.39
6,BPI13 Open,GPT,0.345,0.5157,0.4171,105.17,52.55,42.79
7,BPI13 Open,LSTM,0.45,0.4918,0.5341,47.7,47.31,49.81
8,BPIC 15,GPT,0.2554,0.0213,0.0169,285.35,211.66,164.65
9,BPIC 15,LSTM,0.0239,0.0332,0.0340,2357.0,331.32,827.97


In [47]:
# Reshape so that the column names are split into two columns
rows = []
for _, row in scores_df.iterrows():
    row = row.to_dict()
    for setting in ['Regular', 'Helpdesk', 'RTFM']:
        rows.append({
            'log': row['Log'],
            'model': row['Model'],
            'setting': setting,
            'dls': row[f'DLS_{setting}'],
            'mae': row[f'MAE_{setting}'],
        })
scores_stacked_df = pd.DataFrame(rows)
scores_stacked_df

Unnamed: 0,log,model,setting,dls,mae
0,BPI12,GPT,Regular,0.1606,77.19
1,BPI12,GPT,Helpdesk,0.1586,67.15
2,BPI12,GPT,RTFM,0.1509,54.83
3,BPI12,LSTM,Regular,0.0622,101.78
4,BPI12,LSTM,Helpdesk,0.1377,38.92
5,BPI12,LSTM,RTFM,0.1188,68.44
6,BPI13 Closed,GPT,Regular,0.4712,585.01
7,BPI13 Closed,GPT,Helpdesk,0.6850,111.47
8,BPI13 Closed,GPT,RTFM,0.7289,97.29
9,BPI13 Closed,LSTM,Regular,0.5014,154.69


In [48]:
paper_results_df = scores_stacked_df.merge(layer_df, on=['log', 'setting'], how='left')
log_mapping = {
    'BPIC 15': 'BPI15',
    'RTFM': 'RTFMP',
}
paper_results_df['log'] = paper_results_df['log'].map(log_mapping).fillna(paper_results_df['log'])
setting_mapping = {
    'Regular': 'base',
    'Helpdesk': 'helpdesk',
    'RTFM': 'RTFMP',
}
paper_results_df['setting'] = paper_results_df['setting'].map(setting_mapping).fillna(paper_results_df['setting'])
# Merge the two dataframes and keep the Regular setting
paper_results_df.to_csv('paper_results.csv', index=False)
paper_results_df

Unnamed: 0,log,model,setting,dls,mae,freezed_layers
0,BPI12,GPT,base,0.1606,77.19,
1,BPI12,GPT,helpdesk,0.1586,67.15,"[0,1,2,3]"
2,BPI12,GPT,RTFMP,0.1509,54.83,"[0,1]"
3,BPI12,LSTM,base,0.0622,101.78,
4,BPI12,LSTM,helpdesk,0.1377,38.92,"[0,1,2,3]"
5,BPI12,LSTM,RTFMP,0.1188,68.44,"[0,1]"
6,BPI13 Closed,GPT,base,0.4712,585.01,
7,BPI13 Closed,GPT,helpdesk,0.6850,111.47,"[0,1,3]"
8,BPI13 Closed,GPT,RTFMP,0.7289,97.29,[0]
9,BPI13 Closed,LSTM,base,0.5014,154.69,


# Reproduction of the results of the paper

In [105]:
# the layer freezing combinations used for reproducing the results
LAYER_FREEZING_COMBINATIONS= [['0', '1', '2', '3'], ['0', '1', '2'],
                              ['1', '2', '3'], ['0', '2', '3'],
                              ['0', '1', '3'], ['0', '1'], ['0']]
# the layer freezing combinations used for the additional experiments
ADDITIONAL_LAYER_FREEZING_COMBINATIONS = [['0', '1', '2'], ['0']]

ALL_LOGS = [
    'BPI_Challenge_2013_closed_problems.xes.gz', 
    'BPI_Challenge_2012.xes.gz',
    'BPI_Challenge_2013_incidents.xes.gz', 
    'BPI_Challenge_2013_open_problems.xes.gz',
    'BPI%20Challenge%202017.xes.gz',
    'BPIC15_1.xes',
    'Road_Traffic_Fine_Management_Process.xes.gz', 
    'Sepsis%20Cases%20-%20Event%20Log.xes.gz',
    'helpdesk.csv'
]
ADDITIONAL_LOGS = [
    'RTFMP_filtered.xes'
]
BASE_LOGS = [
    'Road_Traffic_Fine_Management_Process.xes.gz', 
    'helpdesk.csv'
]
ADDITONAL_BASE_LOGS = [
    'BPIC15_1.xes',
    'RTFMP_filtered.xes'
]

ARCHITECTURES = ['rnn', 'GPT']

## Collection of the training statistics
As the training of the models takes a long time, we have trained not all models at once. The training statistics are stored in the `training_figures.csv` files in the `results/rnn` and `results/GPT` folders. The following code collects the training statistics of the models that have been trained. 

In [106]:
# All the timestamps of the training runs
rnn_base_training_timestamps            = ['202312090718', '202312120826']
gpt_base_training_timestamps            = ['202312131309', '202312121708']
rnn_pre_trained_RTFMP_timestamps        = ['202312151300', '202312161146']
rnn_pre_trained_helpdesk_timestamps     = ['202312161442']
gpt_pre_trained_RTFMP_timestamps        = ['202312171427', '202312171342']
gpt_pre_trained_helpdesk_timestamps     = ['202312181009', '202312182038']
rnn_pre_trained_BPIC15_timestamps       = ['202401021416']
rnn_pre_trained_RTFMP_filtered_timestamps = ['202401021913']
gpt_pre_trained_BPIC15_timestamps       = ['202401021425']
gpt_pre_trained_RTFMP_filtered_timestamps = ['202401021913']

In [107]:
# Next we define a function that returns the path to the training statistics file for a given model and log based on the timestamps.
# The function raises an error if no file is found or multiple files are found.
def get_training_stat_file(architecture, log, training_timestamps):
    files = []
    for training_timestamp in training_timestamps:
        files += glob.glob(f'{RESULTS_DIR}/{architecture}/*/*training_figures_{training_timestamp}.csv')
        
    matching_files = [f for f in files if log in f]
    
    if len(matching_files) == 0:
        raise ValueError(f'No training stat file found for {architecture} and {log} for timestamp {training_timestamp}')
    elif len(matching_files) > 1:
        raise ValueError(f'Multiple training stat files found for {architecture} and {log} for timestamp {training_timestamp}')
    else:
        return matching_files[0]

In [108]:
# Here we collect the training statistics files for all models and logs that have been trained and store them in a dictionary.
training_stat_files = {}
for log in ALL_LOGS:
    training_stat_files[log] = {
        'rnn': {},
        'GPT': {}
    }
    training_stat_files[log]['rnn']['base'] = get_training_stat_file('rnn', log, rnn_base_training_timestamps)
    training_stat_files[log]['GPT']['base'] = get_training_stat_file('GPT', log, gpt_base_training_timestamps)

    if log == 'BPI%20Challenge%202017.xes.gz':
        continue
        
    if log != 'Road_Traffic_Fine_Management_Process.xes.gz':
        training_stat_files[log]['rnn']['RTFMP'] = get_training_stat_file('rnn', log, rnn_pre_trained_RTFMP_timestamps)
        if log != 'BPI_Challenge_2012.xes.gz':
            training_stat_files[log]['GPT']['RTFMP'] = get_training_stat_file('GPT', log, gpt_pre_trained_RTFMP_timestamps)
        
    if log != 'helpdesk.csv':
        training_stat_files[log]['rnn']['helpdesk'] = get_training_stat_file('rnn', log, rnn_pre_trained_helpdesk_timestamps)
        if log != 'BPI_Challenge_2012.xes.gz':
            training_stat_files[log]['GPT']['helpdesk'] = get_training_stat_file('GPT', log, gpt_pre_trained_helpdesk_timestamps)
            
    if log != 'BPIC15_1.xes' and log != 'BPI_Challenge_2012.xes.gz':
        training_stat_files[log]['rnn']['BPIC15'] = get_training_stat_file('rnn', log, rnn_pre_trained_BPIC15_timestamps)
        training_stat_files[log]['GPT']['BPIC15'] = get_training_stat_file('GPT', log, gpt_pre_trained_BPIC15_timestamps)
    
    if log != 'RTFMP_filtered.xes' and log != 'BPI_Challenge_2012.xes.gz':
        training_stat_files[log]['rnn']['RTFMP_filtered'] = get_training_stat_file('rnn', log, rnn_pre_trained_RTFMP_filtered_timestamps)
        training_stat_files[log]['GPT']['RTFMP_filtered'] = get_training_stat_file('GPT', log, gpt_pre_trained_RTFMP_filtered_timestamps)

log = ADDITIONAL_LOGS[0]
training_stat_files[log] = {
        'rnn': {
            'base': get_training_stat_file('rnn', log, rnn_pre_trained_RTFMP_filtered_timestamps),
        },
        'GPT': {
            'base': get_training_stat_file('GPT', log, gpt_pre_trained_RTFMP_filtered_timestamps)
        }
    }

## Extraction of the training results
In the following we extract the training results from the training statistics files. The results are stored in a pandas DataFrame.
The available information included in the training statistics files is:
- `epoch`: The epoch number
- `elapsed_seconds`: The time in seconds training took for this epoch
- `training_loss_activity`: The training loss for the activity prediction
- `training_loss_time`: The training loss for the time prediction
- `training_loss`: The training loss for the combined activity and time prediction
- `validation_loss_activity`: The validation loss for the activity prediction
- `validation_loss_time`: The validation loss for the time prediction
- `validation_loss`: The validation loss for the combined activity and time prediction

We combine that information with the log, architecture and setting (base, RTFMP or helpdesk) and the freezed layers (only for the pre-trained models) in the DataFrame.

In [109]:
# First we define a function that extracts the training results from a training statistics file
from pandas.errors import ParserWarning


# Extract interesting information from training stat files
def extract_training_results(training_stats_file, pre_trained, additional_experiment):
    # The files contain columns that do not have a header. This leads to a warning when loading the file.
    # We ignore this warning.
    with warnings.catch_warnings():
        warnings.simplefilter(action='ignore', category=ParserWarning)
            
        if pre_trained:
            # Load file an determine the line numbers of the header for each layer freezing combination
            with open(training_stats_file, 'r') as f:
                lines = f.readlines()
                header_line_numbers = []
                for i, line in enumerate(lines):
                    if 'epoch' in line:
                        header_line_numbers.append(i)
            if len(header_line_numbers) != 7 and not additional_experiment:
                logging.warning(f'{training_stats_file} may be incomplete!')
            elif len(header_line_numbers) != 2 and additional_experiment:
                logging.warning(f'{training_stats_file} may be incomplete!')
            results = {}
            # For each layer freezing combination, load the corresponding part of the file
            for i, header_line_number in enumerate(header_line_numbers):
                if i == len(header_line_numbers) - 1:
                    df = pd.read_csv(training_stats_file, skiprows=header_line_number, index_col=False)
                else:
                    df = pd.read_csv(training_stats_file, skiprows=header_line_number, nrows=header_line_numbers[i+1] - header_line_number - 1, index_col=False)
                
                results[i] = df
        
        else:
            df = pd.read_csv(training_stats_file, index_col=False)
            results = df
        
    return results

In [116]:
# Iterate over all logs and extract training results
training_result_dfs = []
for log in ALL_LOGS + ADDITIONAL_LOGS:
    for architecture in ARCHITECTURES:
        for setting in training_stat_files[log][architecture].keys():
                pre_trained = setting != 'base'
                additional_experiment = setting in ['BPIC15', 'RTFMP_filtered']
                extracted = extract_training_results(training_stat_files[log][architecture][setting], pre_trained, additional_experiment)
                
                if pre_trained:
                    for i, results in extracted.items():
                        results['log'] = log
                        results['architecture'] = architecture
                        results['setting'] = setting
                        if setting in ['RTFMP', 'helpdesk']:
                            results['freezed_layers'] = str(LAYER_FREEZING_COMBINATIONS[i])
                        elif setting in ['BPIC15', 'RTFMP_filtered']:
                            results['freezed_layers'] = str(ADDITIONAL_LAYER_FREEZING_COMBINATIONS[i])
                        else:
                            raise ValueError(f'Unknown setting {setting}')
                        
                        training_result_dfs.append(results)
                else:
                    extracted['log'] = log
                    extracted['architecture'] = architecture
                    extracted['setting'] = setting
                    
                    training_result_dfs.append(extracted)



In [117]:
training_results_df = pd.concat(training_result_dfs, ignore_index=True)
training_results_df.to_csv('all_training_results.csv')
training_results_df

Unnamed: 0,datetime,epoch,training_loss_activity,training_loss_time,training_loss,validation_loss_activity,validation_loss_time,validation_loss,elapsed_seconds,log,architecture,setting,freezed_layers
0,202312090718,0,1.9248,0.0013,1.9260,1.8914,0.0011,1.8925,0.565,BPI_Challenge_2013_closed_problems.xes.gz,rnn,base,
1,202312090718,1,1.8428,0.0014,1.8442,1.7063,0.0020,1.7083,0.212,BPI_Challenge_2013_closed_problems.xes.gz,rnn,base,
2,202312090718,2,1.6531,0.0018,1.6549,1.5158,0.0012,1.5170,0.208,BPI_Challenge_2013_closed_problems.xes.gz,rnn,base,
3,202312090718,3,1.5426,0.0020,1.5446,1.4599,0.0012,1.4611,0.205,BPI_Challenge_2013_closed_problems.xes.gz,rnn,base,
4,202312090718,4,1.5979,0.0016,1.5995,1.4324,0.0010,1.4335,0.210,BPI_Challenge_2013_closed_problems.xes.gz,rnn,base,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
49742,202401021913,206,0.5984,0.0003,0.5987,0.1147,0.0002,0.1149,8.900,RTFMP_filtered.xes,GPT,base,
49743,202401021913,207,0.5992,0.0003,0.5995,0.1149,0.0002,0.1151,6.269,RTFMP_filtered.xes,GPT,base,
49744,202401021913,208,0.5996,0.0003,0.5999,0.1150,0.0002,0.1152,6.678,RTFMP_filtered.xes,GPT,base,
49745,202401021913,209,0.5985,0.0003,0.5988,0.1150,0.0002,0.1152,6.231,RTFMP_filtered.xes,GPT,base,


## Collection of the evaluation results
The evaluation script generates the predictions for the test set and stores the the predictions and the scores in the `results/ARCHITECTURE/Predictions` folder. The following code collects the scores for all models and logs that have been trained and store them in a dataframe called `evaluation_results_df`.

In [112]:
# First, we collect all score files
score_files = glob.glob(f'{RESULTS_DIR}/rnn/Predictions/*/suffix_evaluation_result_dls_mae_*.json')
# The GPT models predictions output is stored differently
score_files += glob.glob(f'{RESULTS_DIR}/GPT/Predictions/*/suffix_generation_result_*.json')
len(score_files)

254

In [113]:
# Next, we define a function that extracts the architecture, log and setting from a score file path
def get_architecture_log_setting(score_file):
    path = os.path.normpath(score_file)
    parts = path.split(os.sep)
    architecture = parts[-4]
    log = parts[-2]
    
    regex = r"suffix_(evaluation|generation)_result_(dls_mae_)?(.*\.[a-zA-Z]+)_(.*\.[a-zA-Z]+)\[([\d\w])\]\.json"
    matches = re.search(regex, parts[-1])
    if matches is None:
        raise ValueError(f'Could not parse {score_file}')
    setting = matches.group(3)
    layer_index = matches.group(5)
    
    return architecture, log, setting, layer_index

print(score_files[1])
get_architecture_log_setting(score_files[1])

C:\Users\Hannes\PycharmProjects\Transfer-Learning-for-Predictive-Process-Monitoring\results/rnn/Predictions\BPI%20Challenge%202017.xes.gz\suffix_evaluation_result_dls_mae_Road_Traffic_Fine_Management_Process.xes.gz_BPI%20Challenge%202017.xes.gz[1].json


('rnn',
 'BPI%20Challenge%202017.xes.gz',
 'Road_Traffic_Fine_Management_Process.xes.gz',
 '1')

In [114]:
evaluation_results = []
for score_file in score_files:
    architecture, log, setting, layer_index = get_architecture_log_setting(score_file)
    
    # Load the score file
    with open(score_file, 'r') as f:
        score = json.load(f)
    score = score[architecture][setting]
    
    # Extract the used layer freezing combination
    if not layer_index.isnumeric():
        layer_index = None
    else:
        layer_index = int(layer_index) - 1
        
    if setting == log:
        freezed_layers = None
    elif setting in BASE_LOGS:
        freezed_layers = str(LAYER_FREEZING_COMBINATIONS[layer_index])
    elif setting in ADDITONAL_BASE_LOGS:
        freezed_layers = str(ADDITIONAL_LAYER_FREEZING_COMBINATIONS[layer_index])
    else:
        raise ValueError(f'Unknown setting {setting}')
    
    # We convert the setting to the same format as in the training results
    if setting == log:
        setting = 'base'
    elif setting == BASE_LOGS[0]:
        setting = 'RTFMP'
    elif setting == BASE_LOGS[1]:
        setting = 'helpdesk'
    elif setting == ADDITONAL_BASE_LOGS[0]:
        setting = 'BPIC15'    
    elif setting == ADDITONAL_BASE_LOGS[1]:
        setting = 'RTFMP_filtered'
    else:
      raise ValueError(f'Unknown setting {setting}')  
    
    evaluation_results.append({
        'architecture': architecture,
        'log': log,
        'setting': setting,
        'freezed_layers': freezed_layers,
        'dls': score['dls'],
        'mae': score['mae'],
        'mae_denormalised': score['mae_denormalised'],
        'nb_worst_situs': score['nb_worst_situs'],
    })
evaluation_results_df = pd.DataFrame(evaluation_results)

In [115]:
evaluation_results_df.to_csv('all_evaluation_results.csv', index=False)
evaluation_results_df

Unnamed: 0,architecture,log,setting,freezed_layers,dls,mae,mae_denormalised,nb_worst_situs
0,rnn,BPI%20Challenge%202017.xes.gz,base,,0.0691,0.7416,100.9505,223449
1,rnn,BPI%20Challenge%202017.xes.gz,RTFMP,"['0', '1', '2', '3']",0.0372,5.1177,696.6516,227572
2,rnn,BPIC15_1.xes,base,,0.0264,2.6185,2549.0971,9359
3,rnn,BPIC15_1.xes,helpdesk,"['0', '1', '2', '3']",0.0226,2.4174,2353.3574,9307
4,rnn,BPIC15_1.xes,helpdesk,"['0', '1', '2']",0.0323,1.8449,1795.9604,9226
...,...,...,...,...,...,...,...,...
249,GPT,Sepsis%20Cases%20-%20Event%20Log.xes.gz,RTFMP,"['0', '1']",0.0307,3.0372,1267.3097,2817
250,GPT,Sepsis%20Cases%20-%20Event%20Log.xes.gz,RTFMP,['0'],0.0307,1.5063,628.5306,2817
251,GPT,Sepsis%20Cases%20-%20Event%20Log.xes.gz,RTFMP_filtered,"['0', '1', '2']",0.0722,3.8692,1614.4840,2901
252,GPT,Sepsis%20Cases%20-%20Event%20Log.xes.gz,RTFMP_filtered,['0'],0.1253,0.1785,74.4924,2686
