In [1]:
import sys
from pathlib import Path
from joblib import load
import torch

sys.path.insert(0, '..')
sys.path.insert(0, '../..')
sys.path.insert(0, '../../..')
sys.path.insert(0, '../../../..')
sys.path.insert(0, '../../../../..')
sys.path.insert(0, '../../../../../..')

from reimplemented_approaches.proactive_conformance_checking.data_prep_split_encode import PrefixDataset
from reimplemented_approaches.proactive_conformance_checking.models import LSTMSeparateIDP
from reimplemented_approaches.proactive_conformance_checking.evaluation import PredictionResults, MetricsSep

In [2]:
data_dir = "../../data_preparation/Helpdesk/separate/"
models_dir = Path("../../training/Helpdesk/separate/")

print(f"Data dir: {data_dir}")
print(f"Models dir: {models_dir}")

Data dir: ../../data_preparation/Helpdesk/separate/
Models dir: ../../training/Helpdesk/separate


In [3]:
_, _, test_set_dict = PrefixDataset.load_datasets(save_path=data_dir)
encoders = load(data_dir + "/encoders.pkl")

deviations = encoders.get("deviations")

deviation_labels = sorted(test_set_dict.keys())

print(f"Deviation labels ({len(deviation_labels)}): {deviation_labels}")

Deviation labels (6): ["('>>', 'Assign seriousness')", "('Create SW anomaly', '>>')", "('Require upgrade', '>>')", "('Resolve ticket', '>>')", "('Take in charge ticket', '>>')", "('Wait', '>>')"]


In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Evaluating on {device}")

Evaluating on cuda


In [5]:
metrics_summary = {}

for label in deviation_labels:
    print(f"\n=== Evaluating label: {label} ===")
    
    test_set = test_set_dict[label]
    
    model_path = models_dir / f"LSTM_separate_IDP_{label}.pkl"
    
    model = LSTMSeparateIDP.load(str(model_path), device=device)
    
    pr = PredictionResults(model=model, test_set=test_set, mode='separate')
    probs, preds, targets = pr.get_predictions_targets()
    # print(probs)
    # print(preds)
    # print(targets)
    
    m = MetricsSep(preds=preds, targets=targets)
    
    res_dev = m.precision_recall_dev()
    
    res_no_dev = m.precision_recall_no_dev()
    
    metrics_summary[label] = {"macro_dev": res_dev,
                              "macro_no_dev": res_no_dev}


=== Evaluating label: ('>>', 'Assign seriousness') ===

=== Evaluating label: ('Create SW anomaly', '>>') ===

=== Evaluating label: ('Require upgrade', '>>') ===

=== Evaluating label: ('Resolve ticket', '>>') ===

=== Evaluating label: ('Take in charge ticket', '>>') ===

=== Evaluating label: ('Wait', '>>') ===


In [6]:
macro_precision_dev = 0.0
macro_precision_no_dev = 0.0
macro_recall_dev = 0.0
macro_recall_no_dev = 0.0

for label, results in metrics_summary.items():    
    # print(f"\nLabel: {label}")
    # print("deviation metrics:", results["macro_dev"])
    # print("no-deviation metrics:", results["macro_no_dev"])
    
    macro_precision_dev += results["macro_dev"].get('precision')
    macro_recall_dev += results["macro_dev"].get('recall')

    macro_precision_no_dev += results["macro_no_dev"].get('precision')
    macro_recall_no_dev += results["macro_no_dev"].get('recall')
    
print("Macro average precision dev: ", macro_precision_dev/ len(list(metrics_summary.keys())))
print("Macro average recall dev: ", macro_recall_dev/ len(list(metrics_summary.keys())))

print("\n")

print("Macro average precision no dev: ", macro_precision_no_dev/ len(list(metrics_summary.keys())))
print("Macro average recall no dev: ", macro_recall_no_dev/ len(list(metrics_summary.keys())))

    

Macro average precision dev:  0.06446474773624894
Macro average recall dev:  0.35385668276972626


Macro average precision no dev:  0.9896694264336068
Macro average recall no dev:  0.8816703569306653
