# BPIC17 Simplified Model - Benchmarking

Run simulation with BPIC17SimplifiedPredictor and compare results with ground truth using SimulationBenchmark.

**Workflow:**
1. Load trained model
2. Run simulation with BPIC17SimplifiedPredictor
3. Generate simulated event log
4. Compare with ground truth using SimulationBenchmark


In [1]:
import sys
import os
import warnings
from pathlib import Path
from datetime import datetime

# Add project root to path
project_root = Path(__file__).parent.parent.parent.parent if '__file__' in globals() else Path.cwd().parent.parent.parent
sys.path.insert(0, str(project_root))

# Add Next-Activity-Prediction to path
na_root = project_root / "Next-Activity-Prediction"
if str(na_root) not in sys.path:
    sys.path.insert(0, str(na_root))

import pandas as pd
import numpy as np

from bpic17_simplified import BPIC17SimplifiedPredictor
from simulation.engine import DESEngine
from simulation.log_exporter import LogExporter
from resources import ResourceAllocator
from integration.SimulationBenchmark import SimulationBenchmark
from processing_time_prediction.ProcessingTimePredictionClass import ProcessingTimePredictionClass

# Optional imports with fallback
try:
    from case_arrival_times_prediction.simulation import ArrivalGenerator
    from case_arrival_times_prediction.data_loader import load_daily_sequences
except ImportError:
    ArrivalGenerator = None
    load_daily_sequences = None

try:
    from case_attributes_prediction.case_attributes import CaseAttributePredictor
except ImportError:
    CaseAttributePredictor = None

pd.options.mode.chained_assignment = None
warnings.filterwarnings("ignore")

print(f"Benchmarking started: {datetime.now()}")


  if not hasattr(np, "object"):


Benchmarking started: 2026-01-06 23:41:43.880683


## 1. Configuration


In [2]:
# Paths
MODEL_PATH = os.path.join(project_root, "models", "bpic17_simplified")
EVENT_LOG_PATH = os.path.join(project_root, "eventlog", "eventlog.xes.gz")
OUTPUT_DIR = os.path.join(project_root, "integration", "output", "bpic17_simplified")

# Simulation parameters
NUM_CASES = 100
START_TIME = datetime(2016, 1, 4, 8, 0)  # Monday 8am, Jan 2016

os.makedirs(OUTPUT_DIR, exist_ok=True)

print(f"Model path: {MODEL_PATH}")
print(f"Output directory: {OUTPUT_DIR}")
print(f"Number of cases: {NUM_CASES}")


Model path: d:\Repos\process-simulation-engine\models\bpic17_simplified
Output directory: d:\Repos\process-simulation-engine\integration\output\bpic17_simplified
Number of cases: 100


## 2. Load Predictors and Models


In [3]:
print("Loading BPIC17SimplifiedPredictor...")
next_activity_predictor = BPIC17SimplifiedPredictor(model_path=MODEL_PATH)
print("✓ Next activity predictor loaded")

print("\nLoading ResourceAllocator...")
resource_allocator = ResourceAllocator(log_path=EVENT_LOG_PATH)
print("✓ Resource allocator loaded")

print("\nLoading ProcessingTimePredictor...")
processing_time_predictor = ProcessingTimePredictionClass()
print("✓ Processing time predictor loaded")

print("\nLoading CaseAttributePredictor...")
case_attribute_predictor = CaseAttributePredictor()
print("✓ Case attribute predictor loaded")


Loading BPIC17SimplifiedPredictor...

✓ Next activity predictor loaded

Loading ResourceAllocator...
[AUTO-LOAD] Found cached model at d:\Repos\process-simulation-engine\resources\resource_availabilities\bpic2017_resource_model.pkl
            Loading pre-trained model (fast)...
[LOADING] Loading model from: d:\Repos\process-simulation-engine\resources\resource_availabilities\bpic2017_resource_model.pkl
[SUCCESS] Model loaded
          - 144 resource patterns
          - 141 clustered resources
          - 0 busy periods
[AUTO-LOAD] Model loaded successfully!
✓ Resource allocator loaded

Loading ProcessingTimePredictor...


FileNotFoundError: Model metadata file not found at models/processing_time_model_metadata.joblib

## 3. Generate Case Arrivals


In [None]:
print("Generating case arrivals...")
arrival_generator = ArrivalGenerator(L=24, verbose=False, random_state=42)

# Load training data for arrival generation
from case_arrival_times_prediction.data_loader import load_daily_sequences
daily_sequences = load_daily_sequences(EVENT_LOG_PATH)

# Generate arrivals (simplified - you may need to adjust based on your arrival generator setup)
# For now, we'll use a simple approach
arrival_timestamps = []
current_time = START_TIME
for i in range(NUM_CASES):
    arrival_timestamps.append(current_time)
    # Simple inter-arrival: add random time between 1-10 hours
    import random
    random.seed(42 + i)
    hours = random.uniform(1, 10)
    current_time = current_time + pd.Timedelta(hours=hours)

print(f"Generated {len(arrival_timestamps)} case arrivals")
print(f"First arrival: {arrival_timestamps[0]}")
print(f"Last arrival: {arrival_timestamps[-1]}")


## 4. Run Simulation


In [None]:
print("Initializing simulation engine...")
engine = DESEngine(
    resource_allocator=resource_allocator,
    arrival_timestamps=arrival_timestamps,
    next_activity_predictor=next_activity_predictor,
    processing_time_predictor=processing_time_predictor,
    case_attribute_predictor=case_attribute_predictor,
    start_time=START_TIME,
)

print(f"\nRunning simulation for {NUM_CASES} cases...")
events = engine.run(num_cases=NUM_CASES)

print("\n" + "=" * 60)
print("SIMULATION RESULTS")
print("=" * 60)
print(f"  Cases started: {engine.stats['cases_started']}")
print(f"  Cases completed: {engine.stats['cases_completed']}")
print(f"  Events generated: {len(events)}")
print(f"  Outside hours: {engine.stats['outside_hours_count']}")
print(f"  No eligible: {engine.stats['no_eligible_failures']}")
print("=" * 60)


## 5. Export Simulated Log


In [None]:
simulated_csv = os.path.join(OUTPUT_DIR, "simulated_log.csv")
simulated_xes = os.path.join(OUTPUT_DIR, "simulated_log.xes")

print("Exporting simulated log...")
LogExporter.to_csv(events, simulated_csv)
print(f"✓ CSV exported to: {simulated_csv}")

try:
    LogExporter.to_xes(events, simulated_xes)
    print(f"✓ XES exported to: {simulated_xes}")
except Exception as e:
    print(f"⚠ Could not export XES: {e}")

print(f"\nSample events (first 5):")
for e in events[:5]:
    ts = e['time:timestamp'].strftime('%Y-%m-%d %H:%M')
    print(f"  [{ts}] {e['case:concept:name']}: {e['concept:name']} (by {e.get('org:resource', 'N/A')})")


## 6. Load Ground Truth Log


In [None]:
# Load ground truth from original event log
# Filter to same number of cases for fair comparison
import pm4py

print("Loading ground truth event log...")
ground_truth_log = pm4py.read_xes(EVENT_LOG_PATH)
df_ground = pm4py.convert_to_dataframe(ground_truth_log)

# Filter to start/complete only (matching our model)
df_ground = df_ground[df_ground['lifecycle:transition'].isin(['start', 'complete'])].copy()

# Sample same number of cases
unique_cases = df_ground['case:concept:name'].unique()
if len(unique_cases) > NUM_CASES:
    sampled_cases = np.random.choice(unique_cases, NUM_CASES, replace=False)
    df_ground = df_ground[df_ground['case:concept:name'].isin(sampled_cases)].copy()

ground_truth_csv = os.path.join(OUTPUT_DIR, "ground_truth_log.csv")
df_ground.to_csv(ground_truth_csv, index=False)
print(f"✓ Ground truth exported to: {ground_truth_csv}")
print(f"  Cases: {df_ground['case:concept:name'].nunique()}")
print(f"  Events: {len(df_ground)}")


## 7. Run Benchmark


In [None]:
print("=" * 60)
print("SIMULATION BENCHMARK")
print("=" * 60)
print(f"Ground truth: {ground_truth_csv}")
print(f"Simulated: {simulated_csv}")
print("=" * 60)
print()

benchmark = SimulationBenchmark(ground_truth_csv, simulated_csv)

print("Computing metrics...")
results = benchmark.compute_all_metrics()

print("\n" + "=" * 60)
print("SUMMARY")
print("=" * 60)
benchmark.print_summary()


## 8. Export Benchmark Results


In [None]:
output_path = os.path.join(OUTPUT_DIR, "benchmark_results.xlsx")
print(f"Exporting results to: {output_path}")
benchmark.export_results(output_path)

print("\n" + "=" * 60)
print("BENCHMARK COMPLETE")
print("=" * 60)
print(f"Results saved to: {output_path}")
print(f"Benchmarking completed at: {datetime.now()}")
