In [1]:
import os

go_up_n_directories = lambda path, n: os.path.abspath(
    os.path.join(*([os.path.dirname(path)] + [".."] * n))
)
os.chdir(go_up_n_directories(os.getcwd(), 4))  # run once (otherwise restart kernel)

In [2]:
# Python natives
import pprint
import json
import os
import pickle
import time
from utilities import evaluation_utils
from ocpa.algo.predictive_monitoring.obj import Feature_Storage
from ocpa.algo.predictive_monitoring import tabular

In [3]:
prediction_task = "regression"
regression = prediction_task == "regression"
target_name = {
    "regression": ("event_remaining_time", ()),
    "classification": "event_ea4",
}
# event_attributes = ["ea2", "ea4", "resource_ce", "resource_multi"]
feature_storage_file = (
    "data/CS/feature_encodings/EFG/efg/raw/CS_split_[C2_P2_P3_O3_eas].fs"
)
model_output_path = "models/CS/baselines/median"

In [4]:
with open(feature_storage_file, "rb") as bin:
    feature_storage: Feature_Storage = pickle.load(bin)

In [5]:
# flatten EFG with same train/test split
eft_train = tabular.construct_table(
    feature_storage, feature_storage.train_indices + feature_storage.validation_indices
)
eft_test = tabular.construct_table(feature_storage, feature_storage.test_indices)

In [6]:
# subset relevant data
y_train = eft_train[target_name[prediction_task]]
y_test = eft_test[target_name[prediction_task]]

In [7]:
train_start_time = time.time()
# fit the model ;)
median = y_train.median()
total_train_time = time.time() - train_start_time

In [8]:
if regression:
    pred_start_time = time.time()
    y_train_preds = [median] * y_train.shape[0]
    train_pred_time = time.time() - pred_start_time

    pred_start_time = time.time()
    y_test_preds = [median] * y_test.shape[0]
    test_pred_time = time.time() - pred_start_time

In [9]:
eval_train = evaluation_utils.get_evaluation(
    y_train, y_train_preds, regression=regression, time=train_pred_time
)
eval_train["report"]["training_time"] = total_train_time
eval_valid = evaluation_utils.get_evaluation(
    y_test, y_test_preds, regression=regression, time=test_pred_time
)
evaluation_report = {"Train": eval_train, "Test": eval_valid}

In [10]:
with open(os.path.join(model_output_path, "evaluation_report.json"), "w") as fp:
    json.dump(evaluation_report, fp, indent=2)

pprint.pprint(evaluation_report)

{'Test': {'report': {'MAE': 0.7701669027198726,
                     'MAPE': 2.5283890251510908,
                     'MSE': 1.2880973350708993,
                     'R^2': -0.28415082619596976,
                     'prediction_time': 8.225440979003906e-05}},
 'Train': {'report': {'MAE': 0.7673212528401351,
                      'MAPE': 4.9429155758070715,
                      'MSE': 1.276318044653244,
                      'R^2': -0.27972726892610233,
                      'prediction_time': 0.00038743019104003906,
                      'training_time': 0.007107734680175781}}}
