In [None]:
import os

os.chdir("/home/tim/Development/OCPPM/")
import pprint

import lightgbm as lgb
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import utilities.evaluation_utils as evaluation_utils

# ONLY OBJECT TYPE==APPLICATION WILL BE CONSIDERED
oft_out_file = "data/BPI17/feature_encodings/OFT/application_features.csv"

In [None]:
oft = pd.read_csv(oft_out_file)
scaler = StandardScaler()
oft_values = scaler.fit_transform(oft.values)
oft = pd.DataFrame(oft_values, columns=oft.columns)
print(oft.shape)
oft.head()

In [None]:
# make train test split
X, y = (
    oft.drop("object_lifecycle_duration", axis=1),
    oft.loc[:, "object_lifecycle_duration"],
)
X_train, X_valid, y_train, y_valid = train_test_split(
    X, y, test_size=0.2, random_state=0
)
train_data = lgb.Dataset(X_train, label=y_train)
valid_data = lgb.Dataset(X_valid, label=y_valid)

In [None]:
params = {
    "objective": "regression",
    "metric": ["mse", "mae", "mape", "rmse"],
    "num_boost_round": 100,
    "stopping_rounds": 100,
}

bst = lgb.train(
    params,
    train_data,
    valid_sets=[valid_data],
    callbacks=[lgb.early_stopping(params["stopping_rounds"])],
)

In [None]:
y_train_preds = bst.predict(X_train)
y_valid_preds = bst.predict(X_valid)

In [None]:
eval_train = evaluation_utils.get_evaluation(y_train, y_train_preds, regression=True)
eval_valid = evaluation_utils.get_evaluation(y_valid, y_valid_preds, regression=True)
experiment_settings = {"experiment_settings": params}
evaluation_report = {"train": eval_train, "validation": eval_valid}
pprint.pprint(evaluation_report)