In [None]:
import pandas as pd
from config.definitions import ROOT_DIR
from utils import utils_gn, utils_noah, utils_ivc, utils_dgrd, utils_models
import importlib
importlib.reload(utils_gn)
importlib.reload(utils_noah)
importlib.reload(utils_ivc)
importlib.reload(utils_models)
importlib.reload(utils_dgrd)

In [None]:
# load training raw data
train_raw = utils_gn.read_data(
    path=f"{ROOT_DIR}/data",
    fname="train_1238.pkl"
)

In [None]:
# Load test data
test_raw = utils_gn.read_data(
    path=f"{ROOT_DIR}/data",
    fname="test_1238.pkl"
)
y_test = utils_gn.read_data(
    path=f"{ROOT_DIR}/data",
    fname="true_test_labels_1238.pkl"
)

In [None]:
# Create target list
target_list = ['Qatk-o', 'Qatk-p', 'IRate-o', 'IRate-p', 'IRatEOL']

In [None]:
# Get training set
tr = utils_gn.FeatureTransformation(n=50)
X_train, y_train = tr.fit_transform(data=train_raw, targets=target_list, with_eol=True)

In [None]:
# Get test set
X_test, y_test = tr.transform(test_raw), y_test[target_list].values

In [None]:
# Build model
params = {'n_estimators': 500, 'max_depth': 6, 'learning_rate': 0.1}
model = utils_models.ModelPipeline(params=params, transform_target=True)
model = model.fit(X_train, y_train)

In [None]:
# Create a function to format errors
def format_e(n):
    a = '%e' % n
    return a.split('e')[0].rstrip('0').rstrip('.') + 'e' + a.split('e')[1]

In [None]:
# Get training metrics and their confidence intervals
train_pred = model.predict(X_train)
train_scores = utils_models.metrics_calculator(y_train, train_pred, multi=True)
train_scores = pd.DataFrame.from_dict(train_scores)
train_scores['MAE CI'] = utils_models.confidence_interval_metrics(
    actual=y_train,
    predictions=train_pred,
    n_bootstraps=10000,
    target_list=target_list,
    metric_type='mae'
)
train_scores['RMSE CI'] = utils_models.confidence_interval_metrics(
    actual=y_train,
    predictions=train_pred,
    n_bootstraps=10000,
    target_list=target_list,
    metric_type='rmse'
)
train_scores.index = target_list


train_scores = train_scores[['MAE', 'MAE CI', 'RMSE', 'RMSE CI']]
train_scores

In [None]:
# Get test metrics and their confidence intervals
test_pred = model.predict(X_test)
test_scores = utils_models.metrics_calculator(y_test, test_pred, multi=True)
test_scores = pd.DataFrame.from_dict(test_scores)
test_scores['MAE CI'] = utils_models.confidence_interval_metrics(
    actual=y_test,
    predictions=test_pred,
    n_bootstraps=10000,
    target_list=target_list,
    metric_type='mae'
)
test_scores['RMSE CI'] = utils_models.confidence_interval_metrics(
    actual=y_test,
    predictions=test_pred,
    n_bootstraps=10000,
    target_list=target_list,
    metric_type='rmse'
)
test_scores.index = target_list

test_scores = test_scores[['MAE', 'MAE CI', 'RMSE', 'RMSE CI']]
test_scores


In [None]:
# Get the prediction intervals
pred_interval, v_list = utils_models.prediction_interval(
    X=X_train,
    y=y_train,
    model=model,
    n_bootstraps=100,
    target_list=target_list,
    predictions=test_pred,
    confidence_level=0.90,
    plot_dist=True
)

In [None]:
# Save data, models and transformation objects
data_to_save = [
    y_train,
    train_pred,
    model,
    tr,
    pred_interval
]

names_to_use = [
    "capir_train_labels.pkl",
    "capir_train_pred.pkl",
    "capacity_ir.pkl",
    "capacity_ir_trans.pkl",
    "capir_pred_interval.pkl"
]

for dt, nm in zip(data_to_save, names_to_use):
    utils_gn.dump_data(
        data=dt,
        fname=nm,
        path=f"{ROOT_DIR}/models"
    )