In [None]:
import numpy as np
import pandas as pd
import warnings
import pickle
import os
from utils import utils_gn, utils_sig, utils_dgrd, utils_models
import importlib
importlib.reload(utils_gn)
importlib.reload(utils_sig)
importlib.reload(utils_models)
importlib.reload(utils_dgrd)
warnings.filterwarnings("ignore")

In [None]:
# load train raw data
train_raw = utils_gn.read_data('train_1238.pkl')

In [None]:
# Load test data
test_raw = utils_gn.read_data('test_1238.pkl')
y_test = utils_gn.read_data('true_test_labels_1238.pkl')

In [None]:
# Create target list
target_list = ['Qatk-o', 'Qatk-p', 'IRate-o', 'IRate-p', 'IRatEOL']

In [None]:
# Define some constants
n = 100
sig_level = 2
multi_cycle = False
#step_size = 80
step_size = 1

In [None]:
# Get training set
tr = utils_gn.FeatureTransformation(n=n, step_size=step_size)
X_train, y_train = tr.fit_transform(data=train_raw, targets=target_list, with_eol=True, sig_level=sig_level, multi_cycle=multi_cycle)

In [None]:
# Get test set
X_test, y_test = tr.transform(test_raw, sig_level=sig_level, multi_cycle=multi_cycle), y_test[target_list].values

In [None]:
# Build model
params = {'n_estimators': 500, 'max_depth': 6, 'learning_rate': 0.1}
model = utils_models.ModelPipeline(params=params, transform_target=True)
model = model.fit(X_train, y_train)

In [None]:
# Get training metrics and their confidence intervals
train_pred = model.predict(X_train)
train_scores = utils_models.metrics_calculator(y_train, train_pred, multi=True)
train_scores = pd.DataFrame.from_dict(train_scores)
train_scores['MAE CI'] = utils_models.confidence_interval_metrics(actual=y_train, predictions=train_pred, n_bootstraps=10000, target_list=target_list, metric_type='mae')
train_scores['MAPE CI'] = utils_models.confidence_interval_metrics(actual=y_train, predictions=train_pred, n_bootstraps=10000, target_list=target_list, metric_type='mape')
train_scores['RMSE CI'] = utils_models.confidence_interval_metrics(actual=y_train, predictions=train_pred, n_bootstraps=10000, target_list=target_list, metric_type='rmse')
train_scores.index = target_list
#train_scores = train_scores[['MAE', 'MAE CI', 'RMSE', 'RMSE CI']] 
display(train_scores)

In [None]:
# Get test metrics and their corresponding confidence intervals
test_pred = model.predict(X_test)
test_scores = utils_models.metrics_calculator(y_test, test_pred, multi=True)
test_scores = pd.DataFrame.from_dict(test_scores)
test_scores['MAE CI'] = utils_models.confidence_interval_metrics(actual=y_test, predictions=test_pred, n_bootstraps=10000, target_list=target_list, metric_type='mae')
test_scores['MAPE CI'] = utils_models.confidence_interval_metrics(actual=y_test, predictions=test_pred, n_bootstraps=10000, target_list=target_list, metric_type='mape')
test_scores['RMSE CI'] = utils_models.confidence_interval_metrics(actual=y_test, predictions=test_pred, n_bootstraps=10000, target_list=target_list, metric_type='rmse')
test_scores.index = target_list
#test_scores = test_scores[['MAE', 'MAE CI', 'RMSE', 'RMSE CI']]
display(test_scores)

In [None]:
# Save model
with open(os.path.join("models", "sig_capacity_ir.pkl"), "wb") as fp:
    pickle.dump(model, fp)

# Save transformation
with open(os.path.join("models", "sig_capacity_ir_trans.pkl"), "wb") as fp:
    pickle.dump(tr, fp)

In [None]:
# Save the train predictions 
with open(os.path.join("models", "sig_capir_train_labels.pkl"), "wb") as fp:
    pickle.dump(y_train, fp)

with open(os.path.join("models", "sig_capir_train_pred.pkl"), "wb") as fp:
    pickle.dump(train_pred, fp)

In [None]:
# Get the prediction intervals and save as pickle file
pred_interval, v_list = utils_models.prediction_interval(X=X_train, y=y_train, model=model, n_bootstraps=100,
                                                         target_list=target_list, predictions=test_pred, confidence_level=0.90, plot_dist=True)
                                                         
with open(os.path.join("models", "sig_capir_pred_interval.pkl"), "wb") as fp:
    pickle.dump(pred_interval, fp)