# Prediction Pipeline: Tree-Based

In [None]:
from predictor import BasePredictor
from utils import Utils

config = Utils.read_config_for_env(config_path='../config/config.yml')
predictor = BasePredictor(
    config,
    refresh_monthly=False,
    refresh_ts_features=False,
    num_lag_mon=3,
    val_ratio=0.2,
    scaler_type='standard')

## Basic LightGBM model

In [None]:
import lightgbm as lgb
# from lightgbm import LGBMRegressor 
import numpy as np
import shap
from sklearn.metrics import mean_squared_error as mse 
from sklearn.model_selection import TimeSeriesSplit, GridSearchCV, RandomizedSearchCV
from sklearn.metrics import make_scorer

In [None]:
# Create a LightGBM dataset for training with features X_train and labels Y_train 
train_data = lgb.Dataset(predictor.X_train, label=predictor.y_train) 
# Create a LightGBM dataset for testing with features X_val and labels Y_val, 
# and specify the reference dataset as train_data for consistent evaluation 
val_data = lgb.Dataset(predictor.X_val, label=predictor.y_val, reference=train_data) 
# Define a dictionary of parameters for configuring the LightGBM regression model. 

In [None]:
params = { 
    'objective': 'regression',
    'metric': 'rmse',
    'boosting_type': 'gbdt',
    'num_leaves': 30,
    'learning_rate': 0.1,
    'feature_fraction': 0.9,
}
callback_early_stopping = lgb.early_stopping(5)
num_round = 100
model = lgb.train(
    params,
    train_data,
    num_round,
    valid_sets=[val_data],
    callbacks=[callback_early_stopping, lgb.log_evaluation()],
)

In [None]:
# Make predictions on the training and validation data. 
pred_train = model.predict(predictor.X_train)
pred_val = model.predict(predictor.X_val)

# Calculate and print the Root Mean Squared Error (RMSE) for training and validation predictions. 
print("Training RMSE: ", np.sqrt(mse(predictor.y_train, pred_train)))
print("Validation RMSE: ", np.sqrt(mse(predictor.y_val, pred_val)))

In [None]:
pred_train.max()