In [1]:
from models.xgboost_model import XgboostModel
from models.arima_model import ArimaModel
from models.sarima_model import SarimaModel
from models.prophet_model import ProphetModel
from models.random_forest_model import RandomForestModel
from models.lightgbm_model import LightGBMModel
from models.lstm_model import LstmModel
from models.stacked_lstm_model import StackedLstmModel
from models.bidirectional_lstm_model import BidirectionalLstmModel
from models.gru_model import GruModel

import pandas as pd
import numpy as np
import os

import warnings
warnings.filterwarnings('ignore', category=FutureWarning)

In [2]:
# Initialize models

model_xgb = XgboostModel()
model_arima = ArimaModel()
model_sarima = SarimaModel()
model_prophet = ProphetModel()
model_rf = RandomForestModel()
model_lgbm = LightGBMModel()

In [3]:
# Define the directory where the data was saved
input_dir = 'processed_data'
print(f"--- Loading processed data from '{input_dir}' directory ---")

# Load each file back into a variable
X_train = pd.read_pickle(os.path.join(input_dir, 'X_train.pkl'))
y_train = pd.read_pickle(os.path.join(input_dir, 'y_train.pkl'))
X_test = pd.read_pickle(os.path.join(input_dir, 'X_test.pkl'))
y_test = pd.read_pickle(os.path.join(input_dir, 'y_test.pkl'))

print("Data loaded successfully.")

# --- Sanity Check: Verify the loaded data ---
print("\nData Shapes:")
print(f"  X_train: {X_train.shape}")
print(f"  y_train: {y_train.shape}")
print(f"  X_test:  {X_test.shape}")
print(f"  y_test:  {y_test.shape}")

print("\nFirst 5 rows of X_train:")
print(X_train.head())

--- Loading processed data from 'processed_data' directory ---
Data loaded successfully.

Data Shapes:
  X_train: (3949, 26)
  y_train: (3949,)
  X_test:  (988, 26)
  y_test:  (988,)

First 5 rows of X_train:
            Open  High   Low  Close      Volume  price_return  log_return  \
Timestamp                                                                   
2012-02-03  6.26  6.35  5.93   6.29  283.382106      0.004792    0.004781   
2012-02-04  6.29  6.50  5.94   6.50   67.694994      0.033386    0.032841   
2012-02-05  6.50  6.50  5.70   5.70   49.866684     -0.123077   -0.131336   
2012-02-06  5.70  6.15  5.20   5.90   26.362078      0.035088    0.034486   
2012-02-07  5.90  5.90  5.50   5.51  151.424746     -0.066102   -0.068388   

               RSI_14  MACD_12_26_9  MACDh_12_26_9  ...   ATRr_14         OBV  \
Timestamp                                           ...                         
2012-02-03  52.119945     -0.113606      -0.000737  ...  0.909106  243.558728   
2012-02-

In [None]:
PERFORM_TUNING = True # A switch to control the tuning

lstm_params_7 = {
    'sequence_length': 7,
    'lstm_units': 50,
    'dropout_rate': 0.2,
    'epochs': 70,
    'batch_size': 32
}
lstm_params_10 = {
    'sequence_length': 10,
    'lstm_units': 50,
    'dropout_rate': 0.2,
    'epochs': 70,
    'batch_size': 32
}
lstm_params_15 = {
    'sequence_length': 15,
    'lstm_units': 50,
    'dropout_rate': 0.2,
    'epochs': 70,
    'batch_size': 32
}
stacked_lstm_params_10 = {
    'sequence_length': 10,
    'lstm_units_1': 64,
    'lstm_units_2': 32,
    'dropout_rate': 0.2,
    'epochs': 70,
    'batch_size': 32
}
stacked_lstm_params_15 = {
    'sequence_length': 15,
    'lstm_units_1': 64,
    'lstm_units_2': 32,
    'dropout_rate': 0.2,
    'epochs': 70,
    'batch_size': 32
}
gru_params_10 = {
    'sequence_length': 10,
    'gru_units': 50,
    'dropout_rate': 0.2,
    'epochs': 50,
    'batch_size': 32
}
gru_params_15 = {
    'sequence_length': 15,
    'gru_units': 50,
    'dropout_rate': 0.2,
    'epochs': 50,
    'batch_size': 32
}

model_lstm_7 = LstmModel(params=lstm_params_7)
model_lstm_7.model_name = "LSTM (7-day seq)"

model_lstm_10 = LstmModel(params=lstm_params_10)
model_lstm_10.model_name = "LSTM (10-day seq)"

model_lstm_15 = LstmModel(params=lstm_params_15)
model_lstm_15.model_name = "LSTM (15-day seq)"

model_bidir_lstm_7 = BidirectionalLstmModel(params=lstm_params_7)
model_bidir_lstm_7.model_name = "Bidirectional LSTM (7-day seq)"

model_bidir_lstm_10 = BidirectionalLstmModel(params=lstm_params_10)
model_bidir_lstm_10.model_name = "Bidirectional LSTM (10-day seq)"

model_bidir_lstm_15 = BidirectionalLstmModel(params=lstm_params_15)
model_bidir_lstm_15.model_name = "Bidirectional LSTM (15-day seq)"

model_stacked_lstm_10 = StackedLstmModel(params=stacked_lstm_params_10)
model_stacked_lstm_10.model_name = "Stacked LSTM (10-day seq)"

model_stacked_lstm_15 = StackedLstmModel(params=stacked_lstm_params_15)
model_stacked_lstm_15.model_name = "Stacked LSTM (15-day seq)"

model_gru_10 = GruModel(params=gru_params_10)
model_gru_10.model_name = "GRU (10-day seq)"

model_gru_15 = GruModel(params=gru_params_15)
model_gru_15.model_name = "GRU (15-day seq)"

# Loop, train, predict, evaluate
ml_models_to_run = [
    model_xgb,
    model_arima,
    model_sarima,
    model_prophet,
    model_rf,
    model_lgbm,
    model_lstm_7,
    model_lstm_10,
    model_lstm_15,
    model_bidir_lstm_7,
    model_bidir_lstm_10,
    model_bidir_lstm_15,
    model_stacked_lstm_10,
    model_stacked_lstm_15,
    model_gru_10,
    model_gru_15
]
results = {}


for model in ml_models_to_run:

    if PERFORM_TUNING and hasattr(model, 'tune'):
        # The 'tune' method updates model.params with the best ones it finds
        model.tune(X_train, y_train)

    # --- ARIMA and SARIMA ---
    if model.model_name in ["ARIMA", "SARIMA"]:
        # ARIMA/SARIMA is trained only on the historical target series (y_train), doesn't use the X_train features.
        model.train(None, y_train)

        # Its predict method only needs to know how many steps to forecast.
        predictions = model.predict(X_test)

    # --- Specialized Time Series Models ---
    # Like ARIMA, it ignores the engineered features in X_train.
    elif model.model_name == "Prophet":
        model.train(None, y_train) # Pass None for X_train to be explicit
        predictions = model.predict(X_test)

    # --- Standard Supervised ML Models ---
    # These models use the engineered features in X_train to predict y_train.
    else:
        # These models are trained using the features (X_train)
        # to predict the target (y_train).
        model.train(X_train, y_train)

        # They then use the unseen test features (X_test) to make predictions.
        predictions = model.predict(X_test)

    # Convert it to a flat NumPy array. This prevents the .flatten() error.
    predictions_array = np.array(predictions).flatten()

    if "LSTM" in model.model_name or "GRU" in model.model_name:
        seq_len = model.params['sequence_length']
        y_test_aligned = y_test.iloc[seq_len:] # The model’s first prediction starts after it has seen 10 timesteps.

        # Just in case the shapes still don’t match
        if len(predictions_array) != len(y_test_aligned):
             print(f"Skipping evaluation for {model.model_name} due to length mismatch after sequencing.")
             # Add empty results to avoid breaking the plotting code
             results[model.model_name] = {'metrics': {'rmse': np.nan, 'mae': np.nan}, 'predictions': pd.Series()}
             continue # Skip to the next model in the loop

        # Use the unified 'predictions_array'
        predictions_series = pd.Series(predictions_array, index=y_test_aligned.index)

        metrics = model.evaluate(y_test_aligned, predictions_series) # Evaluates, comparing to the aligned true values
    else:
        # For all other models, the lengths match
        predictions_series = pd.Series(predictions_array, index=y_test.index)
        metrics = model.evaluate(y_test, predictions_series) # Evaluates, comparing to the true values

    results[model.model_name] = {'metrics': metrics, 'predictions': predictions_series}

print("\n----- All models have been run. -----")

--- Tuning Hyperparameters for XGBoost ---
Fitting 3 folds for each of 25 candidates, totalling 75 fits
[CV] END colsample_bytree=0.9, learning_rate=0.1, max_depth=10, n_estimators=500, subsample=0.8; total time=   2.5s
[CV] END colsample_bytree=0.9, learning_rate=0.1, max_depth=10, n_estimators=500, subsample=0.8; total time=   2.8s
[CV] END colsample_bytree=0.9, learning_rate=0.1, max_depth=10, n_estimators=500, subsample=0.8; total time=   2.3s
[CV] END colsample_bytree=0.7, learning_rate=0.05, max_depth=7, n_estimators=500, subsample=0.7; total time=   1.1s
[CV] END colsample_bytree=0.7, learning_rate=0.05, max_depth=7, n_estimators=500, subsample=0.7; total time=   1.0s
[CV] END colsample_bytree=0.7, learning_rate=0.05, max_depth=7, n_estimators=500, subsample=0.7; total time=   1.1s
[CV] END colsample_bytree=0.8, learning_rate=0.01, max_depth=10, n_estimators=100, subsample=0.7; total time=   0.3s
[CV] END colsample_bytree=0.8, learning_rate=0.01, max_depth=10, n_estimators=100, 

22:22:47 - cmdstanpy - INFO - Chain [1] start processing


 ARIMA(2,1,2)(0,0,0)[7]             : AIC=63088.874, Time=0.81 sec

Best model:  ARIMA(1,1,1)(0,0,0)[7]          
Total fit time: 16.990 seconds

--- Auto-SARIMA Found Best Model ---
                               SARIMAX Results                                
Dep. Variable:                      y   No. Observations:                 3953
Model:               SARIMAX(1, 1, 1)   Log Likelihood              -31540.487
Date:                Wed, 20 Aug 2025   AIC                          63086.974
Time:                        22:22:47   BIC                          63105.820
Sample:                    02-03-2012   HQIC                         63093.658
                         - 11-29-2022                                         
Covariance Type:                  opg                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1         -0.9303      

22:22:48 - cmdstanpy - INFO - Chain [1] done processing
22:22:50 - cmdstanpy - INFO - Chain [1] start processing
22:22:50 - cmdstanpy - INFO - Chain [1] start processing
22:22:50 - cmdstanpy - INFO - Chain [1] start processing
22:22:50 - cmdstanpy - INFO - Chain [1] start processing
22:22:50 - cmdstanpy - INFO - Chain [1] start processing
22:22:50 - cmdstanpy - INFO - Chain [1] start processing
22:22:50 - cmdstanpy - INFO - Chain [1] done processing
22:22:50 - cmdstanpy - INFO - Chain [1] done processing
22:22:50 - cmdstanpy - INFO - Chain [1] start processing
22:22:50 - cmdstanpy - INFO - Chain [1] start processing
22:22:50 - cmdstanpy - INFO - Chain [1] done processing
22:22:50 - cmdstanpy - INFO - Chain [1] done processing
22:22:50 - cmdstanpy - INFO - Chain [1] start processing
22:22:50 - cmdstanpy - INFO - Chain [1] done processing
22:22:50 - cmdstanpy - INFO - Chain [1] start processing
22:22:50 - cmdstanpy - INFO - Chain [1] done processing
22:22:50 - cmdstanpy - INFO - Chain [1

Testing params: {'changepoint_prior_scale': 0.001, 'seasonality_prior_scale': 0.1}


22:22:52 - cmdstanpy - INFO - Chain [1] done processing
22:22:54 - cmdstanpy - INFO - Chain [1] start processing
22:22:54 - cmdstanpy - INFO - Chain [1] done processing
22:22:54 - cmdstanpy - INFO - Chain [1] start processing
22:22:54 - cmdstanpy - INFO - Chain [1] done processing
22:22:54 - cmdstanpy - INFO - Chain [1] start processing
22:22:54 - cmdstanpy - INFO - Chain [1] start processing
22:22:54 - cmdstanpy - INFO - Chain [1] done processing
22:22:54 - cmdstanpy - INFO - Chain [1] start processing
22:22:54 - cmdstanpy - INFO - Chain [1] start processing
22:22:54 - cmdstanpy - INFO - Chain [1] start processing
22:22:54 - cmdstanpy - INFO - Chain [1] done processing
22:22:54 - cmdstanpy - INFO - Chain [1] done processing
22:22:54 - cmdstanpy - INFO - Chain [1] done processing
22:22:54 - cmdstanpy - INFO - Chain [1] done processing
22:22:54 - cmdstanpy - INFO - Chain [1] start processing
22:22:54 - cmdstanpy - INFO - Chain [1] done processing
22:22:54 - cmdstanpy - INFO - Chain [1] 

Testing params: {'changepoint_prior_scale': 0.001, 'seasonality_prior_scale': 1.0}


22:22:57 - cmdstanpy - INFO - Chain [1] done processing
22:22:58 - cmdstanpy - INFO - Chain [1] start processing
22:22:58 - cmdstanpy - INFO - Chain [1] start processing
22:22:58 - cmdstanpy - INFO - Chain [1] start processing
22:22:58 - cmdstanpy - INFO - Chain [1] done processing
22:22:58 - cmdstanpy - INFO - Chain [1] start processing
22:22:58 - cmdstanpy - INFO - Chain [1] start processing
22:22:58 - cmdstanpy - INFO - Chain [1] done processing
22:22:58 - cmdstanpy - INFO - Chain [1] start processing
22:22:58 - cmdstanpy - INFO - Chain [1] done processing
22:22:58 - cmdstanpy - INFO - Chain [1] done processing
22:22:58 - cmdstanpy - INFO - Chain [1] start processing
22:22:58 - cmdstanpy - INFO - Chain [1] start processing
22:22:58 - cmdstanpy - INFO - Chain [1] done processing
22:22:58 - cmdstanpy - INFO - Chain [1] done processing
22:22:58 - cmdstanpy - INFO - Chain [1] start processing
22:22:59 - cmdstanpy - INFO - Chain [1] done processing
22:22:59 - cmdstanpy - INFO - Chain [1]

Testing params: {'changepoint_prior_scale': 0.001, 'seasonality_prior_scale': 10.0}


22:23:00 - cmdstanpy - INFO - Chain [1] done processing
22:23:02 - cmdstanpy - INFO - Chain [1] start processing
22:23:02 - cmdstanpy - INFO - Chain [1] start processing
22:23:02 - cmdstanpy - INFO - Chain [1] start processing
22:23:02 - cmdstanpy - INFO - Chain [1] start processing
22:23:02 - cmdstanpy - INFO - Chain [1] done processing
22:23:02 - cmdstanpy - INFO - Chain [1] done processing
22:23:02 - cmdstanpy - INFO - Chain [1] start processing
22:23:02 - cmdstanpy - INFO - Chain [1] start processing
22:23:02 - cmdstanpy - INFO - Chain [1] done processing
22:23:02 - cmdstanpy - INFO - Chain [1] done processing
22:23:02 - cmdstanpy - INFO - Chain [1] start processing
22:23:02 - cmdstanpy - INFO - Chain [1] start processing
22:23:02 - cmdstanpy - INFO - Chain [1] start processing
22:23:02 - cmdstanpy - INFO - Chain [1] done processing
22:23:02 - cmdstanpy - INFO - Chain [1] done processing
22:23:02 - cmdstanpy - INFO - Chain [1] done processing
22:23:02 - cmdstanpy - INFO - Chain [1]

Testing params: {'changepoint_prior_scale': 0.01, 'seasonality_prior_scale': 0.01}


22:23:04 - cmdstanpy - INFO - Chain [1] start processing
22:23:04 - cmdstanpy - INFO - Chain [1] done processing
22:23:06 - cmdstanpy - INFO - Chain [1] start processing
22:23:06 - cmdstanpy - INFO - Chain [1] start processing
22:23:06 - cmdstanpy - INFO - Chain [1] start processing
22:23:06 - cmdstanpy - INFO - Chain [1] start processing
22:23:06 - cmdstanpy - INFO - Chain [1] start processing
22:23:06 - cmdstanpy - INFO - Chain [1] done processing
22:23:06 - cmdstanpy - INFO - Chain [1] done processing
22:23:06 - cmdstanpy - INFO - Chain [1] done processing
22:23:06 - cmdstanpy - INFO - Chain [1] start processing
22:23:06 - cmdstanpy - INFO - Chain [1] start processing
22:23:06 - cmdstanpy - INFO - Chain [1] start processing
22:23:06 - cmdstanpy - INFO - Chain [1] done processing
22:23:06 - cmdstanpy - INFO - Chain [1] start processing
22:23:06 - cmdstanpy - INFO - Chain [1] start processing
22:23:06 - cmdstanpy - INFO - Chain [1] done processing
22:23:06 - cmdstanpy - INFO - Chain [

Testing params: {'changepoint_prior_scale': 0.01, 'seasonality_prior_scale': 0.1}


22:23:09 - cmdstanpy - INFO - Chain [1] start processing
22:23:09 - cmdstanpy - INFO - Chain [1] done processing
22:23:10 - cmdstanpy - INFO - Chain [1] start processing
22:23:10 - cmdstanpy - INFO - Chain [1] done processing
22:23:10 - cmdstanpy - INFO - Chain [1] start processing
22:23:10 - cmdstanpy - INFO - Chain [1] start processing
22:23:10 - cmdstanpy - INFO - Chain [1] done processing
22:23:10 - cmdstanpy - INFO - Chain [1] start processing
22:23:10 - cmdstanpy - INFO - Chain [1] start processing
22:23:11 - cmdstanpy - INFO - Chain [1] done processing
22:23:11 - cmdstanpy - INFO - Chain [1] start processing
22:23:11 - cmdstanpy - INFO - Chain [1] start processing
22:23:11 - cmdstanpy - INFO - Chain [1] start processing
22:23:11 - cmdstanpy - INFO - Chain [1] start processing
22:23:11 - cmdstanpy - INFO - Chain [1] done processing
22:23:11 - cmdstanpy - INFO - Chain [1] done processing
22:23:11 - cmdstanpy - INFO - Chain [1] start processing
22:23:11 - cmdstanpy - INFO - Chain [

Testing params: {'changepoint_prior_scale': 0.01, 'seasonality_prior_scale': 1.0}


22:23:13 - cmdstanpy - INFO - Chain [1] start processing
22:23:13 - cmdstanpy - INFO - Chain [1] done processing
22:23:14 - cmdstanpy - INFO - Chain [1] start processing
22:23:14 - cmdstanpy - INFO - Chain [1] start processing
22:23:15 - cmdstanpy - INFO - Chain [1] start processing
22:23:15 - cmdstanpy - INFO - Chain [1] start processing
22:23:15 - cmdstanpy - INFO - Chain [1] done processing
22:23:15 - cmdstanpy - INFO - Chain [1] start processing
22:23:15 - cmdstanpy - INFO - Chain [1] done processing
22:23:15 - cmdstanpy - INFO - Chain [1] start processing
22:23:15 - cmdstanpy - INFO - Chain [1] start processing
22:23:15 - cmdstanpy - INFO - Chain [1] done processing
22:23:15 - cmdstanpy - INFO - Chain [1] start processing
22:23:15 - cmdstanpy - INFO - Chain [1] start processing
22:23:15 - cmdstanpy - INFO - Chain [1] start processing
22:23:15 - cmdstanpy - INFO - Chain [1] done processing
22:23:15 - cmdstanpy - INFO - Chain [1] done processing
22:23:15 - cmdstanpy - INFO - Chain [

Testing params: {'changepoint_prior_scale': 0.01, 'seasonality_prior_scale': 10.0}


22:23:17 - cmdstanpy - INFO - Chain [1] done processing
22:23:19 - cmdstanpy - INFO - Chain [1] start processing
22:23:19 - cmdstanpy - INFO - Chain [1] start processing
22:23:19 - cmdstanpy - INFO - Chain [1] done processing
22:23:19 - cmdstanpy - INFO - Chain [1] done processing
22:23:19 - cmdstanpy - INFO - Chain [1] start processing
22:23:19 - cmdstanpy - INFO - Chain [1] start processing
22:23:19 - cmdstanpy - INFO - Chain [1] start processing
22:23:19 - cmdstanpy - INFO - Chain [1] start processing
22:23:19 - cmdstanpy - INFO - Chain [1] start processing
22:23:19 - cmdstanpy - INFO - Chain [1] done processing
22:23:19 - cmdstanpy - INFO - Chain [1] start processing
22:23:19 - cmdstanpy - INFO - Chain [1] start processing
22:23:19 - cmdstanpy - INFO - Chain [1] start processing
22:23:19 - cmdstanpy - INFO - Chain [1] done processing
22:23:19 - cmdstanpy - INFO - Chain [1] done processing
22:23:19 - cmdstanpy - INFO - Chain [1] done processing
22:23:20 - cmdstanpy - INFO - Chain [1

Testing params: {'changepoint_prior_scale': 0.1, 'seasonality_prior_scale': 0.01}


22:23:22 - cmdstanpy - INFO - Chain [1] done processing
22:23:24 - cmdstanpy - INFO - Chain [1] start processing
22:23:24 - cmdstanpy - INFO - Chain [1] start processing
22:23:24 - cmdstanpy - INFO - Chain [1] start processing
22:23:24 - cmdstanpy - INFO - Chain [1] start processing
22:23:24 - cmdstanpy - INFO - Chain [1] start processing
22:23:24 - cmdstanpy - INFO - Chain [1] start processing
22:23:24 - cmdstanpy - INFO - Chain [1] start processing
22:23:24 - cmdstanpy - INFO - Chain [1] start processing
22:23:24 - cmdstanpy - INFO - Chain [1] start processing
22:23:24 - cmdstanpy - INFO - Chain [1] start processing
22:23:24 - cmdstanpy - INFO - Chain [1] done processing
22:23:24 - cmdstanpy - INFO - Chain [1] start processing
22:23:24 - cmdstanpy - INFO - Chain [1] done processing
22:23:24 - cmdstanpy - INFO - Chain [1] done processing
22:23:24 - cmdstanpy - INFO - Chain [1] done processing
22:23:25 - cmdstanpy - INFO - Chain [1] start processing
22:23:25 - cmdstanpy - INFO - Chain 

Testing params: {'changepoint_prior_scale': 0.1, 'seasonality_prior_scale': 0.1}


22:23:28 - cmdstanpy - INFO - Chain [1] done processing
22:23:29 - cmdstanpy - INFO - Chain [1] start processing
22:23:29 - cmdstanpy - INFO - Chain [1] start processing
22:23:29 - cmdstanpy - INFO - Chain [1] start processing
22:23:29 - cmdstanpy - INFO - Chain [1] start processing
22:23:29 - cmdstanpy - INFO - Chain [1] start processing
22:23:29 - cmdstanpy - INFO - Chain [1] start processing
22:23:29 - cmdstanpy - INFO - Chain [1] start processing
22:23:29 - cmdstanpy - INFO - Chain [1] start processing
22:23:29 - cmdstanpy - INFO - Chain [1] start processing
22:23:30 - cmdstanpy - INFO - Chain [1] start processing
22:23:30 - cmdstanpy - INFO - Chain [1] done processing
22:23:30 - cmdstanpy - INFO - Chain [1] done processing
22:23:30 - cmdstanpy - INFO - Chain [1] done processing
22:23:30 - cmdstanpy - INFO - Chain [1] done processing
22:23:30 - cmdstanpy - INFO - Chain [1] start processing
22:23:30 - cmdstanpy - INFO - Chain [1] start processing
22:23:31 - cmdstanpy - INFO - Chain 

Testing params: {'changepoint_prior_scale': 0.1, 'seasonality_prior_scale': 1.0}


22:23:33 - cmdstanpy - INFO - Chain [1] start processing
22:23:34 - cmdstanpy - INFO - Chain [1] done processing
22:23:35 - cmdstanpy - INFO - Chain [1] start processing
22:23:35 - cmdstanpy - INFO - Chain [1] start processing
22:23:35 - cmdstanpy - INFO - Chain [1] start processing
22:23:35 - cmdstanpy - INFO - Chain [1] start processing
22:23:35 - cmdstanpy - INFO - Chain [1] start processing
22:23:35 - cmdstanpy - INFO - Chain [1] start processing
22:23:35 - cmdstanpy - INFO - Chain [1] start processing
22:23:35 - cmdstanpy - INFO - Chain [1] start processing
22:23:35 - cmdstanpy - INFO - Chain [1] start processing
22:23:35 - cmdstanpy - INFO - Chain [1] start processing
22:23:35 - cmdstanpy - INFO - Chain [1] done processing
22:23:36 - cmdstanpy - INFO - Chain [1] done processing
22:23:36 - cmdstanpy - INFO - Chain [1] done processing
22:23:36 - cmdstanpy - INFO - Chain [1] done processing
22:23:36 - cmdstanpy - INFO - Chain [1] start processing
22:23:36 - cmdstanpy - INFO - Chain 

Testing params: {'changepoint_prior_scale': 0.1, 'seasonality_prior_scale': 10.0}


22:23:40 - cmdstanpy - INFO - Chain [1] done processing
22:23:41 - cmdstanpy - INFO - Chain [1] start processing
22:23:41 - cmdstanpy - INFO - Chain [1] start processing
22:23:41 - cmdstanpy - INFO - Chain [1] start processing
22:23:41 - cmdstanpy - INFO - Chain [1] start processing
22:23:41 - cmdstanpy - INFO - Chain [1] start processing
22:23:41 - cmdstanpy - INFO - Chain [1] start processing
22:23:41 - cmdstanpy - INFO - Chain [1] start processing
22:23:41 - cmdstanpy - INFO - Chain [1] start processing
22:23:41 - cmdstanpy - INFO - Chain [1] start processing
22:23:41 - cmdstanpy - INFO - Chain [1] start processing
22:23:41 - cmdstanpy - INFO - Chain [1] done processing
22:23:42 - cmdstanpy - INFO - Chain [1] done processing
22:23:42 - cmdstanpy - INFO - Chain [1] start processing
22:23:42 - cmdstanpy - INFO - Chain [1] done processing
22:23:42 - cmdstanpy - INFO - Chain [1] done processing
22:23:42 - cmdstanpy - INFO - Chain [1] start processing
22:23:42 - cmdstanpy - INFO - Chain 

Testing params: {'changepoint_prior_scale': 0.5, 'seasonality_prior_scale': 0.01}


22:23:45 - cmdstanpy - INFO - Chain [1] start processing
22:23:46 - cmdstanpy - INFO - Chain [1] done processing
22:23:47 - cmdstanpy - INFO - Chain [1] start processing
22:23:47 - cmdstanpy - INFO - Chain [1] start processing
22:23:47 - cmdstanpy - INFO - Chain [1] start processing
22:23:47 - cmdstanpy - INFO - Chain [1] start processing
22:23:47 - cmdstanpy - INFO - Chain [1] start processing
22:23:47 - cmdstanpy - INFO - Chain [1] start processing
22:23:47 - cmdstanpy - INFO - Chain [1] start processing
22:23:47 - cmdstanpy - INFO - Chain [1] start processing
22:23:47 - cmdstanpy - INFO - Chain [1] start processing
22:23:48 - cmdstanpy - INFO - Chain [1] start processing
22:23:48 - cmdstanpy - INFO - Chain [1] done processing
22:23:48 - cmdstanpy - INFO - Chain [1] done processing
22:23:49 - cmdstanpy - INFO - Chain [1] start processing
22:23:49 - cmdstanpy - INFO - Chain [1] done processing
22:23:49 - cmdstanpy - INFO - Chain [1] start processing
22:23:49 - cmdstanpy - INFO - Chain

Testing params: {'changepoint_prior_scale': 0.5, 'seasonality_prior_scale': 0.1}


22:23:52 - cmdstanpy - INFO - Chain [1] start processing
22:23:53 - cmdstanpy - INFO - Chain [1] done processing
22:23:55 - cmdstanpy - INFO - Chain [1] start processing
22:23:55 - cmdstanpy - INFO - Chain [1] start processing
22:23:55 - cmdstanpy - INFO - Chain [1] start processing
22:23:55 - cmdstanpy - INFO - Chain [1] start processing
22:23:55 - cmdstanpy - INFO - Chain [1] start processing
22:23:55 - cmdstanpy - INFO - Chain [1] start processing
22:23:55 - cmdstanpy - INFO - Chain [1] start processing
22:23:55 - cmdstanpy - INFO - Chain [1] start processing
22:23:55 - cmdstanpy - INFO - Chain [1] start processing
22:23:55 - cmdstanpy - INFO - Chain [1] start processing
22:23:55 - cmdstanpy - INFO - Chain [1] done processing
22:23:56 - cmdstanpy - INFO - Chain [1] start processing
22:23:56 - cmdstanpy - INFO - Chain [1] done processing
22:23:56 - cmdstanpy - INFO - Chain [1] done processing
22:23:56 - cmdstanpy - INFO - Chain [1] start processing
22:23:56 - cmdstanpy - INFO - Chain

Testing params: {'changepoint_prior_scale': 0.5, 'seasonality_prior_scale': 1.0}


22:24:00 - cmdstanpy - INFO - Chain [1] start processing
22:24:01 - cmdstanpy - INFO - Chain [1] done processing
22:24:02 - cmdstanpy - INFO - Chain [1] start processing
22:24:02 - cmdstanpy - INFO - Chain [1] start processing
22:24:02 - cmdstanpy - INFO - Chain [1] start processing
22:24:02 - cmdstanpy - INFO - Chain [1] start processing
22:24:02 - cmdstanpy - INFO - Chain [1] start processing
22:24:02 - cmdstanpy - INFO - Chain [1] start processing
22:24:02 - cmdstanpy - INFO - Chain [1] start processing
22:24:02 - cmdstanpy - INFO - Chain [1] start processing
22:24:02 - cmdstanpy - INFO - Chain [1] start processing
22:24:02 - cmdstanpy - INFO - Chain [1] start processing
22:24:03 - cmdstanpy - INFO - Chain [1] done processing
22:24:03 - cmdstanpy - INFO - Chain [1] done processing
22:24:03 - cmdstanpy - INFO - Chain [1] done processing
22:24:03 - cmdstanpy - INFO - Chain [1] start processing
22:24:03 - cmdstanpy - INFO - Chain [1] start processing
22:24:03 - cmdstanpy - INFO - Chain

Testing params: {'changepoint_prior_scale': 0.5, 'seasonality_prior_scale': 10.0}


22:24:07 - cmdstanpy - INFO - Chain [1] start processing
22:24:08 - cmdstanpy - INFO - Chain [1] done processing
22:24:09 - cmdstanpy - INFO - Chain [1] start processing
22:24:09 - cmdstanpy - INFO - Chain [1] start processing
22:24:09 - cmdstanpy - INFO - Chain [1] start processing
22:24:09 - cmdstanpy - INFO - Chain [1] start processing
22:24:09 - cmdstanpy - INFO - Chain [1] start processing
22:24:09 - cmdstanpy - INFO - Chain [1] start processing
22:24:09 - cmdstanpy - INFO - Chain [1] start processing
22:24:09 - cmdstanpy - INFO - Chain [1] start processing
22:24:09 - cmdstanpy - INFO - Chain [1] start processing
22:24:09 - cmdstanpy - INFO - Chain [1] start processing
22:24:10 - cmdstanpy - INFO - Chain [1] done processing
22:24:10 - cmdstanpy - INFO - Chain [1] start processing
22:24:10 - cmdstanpy - INFO - Chain [1] done processing
22:24:10 - cmdstanpy - INFO - Chain [1] done processing
22:24:10 - cmdstanpy - INFO - Chain [1] done processing
22:24:11 - cmdstanpy - INFO - Chain 


Best parameters found: {'changepoint_prior_scale': 0.001, 'seasonality_prior_scale': 0.01} with RMSE: 14034.847344933105
Training Prophet with params: {'changepoint_prior_scale': 0.001, 'seasonality_prior_scale': 0.01}


22:24:14 - cmdstanpy - INFO - Chain [1] start processing
22:24:14 - cmdstanpy - INFO - Chain [1] done processing


Training complete.
Predicting with Prophet...
--- Prophet Metrics ---
RMSE: 34276.6466
MAE: 26234.9859
--- Tuning Hyperparameters for Random Forest ---
Fitting 3 folds for each of 25 candidates, totalling 75 fits
[CV] END max_depth=None, max_features=1.0, min_samples_leaf=4, min_samples_split=5, n_estimators=100; total time=   0.4s
[CV] END max_depth=None, max_features=1.0, min_samples_leaf=4, min_samples_split=5, n_estimators=100; total time=   0.4s
[CV] END max_depth=None, max_features=1.0, min_samples_leaf=4, min_samples_split=5, n_estimators=100; total time=   0.4s
[CV] END max_depth=10, max_features=1.0, min_samples_leaf=1, min_samples_split=2, n_estimators=1000; total time=   3.7s
[CV] END max_depth=10, max_features=1.0, min_samples_leaf=1, min_samples_split=2, n_estimators=1000; total time=   3.8s
[CV] END max_depth=10, max_features=1.0, min_samples_leaf=1, min_samples_split=2, n_estimators=1000; total time=   4.0s
[CV] END max_depth=20, max_features=1.0, min_samples_leaf=1, min

Epoch 1/70
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 3319332864.0000
Epoch 2/70
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 620038976.0000
Epoch 3/70
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 566404352.0000
Epoch 4/70
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 241687888.0000
Epoch 5/70
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 209214080.0000
Epoch 6/70
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 141467120.0000
Epoch 7/70
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 84425944.0000
Epoch 8/70
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 92083400.0000
Epoch 9/70
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 83117128.0000
Epoch 10/70
[1m124/1

Epoch 1/70
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 3697553152.0000
Epoch 2/70
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 1305086976.0000
Epoch 3/70
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 472914560.0000
Epoch 4/70
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 252217792.0000
Epoch 5/70
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 160853504.0000
Epoch 6/70
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 76586888.0000 
Epoch 7/70
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 45052948.0000
Epoch 8/70
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 30643568.0000
Epoch 9/70
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 28959792.0000
Epoch 10/70
[1m124/

Epoch 1/70
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 4386308096.0000
Epoch 2/70
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 1372758272.0000
Epoch 3/70
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 428817344.0000
Epoch 4/70
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 256898512.0000
Epoch 5/70
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 185732320.0000
Epoch 6/70
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 103675056.0000
Epoch 7/70
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 92477032.0000
Epoch 8/70
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 99987632.0000
Epoch 9/70
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 914822720.0000
Epoch 10/70
[1m123

Epoch 1/70
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 1126533888.0000
Epoch 2/70
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 234330480.0000
Epoch 3/70
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 136553632.0000
Epoch 4/70
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 107655408.0000
Epoch 5/70
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 84696888.0000
Epoch 6/70
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 73767488.0000
Epoch 7/70
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 63578676.0000
Epoch 8/70
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 58395920.0000
Epoch 9/70
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 61505536.0000
Epoch 10/70
[1m124/124

Epoch 1/70
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 8141628416.0000 
Epoch 2/70
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 1045493696.0000
Epoch 3/70
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 739705792.0000
Epoch 4/70
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 966204032.0000
Epoch 5/70
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 2306524928.0000
Epoch 6/70
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 636936704.0000
Epoch 7/70
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 342517696.0000
Epoch 8/70
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 237847696.0000
Epoch 9/70
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 233398928.0000
Epoch 10/70
[1

Epoch 1/70
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - loss: 38662799360.0000
Epoch 2/70
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 9542609920.0000 
Epoch 3/70
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 3201524992.0000
Epoch 4/70
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 2467162112.0000
Epoch 5/70
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 1961229568.0000
Epoch 6/70
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 1619803008.0000
Epoch 7/70
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 1009113856.0000
Epoch 8/70
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 567905856.0000
Epoch 9/70
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 572712576.0000
Epoch 10/7

Epoch 1/70
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - loss: 7689307648.0000 
Epoch 2/70
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 856614144.0000
Epoch 3/70
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 495697408.0000
Epoch 4/70
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 246424896.0000
Epoch 5/70
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 198881456.0000
Epoch 6/70
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 160440992.0000
Epoch 7/70
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 293760544.0000
Epoch 8/70
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 308532384.0000
Epoch 9/70
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 233466064.0000
Epoch 10/70
[1m1

Epoch 1/70
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - loss: 105316016128.0000
Epoch 2/70
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 3214307840.0000
Epoch 3/70
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 1155681792.0000
Epoch 4/70
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 741323712.0000
Epoch 5/70
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 635555840.0000
Epoch 6/70
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 368408480.0000
Epoch 7/70
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 274614944.0000
Epoch 8/70
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 275446944.0000
Epoch 9/70
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 213971232.0000
Epoch 10/70
[

Epoch 1/50
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 764951232.0000 
Epoch 2/50
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 130672152.0000
Epoch 3/50
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 45409408.0000
Epoch 4/50
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 26063848.0000
Epoch 5/50
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 22362054.0000
Epoch 6/50
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 23204172.0000
Epoch 7/50
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 21791062.0000
Epoch 8/50
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 19077354.0000
Epoch 9/50
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 18705398.0000
Epoch 10/50
[1m124/124[

Epoch 1/50
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 873827840.0000 
Epoch 2/50
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 127696160.0000
Epoch 3/50
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 40738560.0000
Epoch 4/50
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 21634130.0000
Epoch 5/50
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 18797936.0000
Epoch 6/50
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 16454221.0000
Epoch 7/50
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 14381921.0000
Epoch 8/50
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 12524850.0000
Epoch 9/50
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 10834588.0000
Epoch 10/50
[1m123/123[

In [None]:
import plotly.graph_objects as go

color_map = {
    'XGBoost': 'cyan',
    'ARIMA': 'orange',
    'SARIMA': 'gold',
    'Prophet': 'royalblue',
    'Random Forest': 'lightgreen',
    'LightGBM': 'magenta',
    'LSTM (7-day seq)': 'deeppink',
    'LSTM (10-day seq)': 'red',
    'LSTM (15-day seq)': 'fuchsia',
    'Bidirectional LSTM (7-day seq)' : 'mediumspringgreen',
    'Bidirectional LSTM (10-day seq)': 'aliceblue',
    'Bidirectional LSTM (15-day seq)': 'aquamarine',
    'Stacked LSTM (10-day seq)': 'darkviolet',
    'Stacked LSTM (15-day seq)': 'yellow',
    'GRU (10-day seq)': 'salmon',
    'GRU (15-day seq)': 'darkorchid'
}

ml_model_names = ['XGBoost', 'ARIMA', 'SARIMA', 'Prophet', 'Random Forest', 'LightGBM']
dl_model_names = [name for name in results.keys() if name not in ml_model_names]

# --- PLOT 1: MACHINE LEARNING MODELS ---
print("\n--- Generating Plot for Machine Learning Models ---")
fig_ml = go.Figure()

# Add the actual price line
fig_ml.add_trace(go.Scatter(
    x=y_test.index, y=y_test, mode='lines', name='Actual Price',
    line=dict(color='white', width=3)
))

# Loop through ONLY the ML model results
for model_name in ml_model_names:
    if model_name in results: # Check if the model was actually run
        result_data = results[model_name]
        model_color = color_map.get(model_name, 'grey')

        fig_ml.add_trace(go.Scatter(
            x=result_data['predictions'].index, # Use the prediction's own index
            y=result_data['predictions'],
            mode='lines', name=f'{model_name} Forecast',
            line=dict(color=model_color, width=1.5, dash='dot')
        ))

# Customize and show the ML plot
fig_ml.update_layout(
    title='Machine Learning Model Forecasts vs. Actual Price',
    xaxis_title='Date', yaxis_title='Bitcoin Price (USD)', template='plotly_dark',
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
)
fig_ml.show()


# --- PLOT 2: DEEP LEARNING MODELS ---
print("\n--- Generating Plot for Deep Learning Models ---")
fig_dl = go.Figure()

# Add the actual price line
fig_dl.add_trace(go.Scatter(
    x=y_test.index, y=y_test, mode='lines', name='Actual Price',
    line=dict(color='white', width=3)
))

# Loop through ONLY the DL model results
for model_name in dl_model_names:
    if model_name in results: # Check if the model was actually run
        result_data = results[model_name]
        model_color = color_map.get(model_name, 'grey')

        # The predictions from sequence models are shorter.
        # Plotting them with their own index aligns them correctly on the timeline.
        fig_dl.add_trace(go.Scatter(
            x=result_data['predictions'].index, # Uses the prediction's own index
            y=result_data['predictions'],
            mode='lines', name=f'{model_name} Forecast',
            line=dict(color=model_color, width=1.5, dash='dot')
        ))

# Customize and show the DL plot
fig_dl.update_layout(
    title='Deep Learning Model Forecasts vs. Actual Price',
    xaxis_title='Date', yaxis_title='Bitcoin Price (USD)', template='plotly_dark',
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
)
fig_dl.show()

# # Create a figure object (our canvas)
# fig = go.Figure()

# # Plot the real future prices from the test set
# fig.add_trace(go.Scatter(
#     x=y_test.index,
#     y=y_test,
#     mode='lines',
#     name='Actual Future Price',
#     line=dict(color='white', width=3) # Make the true line thick and white
# ))

# for model_name, result_data in results.items():
#     # Get the color for the current model from our map
#     # .get() is used safely in case a model name is not in the map, it will default to grey
#     model_color = color_map.get(model_name, 'grey')

#     fig.add_trace(go.Scatter(
#         x=y_test.index,
#         y=result_data['predictions'],
#         mode='lines',
#         name=f'{model_name} Forecast',
#         line=dict(color=model_color, width=1.5, dash='dot')
#     ))

# fig.update_layout(
#     title='Model Forecast vs. Actual Price',
#     xaxis_title='Date',
#     yaxis_title='Bitcoin Price (USD)',
#     template='plotly_dark',
#     legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
# )

# fig.show()


--- Generating Plot for Machine Learning Models ---



--- Generating Plot for Deep Learning Models ---


In [None]:
all_models = {model.model_name: model for model in ml_models_to_run}


# --- Implement the Ensemble Methods ---
from sklearn.linear_model import LinearRegression

# A. SIMPLE AVERAGING
print("\n\n--- Running Ensemble: Simple Averaging ---")

top_3_model_names = [
    "GRU (15-day seq)",
    "Bidirectional LSTM (10-day seq)",
    "GRU (10-day seq)"
]

pred1 = results[top_3_model_names[0]]['predictions']
pred2 = results[top_3_model_names[1]]['predictions']
pred3 = results[top_3_model_names[2]]['predictions']

ensemble_df = pd.concat([pred1, pred2, pred3], axis=1)
ensemble_df.columns = top_3_model_names

# Ensures that it only keeps the dates where all three models made a prediction.
ensemble_df_aligned = ensemble_df.dropna()

average_predictions = ensemble_df_aligned.mean(axis=1)

# .dropna() automatically handles the alignment of different length predictions
aligned_y_test = y_test[ensemble_df_aligned.index]
ensemble_avg_metrics = model_xgb.evaluate(aligned_y_test, average_predictions) # Using any model's .evaluate method

results['Ensemble Average'] = {
    'metrics': ensemble_avg_metrics,
    'predictions': average_predictions.dropna()
}
print(f"Simple Averaging Metrics: {ensemble_avg_metrics}")



--- Running Ensemble: Simple Averaging ---
--- XGBoost Metrics ---
RMSE: 4408.9659
MAE: 3498.4533
Simple Averaging Metrics: {'rmse': 4408.965862739233, 'mae': 3498.453301644399}


In [None]:
# B. STACKING WITH A META-LEARNER
print("\n\n--- Running Ensemble: Stacking ---")

# --- Stacking uses predictions of models as features for another model ---

meta_X_train_list = []
for model_name in top_3_model_names:
    print(f"Generating training predictions for meta-learner from: {model_name}")
    model = all_models[model_name]

    seq_len = model.params['sequence_length']
    X_train_seq, _ = model._create_sequences(X_train, y_train, seq_len)

    train_preds = model.model.predict(X_train_seq).flatten() # Each base model makes predictions on training data

    aligned_train_preds = pd.Series(train_preds, index=y_train.iloc[seq_len:].index) # Align predictions with the correct dates (labels shifted by seq_len)
    meta_X_train_list.append(aligned_train_preds)

# Combine all models’ predictions into one table
meta_X_train = pd.concat(meta_X_train_list, axis=1).dropna()
meta_X_train.columns = top_3_model_names
meta_y_train = y_train[meta_X_train.index] # Makes sure the target values (what we want to predict) line up exactly with the rows of meta_X_train.

meta_X_test = ensemble_df_aligned # meta_X_test = predictions from top models on the test set.
aligned_y_test_stacking = aligned_y_test # aligned_y_test_stacking = the true labels for the test set, aligned the same way.

# --- Train the meta-learner and make the final prediction ---
print("\nTraining the meta-learner (Linear Regression)...")
meta_learner = LinearRegression() # Linear Regression learns weights: e.g. final_prediction = 0.3*LSTM + 0.5*XGB + 0.2*GRU
meta_learner.fit(meta_X_train, meta_y_train)

print("Making final stacked prediction...")
stacked_predictions = meta_learner.predict(meta_X_test)
stacked_predictions_series = pd.Series(stacked_predictions, index=meta_X_test.index)

# --- Evaluate the stacked predictions ---
# We evaluate against the correctly aligned y_test for stacking
stacked_metrics = model_xgb.evaluate(aligned_y_test_stacking, stacked_predictions_series)

# --- Store the results ---
results['Ensemble Stacking'] = {
    'metrics': stacked_metrics,
    'predictions': stacked_predictions_series
}
print(f"Stacking Metrics: {stacked_metrics}")



--- Running Ensemble: Stacking ---
Generating training predictions for meta-learner from: GRU (15-day seq)
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
Generating training predictions for meta-learner from: Bidirectional LSTM (10-day seq)
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
Generating training predictions for meta-learner from: GRU (10-day seq)
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 778us/step

Training the meta-learner (Linear Regression)...
Making final stacked prediction...
--- XGBoost Metrics ---
RMSE: 2903.5796
MAE: 1951.3465
Stacking Metrics: {'rmse': 2903.5795886399455, 'mae': 1951.346508061408}


In [None]:
# --- Plot the Ensemble Results ---
print("\n--- Generating Plot for Ensemble Models ---")

# Adds the new ensemble models to the color map
color_map['Ensemble Average'] = 'brown'
color_map['Ensemble Stacking'] = 'lime'

fig_ensemble = go.Figure()

# Plot the actual price (using the aligned y_test for a fair comparison)
fig_ensemble.add_trace(go.Scatter(
    x=aligned_y_test.index, y=aligned_y_test, mode='lines', name='Actual Price',
    line=dict(color='white', width=3)
))

# Plot the top 3 individual models
for model_name in top_3_model_names:
    result_data = results[model_name]
    fig_ensemble.add_trace(go.Scatter(
        x=result_data['predictions'].index,
        y=result_data['predictions'],
        mode='lines', name=f'{model_name} (Individual)',
        line=dict(color=color_map.get(model_name), width=1.5, dash='dot')
    ))

# Plot the ensemble models
for model_name in ['Ensemble Average', 'Ensemble Stacking']:
    result_data = results[model_name]
    fig_ensemble.add_trace(go.Scatter(
        x=result_data['predictions'].index,
        y=result_data['predictions'],
        mode='lines', name=f'{model_name}',
        line=dict(color=color_map.get(model_name), width=2.5) # Thicker line for ensembles
    ))

fig_ensemble.update_layout(
    title='Ensemble Model Forecasts vs. Actual Price',
    xaxis_title='Date', yaxis_title='Bitcoin Price (USD)', template='plotly_dark',
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
)
fig_ensemble.show()

# --- Display the Final Metrics Table with Ensembles ---
metrics_list = []
for model_name, result_data in results.items():
    metrics = {
        'Model': model_name,
        'RMSE': result_data['metrics']['rmse'],
        'MAE': result_data['metrics']['mae']
    }
    metrics_list.append(metrics)

metrics_df = pd.DataFrame(metrics_list)
metrics_df_sorted = metrics_df.sort_values(by='RMSE', ascending=True).reset_index(drop=True)

print("\n\n--- FINAL MODEL RANKING (Including Ensembles) ---")
print(metrics_df_sorted)


--- Generating Plot for Ensemble Models ---




--- FINAL MODEL RANKING (Including Ensembles) ---
                              Model          RMSE           MAE
0                 Ensemble Stacking   2903.579589   1951.346508
1                  Ensemble Average   4408.965863   3498.453302
2                  GRU (15-day seq)   6053.050844   4851.200502
3   Bidirectional LSTM (10-day seq)   7000.922463   5910.034421
4                  GRU (10-day seq)   8319.737181   5583.319332
5                     Random Forest  19313.250817  10730.283808
6                 LSTM (10-day seq)  19822.162095  12218.471929
7                           XGBoost  20724.097238  12201.060001
8                          LightGBM  20901.771924  12273.938354
9   Bidirectional LSTM (15-day seq)  22352.367357  17767.923349
10   Bidirectional LSTM (7-day seq)  27325.750321  13106.048911
11                LSTM (15-day seq)  29187.114984  15905.238029
12                          Prophet  34276.646558  26234.985851
13        Stacked LSTM (15-day seq)  42609.513730  3

In [None]:
# --- VISUALIZATION: RESIDUAL PLOTS ---
import plotly.express as px

print("\n\n--- Generating Residual Plots ---")

# Plots the residuals for the best models to analyze them
models_to_analyze = [
    "Ensemble Stacking",
    "Ensemble Average",
    "GRU (15-day seq)"
]

for model_name in models_to_analyze:
    if model_name in results:
        predictions = results[model_name]['predictions']
        # Align y_test with the predictions for calculating residuals
        y_true_aligned = y_test[predictions.index]
        residuals = y_true_aligned - predictions

        fig_res = px.scatter(
            x=residuals.index,
            y=residuals,
            title=f'Residuals (Errors) for {model_name}',
            labels={'x': 'Date', 'y': 'Error (Actual - Predicted)'},
            template='plotly_dark'
        )
        # Add a horizontal line at zero to make it easy to see the baseline
        fig_res.add_hline(y=0, line_width=2, line_dash="dash", line_color="red")
        fig_res.show()



--- Generating Residual Plots ---


In [None]:
print("\n\n--- Generating Feature Importance Plots (for applicable models) ---")

# Takes the master dictionary of all trained model objects
all_models = {model.model_name: model for model in ml_models_to_run}

# Loop through all the trained model objects
for model_name, model_instance in all_models.items():

    # Checks if the internal, fitted model object has an attribute called 'feature_importances_'
    if hasattr(model_instance.model, 'feature_importances_'):

        print(f"-> Found feature importances for: {model_name}")

        # Create a DataFrame to hold the feature names and their importance scores
        importance_df = pd.DataFrame({
            'Feature': X_train.columns, # Get the feature names from the training data
            'Importance': model_instance.model.feature_importances_
        }).sort_values(by='Importance', ascending=False) # Sort from most to least important

        # Select the top 20 most important features for a clean plot
        top_20_features = importance_df.head(20)

        # Create an interactive bar chart using Plotly Express
        fig_importance = px.bar(
            top_20_features,
            x='Importance', # The length of the bar
            y='Feature',    # The label for the bar
            orientation='h', # 'h' makes it a horizontal bar chart, which is easier to read
            title=f'Top 20 Feature Importances for {model_name}',
            template='plotly_dark'
        )

        # This line makes the plot even easier to read by sorting the bars
        # from smallest at the bottom to largest at the top.
        fig_importance.update_yaxes(categoryorder='total ascending')

        fig_importance.show()



--- Generating Feature Importance Plots (for applicable models) ---
-> Found feature importances for: XGBoost


-> Found feature importances for: Random Forest


-> Found feature importances for: LightGBM
