## Crime forecasting with NeuralProphet - Optimizing parameters using snow and precipation as additional regressors.

1. Importing libraries

In [23]:
import optuna
import pandas as pd
import numpy as np
from neuralprophet import NeuralProphet
from joblib import parallel_backend

2. Importing daily crime count with weather data. Dropping all columns except Snow Depth and precipitation.    

In [24]:
df = pd.read_csv("../../data/daily-count-w-weather.csv")
df = df[["ds", "y", "SNWD_x", "PRCP_x"]]
df["ds"] = pd.to_datetime(df["ds"])

# Random seed for reproducibility
import random
random.seed(42)
np.random.seed(42)

3. Defining objective function with parameters to optimize and RMSE as definitive metric.

In [25]:
def objective(trial):
    # Hyperparameters to optimize
    n_changepoints = trial.suggest_int("n_changepoints", 5, 20)
    changepoints_range = trial.suggest_float("changepoints_range", 0.5, 1.0)
    trend_reg = trial.suggest_float("trend_reg", 0.001, 10, log=True)
    num_hidden_layers = trial.suggest_int("num_hidden_layers", 0, 5)
    d_hidden = trial.suggest_int("d_hidden", 8, 512, log=True)
    learning_rate = trial.suggest_float("learning_rate", 0.001, 1, log=True)
    epochs = trial.suggest_int("epochs", 5, 200)
    batch_size = trial.suggest_int("batch_size", 8, 1024, log=True)

    m = NeuralProphet(
        n_changepoints=n_changepoints,
        changepoints_range=changepoints_range,
        trend_reg=trend_reg,
        num_hidden_layers=num_hidden_layers,
        d_hidden=d_hidden,
        learning_rate=learning_rate,
        epochs=epochs,
        batch_size=batch_size
    )

    #Adding regressors
    m = m.add_future_regressor("SNWD_x")
    m = m.add_future_regressor("PRCP_x")

    df_train, df_test = m.split_df(df, freq="D", valid_p=0.2)
    try:
        metrics = m.fit(df_train, freq="D", validation_df=df_test, early_stopping_patience=10)
        # Returning the validation loss as the objective to minimize
        rmse = metrics["RMSE"].tail(1).values[0]
        # If model fails to produce a result, return infinity
    except Exception as e:
        print(f"Exception encountered during trial: {e}")
        rmse = np.inf

    return rmse

4. Using Optuna for automatic hyperparameter optimization

In [26]:
study = optuna.create_study(direction="minimize")

[32m[I 2023-05-12 16:43:13,826][0m A new study created in memory with name: no-name-da8eacfd-9b97-4649-a2d2-49f7168e61fe[0m


5. Define properties for Optuna - Max CPU cores for parallelization and 50 trials for given objective.

In [27]:
n_jobs = -1  
n_trials = 50
timeout = None  

with parallel_backend("threading", n_jobs=n_jobs):
    study.optimize(objective, n_trials=n_trials, timeout=timeout, n_jobs=n_jobs)

# Printing the best hyperparameters
print("Best hyperparameters:", study.best_params)

INFO - (NP.config.__post_init__) - Note: Trend changepoint regularization is experimental.
INFO - (NP.config.__post_init__) - Note: Trend changepoint regularization is experimental.
INFO - (NP.config.__post_init__) - Note: Trend changepoint regularization is experimental.
INFO - (NP.config.__post_init__) - Note: Trend changepoint regularization is experimental.
INFO - (NP.config.__post_init__) - Note: Trend changepoint regularization is experimental.
INFO - (NP.config.__post_init__) - Note: Trend changepoint regularization is experimental.
INFO - (NP.config.__post_init__) - Note: Trend changepoint regularization is experimental.
INFO - (NP.config.__post_init__) - Note: Trend changepoint regularization is experimental.
INFO - (NP.config.__post_init__) - Note: Trend changepoint regularization is experimental.
INFO - (NP.config.__post_init__) - Note: Trend changepoint regularization is experimental.
INFO - (NP.config.__post_init__) - Note: Trend changepoint regularization is experimental.

Exception encountered during trial: fit() got an unexpected keyword argument 'early_stopping_patience'
Exception encountered during trial: fit() got an unexpected keyword argument 'early_stopping_patience'
Exception encountered during trial: fit() got an unexpected keyword argument 'early_stopping_patience'
Exception encountered during trial: fit() got an unexpected keyword argument 'early_stopping_patience'
Exception encountered during trial: fit() got an unexpected keyword argument 'early_stopping_patience'
Exception encountered during trial: fit() got an unexpected keyword argument 'early_stopping_patience'
Exception encountered during trial: fit() got an unexpected keyword argument 'early_stopping_patience'
Exception encountered during trial: fit() got an unexpected keyword argument 'early_stopping_patience'
Exception encountered during trial: fit() got an unexpected keyword argument 'early_stopping_patience'
Exception encountered during trial: fit() got an unexpected keyword argum

INFO - (NP.df_utils._infer_frequency) - Major frequency D corresponds to 99.955% of the data.
INFO - (NP.config.__post_init__) - Note: Trend changepoint regularization is experimental.
INFO - (NP.df_utils._infer_frequency) - Major frequency D corresponds to 99.955% of the data.
INFO - (NP.config.__post_init__) - Note: Trend changepoint regularization is experimental.
INFO - (NP.df_utils._infer_frequency) - Major frequency D corresponds to 99.955% of the data.
INFO - (NP.df_utils._infer_frequency) - Major frequency D corresponds to 99.955% of the data.
INFO - (NP.df_utils._infer_frequency) - Major frequency D corresponds to 99.955% of the data.
INFO - (NP.config.__post_init__) - Note: Trend changepoint regularization is experimental.
INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - D
INFO - (NP.df_utils._infer_frequency) - Major frequency D corresponds to 99.955% of the data.
INFO - (NP.df_utils._infer_frequency) - Major frequency D corresponds to 

Exception encountered during trial: fit() got an unexpected keyword argument 'early_stopping_patience'
Exception encountered during trial: fit() got an unexpected keyword argument 'early_stopping_patience'
Exception encountered during trial: fit() got an unexpected keyword argument 'early_stopping_patience'
Exception encountered during trial: fit() got an unexpected keyword argument 'early_stopping_patience'
Exception encountered during trial: fit() got an unexpected keyword argument 'early_stopping_patience'
Exception encountered during trial: fit() got an unexpected keyword argument 'early_stopping_patience'


INFO - (NP.df_utils.return_df_in_original_format) - Returning df with no ID column
INFO - (NP.config.__post_init__) - Note: Trend changepoint regularization is experimental.
[32m[I 2023-05-12 16:43:14,981][0m Trial 19 finished with value: inf and parameters: {'n_changepoints': 15, 'changepoints_range': 0.6323494904632276, 'trend_reg': 0.001973471236921979, 'num_hidden_layers': 3, 'd_hidden': 503, 'learning_rate': 0.7711163435434142, 'epochs': 27, 'batch_size': 268}. Best is trial 0 with value: inf.[0m
INFO - (NP.df_utils.return_df_in_original_format) - Returning df with no ID column
[32m[I 2023-05-12 16:43:14,990][0m Trial 17 finished with value: inf and parameters: {'n_changepoints': 17, 'changepoints_range': 0.9479351849398898, 'trend_reg': 0.005336504631717149, 'num_hidden_layers': 5, 'd_hidden': 93, 'learning_rate': 0.16447091493866345, 'epochs': 40, 'batch_size': 477}. Best is trial 0 with value: inf.[0m
INFO - (NP.config.__post_init__) - Note: Trend changepoint regularizati

Exception encountered during trial: fit() got an unexpected keyword argument 'early_stopping_patience'
Exception encountered during trial: fit() got an unexpected keyword argument 'early_stopping_patience'
Exception encountered during trial: fit() got an unexpected keyword argument 'early_stopping_patience'
Exception encountered during trial: fit() got an unexpected keyword argument 'early_stopping_patience'
Exception encountered during trial: fit() got an unexpected keyword argument 'early_stopping_patience'


INFO - (NP.config.__post_init__) - Note: Trend changepoint regularization is experimental.
INFO - (NP.df_utils.return_df_in_original_format) - Returning df with no ID column
[32m[I 2023-05-12 16:43:15,253][0m Trial 23 finished with value: inf and parameters: {'n_changepoints': 14, 'changepoints_range': 0.8528036960429314, 'trend_reg': 2.6608896225982677, 'num_hidden_layers': 3, 'd_hidden': 508, 'learning_rate': 0.007413759459247041, 'epochs': 134, 'batch_size': 82}. Best is trial 0 with value: inf.[0m
INFO - (NP.df_utils._infer_frequency) - Major frequency D corresponds to 99.955% of the data.
INFO - (NP.config.__post_init__) - Note: Trend changepoint regularization is experimental.
INFO - (NP.df_utils._infer_frequency) - Major frequency D corresponds to 99.955% of the data.
INFO - (NP.df_utils._infer_frequency) - Major frequency D corresponds to 99.955% of the data.
INFO - (NP.df_utils._infer_frequency) - Major frequency D corresponds to 99.955% of the data.
INFO - (NP.df_utils._in

Exception encountered during trial: fit() got an unexpected keyword argument 'early_stopping_patience'


INFO - (NP.config.__post_init__) - Note: Trend changepoint regularization is experimental.
INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - D
INFO - (NP.df_utils.return_df_in_original_format) - Returning df with no ID column
INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - D
INFO - (NP.df_utils.return_df_in_original_format) - Returning df with no ID column
[32m[I 2023-05-12 16:43:15,464][0m Trial 24 finished with value: inf and parameters: {'n_changepoints': 15, 'changepoints_range': 0.9818990265549079, 'trend_reg': 0.001588181286809703, 'num_hidden_layers': 2, 'd_hidden': 502, 'learning_rate': 0.008439198947661038, 'epochs': 139, 'batch_size': 60}. Best is trial 0 with value: inf.[0m
INFO - (NP.df_utils.return_df_in_original_format) - Returning df with no ID column
[32m[I 2023-05-12 16:43:15,477][0m Trial 25 finished with value: inf and parameters: {'n_changepoints': 9, 'changepoints_range': 0.993763146

Exception encountered during trial: fit() got an unexpected keyword argument 'early_stopping_patience'
Exception encountered during trial: fit() got an unexpected keyword argument 'early_stopping_patience'
Exception encountered during trial: fit() got an unexpected keyword argument 'early_stopping_patience'
Exception encountered during trial: fit() got an unexpected keyword argument 'early_stopping_patience'
Exception encountered during trial: fit() got an unexpected keyword argument 'early_stopping_patience'
Exception encountered during trial: fit() got an unexpected keyword argument 'early_stopping_patience'
Exception encountered during trial: fit() got an unexpected keyword argument 'early_stopping_patience'
Exception encountered during trial: fit() got an unexpected keyword argument 'early_stopping_patience'
Exception encountered during trial: fit() got an unexpected keyword argument 'early_stopping_patience'


INFO - (NP.df_utils.return_df_in_original_format) - Returning df with no ID column
INFO - (NP.df_utils.return_df_in_original_format) - Returning df with no ID column
[32m[I 2023-05-12 16:43:15,704][0m Trial 33 finished with value: inf and parameters: {'n_changepoints': 8, 'changepoints_range': 0.756627623351679, 'trend_reg': 0.0011600962960659474, 'num_hidden_layers': 2, 'd_hidden': 307, 'learning_rate': 0.0010104648535548964, 'epochs': 79, 'batch_size': 184}. Best is trial 0 with value: inf.[0m
INFO - (NP.df_utils._infer_frequency) - Major frequency D corresponds to 99.955% of the data.
INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - D
INFO - (NP.config.__post_init__) - Note: Trend changepoint regularization is experimental.
INFO - (NP.df_utils.return_df_in_original_format) - Returning df with no ID column
INFO - (NP.df_utils.return_df_in_original_format) - Returning df with no ID column
[32m[I 2023-05-12 16:43:15,845][0m Trial 34 finished 

Exception encountered during trial: fit() got an unexpected keyword argument 'early_stopping_patience'
Exception encountered during trial: fit() got an unexpected keyword argument 'early_stopping_patience'


INFO - (NP.df_utils.return_df_in_original_format) - Returning df with no ID column
INFO - (NP.df_utils.return_df_in_original_format) - Returning df with no ID column
[32m[I 2023-05-12 16:43:15,978][0m Trial 35 finished with value: inf and parameters: {'n_changepoints': 8, 'changepoints_range': 0.9984818668095492, 'trend_reg': 0.0013794992114484603, 'num_hidden_layers': 2, 'd_hidden': 298, 'learning_rate': 0.0012294520799410204, 'epochs': 84, 'batch_size': 9}. Best is trial 0 with value: inf.[0m
INFO - (NP.config.__post_init__) - Note: Trend changepoint regularization is experimental.
INFO - (NP.config.__post_init__) - Note: Trend changepoint regularization is experimental.
INFO - (NP.config.__post_init__) - Note: Trend changepoint regularization is experimental.
INFO - (NP.df_utils._infer_frequency) - Major frequency D corresponds to 99.955% of the data.
INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - D
INFO - (NP.config.__post_init__) - Note:

Exception encountered during trial: fit() got an unexpected keyword argument 'early_stopping_patience'


INFO - (NP.config.__post_init__) - Note: Trend changepoint regularization is experimental.
INFO - (NP.df_utils._infer_frequency) - Major frequency D corresponds to 99.955% of the data.
INFO - (NP.df_utils._infer_frequency) - Major frequency D corresponds to 99.955% of the data.
INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - D
INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - D
INFO - (NP.df_utils.return_df_in_original_format) - Returning df with no ID column
INFO - (NP.config.__post_init__) - Note: Trend changepoint regularization is experimental.
INFO - (NP.df_utils._infer_frequency) - Major frequency D corresponds to 99.955% of the data.
INFO - (NP.df_utils.return_df_in_original_format) - Returning df with no ID column
[32m[I 2023-05-12 16:43:16,299][0m Trial 36 finished with value: inf and parameters: {'n_changepoints': 8, 'changepoints_range': 0.9048930703755634, 'trend_reg': 8.709881770130147, 'num_hi

Exception encountered during trial: fit() got an unexpected keyword argument 'early_stopping_patience'
Exception encountered during trial: fit() got an unexpected keyword argument 'early_stopping_patience'
Exception encountered during trial: fit() got an unexpected keyword argument 'early_stopping_patience'


INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - D
INFO - (NP.df_utils.return_df_in_original_format) - Returning df with no ID column
INFO - (NP.df_utils.return_df_in_original_format) - Returning df with no ID column
[32m[I 2023-05-12 16:43:16,524][0m Trial 39 finished with value: inf and parameters: {'n_changepoints': 8, 'changepoints_range': 0.7421586042685545, 'trend_reg': 0.010394127787866551, 'num_hidden_layers': 4, 'd_hidden': 280, 'learning_rate': 0.01904090136781748, 'epochs': 93, 'batch_size': 123}. Best is trial 0 with value: inf.[0m
INFO - (NP.config.__post_init__) - Note: Trend changepoint regularization is experimental.
INFO - (NP.df_utils.return_df_in_original_format) - Returning df with no ID column
INFO - (NP.df_utils.return_df_in_original_format) - Returning df with no ID column
[32m[I 2023-05-12 16:43:16,592][0m Trial 42 finished with value: inf and parameters: {'n_changepoints': 6, 'changepoints_range': 0.7528813799134325, 

Exception encountered during trial: fit() got an unexpected keyword argument 'early_stopping_patience'
Exception encountered during trial: fit() got an unexpected keyword argument 'early_stopping_patience'
Exception encountered during trial: fit() got an unexpected keyword argument 'early_stopping_patience'
Exception encountered during trial: fit() got an unexpected keyword argument 'early_stopping_patience'
Exception encountered during trial: fit() got an unexpected keyword argument 'early_stopping_patience'
Exception encountered during trial: fit() got an unexpected keyword argument 'early_stopping_patience'
Exception encountered during trial: fit() got an unexpected keyword argument 'early_stopping_patience'


[32m[I 2023-05-12 16:43:16,714][0m Trial 46 finished with value: inf and parameters: {'n_changepoints': 6, 'changepoints_range': 0.9065223909952755, 'trend_reg': 0.01203155088603785, 'num_hidden_layers': 4, 'd_hidden': 219, 'learning_rate': 0.020370976149208905, 'epochs': 171, 'batch_size': 121}. Best is trial 0 with value: inf.[0m
INFO - (NP.df_utils.return_df_in_original_format) - Returning df with no ID column
INFO - (NP.df_utils.return_df_in_original_format) - Returning df with no ID column
[32m[I 2023-05-12 16:43:16,724][0m Trial 47 finished with value: inf and parameters: {'n_changepoints': 10, 'changepoints_range': 0.9167806442521833, 'trend_reg': 0.011158947681602428, 'num_hidden_layers': 4, 'd_hidden': 227, 'learning_rate': 0.05389020423661378, 'epochs': 165, 'batch_size': 26}. Best is trial 0 with value: inf.[0m
INFO - (NP.config.__post_init__) - Note: Trend changepoint regularization is experimental.
INFO - (NP.config.__post_init__) - Note: Trend changepoint regulariza

Exception encountered during trial: fit() got an unexpected keyword argument 'early_stopping_patience'
Exception encountered during trial: fit() got an unexpected keyword argument 'early_stopping_patience'
Exception encountered during trial: fit() got an unexpected keyword argument 'early_stopping_patience'
Exception encountered during trial: fit() got an unexpected keyword argument 'early_stopping_patience'
Best hyperparameters: {'n_changepoints': 14, 'changepoints_range': 0.9116230261091516, 'trend_reg': 0.0014209946662592436, 'num_hidden_layers': 2, 'd_hidden': 480, 'learning_rate': 0.008254072786634332, 'epochs': 84, 'batch_size': 314}


In [28]:
print("Best hyperparameters:", study.best_params)

Best hyperparameters: {'n_changepoints': 14, 'changepoints_range': 0.9116230261091516, 'trend_reg': 0.0014209946662592436, 'num_hidden_layers': 2, 'd_hidden': 480, 'learning_rate': 0.008254072786634332, 'epochs': 84, 'batch_size': 314}
