## Optimizing Crime forecasting with NeuralProphet - Optimizing parameters with Optuna using snow and precipation as additional regressors.

1. Importing libraries

In [1]:
import optuna
import pandas as pd
import numpy as np
from neuralprophet import NeuralProphet 
from joblib import parallel_backend
import random
from neuralprophet import utils
random.seed(42)
np.random.seed(42)
utils.set_random_seed(seed=42)

2. Importing daily crime count with weather data. Dropping all columns except Snow Depth and precipitation.    

In [2]:
df = pd.read_csv("../../../data/daily-count-w-weather.csv")
df = df[["ds", "y", "SNWD_x", "PRCP_x"]]
df["ds"] = pd.to_datetime(df["ds"])
df.set_index('ds')

train = df[:int(0.8 * (len(df)))]
test = df[int(0.8 * (len(df))):]
train.tail(), test.head()

(             ds   y  SNWD_x  PRCP_x
 1788 2021-11-24  33     0.0    0.00
 1789 2021-11-25  16     0.0    0.17
 1790 2021-11-26  48     0.0    0.00
 1791 2021-11-27  35     0.0    0.00
 1792 2021-11-28  25     0.0    0.00,
              ds   y  SNWD_x  PRCP_x
 1793 2021-11-29  47     0.0     0.0
 1794 2021-11-30  45     0.0     0.0
 1795 2021-12-01  45     0.0     0.0
 1796 2021-12-02  43     0.0     0.0
 1797 2021-12-03  45     0.0     0.0)

3. Defining objective function with parameters to optimize and RMSE as definitive metric.

In [3]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

def objective(trial):
    learning_rate = trial.suggest_float('learning_rate', 0.001, 10, step=0.1)
    trend_reg = trial.suggest_float('trend_reg', 0, 100, step=0.1)
    seasonality_reg = trial.suggest_float('seasonality_reg', 0, 100, step=0.1)
    ar_reg = trial.suggest_float('ar_reg', 0, 100, step=0.1)

    m = NeuralProphet(
        yearly_seasonality="auto",
        weekly_seasonality="auto",
        learning_rate=learning_rate,
        trend_reg=trend_reg,
        seasonality_reg=seasonality_reg,
        ar_reg=ar_reg,
    )
    m.set_plotting_backend("plotly")
    m = m.add_country_holidays(country_name='US')
    m = m.add_future_regressor("SNWD_x")
    m = m.add_future_regressor("PRCP_x")
    m.fit(train)
    future = m.make_future_dataframe(train, periods=test.shape[0], regressors_df=test[["SNWD_x", "PRCP_x"]])
    forecast = m.predict(future)
    forecast_on_train = m.predict(train)
    
    try:
        toTest = test.merge(forecast[['ds', 'yhat1']], how='left', on='ds')
        rmse = mean_squared_error(toTest['y'], toTest['yhat1'], squared=False)
        print(f"RMSE: {rmse}")
        print(f"Trial: {trial.number}")
        return rmse
    
    except Exception as e:
        print(f"Exception encountered during trial: {trial.number}")
        print(e)
        rmse = np.inf
        return rmse
   

4. Using Optuna for automatic hyperparameter optimization

In [4]:
study = optuna.create_study(direction="minimize")

[32m[I 2023-05-14 15:04:43,375][0m A new study created in memory with name: no-name-18cc881e-2556-40b1-a8f2-458b19d1b13e[0m


5. Define properties for Optuna - Max CPU cores for parallelization and 50 trials for given objective.

In [5]:

study.optimize(objective, n_trials=100)

print("Best hyperparameters:", study.best_params)


INFO - (NP.config.__post_init__) - Note: Trend changepoint regularization is experimental.
INFO - (NP.config.__post_init__) - Note: Fourier-based seasonality regularization is experimental.
INFO - (NP.df_utils._infer_frequency) - Major frequency D corresponds to 99.944% of the data.
INFO - (NP.df_utils._infer_frequency) - Dataframe freq automatically defined as D
INFO - (NP.config.init_data_params) - Setting normalization to global as only one dataframe provided for training.
INFO - (NP.utils.set_auto_seasonalities) - Disabling daily seasonality. Run NeuralProphet with daily_seasonality=True to override this.
INFO - (NP.config.set_auto_batch_epoch) - Auto-set batch_size to 32
INFO - (NP.config.set_auto_batch_epoch) - Auto-set epochs to 163


Training: 0it [00:00, ?it/s]

In [None]:
df = study.trials_dataframe()
df.sort_values(by='value', ascending=True, inplace=True)
df

6. Printing and saving optimized parameters.

In [None]:
print("Best hyperparameters:", study.best_params)
print("RMSE:", study.best_value)
with open(f'./hyperparameters/{study.best_value}_optuna.txt', 'w') as f:
    for key, value in study.best_params.items():
        f.write(f'{key}={value}\n')


