## Optimizing Crime forecasting with HyperOpt - using weather data as regressor.

1. Importing libraries

In [1]:
import optuna
import pandas as pd
import numpy as np
from neuralprophet import NeuralProphet 
from joblib import parallel_backend
import random
from neuralprophet import utils
random.seed(42)
np.random.seed(42)
utils.set_random_seed(seed=42)

2. Importing daily crime count with weather data.

In [2]:
df = pd.read_csv("../../../data/daily-count-w-weather.csv")
df = df[["ds", "y", "SNWD_x", "PRCP_x"]]
df["ds"] = pd.to_datetime(df["ds"])
df.set_index('ds')

train = df[:int(0.8 * (len(df)))]
test = df[int(0.8 * (len(df))):]
train.tail(), test.head()

(             ds   y  SNWD_x  PRCP_x
 1788 2021-11-24  33     0.0    0.00
 1789 2021-11-25  16     0.0    0.17
 1790 2021-11-26  48     0.0    0.00
 1791 2021-11-27  35     0.0    0.00
 1792 2021-11-28  25     0.0    0.00,
              ds   y  SNWD_x  PRCP_x
 1793 2021-11-29  47     0.0     0.0
 1794 2021-11-30  45     0.0     0.0
 1795 2021-12-01  45     0.0     0.0
 1796 2021-12-02  43     0.0     0.0
 1797 2021-12-03  45     0.0     0.0)

3. Defining search space and objective to minimize RMSE on train - test split evaluation.

In [None]:
import hyperopt
from hyperopt import hp, fmin, tpe, STATUS_OK, Trials
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

search_space = {
    'num_hidden_layers': hp.uniform('num_hidden_layers', 1, 30),
    'learning_rate': hp.quniform('learning_rate', 0.001, 10, 0.1),
    'trend_reg': hp.quniform('trend_reg', 0, 100, 0.1),
    'seasonality_reg': hp.quniform('seasonality_reg',  0, 100, 0.1),
    'd_hidden': hp.uniform('d_hidden', 1, 50),
    'ar_reg': hp.quniform('ar_reg',  0, 100, 0.1)}

#Disable verbose for neuralprophet
import sys
import logging
log = logging.getLogger("")
logging.disable(sys.maxsize)

#Minimal RMSE
def objective(params):
    print(params)
    m = NeuralProphet(**params, loss_func="MAE", yearly_seasonality=True)
    m = m.add_country_holidays(country_name='US')
    m = m.add_future_regressor("SNWD_x")
    m = m.add_future_regressor("PRCP_x")
    m.fit(train)
    future = m.make_future_dataframe(train, periods=test.shape[0], regressors_df=test[["SNWD_x", "PRCP_x"]])
    forecast = m.predict(future)
    forecast_on_train = m.predict(train)
    
    toTest = test.merge(forecast[['ds', 'yhat1']], how='left', on='ds')
    rmse = mean_squared_error(toTest['y'], toTest['yhat1'], squared=False)
    print("RMSE: ", rmse)
    return {'loss': rmse, 'status': STATUS_OK}

trials = Trials()
best = fmin(fn=objective,
            space=search_space,
            algo=tpe.suggest,
            max_evals=200,
            trials=trials)


4. Printing and saving optimized parameters.

In [4]:
print(best)
rmse = trials.best_trial['result']['loss']
with open(f'./hyperparameters/{rmse}_hyperopt.txt', 'w') as f:
    for key, value in best.items():
        f.write(f'{key}={value}\n')


{'ar_reg': 15.9, 'd_hidden': 2.043179564006717, 'learning_rate': 4.800000000000001, 'num_hidden_layers': 9.869792383069202, 'seasonality_reg': 32.6, 'trend_reg': 5.9}
