In [1]:
from neuralprophet import NeuralProphet
import pandas as pd


In [2]:

df = pd.read_csv('../data/lrpd-clean.csv')
df['INCIDENT_DATE'] = pd.to_datetime(df['INCIDENT_DATE'])
df.head().T

Unnamed: 0,0,1,2,3,4
INCIDENT_NUMBER,2021-092971,2017-029450,2017-156453,2019-130717,2020-073313
INCIDENT_DATE,2021-08-04 10:21:00,2017-03-16 18:30:00,2017-09-01 12:00:00,2019-10-17 21:00:00,2020-07-02 19:10:00
LOCATION_DISTRICT,72.0,82.0,53.0,51.0,50.0
OFFENSE_DESCRIPTION,THEFT FROM MOTOR VEHICLE,THEFT OF MOTOR VEHICLE PARTS,RAPE,RAPE,RAPE
WEAPON_TYPE,NO WEAPON,NO WEAPON,NO WEAPON,NO WEAPON,NO WEAPON
ZIP,72204.0,72206.0,0.0,0.0,0.0
LATITUDE,0.0,34.649362,0.0,0.0,0.0
LONGITUDE,0.0,-92.301959,0.0,0.0,0.0
WEEK_OF_MONTH,0,2,0,2,0
YEAR,2021,2017,2017,2019,2020


In [3]:
df2 = df.groupby(pd.Grouper(key='INCIDENT_DATE', freq='D')).size().reset_index(name='INCIDENT_COUNT')
df2.columns = ['ds', 'y']
df2.tail()

Unnamed: 0,ds,y
2112,2022-10-14,49
2113,2022-10-15,28
2114,2022-10-16,31
2115,2022-10-17,47
2116,2022-10-18,35


In [4]:
train = df2[:int(0.8 * (len(df2)))]
test = df2[int(0.8 * (len(df2))):]

In [5]:
m = NeuralProphet()
m.add_country_holidays(country_name='US')
metrics = m.fit(train, freq="D")

future = m.make_future_dataframe(train, periods=len(test))
forecast = m.predict(future)

INFO - (NP.df_utils._infer_frequency) - Major frequency D corresponds to 99.941% of the data.
INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - D
INFO - (NP.config.init_data_params) - Setting normalization to global as only one dataframe provided for training.
INFO - (NP.utils.set_auto_seasonalities) - Disabling daily seasonality. Run NeuralProphet with daily_seasonality=True to override this.
INFO - (NP.config.set_auto_batch_epoch) - Auto-set batch_size to 32
INFO - (NP.config.set_auto_batch_epoch) - Auto-set epochs to 165


  0%|          | 0/131 [00:00<?, ?it/s]

INFO - (NP.utils_torch.lr_range_test) - lr-range-test results: steep: 6.54E-02, min: 1.45E-01


  0%|          | 0/131 [00:00<?, ?it/s]

INFO - (NP.utils_torch.lr_range_test) - lr-range-test results: steep: 6.54E-02, min: 1.45E-01
INFO - (NP.forecaster._init_train_loader) - lr-range-test selected learning rate: 7.53E-02
Epoch[165/165]: 100%|██████████| 165/165 [00:19<00:00,  8.61it/s, SmoothL1Loss=0.015, MAE=5.85, RMSE=7.37, RegLoss=0] 
INFO - (NP.df_utils._infer_frequency) - Major frequency D corresponds to 99.941% of the data.
INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - D
INFO - (NP.df_utils._infer_frequency) - Major frequency D corresponds to 99.764% of the data.
INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - D
INFO - (NP.df_utils._infer_frequency) - Major frequency D corresponds to 99.764% of the data.
INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - D


In [6]:
forecast.head()

Unnamed: 0,ds,y,yhat1,residual1,trend,season_yearly,season_weekly,events_additive,event_Christmas Day,event_Christmas Day (Observed),...,event_Juneteenth National Independence Day (Observed),event_Labor Day,event_Martin Luther King Jr. Day,event_Memorial Day,event_New Year's Day,event_New Year's Day (Observed),event_Thanksgiving,event_Veterans Day,event_Veterans Day (Observed),event_Washington's Birthday
0,2021-08-21,,38.282417,,38.629818,1.970118,-2.317519,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2021-08-22,,35.878098,,38.646717,1.929807,-4.698422,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2021-08-23,,43.583611,,38.663609,1.893498,3.026503,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2021-08-24,,41.049545,,38.680504,1.861782,0.50726,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2021-08-25,,41.042366,,38.697403,1.835188,0.509777,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [7]:
from statsmodels.tools.eval_measures import rmse
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score

predictions = forecast['yhat1']
actual = test['y']

print('RMSE: ', rmse(actual, predictions))
print('MSE: ', mean_squared_error(actual, predictions))
print('MAE: ', mean_absolute_error(actual, predictions))
print('R2: ', r2_score(actual, predictions))
# TODO - use hyperopt for optimization

RMSE:  9.264107439209369
MSE:  85.82368664521435
MAE:  7.45585833855395
R2:  -0.03256324327300053
