In [1]:
from neuralprophet import NeuralProphet
import pandas as pd


In [2]:

df = pd.read_csv('../data/lrpd-clean.csv')
df['INCIDENT_DATE'] = pd.to_datetime(df['INCIDENT_DATE'])
df.head().T

Unnamed: 0,0,1,2,3,4
INCIDENT_NUMBER,2022-036059,2017-029450,2017-114829,2018-046714,2020-042339
INCIDENT_DATE,2022-03-30 17:38:00,2017-03-16 18:30:00,2017-09-15 03:14:00,2018-04-20 16:18:00,2020-04-18 18:15:00
LOCATION_DISTRICT,42.0,82.0,64.0,82.0,61.0
OFFENSE_DESCRIPTION,THEFT FROM MOTOR VEHICLE,THEFT OF MOTOR VEHICLE PARTS,RAPE,RAPE,RAPE
WEAPON_TYPE,NO WEAPON,NO WEAPON,NO WEAPON,UNKNOWN,HANDGUN
ZIP,72202.0,72206.0,0.0,0.0,0.0
LATITUDE,0.0,34.649362,0.0,0.0,0.0
LONGITUDE,0.0,-92.301959,0.0,0.0,0.0
WEEK_OF_MONTH,4,2,2,2,2
YEAR,2022,2017,2017,2018,2020


In [3]:
df2 = df.groupby(pd.Grouper(key='INCIDENT_DATE', freq='D')).size().reset_index(name='INCIDENT_COUNT')
df2.columns = ['ds', 'y']
df2.tail()

Unnamed: 0,ds,y
2237,2023-02-16,42
2238,2023-02-17,32
2239,2023-02-18,25
2240,2023-02-19,36
2241,2023-02-20,30


In [4]:
train = df2[:int(0.8 * (len(df2)))]
test = df2[int(0.8 * (len(df2))):]

In [5]:
m = NeuralProphet()
m.add_country_holidays(country_name='US')
metrics = m.fit(train, freq="D")

future = m.make_future_dataframe(train, periods=len(test))
forecast = m.predict(future)

INFO - (NP.df_utils._infer_frequency) - Major frequency D corresponds to 99.944% of the data.
INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - D
INFO - (NP.config.init_data_params) - Setting normalization to global as only one dataframe provided for training.
INFO - (NP.utils.set_auto_seasonalities) - Disabling daily seasonality. Run NeuralProphet with daily_seasonality=True to override this.
INFO - (NP.config.set_auto_batch_epoch) - Auto-set batch_size to 32
INFO - (NP.config.set_auto_batch_epoch) - Auto-set epochs to 163


  0%|          | 0/131 [00:00<?, ?it/s]

INFO - (NP.utils_torch.lr_range_test) - lr-range-test results: steep: 8.99E-02, min: 1.58E+00


  0%|          | 0/131 [00:00<?, ?it/s]

INFO - (NP.utils_torch.lr_range_test) - lr-range-test results: steep: 8.99E-02, min: 9.82E-01
INFO - (NP.forecaster._init_train_loader) - lr-range-test selected learning rate: 9.32E-02
Epoch[163/163]: 100%|██████████| 163/163 [00:15<00:00, 10.79it/s, SmoothL1Loss=0.0152, MAE=5.85, RMSE=7.42, RegLoss=0]
INFO - (NP.df_utils._infer_frequency) - Major frequency D corresponds to 99.944% of the data.
INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - D
INFO - (NP.df_utils._infer_frequency) - Major frequency D corresponds to 99.777% of the data.
INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - D
INFO - (NP.df_utils._infer_frequency) - Major frequency D corresponds to 99.777% of the data.
INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - D


In [6]:
forecast.head()

Unnamed: 0,ds,y,yhat1,residual1,trend,season_yearly,season_weekly,events_additive,event_Christmas Day,event_Christmas Day (Observed),...,event_Juneteenth National Independence Day (Observed),event_Labor Day,event_Martin Luther King Jr. Day,event_Memorial Day,event_New Year's Day,event_New Year's Day (Observed),event_Thanksgiving,event_Veterans Day,event_Veterans Day (Observed),event_Washington's Birthday
0,2021-11-29,,42.357899,,37.940418,0.966821,3.450661,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2021-11-30,,39.400238,,37.944393,0.90707,0.548773,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2021-12-01,,39.602921,,37.948368,0.844054,0.810497,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2021-12-02,,39.541672,,37.952339,0.777688,0.811644,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2021-12-03,,40.989662,,37.956314,0.707915,2.325429,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [7]:
from statsmodels.tools.eval_measures import rmse
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score

predictions = forecast['yhat1']
actual = test['y']

print('RMSE: ', rmse(actual, predictions))
print('MSE: ', mean_squared_error(actual, predictions))
print('MAE: ', mean_absolute_error(actual, predictions))
print('R2: ', r2_score(actual, predictions))
# TODO - use hyperopt for optimization

RMSE:  8.164387149993262
MSE:  66.6572175349751
MAE:  6.44964623398133
R2:  0.19175350015791137
