## Comparing performance of NeuralProphet vs Prophet with additional regressors and default parameters.

1. Importing libraries

In [None]:
import pandas as pd
import numpy as np
from neuralprophet import NeuralProphet
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import random
from prophet import Prophet 
import holidays
from neuralprophet import set_random_seed

set_random_seed(42)
seed = 42
np.random.seed(seed) 
random.seed(seed)


2. Importing daily crime count with regressor data.

In [None]:
df = pd.read_csv("../../data/final-lrpd-data.csv")
import reduce_mem_usage
df = reduce_mem_usage.reduce_mem_usage(df)
regressors = ['is_holiday', 'dayofweek', 'quarter', 'month', 'year',
       'dayofyear', 'dayofmonth', 'weekofyear', 'is_weekend', 'is_weekday',
       'season', 'AWND', 'PRCP', 'SNWD', 'SNOW', 'TMAX', 'TMIN']
df = df[["ds", "y"] + regressors]
df["ds"] = pd.to_datetime(df["ds"])
df.set_index('ds').plot(figsize=(25, 9))
df['pre_covid'] = pd.to_datetime(df['ds']) < pd.to_datetime('2020-03-21')
df['post_covid'] = ~df['pre_covid']

In [None]:
train = df[:int(0.8 * (len(df)))]
test = df[int(0.8 * (len(df))):]

df_half = df[:int(0.5 * (len(df)))]
train_half = df_half[:int(0.8 * (len(df_half)))]
test_half = df_half[int(0.8 * (len(df_half))):]

df_quarter = df[:int(0.25 * (len(df)))]
train_quarter = df_quarter[:int(0.8 * (len(df_quarter)))]
test_quarter = df_quarter[int(0.8 * (len(df_quarter))):]

train.shape, test.shape

3. Fitting the all models with default settings and additional regressors/seasonality modifiers.

In [None]:
def neural_prophet(train, test, regressor_enabled = True):
    m = NeuralProphet(
    )
    m = m.add_country_holidays(country_name='US')

    if regressor_enabled:
        for regressor in regressors:
            m = m.add_future_regressor(regressor)

        m.add_seasonality(
            name='weekly_pre_covid',
            period=7,
            fourier_order=3,
            condition_name='pre_covid',
        )
        m.add_seasonality(
            name='weekly_post_covid',
            period=7,
            fourier_order=3,
            condition_name='post_covid',
        )
    
    m.fit(train)
    if regressor_enabled:
        future = m.make_future_dataframe(train, periods=test.shape[0], regressors_df=test[regressors + ['pre_covid', 'post_covid']])
        future['pre_covid'] = pd.to_datetime(future['ds']) < pd.to_datetime('2020-03-21')
        future['post_covid'] = ~future['pre_covid']
    else:
        future = m.make_future_dataframe(train, periods=test.shape[0])
    
    forecast = m.predict(future)
    forecast_on_train = m.predict(train)

    toTest = test.merge(forecast[['ds', 'yhat1']], how='left', on='ds')
    rmse = mean_squared_error(toTest['y'], toTest['yhat1'], squared=False)
    return rmse

def fb_prophet(train, test, regressor_enabled = True):
    np.random.seed(42)
    m = Prophet()
    m.add_country_holidays(country_name='US')
    
    if regressor_enabled:
        m.add_seasonality(
            name='weekly_pre_covid',
            period=7,
            fourier_order=3,
            condition_name='pre_covid',
        )
        m.add_seasonality(
            name='weekly_post_covid',
            period=7,
            fourier_order=3,
            condition_name='post_covid',
        )
        
        for f in regressors:
            m.add_regressor(f)

    m.fit(train)


    future_train = train.drop(columns=['y']).copy()
    if regressor_enabled:
        future_train['pre_covid'] = pd.to_datetime(future_train['ds']) < pd.to_datetime('2020-03-21')
        future_train['post_covid'] = ~future_train['pre_covid']

        for f in regressors:
            future_train[f] = df[f][:len(train)]

    forecast_train = m.predict(future_train)

    # Evaluate performance on the train set
    y_true_train = train['y'].values
    y_pred_train = forecast_train['yhat'].values[:len(train)]
    rmse_train = np.sqrt(mean_squared_error(y_true_train, y_pred_train))

    future_test = test.drop(columns=['y']).copy()
    if regressor_enabled:
        future_test['pre_covid'] = pd.to_datetime(future_test['ds']) < pd.to_datetime('2020-03-21')
        future_test['post_covid'] = ~future_test['pre_covid']

        for f in regressors:
            future_test[f] = df[f][len(train):]

    forecast_test = m.predict(future_test)

    # Evaluate performance on the test set
    y_true_test = test['y'].values
    y_pred_test = forecast_test['yhat'].values[-len(test):]
    rmse_test_tuned = np.sqrt(mean_squared_error(y_true_test, y_pred_test))
    
    return rmse_test_tuned


In [None]:
fb_rmse_full_data = fb_prophet(train, test)
neural_rmse_full_data = neural_prophet(train, test)

fb_rmse_half_data = fb_prophet(train_half, test_half)
neural_rmse_half_data = neural_prophet(train_half, test_half)

fb_rmse_quarter_data = fb_prophet(train_quarter, test_quarter)
neural_rmse_quarter_data = neural_prophet(train_quarter, test_quarter)

fb_rmse_full_data_wo_regressor = fb_prophet(train[['ds', 'y']], test[['ds', 'y']], regressor_enabled=False)
neural_rmse_full_data_wo_regressor = neural_prophet(train[['ds', 'y']], test[['ds', 'y']], regressor_enabled=False)

fb_rmse_half_data_wo_regressor = fb_prophet(train_half[['ds', 'y']], test_half[['ds', 'y']], regressor_enabled=False)
neural_rmse_half_data_wo_regressor = neural_prophet(train_half[['ds', 'y']], test_half[['ds', 'y']], regressor_enabled=False)

fb_rmse_quarter_data_wo_regressor = fb_prophet(train_quarter[['ds', 'y']], test_quarter[['ds', 'y']], regressor_enabled=False)
neural_rmse_quarter_data_wo_regressor = neural_prophet(train_quarter[['ds', 'y']], test_quarter[['ds', 'y']], regressor_enabled=False)

In [None]:
print("NeuralProphet RMSE: ", neural_rmse_full_data)
print("Prophet RMSE: ", fb_rmse_full_data)

print("NeuralProphet RMSE (half data): ", neural_rmse_half_data)
print("Prophet RMSE (half data): ", fb_rmse_half_data)

print("NeuralProphet RMSE (quarter data): ", neural_rmse_quarter_data)
print("Prophet RMSE (quarter data): ", fb_rmse_quarter_data)

print("NeuralProphet RMSE (full data, no regressor): ", neural_rmse_full_data_wo_regressor)
print("Prophet RMSE (full data, no regressor): ", fb_rmse_full_data_wo_regressor)

print("NeuralProphet RMSE (half data, no regressor): ", neural_rmse_half_data_wo_regressor)
print("Prophet RMSE (half data, no regressor): ", fb_rmse_half_data_wo_regressor)

print("NeuralProphet RMSE (quarter data, no regressor): ", neural_rmse_quarter_data_wo_regressor)
print("Prophet RMSE (quarter data, no regressor): ", fb_rmse_quarter_data_wo_regressor)