
# Rolling horizon forecast

In [1]:
import os
from datetime import timedelta

import pandas as pd
from fbprophet import Prophet
from fbprophet.diagnostics import cross_validation, simulated_historical_forecasts
from bokeh.io import output_notebook, show
from bokeh.layouts import column
from bokeh.plotting import figure
from bokeh.models import Range1d

print(os.getcwd())  # see why your pickle is not found

output_notebook()   # so show() displays in the notebook

# Load some data
df = pd.read_pickle('../data/pickles/df_ejj_pv_res15T.pickle').y
# df = pd.read_pickle('data/pickles/df_ps_pv_res1h.pickle')
resolution_ = '15T'
preconditions = dict(yearly_seasonality=True, weekly_seasonality=False, daily_seasonality=True)

print("Loading and preparing data for use in fbprophet...")

df = df.reset_index()                                       # Move the datetime index column as a separate data column 
df = df.rename(columns={'datetime':'ds', 'actual':'y'})        # Rename the datetime and data column for use in fbprophet

print("Done loading and preparing data.")

/home/nicolas/workspace/seita/load-forecasting/notebooks


Loading and preparing data for use in fbprophet...
Done loading and preparing data.


In [2]:
print("Starting to generate inner-sample forecasts ...")

# Cheap rolling horizon forecast

model = Prophet(**preconditions)
model.fit(df) 

# Select a time window for the forecast
start_ = model.history_dates.min()
end_ = model.history_dates.max()
dates = pd.date_range(start=start_, end=end_, freq=resolution_)

window = pd.DataFrame({'ds': dates})

inner_sample_forecast = model.predict(window)

print("Done generating inner-sample forecasts.")

Starting to generate inner-sample forecasts ...


  elif np.issubdtype(np.asarray(v).dtype, float):


Done generating inner-sample forecasts.


In [3]:
print("Plotting inner-sample forecasts ...")

plot_df = df.set_index('ds')
inner_sample_forecast.set_index('ds', inplace=True)

x = plot_df.index[plot_df.index < "2015-01-13"]
xdr = Range1d(start=min(x), end=max(x))
s1 = figure(x_range=xdr, plot_width=1200, plot_height=750, title=None, sizing_mode='scale_width')

s1.circle(x, plot_df.loc[plot_df.index < "2015-01-13"].y, size=4,
          color="green", alpha=0.5, legend="actual")
s1.square(x, inner_sample_forecast[inner_sample_forecast.index  < "2015-01-13"].yhat,
          color="blue", legend="inner sample forecast")

show(s1)

Plotting inner-sample forecasts ...


In [4]:
print("Starting to generate rolling forecasts ...")

initial_training = timedelta(days=21)
modeling_times = pd.date_range(start='2015-01-01 00:00', end="2015-02-01 23:45", freq="6h")
forecast_times = pd.date_range(start='2015-01-01 00:00', end="2015-02-01 23:45", freq=resolution_)

if resolution_ == "1h":
    periods_forward = 52
    window = [timedelta(hours=step) for step in range(-3, 4)]
elif resolution_ == "15T":
    periods_forward = 52 * 4
    window = [timedelta(minutes=15 * step) for step in range(-12, 13)]

# Modeling rolling forecasts with a time-saving measure:
# We build a model every 6 hours. We forecast 52 hours from there. From this forecast,
# we pick two windows, around 6h and 48h, and apply these forecasts, *as if they were made
# exactly 6h/48h before*, where in reality there are from *roughly* 6h/48h before.
# The results will probably not differ a lot, but our computation time is cut by a factor of six to twenty-four.

forecast_6h_ago = pd.DataFrame(columns=["ds", "yhat", "yhat_upper", "yhat_lower"])
forecast_6h_ago["ds"] = forecast_times
forecast_48h_ago = pd.DataFrame(columns=["ds", "yhat", "yhat_upper", "yhat_lower"])
forecast_48h_ago["ds"] = forecast_times

model = None
yhats = ["yhat", "yhat_upper", "yhat_lower"]

for dt in modeling_times:
    if dt < modeling_times[0] + initial_training:
        continue  # wait for initial training
    if dt.hour == 0:
        print(dt)
    model = Prophet(**preconditions)
    model.fit(df[df["ds"] <= dt])     
    future = model.make_future_dataframe(freq=resolution_, periods=periods_forward)
    forecast_at_dt = model.predict(future)
    for timestep in window:
        forecast_6h_ago.loc[forecast_6h_ago["ds"] == dt + timedelta(hours=6) + timestep, yhats] = \
            forecast_at_dt.loc[forecast_at_dt["ds"] == dt + timedelta(hours=6) + timestep, yhats].values
        forecast_48h_ago.loc[forecast_48h_ago["ds"] == dt + timedelta(hours=48) + timestep, yhats] = \
            forecast_at_dt.loc[forecast_at_dt["ds"] == dt + timedelta(hours=48) + timestep, yhats].values
            

# We fill NaN values with zeroes for now.
# There might be a better way for our app to handle times without forecasts data.
forecast_6h_ago.fillna(0, inplace=True)
forecast_48h_ago.fillna(0, inplace=True)

print("Done generating rolling forecasts.")


Starting to generate rolling forecasts ...
2015-01-22 00:00:00


  elif np.issubdtype(np.asarray(v).dtype, float):


2015-01-23 00:00:00
2015-01-24 00:00:00
2015-01-25 00:00:00
2015-01-26 00:00:00
2015-01-27 00:00:00
2015-01-28 00:00:00
2015-01-29 00:00:00
2015-01-30 00:00:00
2015-01-31 00:00:00
2015-02-01 00:00:00
Done generating rolling forecasts.


In [5]:
print("Plotting rolling forecasts ...")

plot_df = df.set_index('ds')
#forecast_6h_ago.set_index('ds', inplace=True)
#forecast_48h_ago.set_index('ds', inplace=True)

x = plot_df.index[plot_df.index < "2015-02-01"]
xdr = Range1d(start=min(x), end=max(x))
s1 = figure(x_range=xdr, plot_width=1400, plot_height=750, title=None, sizing_mode='scale_width')

s1.circle(x, df.loc[plot_df.index < "2015-02-01"].y, size=4, color="green", alpha=0.5, legend="actual")
s1.square(x, forecast_6h_ago.yhat, color="blue", legend="6h ago")
s1.square(x, forecast_48h_ago.yhat, color="orange", legend="48h ago")

show(s1)

Plotting rolling forecasts ...


