# Electricity Demand Forecasting
This notebook details some basic code to get a simple time series forecasting algorithm up and running. The focus here is not an optimised algorithm, but to create a simple base model from which we can explore the concepts of machine learning engineering in the rest of the book.

# Set Up

In [None]:
import pandas as pd
import numpy as np
from fbprophet import Prophet
import matplotlib.pyplot as plt
from sklearn.model_selection import TimeSeriesSplit #Splitting for time series CV!

In [None]:
df = pd.read_csv('../data/AEP_hourly.csv')

In [None]:
df.head()

In [None]:
df.plot(x='Datetime', y='AEP_MW', figsize=(20,10))

# Functions

In [None]:
seasonality = {
    'yearly': True,
    'weekly': True,
    'daily': True
}
def time_split_train_test(df, time_series_splits, seasonality=seasonality):
    # for outputting
    df_results = pd.DataFrame()
    
    for i,(train_i,test_i) in enumerate(time_series_splits.split(df)):
        
        # grab split data
        df_train = df.copy().iloc[train_i,:]
        df_test = df.copy().iloc[test_i,:]

        # create Prophet model
        model=Prophet(
            yearly_seasonality=seasonality['yearly'],
            weekly_seasonality=seasonality['weekly'],
            daily_seasonality=seasonality['daily']
        )

        # train and predict
        model.fit(df_train)
        predicted = model.predict(df_test)

        # combine pred and training df's for plotting
        df_pred = predicted.loc[:,["ds","yhat"]]
        
        df_pred["y"] = df_test['y'].tolist()
        
        # Train or Test?
        df_train["train"]=True
        df_pred["train"]=False
        
        df_sub = df_train.append(df_pred).reset_index(drop=True)
        df_sub["split"]=i
        df_sub["rmse"]=(np.mean((df_sub.yhat-df_sub.y)**2))**.5 #calculating rmse for the split
        
        df_results = df_results.append(df_sub).reset_index(drop=True)
    return df_results

## Prep for Prophet

In [None]:
df.rename(columns= {'Datetime': 'ds', 'AEP_MW': 'y'}, inplace=True)

In [None]:
df['ds']=df['ds'].astype('datetime64[ns]')

In [None]:
df.dtypes

In [None]:
#Initialize Split Class, we'll split our data 5 times for cv
ts_splits = TimeSeriesSplit(n_splits=5)

## Train and Forecast

In [None]:
tmp = time_split_train_test(df.sort_values('ds', ascending=True).iloc[-1000:], ts_splits)

In [None]:
tmp.head()

## Plot

In [None]:
nrow = 5; ncol = 1;
fig, axs = plt.subplots(nrows=nrow, ncols=ncol, figsize=(20,30))
fig.subplots_adjust(hspace=0.4, wspace=0.4)
for i, ax in enumerate(fig.axes):
    split_rmse = tmp[(tmp['split']==i) & (tmp['train']==False)]['rmse'].iloc[0]
    
    ax.set_title('Split '+str(i)+' - RMSE: '+"{:.2f}".format(split_rmse))
    
    tmp[(tmp['split']==i) & (tmp['train']==True)].plot(x='ds', y='y', ax=ax, color='blue', marker='o')
    tmp[(tmp['split']==i) & (tmp['train']==False)].plot(x='ds', y='y', ax=ax, color='red', marker='o')
    tmp[(tmp['split']==i) & (tmp['train']==False)].plot(x='ds', y='yhat', ax=ax, color='orange', marker='^')

In [None]:
# winner is split 3 - so what are the sizes we select? 
model_config = {'train_size': 668, 'forecast_horizon': 100}
                  
#tmp[(tmp['split']==3) & (tmp['train']==True)].shape[0]

#tmp[tmp['split']==3]['train'].groupby(tmp['train']).count().to_dict()

# Final Model Param and Training cycle

In [None]:

# def forecaster_train_and_export(df, seasonality):
#     # create Prophet model
#     model=Prophet(
#         yearly_seasonality=seasonality['yearly'],
#         weekly_seasonality=seasonality['weekly'],
#         daily_seasonality=seasonality['daily']
#     )
        
#     # train and predict
#     model.fit(df)
#     return model
    