# Project time series with Facebook Prophet

This project tries to predict the electricity consumption of India from historical data.

Data is in the form of a time series for a period of 17 months beginning from 2nd Jan 2019 till 23rd May 2020.

In [97]:
import pandas as pd
from prophet import Prophet
from matplotlib import pyplot
from matplotlib.pyplot import figure
from sklearn.metrics import mean_absolute_error
import plotly.express as px
import plotly.graph_objects as go

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [98]:
df=pd.read_csv('long_data_.csv')

In [99]:
df.head()

Unnamed: 0,States,Regions,latitude,longitude,Dates,Usage
0,Punjab,NR,31.519974,75.980003,02/01/2019 00:00:00,119.9
1,Haryana,NR,28.450006,77.019991,02/01/2019 00:00:00,130.3
2,Rajasthan,NR,26.449999,74.639981,02/01/2019 00:00:00,234.1
3,Delhi,NR,28.669993,77.230004,02/01/2019 00:00:00,85.8
4,UP,NR,27.599981,78.050006,02/01/2019 00:00:00,313.9


In [100]:
df.shape

(16599, 6)

In [101]:
df.dtypes

States        object
Regions       object
latitude     float64
longitude    float64
Dates         object
Usage        float64
dtype: object

In [102]:
df['Dates'] = pd.to_datetime(df['Dates'])

In [103]:
df.dtypes

States               object
Regions              object
latitude            float64
longitude           float64
Dates        datetime64[ns]
Usage               float64
dtype: object

In [104]:
df=df.groupby('Dates',as_index=False).mean()

In [105]:
df.head()

Unnamed: 0,Dates,latitude,longitude,Usage
0,2019-01-07,23.17822,81.794533,103.736364
1,2019-01-08,23.17822,81.794533,113.951515
2,2019-01-09,23.17822,81.794533,107.836364
3,2019-01-10,23.17822,81.794533,98.045455
4,2019-01-11,23.17822,81.794533,84.463636


In [106]:
df.shape

(498, 4)

In [107]:
df=df[['Dates','Usage']]

In [108]:
df.head()

Unnamed: 0,Dates,Usage
0,2019-01-07,103.736364
1,2019-01-08,113.951515
2,2019-01-09,107.836364
3,2019-01-10,98.045455
4,2019-01-11,84.463636


In [109]:
df.shape

(498, 2)

In [110]:
df.dtypes

Dates    datetime64[ns]
Usage           float64
dtype: object

We plot the dataset

In [111]:
fig=px.line(df, x='Dates', y='Usage')
fig.show()

In [112]:
df.columns = ['ds','y']

In [113]:
df.head()

Unnamed: 0,ds,y
0,2019-01-07,103.736364
1,2019-01-08,113.951515
2,2019-01-09,107.836364
3,2019-01-10,98.045455
4,2019-01-11,84.463636


The model is created and fit with the dataset

In [114]:
model=Prophet()

In [115]:
model.fit(df)

14:59:43 - cmdstanpy - INFO - Chain [1] start processing
14:59:43 - cmdstanpy - INFO - Chain [1] done processing


<prophet.forecaster.Prophet at 0x1c14a2d6970>

In [116]:
model.component_modes

{'additive': ['weekly',
  'additive_terms',
  'extra_regressors_additive',
  'holidays'],
 'multiplicative': ['multiplicative_terms', 'extra_regressors_multiplicative']}

In [117]:
future_dates= model.make_future_dataframe(periods=365, freq='d',include_history=True)

In [118]:
future_dates.head()

Unnamed: 0,ds
0,2019-01-07
1,2019-01-08
2,2019-01-09
3,2019-01-10
4,2019-01-11


In [120]:
prediction = model.predict(future_dates)

In [121]:
prediction.head()

Unnamed: 0,ds,trend,yhat_lower,yhat_upper,trend_lower,trend_upper,additive_terms,additive_terms_lower,additive_terms_upper,weekly,weekly_lower,weekly_upper,multiplicative_terms,multiplicative_terms_lower,multiplicative_terms_upper,yhat
0,2019-01-07,103.380234,89.840038,115.551553,103.380234,103.380234,-0.184897,-0.184897,-0.184897,-0.184897,-0.184897,-0.184897,0.0,0.0,0.0,103.195337
1,2019-01-08,103.378101,91.2615,116.19399,103.378101,103.378101,0.650456,0.650456,0.650456,0.650456,0.650456,0.650456,0.0,0.0,0.0,104.028557
2,2019-01-09,103.375967,90.746417,116.138835,103.375967,103.375967,0.151431,0.151431,0.151431,0.151431,0.151431,0.151431,0.0,0.0,0.0,103.527398
3,2019-01-10,103.373834,89.653192,115.890659,103.373834,103.373834,-0.29326,-0.29326,-0.29326,-0.29326,-0.29326,-0.29326,0.0,0.0,0.0,103.080574
4,2019-01-11,103.3717,89.762679,114.869689,103.3717,103.3717,-0.470863,-0.470863,-0.470863,-0.470863,-0.470863,-0.470863,0.0,0.0,0.0,102.900837


In [122]:
trace_open = go.Scatter(
    x = prediction["ds"],
    y = prediction["yhat"],
    mode = 'lines',
    name="Forecast"
)
trace_high = go.Scatter(
    x = prediction["ds"],
    y = prediction["yhat_upper"],
    mode = 'lines',
    fill = "tonexty", 
    line = {"color": "#57b8ff"}, 
    name="Higher uncertainty interval"
)
trace_low = go.Scatter(
    x = prediction["ds"],
    y = prediction["yhat_lower"],
    mode = 'lines',
    fill = "tonexty", 
    line = {"color": "#57b8ff"}, 
    name="Lower uncertainty interval"
)
trace_close = go.Scatter(
    x = df["ds"],
    y = df["y"],
    name="Data values"
)

data = [trace_open,trace_high,trace_low,trace_close]
layout = go.Layout(title="Power consumption forecasting")
fig = go.Figure(data=data)
fig.show()

In [123]:
fig = go.Figure([go.Scatter(x=df['ds'], y=df['y'],mode='lines',
                    name='Actual')])
fig.add_trace(go.Scatter(x=prediction['ds'], y=prediction['yhat'],
                   mode='lines+markers',
                    name='predicted'))
fig.show()

Mean Absolute Error

In [124]:
y_true = df['y'].values
y_pred = prediction['yhat'][:498].values 
mae = mean_absolute_error(y_true, y_pred)
print('MAE: %.3f' % mae)

MAE: 7.910


In [125]:
df.shape

(498, 2)

In [126]:
future_dates.shape

(863, 1)

Optimizing the model

In [158]:
modelOpt = Prophet(daily_seasonality=True).add_seasonality(name='yearly',period=365,fourier_order=70)

In [159]:
modelOpt.fit(df)

15:24:24 - cmdstanpy - INFO - Chain [1] start processing
15:24:24 - cmdstanpy - INFO - Chain [1] done processing


<prophet.forecaster.Prophet at 0x1c14a775970>

In [160]:
modelOpt.component_modes

{'additive': ['yearly',
  'weekly',
  'daily',
  'additive_terms',
  'extra_regressors_additive',
  'holidays'],
 'multiplicative': ['multiplicative_terms', 'extra_regressors_multiplicative']}

In [161]:
future_datesOpt = modelOpt.make_future_dataframe(periods=365)

In [162]:
predictionOpt = modelOpt.predict(future_datesOpt)

Mean Absolute Error for the optimized model

In [163]:
#from sklearn.metrics import mean_absolute_error
y_true = df['y'].values
y_pred = predictionOpt['yhat'][:498].values
mae = mean_absolute_error(y_true, y_pred)
print('MAE: %.3f' % mae)

MAE: 5.600


In [164]:
#import plotly.graph_objects as go
fig = go.Figure([go.Scatter(x=df['ds'], y=df['y'],mode='lines',
                    name='Actual')])

fig.add_trace(go.Scatter(x=predictionOpt['ds'], y=predictionOpt['yhat'],
                   mode='lines+markers',
                    name='predicted'))

fig.show()