# Project time series with Facebook Prophet

This project tries to predict the electricity consumption of India from historical data.

Data is in the form of a time series for a period of 17 months beginning from 2nd Jan 2019 till 23rd May 2020.

In [97]:
import pandas as pd
from prophet import Prophet
from matplotlib import pyplot
from matplotlib.pyplot import figure
from sklearn.metrics import mean_absolute_error
import plotly.express as px
import plotly.graph_objects as go

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [98]:
df=pd.read_csv('long_data_.csv')

In [99]:
df.head()

Unnamed: 0,States,Regions,latitude,longitude,Dates,Usage
0,Punjab,NR,31.519974,75.980003,02/01/2019 00:00:00,119.9
1,Haryana,NR,28.450006,77.019991,02/01/2019 00:00:00,130.3
2,Rajasthan,NR,26.449999,74.639981,02/01/2019 00:00:00,234.1
3,Delhi,NR,28.669993,77.230004,02/01/2019 00:00:00,85.8
4,UP,NR,27.599981,78.050006,02/01/2019 00:00:00,313.9


In [100]:
df.shape

(16599, 6)

In [101]:
df.dtypes

States        object
Regions       object
latitude     float64
longitude    float64
Dates         object
Usage        float64
dtype: object

In [102]:
df['Dates'] = pd.to_datetime(df['Dates'])

In [103]:
df.dtypes

States               object
Regions              object
latitude            float64
longitude           float64
Dates        datetime64[ns]
Usage               float64
dtype: object

In [104]:
df=df.groupby('Dates',as_index=False).mean()

In [105]:
df.head()

Unnamed: 0,Dates,latitude,longitude,Usage
0,2019-01-07,23.17822,81.794533,103.736364
1,2019-01-08,23.17822,81.794533,113.951515
2,2019-01-09,23.17822,81.794533,107.836364
3,2019-01-10,23.17822,81.794533,98.045455
4,2019-01-11,23.17822,81.794533,84.463636


In [106]:
df.shape

(498, 4)

In [107]:
df=df[['Dates','Usage']]

In [108]:
df.head()

Unnamed: 0,Dates,Usage
0,2019-01-07,103.736364
1,2019-01-08,113.951515
2,2019-01-09,107.836364
3,2019-01-10,98.045455
4,2019-01-11,84.463636


In [109]:
df.shape

(498, 2)

In [110]:
df.dtypes

Dates    datetime64[ns]
Usage           float64
dtype: object

We plot the dataset

In [111]:
fig=px.line(df, x='Dates', y='Usage')
fig.show()

In [112]:
df.columns = ['ds','y']

In [113]:
df.head()

Unnamed: 0,ds,y
0,2019-01-07,103.736364
1,2019-01-08,113.951515
2,2019-01-09,107.836364
3,2019-01-10,98.045455
4,2019-01-11,84.463636


The model is created and fit with the dataset

In [114]:
model=Prophet()

In [115]:
model.fit(df)

14:59:43 - cmdstanpy - INFO - Chain [1] start processing
14:59:43 - cmdstanpy - INFO - Chain [1] done processing


<prophet.forecaster.Prophet at 0x1c14a2d6970>

In [116]:
model.component_modes

{'additive': ['weekly',
  'additive_terms',
  'extra_regressors_additive',
  'holidays'],
 'multiplicative': ['multiplicative_terms', 'extra_regressors_multiplicative']}

In [117]:
future_dates= model.make_future_dataframe(periods=365, freq='d',include_history=True)

In [118]:
future_dates.head()

Unnamed: 0,ds
0,2019-01-07
1,2019-01-08
2,2019-01-09
3,2019-01-10
4,2019-01-11


In [119]:
print(future_dates)

            ds
0   2019-01-07
1   2019-01-08
2   2019-01-09
3   2019-01-10
4   2019-01-11
5   2019-01-12
6   2019-01-13
7   2019-01-14
8   2019-01-15
9   2019-01-16
10  2019-01-17
11  2019-01-18
12  2019-01-19
13  2019-01-20
14  2019-01-21
15  2019-01-22
16  2019-01-23
17  2019-01-24
18  2019-01-25
19  2019-01-26
20  2019-01-27
21  2019-01-28
22  2019-01-29
23  2019-01-30
24  2019-01-31
25  2019-02-01
26  2019-02-02
27  2019-02-03
28  2019-02-04
29  2019-02-05
30  2019-02-06
31  2019-02-07
32  2019-02-08
33  2019-02-09
34  2019-02-10
35  2019-02-11
36  2019-02-12
37  2019-02-13
38  2019-02-14
39  2019-02-15
40  2019-02-16
41  2019-02-17
42  2019-02-18
43  2019-02-19
44  2019-02-20
45  2019-02-21
46  2019-02-22
47  2019-02-23
48  2019-02-24
49  2019-02-25
50  2019-02-26
51  2019-02-27
52  2019-02-28
53  2019-03-01
54  2019-03-02
55  2019-03-03
56  2019-03-04
57  2019-03-05
58  2019-03-06
59  2019-03-07
60  2019-03-08
61  2019-03-09
62  2019-03-10
63  2019-03-11
64  2019-03-12
65  2019-0

In [120]:
prediction = model.predict(future_dates)

In [121]:
prediction.head()

Unnamed: 0,ds,trend,yhat_lower,yhat_upper,trend_lower,trend_upper,additive_terms,additive_terms_lower,additive_terms_upper,weekly,weekly_lower,weekly_upper,multiplicative_terms,multiplicative_terms_lower,multiplicative_terms_upper,yhat
0,2019-01-07,103.380234,89.840038,115.551553,103.380234,103.380234,-0.184897,-0.184897,-0.184897,-0.184897,-0.184897,-0.184897,0.0,0.0,0.0,103.195337
1,2019-01-08,103.378101,91.2615,116.19399,103.378101,103.378101,0.650456,0.650456,0.650456,0.650456,0.650456,0.650456,0.0,0.0,0.0,104.028557
2,2019-01-09,103.375967,90.746417,116.138835,103.375967,103.375967,0.151431,0.151431,0.151431,0.151431,0.151431,0.151431,0.0,0.0,0.0,103.527398
3,2019-01-10,103.373834,89.653192,115.890659,103.373834,103.373834,-0.29326,-0.29326,-0.29326,-0.29326,-0.29326,-0.29326,0.0,0.0,0.0,103.080574
4,2019-01-11,103.3717,89.762679,114.869689,103.3717,103.3717,-0.470863,-0.470863,-0.470863,-0.470863,-0.470863,-0.470863,0.0,0.0,0.0,102.900837


In [122]:
trace_open = go.Scatter(
    x = prediction["ds"],
    y = prediction["yhat"],
    mode = 'lines',
    name="Forecast"
)
trace_high = go.Scatter(
    x = prediction["ds"],
    y = prediction["yhat_upper"],
    mode = 'lines',
    fill = "tonexty", 
    line = {"color": "#57b8ff"}, 
    name="Higher uncertainty interval"
)
trace_low = go.Scatter(
    x = prediction["ds"],
    y = prediction["yhat_lower"],
    mode = 'lines',
    fill = "tonexty", 
    line = {"color": "#57b8ff"}, 
    name="Lower uncertainty interval"
)
trace_close = go.Scatter(
    x = df["ds"],
    y = df["y"],
    name="Data values"
)

data = [trace_open,trace_high,trace_low,trace_close]
layout = go.Layout(title="Power consumption forecasting")
fig = go.Figure(data=data)
fig.show()

In [123]:
fig = go.Figure([go.Scatter(x=df['ds'], y=df['y'],mode='lines',
                    name='Actual')])
fig.add_trace(go.Scatter(x=prediction['ds'], y=prediction['yhat'],
                   mode='lines+markers',
                    name='predicted'))
fig.show()

Mean Absolute Error

In [124]:
y_true = df['y'].values
y_pred = prediction['yhat'][:498].values 
mae = mean_absolute_error(y_true, y_pred)
print('MAE: %.3f' % mae)

MAE: 7.910


In [125]:
df.shape

(498, 2)

In [126]:
future_dates.shape

(863, 1)

In [127]:
print(future_dates)

            ds
0   2019-01-07
1   2019-01-08
2   2019-01-09
3   2019-01-10
4   2019-01-11
5   2019-01-12
6   2019-01-13
7   2019-01-14
8   2019-01-15
9   2019-01-16
10  2019-01-17
11  2019-01-18
12  2019-01-19
13  2019-01-20
14  2019-01-21
15  2019-01-22
16  2019-01-23
17  2019-01-24
18  2019-01-25
19  2019-01-26
20  2019-01-27
21  2019-01-28
22  2019-01-29
23  2019-01-30
24  2019-01-31
25  2019-02-01
26  2019-02-02
27  2019-02-03
28  2019-02-04
29  2019-02-05
30  2019-02-06
31  2019-02-07
32  2019-02-08
33  2019-02-09
34  2019-02-10
35  2019-02-11
36  2019-02-12
37  2019-02-13
38  2019-02-14
39  2019-02-15
40  2019-02-16
41  2019-02-17
42  2019-02-18
43  2019-02-19
44  2019-02-20
45  2019-02-21
46  2019-02-22
47  2019-02-23
48  2019-02-24
49  2019-02-25
50  2019-02-26
51  2019-02-27
52  2019-02-28
53  2019-03-01
54  2019-03-02
55  2019-03-03
56  2019-03-04
57  2019-03-05
58  2019-03-06
59  2019-03-07
60  2019-03-08
61  2019-03-09
62  2019-03-10
63  2019-03-11
64  2019-03-12
65  2019-0

In [128]:
print(df)

            ds           y
0   2019-01-07  103.736364
1   2019-01-08  113.951515
2   2019-01-09  107.836364
3   2019-01-10   98.045455
4   2019-01-11   84.463636
5   2019-01-12  122.618182
6   2019-01-13   98.800000
7   2019-01-14   98.833333
8   2019-01-15   96.166667
9   2019-01-16  105.151515
10  2019-01-17  107.472727
11  2019-01-18  108.775758
12  2019-01-19  109.839394
13  2019-01-20  107.675758
14  2019-01-21  105.715152
15  2019-01-22  103.378788
16  2019-01-23  117.257576
17  2019-01-24  117.112121
18  2019-01-25  111.145455
19  2019-01-26  112.681818
20  2019-01-27  109.715152
21  2019-01-28  110.254545
22  2019-01-29  108.175758
23  2019-01-30   78.966667
24  2019-01-31   79.766667
25  2019-02-01  102.224242
26  2019-02-02   80.939394
27  2019-02-03  105.157576
28  2019-02-04  110.500000
29  2019-02-05  102.709091
30  2019-02-06   78.557576
31  2019-02-07  108.057576
32  2019-02-08  116.960606
33  2019-02-09  109.627273
34  2019-02-10   92.700000
35  2019-02-11   87.263636
3

In [129]:
print(prediction)

            ds       trend  yhat_lower  yhat_upper  trend_lower  trend_upper  \
0   2019-01-07  103.380234   89.840038  115.551553   103.380234   103.380234   
1   2019-01-08  103.378101   91.261500  116.193990   103.378101   103.378101   
2   2019-01-09  103.375967   90.746417  116.138835   103.375967   103.375967   
3   2019-01-10  103.373834   89.653192  115.890659   103.373834   103.373834   
4   2019-01-11  103.371700   89.762679  114.869689   103.371700   103.371700   
5   2019-01-12  103.369567   89.872028  115.213470   103.369567   103.369567   
6   2019-01-13  103.367434   93.006730  116.392511   103.367434   103.367434   
7   2019-01-14  103.365300   90.427365  115.759136   103.365300   103.365300   
8   2019-01-15  103.363167   92.241534  116.883799   103.363167   103.363167   
9   2019-01-16  103.361034   90.150702  115.939695   103.361034   103.361034   
10  2019-01-17  103.358900   90.141494  115.293159   103.358900   103.358900   
11  2019-01-18  103.356767   90.676518  

Optimizing the model

In [158]:
modelOpt = Prophet(daily_seasonality=True).add_seasonality(name='yearly',period=365,fourier_order=70)

In [159]:
modelOpt.fit(df)

15:24:24 - cmdstanpy - INFO - Chain [1] start processing
15:24:24 - cmdstanpy - INFO - Chain [1] done processing


<prophet.forecaster.Prophet at 0x1c14a775970>

In [160]:
modelOpt.component_modes

{'additive': ['yearly',
  'weekly',
  'daily',
  'additive_terms',
  'extra_regressors_additive',
  'holidays'],
 'multiplicative': ['multiplicative_terms', 'extra_regressors_multiplicative']}

In [161]:
future_datesOpt = modelOpt.make_future_dataframe(periods=365)

In [162]:
predictionOpt = modelOpt.predict(future_datesOpt)

Mean Absolute Error for the optimized model

In [163]:
#from sklearn.metrics import mean_absolute_error
y_true = df['y'].values
y_pred = predictionOpt['yhat'][:498].values
mae = mean_absolute_error(y_true, y_pred)
print('MAE: %.3f' % mae)

MAE: 5.600


In [164]:
#import plotly.graph_objects as go
fig = go.Figure([go.Scatter(x=df['ds'], y=df['y'],mode='lines',
                    name='Actual')])

fig.add_trace(go.Scatter(x=predictionOpt['ds'], y=predictionOpt['yhat'],
                   mode='lines+markers',
                    name='predicted'))

fig.show()