### https://towardsdatascience.com/how-to-forecast-sales-with-python-using-sarima-model-ba600992fa7d

In [7]:
import warnings
import itertools
import numpy as np
from numpy import concatenate, savetxt, unique, array, subtract
import matplotlib.pyplot as plt
warnings.filterwarnings("ignore")
plt.style.use('fivethirtyeight')
import pandas as pd
from pandas import merge, DataFrame, Series
import statsmodels.api as sm
import matplotlib
from math import sqrt
from statistics import mean
matplotlib.rcParams['axes.labelsize'] = 14
matplotlib.rcParams['xtick.labelsize'] = 12
matplotlib.rcParams['ytick.labelsize'] = 12
matplotlib.rcParams['text.color'] = 'g'


In [8]:
df = pd.read_csv('CharlestownAllWithDate1.csv', header=0, sep='[,]', parse_dates=True, squeeze=True, dayfirst=True, engine='python')
df['Date'] = pd.to_datetime(df['Date'], dayfirst=True)
y = df.set_index(['Date'])

In [9]:
meter_ids = unique(y['MeterNo'])
#print(meter_ids)
prediction_d = dict()
actual_d = dict()
results_RMSE = dict()
results_Error = dict()
prediction_df = []
actual_df = []
results_dfRMSE = []
results_dfError = []
results_df = []
pred_date = '2021-02-01'

In [10]:
p = d = q = range(0, 2)
pdq = list(itertools.product(p, d, q))
seasonal_pdq = [(x[0], x[1], x[2], 12) for x in list(itertools.product(p, d, q))]
print('Examples of parameter for SARIMA...')
print('SARIMAX: {} x {}'.format(pdq[1], seasonal_pdq[1]))
print('SARIMAX: {} x {}'.format(pdq[1], seasonal_pdq[2]))
print('SARIMAX: {} x {}'.format(pdq[2], seasonal_pdq[3]))
print('SARIMAX: {} x {}'.format(pdq[2], seasonal_pdq[4]))

Examples of parameter for SARIMA...
SARIMAX: (0, 0, 1) x (0, 0, 1, 12)
SARIMAX: (0, 0, 1) x (0, 1, 0, 12)
SARIMAX: (0, 1, 0) x (0, 1, 1, 12)
SARIMAX: (0, 1, 0) x (1, 0, 0, 12)


In [11]:
for param in pdq:
    for param_seasonal in seasonal_pdq:
        try:
            mod = sm.tsa.statespace.SARIMAX(y,order=param,seasonal_order=param_seasonal,enforce_stationarity=False,enforce_invertibility=False, period=1)
            results = mod.fit()
            print('ARIMA{}x{}12 - AIC:{}'.format(param,param_seasonal,results.aic))
        except: 
            continue

In [12]:
for m in meter_ids:
    df = pd.read_csv('CharlestownAllWithDate1.csv', header=0, sep='[,]', parse_dates=True, squeeze=True, dayfirst=True, engine='python')
    df['Date'] = pd.to_datetime(df['Date'], dayfirst=True)
    y = df.set_index(['Date'])
    y = y.loc[y['MeterNo'] == m]
    y.drop(y.columns[[1]], axis = 1, inplace = True)
    #print(y)
    mod = sm.tsa.statespace.SARIMAX(y,
                                order=(0, 1, 1),
                                seasonal_order=(1, 1, 1, 12),
                                enforce_stationarity=False,
                                enforce_invertibility=False)
    results = mod.fit()
    pred = results.get_prediction(start=pd.to_datetime(pred_date), dynamic=False)
    y_forecasted = pred.predicted_mean
    y_truth = y[pred_date:]
    prediction = y_forecasted.mean()
    actual = y_truth.mean()
    error = (prediction - actual)
    mse = (error **2)
    rmse = round(np.sqrt(mse),2)
    print('MeterNo %s - Prediction: %.3f - Actual: %.3f - Test Error: %.3f - Test RMSE: %.3f' % (m, prediction, actual, error, rmse))
    results_RMSE[m] = rmse #dict
    results_Error[m] = error #dict
    prediction_d[m] = prediction #dict
    actual_d[m] = actual #dict
    results_dfRMSE.append([m, mean(rmse)]) #array
    results_dfError.append([m, mean(error)]) #array
    prediction_df.append([m, prediction]) #array
    actual_df.append([m, mean(actual)]) #array

MeterNo 1.0 - Prediction: 7.066 - Actual: 6.829 - Test Error: 0.237 - Test RMSE: 0.240
MeterNo 2.0 - Prediction: 25.353 - Actual: 24.500 - Test Error: 0.853 - Test RMSE: 0.850
MeterNo 3.0 - Prediction: 16.493 - Actual: 16.114 - Test Error: 0.379 - Test RMSE: 0.380
MeterNo 4.0 - Prediction: 16.929 - Actual: 16.471 - Test Error: 0.457 - Test RMSE: 0.460
MeterNo 5.0 - Prediction: 4.166 - Actual: 4.086 - Test Error: 0.080 - Test RMSE: 0.080
MeterNo 6.0 - Prediction: 13.897 - Actual: 12.371 - Test Error: 1.525 - Test RMSE: 1.530
MeterNo 7.0 - Prediction: 8.505 - Actual: 8.271 - Test Error: 0.234 - Test RMSE: 0.230
MeterNo 8.0 - Prediction: 11.184 - Actual: 11.014 - Test Error: 0.170 - Test RMSE: 0.170
MeterNo 9.0 - Prediction: 1.666 - Actual: 1.529 - Test Error: 0.138 - Test RMSE: 0.140
MeterNo 10.0 - Prediction: 10.291 - Actual: 9.814 - Test Error: 0.476 - Test RMSE: 0.480
MeterNo 11.0 - Prediction: 20.582 - Actual: 20.229 - Test Error: 0.353 - Test RMSE: 0.350
MeterNo 12.0 - Prediction: 2

MeterNo 94.0 - Prediction: 17.265 - Actual: 17.057 - Test Error: 0.208 - Test RMSE: 0.210
MeterNo 95.0 - Prediction: 6.278 - Actual: 6.100 - Test Error: 0.178 - Test RMSE: 0.180
MeterNo 96.0 - Prediction: 13.554 - Actual: 13.200 - Test Error: 0.354 - Test RMSE: 0.350
MeterNo 97.0 - Prediction: 11.799 - Actual: 11.871 - Test Error: -0.072 - Test RMSE: 0.070
MeterNo 98.0 - Prediction: 7.168 - Actual: 7.000 - Test Error: 0.168 - Test RMSE: 0.170
MeterNo 99.0 - Prediction: 10.209 - Actual: 10.057 - Test Error: 0.151 - Test RMSE: 0.150
MeterNo 100.0 - Prediction: 27.014 - Actual: 26.043 - Test Error: 0.972 - Test RMSE: 0.970
MeterNo 101.0 - Prediction: 16.894 - Actual: 16.557 - Test Error: 0.337 - Test RMSE: 0.340
MeterNo 102.0 - Prediction: 10.975 - Actual: 10.414 - Test Error: 0.561 - Test RMSE: 0.560
MeterNo 103.0 - Prediction: 23.462 - Actual: 22.486 - Test Error: 0.977 - Test RMSE: 0.980
MeterNo 104.0 - Prediction: 18.033 - Actual: 18.257 - Test Error: -0.224 - Test RMSE: 0.220
MeterNo

MeterNo 185.0 - Prediction: 17.259 - Actual: 16.829 - Test Error: 0.430 - Test RMSE: 0.430
MeterNo 186.0 - Prediction: 13.333 - Actual: 13.343 - Test Error: -0.010 - Test RMSE: 0.010
MeterNo 187.0 - Prediction: 4.869 - Actual: 4.814 - Test Error: 0.054 - Test RMSE: 0.050
MeterNo 188.0 - Prediction: 17.921 - Actual: 17.471 - Test Error: 0.450 - Test RMSE: 0.450
MeterNo 189.0 - Prediction: 10.229 - Actual: 8.729 - Test Error: 1.500 - Test RMSE: 1.500
MeterNo 190.0 - Prediction: 6.074 - Actual: 6.114 - Test Error: -0.041 - Test RMSE: 0.040
MeterNo 191.0 - Prediction: 10.893 - Actual: 10.600 - Test Error: 0.293 - Test RMSE: 0.290
MeterNo 192.0 - Prediction: 18.013 - Actual: 17.771 - Test Error: 0.242 - Test RMSE: 0.240
MeterNo 193.0 - Prediction: 23.462 - Actual: 23.143 - Test Error: 0.319 - Test RMSE: 0.320
MeterNo 194.0 - Prediction: 7.172 - Actual: 7.186 - Test Error: -0.014 - Test RMSE: 0.010
MeterNo 195.0 - Prediction: 8.072 - Actual: 8.086 - Test Error: -0.014 - Test RMSE: 0.010
Mete

MeterNo 276.0 - Prediction: 16.716 - Actual: 16.129 - Test Error: 0.587 - Test RMSE: 0.590
MeterNo 277.0 - Prediction: 23.762 - Actual: 23.071 - Test Error: 0.691 - Test RMSE: 0.690
MeterNo 278.0 - Prediction: 25.465 - Actual: 25.200 - Test Error: 0.265 - Test RMSE: 0.260
MeterNo 279.0 - Prediction: 6.320 - Actual: 5.757 - Test Error: 0.563 - Test RMSE: 0.560
MeterNo 280.0 - Prediction: 48.723 - Actual: 47.843 - Test Error: 0.880 - Test RMSE: 0.880
MeterNo 281.0 - Prediction: 25.175 - Actual: 24.529 - Test Error: 0.647 - Test RMSE: 0.650
MeterNo 282.0 - Prediction: 22.147 - Actual: 21.386 - Test Error: 0.761 - Test RMSE: 0.760
MeterNo 283.0 - Prediction: 1.618 - Actual: 0.000 - Test Error: 1.618 - Test RMSE: 1.620
MeterNo 284.0 - Prediction: 16.564 - Actual: 15.114 - Test Error: 1.450 - Test RMSE: 1.450
MeterNo 285.0 - Prediction: 45.042 - Actual: 43.800 - Test Error: 1.242 - Test RMSE: 1.240


IndexError: index 0 is out of bounds for axis 0 with size 0

In [13]:
prediction_df=DataFrame(prediction_df)
prediction_df.columns = ['MeterID', 'Prediction']
print(prediction_df)

     MeterID  Prediction
0        1.0    7.066052
1        2.0   25.352763
2        3.0   16.493191
3        4.0   16.928545
4        5.0    4.166146
..       ...         ...
279    281.0   25.175114
280    282.0   22.147112
281    283.0    1.617809
282    284.0   16.564455
283    285.0   45.041791

[284 rows x 2 columns]


In [14]:
actual_df=DataFrame(actual_df)
actual_df.columns = ['MeterID', 'Actual']
print(actual_df)

     MeterID     Actual
0        1.0   6.828571
1        2.0  24.500000
2        3.0  16.114286
3        4.0  16.471429
4        5.0   4.085714
..       ...        ...
279    281.0  24.528571
280    282.0  21.385714
281    283.0   0.000000
282    284.0  15.114286
283    285.0  43.800000

[284 rows x 2 columns]


In [15]:
results_dfRMSE=DataFrame(results_dfRMSE)
results_dfRMSE.columns = ['MeterID', 'RMSE']
print(results_dfRMSE)

     MeterID  RMSE
0        1.0  0.24
1        2.0  0.85
2        3.0  0.38
3        4.0  0.46
4        5.0  0.08
..       ...   ...
279    281.0  0.65
280    282.0  0.76
281    283.0  1.62
282    284.0  1.45
283    285.0  1.24

[284 rows x 2 columns]


In [16]:
results_dfError=DataFrame(results_dfError)
results_dfError.columns = ['MeterID', 'Error']
print(results_dfError)

     MeterID     Error
0        1.0  0.237481
1        2.0  0.852763
2        3.0  0.378905
3        4.0  0.457117
4        5.0  0.080431
..       ...       ...
279    281.0  0.646542
280    282.0  0.761397
281    283.0  1.617809
282    284.0  1.450170
283    285.0  1.241791

[284 rows x 2 columns]


In [17]:
results_df = merge(actual_df, prediction_df, on="MeterID", how='left')
results_df = merge(results_df, results_dfRMSE, on="MeterID", how='left')
results_df = merge(results_df, results_dfError, on="MeterID", how='left')
print(results_df)

     MeterID     Actual  Prediction  RMSE     Error
0        1.0   6.828571    7.066052  0.24  0.237481
1        2.0  24.500000   25.352763  0.85  0.852763
2        3.0  16.114286   16.493191  0.38  0.378905
3        4.0  16.471429   16.928545  0.46  0.457117
4        5.0   4.085714    4.166146  0.08  0.080431
..       ...        ...         ...   ...       ...
279    281.0  24.528571   25.175114  0.65  0.646542
280    282.0  21.385714   22.147112  0.76  0.761397
281    283.0   0.000000    1.617809  1.62  1.617809
282    284.0  15.114286   16.564455  1.45  1.450170
283    285.0  43.800000   45.041791  1.24  1.241791

[284 rows x 5 columns]


In [18]:
results_df.drop(results_df.columns[[0]], axis = 1, inplace = True)

In [19]:
def Summary(x):
    return Series(index=['min','max', 'mean','sum'],data=[x.min(),x.max(),x.mean(),x.sum()])
results_df.apply(Summary)

Unnamed: 0,Actual,Prediction,RMSE,Error
min,0.0,-0.408818,0.0,-0.56157
max,56.528571,57.362911,1.62,1.617809
mean,16.148767,16.470172,0.376796,0.321406
sum,4586.249762,4677.52899,107.01,91.279228
