In [1]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

In [2]:
df = pd.read_csv(r'C:\Users\mahat\OneDrive\Desktop\Optimizing-Household-Electricity-through-Machine-Learning-main\Datasets\final_electricity_data.csv')
df['Date'] = pd.to_datetime(df['Date'])
df = df[df['Date'].dt.year == 2022]
df.reset_index(drop=True, inplace=True)

In [3]:
df

Unnamed: 0,Date,Total_Consumption
0,2022-01-01,11.8
1,2022-01-02,9.2
2,2022-01-03,2.5
3,2022-01-04,21.8
4,2022-01-05,12.3
...,...,...
359,2022-12-27,16.3
360,2022-12-28,23.5
361,2022-12-29,21.0
362,2022-12-30,17.9


In [4]:
df.tail()

Unnamed: 0,Date,Total_Consumption
359,2022-12-27,16.3
360,2022-12-28,23.5
361,2022-12-29,21.0
362,2022-12-30,17.9
363,2022-12-31,12.8


In [5]:
df.shape

(364, 2)

In [6]:
import math
import statsmodels.api as sm
import statsmodels.tsa.api as smt
# from sklearn.metrics import mean_squared_error
from matplotlib import pyplot
import matplotlib.pyplot as plt

actual_vals = df.Total_Consumption.values
actual_log = np.log10(actual_vals)

train, test = actual_vals[0:-80], actual_vals[-80:]
train_log, test_log = np.log10(train), np.log10(test)
my_order = (1, 1, 1)
my_seasonal_order = (1, 1, 1, 7)

In [7]:
history = [x for x in train_log]
predictions = list()
predict_log=list()
for t in range(len(test_log)):
    model = sm.tsa.SARIMAX(history, order=my_order, seasonal_order=my_seasonal_order,enforce_stationarity=False,enforce_invertibility=False)
    model_fit = model.fit(disp=0)
    output = model_fit.forecast()
    predict_log.append(output[0])
    yhat = 10**output[0]
    predictions.append(yhat)
    # obs = test_log[t]
    # history.append(obs)
   # print('predicted=%f, expected=%f' % (output[0], obs))
#error = math.sqrt(mean_squared_error(test_log, predict_log))
#print('Test rmse: %.3f' % error)
# plot
# figsize=(12, 7)
# plt.figure(figsize=figsize)
# pyplot.plot(test,label='Actuals')
# pyplot.plot(predictions, color='red',label='Predicted')
# pyplot.legend(loc='upper right')
# pyplot.show()

In [8]:
import pickle 
with open('module.pkl','wb') as f:
    pickle.dump(model_fit,f)

In [9]:
df_preds = df[-80:]
df_preds['Predicted_Consumption'] = predictions
df_preds

Unnamed: 0,Date,Total_Consumption,Predicted_Consumption
284,2022-10-13,39.3,28.707322
285,2022-10-14,57.9,28.707322
286,2022-10-15,20.7,28.707322
287,2022-10-16,49.2,28.707322
288,2022-10-17,24.9,28.707322
...,...,...,...
359,2022-12-27,16.3,28.707322
360,2022-12-28,23.5,28.707322
361,2022-12-29,21.0,28.707322
362,2022-12-30,17.9,28.707322


In [10]:
import plotly.graph_objects as go
import plotly.offline as py

In [11]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=df_preds['Date'], y=df_preds['Total_Consumption'],
                    mode='lines',
                    name='lines'))
fig.add_trace(go.Scatter(x=df_preds['Date'], y=df_preds['Predicted_Consumption'],
                    mode='lines',
                    name='lines'))

In [12]:
new_dates = pd.date_range(start='2021/07/31', end='2021/09/30')

In [13]:
train = df.Total_Consumption.values

my_order = (1, 1, 1)
my_seasonal_order = (1, 1, 1, 62)

model = sm.tsa.SARIMAX(train, order=my_order, seasonal_order=my_seasonal_order,enforce_stationarity=False,enforce_invertibility=False)
model_fit = model.fit()
output = model_fit.forecast(62)

In [14]:
listofzeros = [np.nan] * 62

In [15]:
data_new = {'Date':new_dates,
            'Total_Consumption':listofzeros,
            'Predicted_Consumption':output}

df_new_preds = pd.DataFrame(data_new)

In [16]:
df_all = pd.concat([df_preds, df_new_preds], ignore_index=True)

In [17]:
df_all

Unnamed: 0,Date,Total_Consumption,Predicted_Consumption
0,2022-10-13,39.3,28.707322
1,2022-10-14,57.9,28.707322
2,2022-10-15,20.7,28.707322
3,2022-10-16,49.2,28.707322
4,2022-10-17,24.9,28.707322
...,...,...,...
137,2021-09-26,,7.952769
138,2021-09-27,,22.041882
139,2021-09-28,,2.668141
140,2021-09-29,,1.757678


In [18]:
df_all['MAE'] = df_all['Total_Consumption'] - df_all['Predicted_Consumption']

In [19]:
df_anoms = df_all[df_all['MAE'] >= 5]
df_anoms.reset_index(drop=True, inplace=True)

In [20]:
df_anoms['extra_MAE'] = df_anoms['MAE'] - 5

In [21]:
df_anoms['extra_MAE_cost'] = df_anoms['extra_MAE']*9

In [22]:
df_anoms.groupby(df_anoms['Date'].dt.month)['extra_MAE_cost'].sum()

Date
10     828.306901
11    2121.211502
Name: extra_MAE_cost, dtype: float64

In [23]:
df_all.groupby(df_all['Date'].dt.month)['Total_Consumption'].sum().reset_index()['Total_Consumption']*7

0       0.0
1       0.0
2       0.0
3    4187.4
4    7591.5
5    2811.9
Name: Total_Consumption, dtype: float64

In [24]:
180/2300 # 8-15%

0.0782608695652174

In [25]:
df_anoms = df_all[df_all['MAE'] >= 10]
df_anoms.reset_index(drop=True, inplace=True)

In [26]:
df_anoms.tail(2)

Unnamed: 0,Date,Total_Consumption,Predicted_Consumption,MAE
18,2022-11-27,71.4,28.707322,42.692678
19,2022-11-30,53.1,28.707322,24.392678


In [27]:
df_anoms['extra_MAE'] = df_anoms['MAE'] - 10

In [28]:
df_anoms['extra_MAE_cost'] = df_anoms['extra_MAE']*9

In [29]:
df_anoms.groupby(df_anoms['Date'].dt.month)['extra_MAE_cost'].sum()

Date
10     453.138701
11    1521.943302
Name: extra_MAE_cost, dtype: float64

In [30]:
df_all.groupby(df_all['Date'].dt.month)['Total_Consumption'].sum().reset_index()['Total_Consumption']*7

0       0.0
1       0.0
2       0.0
3    4187.4
4    7591.5
5    2811.9
Name: Total_Consumption, dtype: float64

In [31]:
638/8862 # 8-15%

0.0719927781539156

In [32]:
df_anoms = df_all[df_all['MAE'] >= 12]
df_anoms.reset_index(drop=True, inplace=True)

In [33]:
df_anoms.tail(2)

Unnamed: 0,Date,Total_Consumption,Predicted_Consumption,MAE
15,2022-11-27,71.4,28.707322,42.692678
16,2022-11-30,53.1,28.707322,24.392678


In [34]:
df_anoms['extra_MAE'] = df_anoms['MAE'] - 12

In [35]:
df_anoms['extra_MAE_cost'] = df_anoms['extra_MAE']*9

In [36]:
df_anoms.groupby(df_anoms['Date'].dt.month)['extra_MAE_cost'].sum()

Date
10     347.070501
11    1300.609202
Name: extra_MAE_cost, dtype: float64

In [37]:
df_all.groupby(df_all['Date'].dt.month)['Total_Consumption'].sum().reset_index()['Total_Consumption']*7

0       0.0
1       0.0
2       0.0
3    4187.4
4    7591.5
5    2811.9
Name: Total_Consumption, dtype: float64

In [38]:
180/2300 # 8-15%

0.0782608695652174

In [39]:
ved = pd.DataFrame(columns=['Date', 'Total_Consumption', 'Predicted_Consumption', 'MAE'])
for i in range(len(df_all['Date'])):
    ved.loc[i,'Date'] = df_all.loc[i,'Date']
    ved.loc[i,'Total_Consumption'] = df_all.loc[i,'Total_Consumption']
    ved.loc[i,'Predicted_Consumption'] = df_all.loc[i,'Predicted_Consumption']
    ved.loc[i,'MAE'] = df_all.loc[i,'MAE']

ved.to_csv('new_pred2022.csv', index=False) 