In [1]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

In [2]:
df = pd.read_csv(r'C:\Users\mahat\OneDrive\Desktop\Optimizing-Household-Electricity-through-Machine-Learning-main\Datasets\final_electricity_data.csv')
df['Date'] = pd.to_datetime(df['Date'])
df = df[df['Date'].dt.year == 2023]
df.reset_index(drop=True, inplace=True)

In [3]:
df

Unnamed: 0,Date,Total_Consumption
0,2023-01-01,16.7
1,2023-01-02,7.1
2,2023-01-03,6.0
3,2023-01-04,16.7
4,2023-01-05,3.0
...,...,...
361,2023-12-27,12.4
362,2023-12-28,8.5
363,2023-12-29,2.5
364,2023-12-30,5.1


In [4]:
df.tail()

Unnamed: 0,Date,Total_Consumption
361,2023-12-27,12.4
362,2023-12-28,8.5
363,2023-12-29,2.5
364,2023-12-30,5.1
365,2023-12-31,11.4


In [5]:
import math
import statsmodels.api as sm
import statsmodels.tsa.api as smt
# from sklearn.metrics import mean_squared_error
from matplotlib import pyplot
import matplotlib.pyplot as plt

actual_vals = df.Total_Consumption.values
actual_log = np.log10(actual_vals)

train, test = actual_vals[0:-80], actual_vals[-80:]
train_log, test_log = np.log10(train), np.log10(test)
my_order = (1, 1, 1)
my_seasonal_order = (1, 1, 1, 7)

In [6]:
history = [x for x in train_log]
predictions = list()
predict_log=list()
for t in range(len(test_log)):
    model = sm.tsa.SARIMAX(history, order=my_order, seasonal_order=my_seasonal_order,enforce_stationarity=False,enforce_invertibility=False)
    model_fit = model.fit(disp=0)
    output = model_fit.forecast()
    predict_log.append(output[0])
    yhat = 10**output[0]
    predictions.append(yhat)
    # obs = test_log[t]
    # history.append(obs)
   # print('predicted=%f, expected=%f' % (output[0], obs))
#error = math.sqrt(mean_squared_error(test_log, predict_log))
#print('Test rmse: %.3f' % error)
# plot
# figsize=(12, 7)
# plt.figure(figsize=figsize)
# pyplot.plot(test,label='Actuals')
# pyplot.plot(predictions, color='red',label='Predicted')
# pyplot.legend(loc='upper right')
# pyplot.show()

In [7]:
import pickle 
with open('module.pkl','wb') as f:
    pickle.dump(model_fit,f)

In [8]:
df_preds = df[-80:]
df_preds['Predicted_Consumption'] = predictions
df_preds

Unnamed: 0,Date,Total_Consumption,Predicted_Consumption
286,2023-10-13,23.4,25.502121
287,2023-10-14,25.8,25.502121
288,2023-10-15,54.9,25.502121
289,2023-10-16,24.6,25.502121
290,2023-10-17,39.9,25.502121
...,...,...,...
361,2023-12-27,12.4,25.502121
362,2023-12-28,8.5,25.502121
363,2023-12-29,2.5,25.502121
364,2023-12-30,5.1,25.502121


In [9]:
import plotly.graph_objects as go
import plotly.offline as py

In [10]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=df_preds['Date'], y=df_preds['Total_Consumption'],
                    mode='lines',
                    name='lines'))
fig.add_trace(go.Scatter(x=df_preds['Date'], y=df_preds['Predicted_Consumption'],
                    mode='lines',
                    name='lines'))

In [11]:
new_dates = pd.date_range(start='2021/07/31', end='2021/09/30')

In [12]:
train = df.Total_Consumption.values

my_order = (1, 1, 1)
my_seasonal_order = (1, 1, 1, 62)

model = sm.tsa.SARIMAX(train, order=my_order, seasonal_order=my_seasonal_order,enforce_stationarity=False,enforce_invertibility=False)
model_fit = model.fit()
output = model_fit.forecast(62)

In [13]:
listofzeros = [np.nan] * 62

In [14]:
data_new = {'Date':new_dates,
            'Total_Consumption':listofzeros,
            'Predicted_Consumption':output}

df_new_preds = pd.DataFrame(data_new)

In [15]:
df_all = pd.concat([df_preds, df_new_preds], ignore_index=True)

In [16]:
df_all

Unnamed: 0,Date,Total_Consumption,Predicted_Consumption
0,2023-10-13,23.4,25.502121
1,2023-10-14,25.8,25.502121
2,2023-10-15,54.9,25.502121
3,2023-10-16,24.6,25.502121
4,2023-10-17,39.9,25.502121
...,...,...,...
137,2021-09-26,,0.633771
138,2021-09-27,,-1.347484
139,2021-09-28,,3.372470
140,2021-09-29,,5.499232


In [17]:
df_all['MAE'] = df_all['Total_Consumption'] - df_all['Predicted_Consumption']

In [18]:
df_anoms = df_all[df_all['MAE'] >= 5]
df_anoms.reset_index(drop=True, inplace=True)

In [19]:
df_anoms['extra_MAE'] = df_anoms['MAE'] - 5

In [20]:
df_anoms['extra_MAE_cost'] = df_anoms['extra_MAE']*9

In [21]:
df_anoms.groupby(df_anoms['Date'].dt.month)['extra_MAE_cost'].sum()

Date
10     869.228186
11    1905.970915
Name: extra_MAE_cost, dtype: float64

In [22]:
df_all.groupby(df_all['Date'].dt.month)['Total_Consumption'].sum().reset_index()['Total_Consumption']*7

0       0.0
1       0.0
2       0.0
3    3960.6
4    6951.0
5    2346.4
Name: Total_Consumption, dtype: float64

In [23]:
180/2300 # 8-15%

0.0782608695652174

In [24]:
df_anoms = df_all[df_all['MAE'] >= 10]
df_anoms.reset_index(drop=True, inplace=True)

In [25]:
df_anoms.tail(2)

Unnamed: 0,Date,Total_Consumption,Predicted_Consumption,MAE
15,2023-11-29,53.4,25.502121,27.897879
16,2023-11-30,44.1,25.502121,18.597879


In [26]:
df_anoms['extra_MAE'] = df_anoms['MAE'] - 10

In [27]:
df_anoms['extra_MAE_cost'] = df_anoms['extra_MAE']*9

In [28]:
df_anoms.groupby(df_anoms['Date'].dt.month)['extra_MAE_cost'].sum()

Date
10     583.085457
11    1374.990005
Name: extra_MAE_cost, dtype: float64

In [29]:
df_all.groupby(df_all['Date'].dt.month)['Total_Consumption'].sum().reset_index()['Total_Consumption']*7

0       0.0
1       0.0
2       0.0
3    3960.6
4    6951.0
5    2346.4
Name: Total_Consumption, dtype: float64

In [30]:
638/8862 # 8-15%

0.0719927781539156

In [31]:
df_anoms = df_all[df_all['MAE'] >= 12]
df_anoms.reset_index(drop=True, inplace=True)

In [32]:
df_all.groupby(df_all['Date'].dt.month)['Total_Consumption'].sum().reset_index()['Total_Consumption']*7

0       0.0
1       0.0
2       0.0
3    3960.6
4    6951.0
5    2346.4
Name: Total_Consumption, dtype: float64

In [33]:
180/2300 # 8-15%

0.0782608695652174

In [34]:
ved = pd.DataFrame(columns=['Date', 'Total_Consumption', 'Predicted_Consumption', 'MAE'])
for i in range(len(df_all['Date'])):
    ved.loc[i,'Date'] = df_all.loc[i,'Date']
    ved.loc[i,'Total_Consumption'] = df_all.loc[i,'Total_Consumption']
    ved.loc[i,'Predicted_Consumption'] = df_all.loc[i,'Predicted_Consumption']
    ved.loc[i,'MAE'] = df_all.loc[i,'MAE']

ved.to_csv('new_pred2023.csv', index=False) 