In [1]:
# Import Necessary Modules
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import math

In [2]:
# Read the dataset
data = pd.read_excel(os.path.join(os.getcwd(), 'Assets', 'assignment2_3_4_5.xlsx'))
print(data.shape)
data.head()

(2184, 3)


Unnamed: 0,Date,Time,Load (kW)
0,01.09.2018,00:00:00,5551.82208
1,,01:00:00,4983.17184
2,,02:00:00,4888.3968
3,,03:00:00,5072.95872
4,,04:00:00,5196.2598


In [3]:
#Reshape the dataset
x = []
y = []
for i in range(25, data.shape[0]):
    x.append(data['Load (kW)'].iloc[i-25]) #Since x is 25 hours before y
    y.append(data['Load (kW)'].iloc[i])

In [4]:
data = pd.DataFrame({'Prev Day Prev Hour' : x, 'Present Hour' : y})
data.head()

Unnamed: 0,Prev Day Prev Hour,Present Hour
0,5551.82208,4775.53968
1,4983.17184,4713.8112
2,4888.3968,4689.02628
3,5072.95872,4844.7504
4,5196.2598,5571.46296


In [5]:
# Data Normalization
maxX = np.max(data['Prev Day Prev Hour'])
minX = np.min(data['Prev Day Prev Hour'])
maxY = np.max(data['Present Hour'])
minY = np.min(data['Present Hour'])
data['Prev Day Prev Hour'] = (data['Prev Day Prev Hour'] - minY) / (maxY - minY)
data['Present Hour'] = (data['Present Hour'] - minY) / (maxY - minY)

In [6]:
# Splitting Data
x = data['Prev Day Prev Hour']
y = data['Present Hour']
trainX, testX, trainY, testY = train_test_split(x, y, test_size=0.1, random_state=101)

In [7]:
# Model Training

#Initialization
m = np.random.uniform(-2, 2)
c = np.random.uniform(-2, 2)
eta = 0.01
epochs = 500
v = 0.9
em2 = 0
ec2 = 0
e = 1E-8

# Loop for epochs
for _ in range(epochs):

    for i in range(trainX.shape[0]):

        # Calculating Grad M
        gradM = -1 * (trainY.iloc[i] - m * trainX.iloc[i] - c) * trainX.iloc[i]

        # Calculating Grad C
        gradC = -1 * (trainY.iloc[i] - m * trainX.iloc[i] - c)

        # Calculating updated values of gm2 and gc2
        em2 = (v * em2) + (1-v) * (gradM * gradM)
        ec2 = v * ec2 + (1-v) * (gradC * gradC)

        # Updating m and c values
        m -= (eta * gradM) / math.sqrt(e + em2)
        c -= (eta * gradC) / math.sqrt(e + ec2)

# Print model parameters after training
print(f'm = {m}\nc = {c}')

m = 0.5859833653282469
c = 0.17591323657715263


In [8]:
# Calculating Predictions
train_pred = [m * trainX.iloc[i] + c for i in range(trainX.shape[0])]
test_pred = [m * testX.iloc[i] + c for i in range(testX.shape[0])]

# Denormalization of Output Values
train_pred = [i * (maxY - minY) + minY for i in train_pred]
test_pred = [i * (maxY - minY) + minY for i in test_pred]
trainY = [i * (maxY - minY) + minY for i in trainY]
testY = [i * (maxY - minY) + minY for i in testY]

# Training Error Calculation
training_MAE = sum([abs(train_pred[i] - trainY[i]) for i in range(len(trainY))]) / len(trainY)
training_MSE = sum([(train_pred[i] - trainY[i]) ** 2 for i in range(len(trainY))]) / len(trainY)
training_RMSE = training_MSE ** 0.5
print('Training Error :')
print(f'MAE : {training_MAE}')
print(f'MSE : {training_MSE}')
print(f'RMSE : {training_RMSE}')

# Testing Error Calculation
testing_MAE = sum([abs(test_pred[i] - testY[i]) for i in range(len(testY))]) / len(testY)
testing_MSE = sum([(test_pred[i] - testY[i]) ** 2 for i in range(len(testY))]) / len(testY)
testing_RMSE = testing_MSE ** 0.5
print('Testing Error :')
print(f'MAE : {testing_MAE}')
print(f'MSE : {testing_MSE}')
print(f'RMSE : {testing_RMSE}')

Training Error :
MAE : 617.5743470776646
MSE : 668914.7976232431
RMSE : 817.8721156900039
Testing Error :
MAE : 671.0754626322865
MSE : 778269.986757529
RMSE : 882.1961158141249


In [9]:
# Checking predicted training values
pd.DataFrame({'Actual' : trainY, 'Predicted' : train_pred})

Unnamed: 0,Actual,Predicted
0,5173.50132,5401.385627
1,6218.67672,6418.947616
2,5675.27904,5683.270393
3,7306.09560,6800.213661
4,6439.55868,5685.279941
...,...,...
1938,5900.83740,5660.617308
1939,5354.78976,5817.544731
1940,5062.98240,5469.710256
1941,7613.17920,7178.556727


In [10]:
# Checking predicted testing values
pd.DataFrame({'Actual' : testY, 'Predicted' : test_pred})

Unnamed: 0,Actual,Predicted
0,5190.80400,7333.291917
1,6813.35892,5386.405361
2,4486.84992,5043.320726
3,7493.15160,5229.021222
4,6177.05676,5904.137978
...,...,...
211,6138.55440,6217.992825
212,4827.44772,5685.097255
213,5097.89952,5705.558107
214,6082.74936,5930.810159


In [11]:
# Real Time Prediction
x = float(input('Enter the load at same hour in previous week: '))
x = (x - minX) / (maxX - minX)
prediction = m * x + c
prediction = (prediction * (maxY - minY)) + minY
print('Predicted load at present hour :', prediction)

Enter the load at same hour in previous week: 5642.5742
Predicted load at present hour : 5666.115449052924
