In [334]:
import pandas as pd
import numpy as np


from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout
from tensorflow.keras.optimizers import RMSprop

from tensorflow.keras.wrappers.scikit_learn import KerasRegressor
from tensorflow.keras.optimizers import Adam
from datetime import datetime


from sklearn.preprocessing import MinMaxScaler,OneHotEncoder
from sklearn.model_selection import train_test_split, GridSearchCV

from sklearn.metrics import mean_squared_error, r2_score

In [335]:
#Load dataset into a pandas dataframe
df = pd.read_csv("../data/Cleaned data/data.csv") 

df.dropna(inplace = True)

df.head()

Unnamed: 0,YEAR,MONTH,DAY,HUMIDITY,WINDSPEED,DATE,TOTALDEMAND,HOLIDAY,MIN,MAX,RAIN,SOLAR,RRP,FORECASTDEMAND,OUTPUT,MONTHDATE,WEEKDAY,WEEKEND,TEMPAVE
0,2016,1,1,0.656341,15.902439,2016-01-01,6853.633437,2.0,15.3,28.6,0.0,32.2,38.472917,6665.366167,23.465,01-2016,4,0,21.95
1,2016,1,2,0.656341,15.902439,2016-01-02,6727.613958,0.0,15.9,26.1,0.0,21.7,36.907292,6236.849955,23.465,01-2016,5,1,21.0
2,2016,1,3,0.688837,14.488372,2016-01-03,6616.406076,0.0,17.5,25.6,0.0,10.3,31.997083,6551.924748,23.465,01-2016,6,1,21.55
3,2016,1,4,0.679545,22.477273,2016-01-04,7367.750278,0.0,18.2,23.6,14.0,6.4,33.424583,6729.993123,23.465,01-2016,0,0,20.9
4,2016,1,5,0.768837,22.581395,2016-01-05,7462.242014,0.0,17.6,20.5,39.0,4.4,33.053958,7333.898202,23.465,01-2016,1,0,19.05


In [336]:
#X = df.iloc[:,[3,5,9,16]]
#y = df.iloc[:,10]

df = df.drop(['MIN','MAX','FORECASTDEMAND', 'MONTHDATE','WEEKEND'], axis=1)

#y = df['FORECASTDEMAND']

df.head()


Unnamed: 0,YEAR,MONTH,DAY,HUMIDITY,WINDSPEED,DATE,TOTALDEMAND,HOLIDAY,RAIN,SOLAR,RRP,OUTPUT,WEEKDAY,TEMPAVE
0,2016,1,1,0.656341,15.902439,2016-01-01,6853.633437,2.0,0.0,32.2,38.472917,23.465,4,21.95
1,2016,1,2,0.656341,15.902439,2016-01-02,6727.613958,0.0,0.0,21.7,36.907292,23.465,5,21.0
2,2016,1,3,0.688837,14.488372,2016-01-03,6616.406076,0.0,0.0,10.3,31.997083,23.465,6,21.55
3,2016,1,4,0.679545,22.477273,2016-01-04,7367.750278,0.0,14.0,6.4,33.424583,23.465,0,20.9
4,2016,1,5,0.768837,22.581395,2016-01-05,7462.242014,0.0,39.0,4.4,33.053958,23.465,1,19.05


In [337]:
# Onehot Encoding for categorial data (Weekday)


# Select the "WEEKDAY" column and create a new dataframe
weekday_df = df[['WEEKDAY']]

# Create a one-hot encoder object

encoder = OneHotEncoder(categories='auto')

# Fit and transform the weekday data
weekday_encoded = encoder.fit_transform(df[['WEEKDAY']]).toarray()

# Create a new dataframe with the encoded weekday data
weekday_df = pd.DataFrame(weekday_encoded, columns=['MON', 'TUE', 'WED', 'THU', 'FRI', 'SAT', 'SUN'])

#Drop weekday column
df = df.drop(['WEEKDAY'], axis=1)

# Concatenate the original dataframe with the encoded weekday dataframe
df = pd.concat([df, weekday_df], axis=1)


df.head()

Unnamed: 0,YEAR,MONTH,DAY,HUMIDITY,WINDSPEED,DATE,TOTALDEMAND,HOLIDAY,RAIN,SOLAR,RRP,OUTPUT,TEMPAVE,MON,TUE,WED,THU,FRI,SAT,SUN
0,2016,1,1,0.656341,15.902439,2016-01-01,6853.633437,2.0,0.0,32.2,38.472917,23.465,21.95,0.0,0.0,0.0,0.0,1.0,0.0,0.0
1,2016,1,2,0.656341,15.902439,2016-01-02,6727.613958,0.0,0.0,21.7,36.907292,23.465,21.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2,2016,1,3,0.688837,14.488372,2016-01-03,6616.406076,0.0,0.0,10.3,31.997083,23.465,21.55,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,2016,1,4,0.679545,22.477273,2016-01-04,7367.750278,0.0,14.0,6.4,33.424583,23.465,20.9,1.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2016,1,5,0.768837,22.581395,2016-01-05,7462.242014,0.0,39.0,4.4,33.053958,23.465,19.05,0.0,1.0,0.0,0.0,0.0,0.0,0.0


In [338]:
# Convert the 'date' column to a datetime object
df['DATE'] = pd.to_datetime(df['DATE'])

# identify response variable and predictors
X = df.drop(['TOTALDEMAND', 'DATE'], axis=1).values
y = df['TOTALDEMAND'].values

# Split the dataset into training and testing sets. 

train_set = (df['DATE'] >= datetime(2018, 1, 1)) & (df['DATE'] < datetime(2022, 1, 1))
test_set = df['DATE'] >= datetime(2022, 1, 1)



In [339]:
# Normalize the data

scaler = MinMaxScaler(feature_range=(0, 1))
X_train_scaled = scaler.fit_transform(X[train_set.index])
X_test_scaled = scaler.transform(X[test_set.index])

train_set_scaled = np.hstack((X_train_scaled, y[train_set.index][:, None]))
test_set_scaled = np.hstack((X_test_scaled, y[test_set.index][:, None]))


print(train_set_scaled[:5])


[[0.00000000e+00 0.00000000e+00 0.00000000e+00 6.79161283e-01
  3.56595002e-01 1.00000000e+00 0.00000000e+00 1.00000000e+00
  2.35697584e-01 2.49868284e-02 6.38081395e-01 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 1.00000000e+00
  0.00000000e+00 0.00000000e+00 6.85363344e+03]
 [0.00000000e+00 0.00000000e+00 3.33333333e-02 6.79161283e-01
  3.56595002e-01 0.00000000e+00 0.00000000e+00 6.73913043e-01
  2.26106057e-01 2.49868284e-02 6.10465116e-01 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  1.00000000e+00 0.00000000e+00 6.72761396e+03]
 [0.00000000e+00 0.00000000e+00 6.66666667e-02 7.12786847e-01
  3.24886080e-01 0.00000000e+00 0.00000000e+00 3.19875776e-01
  1.96024526e-01 2.49868284e-02 6.26453488e-01 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 1.00000000e+00 6.61640608e+03]
 [0.00000000e+00 0.00000000e+00 1.00000000e-01 7.03172035e-01
  5.04028540e-01 0.00000000e+00 8.77192982e-02 1.9

In [340]:
#Reshape the data for input to the LSTM model
#(This step is fit the data to 3D tensor format for LSTM model to process the sequential data efficiently 
#and capture any temporal dependencies in the data)


X_train = []
y_train = []
for i in range(1, len(train_set)):
    X_train.append(X_train_scaled[i-1:i, :])
    y_train.append(X_train_scaled[i, 0])
X_train, y_train = np.array(X_train), np.array(y_train)
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], X_train.shape[2]))

X_test = []
y_test = []
for i in range(1, len(test_set)):
    X_test.append(X_test_scaled[i-1:i, :])
    y_test.append(X_test_scaled[i, 0])
X_test, y_test = np.array(X_test), np.array(y_test)
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], X_train.shape[2]))

print(X_test[:5])




[[[0.         0.         0.         0.67916128 0.356595   1.
   0.         1.         0.23569758 0.02498683 0.6380814  0.
   0.         0.         0.         1.         0.         0.        ]]

 [[0.         0.         0.03333333 0.67916128 0.356595   0.
   0.         0.67391304 0.22610606 0.02498683 0.61046512 0.
   0.         0.         0.         0.         1.         0.        ]]

 [[0.         0.         0.06666667 0.71278685 0.32488608 0.
   0.         0.31987578 0.19602453 0.02498683 0.62645349 0.
   0.         0.         0.         0.         0.         1.        ]]

 [[0.         0.         0.1        0.70317203 0.50402854 0.
   0.0877193  0.19875776 0.20476985 0.02498683 0.60755814 1.
   0.         0.         0.         0.         0.         0.        ]]

 [[0.         0.         0.13333333 0.7955683  0.50636338 0.
   0.2443609  0.13664596 0.20249929 0.02498683 0.55377907 0.
   1.         0.         0.         0.         0.         0.        ]]]


In [347]:
# Build the LSTM model with three LSTM layers and one dense output layer


model = Sequential()
model.add(LSTM(units=64, activation='relu', return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dropout(0.2))
model.add(LSTM(units=32, activation='tanh', return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=16, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(units=1, activation='linear'))


optimizer = RMSprop(learning_rate=0.0001)
model.compile(loss='mean_squared_error', optimizer=optimizer)

In [348]:
# Train the LSTM model
model.fit(X_train, y_train, epochs=100, batch_size=32)



Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<tensorflow.python.keras.callbacks.History at 0x7fdb0fc80730>

In [349]:
# Make predictions on the test set
y_pred = model.predict(X_test)

# Inverse transform the scaled data back to the original scale
#y_pred = scaler.inverse_transform(y_pred)
#y_test = scaler.inverse_transform(y_test.reshape(-1, 1))



In [358]:
# Evaluate the LSTM model on the test set
# Calculate MSE and R-squared

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print('MSE: %.4f' % mse)
print('R-squared: %.4f' % r2)

MSE: 0.0011
R-squared: 0.9893


In [359]:
## Plot Accuracy 
