In [73]:
import pandas as pd
import numpy as np


from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM
from tensorflow.keras.optimizers import RMSprop
from datetime import datetime


from sklearn.preprocessing import MinMaxScaler,OneHotEncoder
from sklearn.model_selection import train_test_split, GridSearchCV

from sklearn.metrics import mean_squared_error, r2_score

In [74]:
#Load dataset into a pandas dataframe
df = pd.read_csv("../data/Cleaned data/data.csv") 

df.dropna(inplace = True)

df.head()

Unnamed: 0,YEAR,MONTH,DAY,HUMIDITY,WINDSPEED,DATE,TOTALDEMAND,HOLIDAY,MIN,MAX,RAIN,SOLAR,RRP,FORECASTDEMAND,OUTPUT,MONTHDATE,WEEKDAY,WEEKEND,TEMPAVE
0,2016,1,1,0.656341,15.902439,2016-01-01,6853.633437,2.0,15.3,28.6,0.0,32.2,38.472917,6665.366167,46.93,01-2016,4,0,21.95
1,2016,1,2,0.656341,15.902439,2016-01-02,6727.613958,0.0,15.9,26.1,0.0,21.7,36.907292,6236.849955,46.93,01-2016,5,1,21.0
2,2016,1,3,0.688837,14.488372,2016-01-03,6616.406076,0.0,17.5,25.6,0.0,10.3,31.997083,6551.924748,46.93,01-2016,6,1,21.55
3,2016,1,4,0.679545,22.477273,2016-01-04,7367.750278,0.0,18.2,23.6,14.0,6.4,33.424583,6729.993123,46.93,01-2016,0,0,20.9
4,2016,1,5,0.768837,22.581395,2016-01-05,7462.242014,0.0,17.6,20.5,39.0,4.4,33.053958,7333.898202,46.93,01-2016,1,0,19.05


In [75]:
#X = df.iloc[:,[3,5,9,16]]
#y = df.iloc[:,10]

df = df.drop(['MIN','MAX','FORECASTDEMAND', 'MONTHDATE','WEEKEND'], axis=1)

#y = df['FORECASTDEMAND']

df.head()


Unnamed: 0,YEAR,MONTH,DAY,HUMIDITY,WINDSPEED,DATE,TOTALDEMAND,HOLIDAY,RAIN,SOLAR,RRP,OUTPUT,WEEKDAY,TEMPAVE
0,2016,1,1,0.656341,15.902439,2016-01-01,6853.633437,2.0,0.0,32.2,38.472917,46.93,4,21.95
1,2016,1,2,0.656341,15.902439,2016-01-02,6727.613958,0.0,0.0,21.7,36.907292,46.93,5,21.0
2,2016,1,3,0.688837,14.488372,2016-01-03,6616.406076,0.0,0.0,10.3,31.997083,46.93,6,21.55
3,2016,1,4,0.679545,22.477273,2016-01-04,7367.750278,0.0,14.0,6.4,33.424583,46.93,0,20.9
4,2016,1,5,0.768837,22.581395,2016-01-05,7462.242014,0.0,39.0,4.4,33.053958,46.93,1,19.05


In [76]:
# Onehot Encoding for categorial data (Weekday)


# Select the "WEEKDAY" column and create a new dataframe
weekday_df = df[['WEEKDAY']]

# Create a one-hot encoder object

encoder = OneHotEncoder(categories='auto')

# Fit and transform the weekday data
weekday_encoded = encoder.fit_transform(df[['WEEKDAY']]).toarray()

# Create a new dataframe with the encoded weekday data
weekday_df = pd.DataFrame(weekday_encoded, columns=['MON', 'TUE', 'WED', 'THU', 'FRI', 'SAT', 'SUN'])

#Drop weekday column
df = df.drop(['WEEKDAY'], axis=1)

# Concatenate the original dataframe with the encoded weekday dataframe
df = pd.concat([df, weekday_df], axis=1)


df.head()

Unnamed: 0,YEAR,MONTH,DAY,HUMIDITY,WINDSPEED,DATE,TOTALDEMAND,HOLIDAY,RAIN,SOLAR,RRP,OUTPUT,TEMPAVE,MON,TUE,WED,THU,FRI,SAT,SUN
0,2016,1,1,0.656341,15.902439,2016-01-01,6853.633437,2.0,0.0,32.2,38.472917,46.93,21.95,0.0,0.0,0.0,0.0,1.0,0.0,0.0
1,2016,1,2,0.656341,15.902439,2016-01-02,6727.613958,0.0,0.0,21.7,36.907292,46.93,21.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2,2016,1,3,0.688837,14.488372,2016-01-03,6616.406076,0.0,0.0,10.3,31.997083,46.93,21.55,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,2016,1,4,0.679545,22.477273,2016-01-04,7367.750278,0.0,14.0,6.4,33.424583,46.93,20.9,1.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2016,1,5,0.768837,22.581395,2016-01-05,7462.242014,0.0,39.0,4.4,33.053958,46.93,19.05,0.0,1.0,0.0,0.0,0.0,0.0,0.0


In [77]:
# Convert the 'date' column to a datetime object
#df['DATE'] = pd.to_datetime(df['DATE'])

# identify response variable and predictors
X = df.drop(['TOTALDEMAND', 'DATE'], axis=1).values
y = df['TOTALDEMAND'].values

# Split the dataset into training and testing sets

train_set = df['DATE'] < datetime(2022, 1, 1)
test_set = df['DATE'] >= datetime(2022, 1, 1)



TypeError: '<' not supported between instances of 'str' and 'datetime.datetime'

In [78]:
# Normalize the data

scaler = MinMaxScaler(feature_range=(0, 1))
X_train_scaled = scaler.fit_transform(X[train_set.index])
X_test_scaled = scaler.transform(X[test_set.index])

train_set_scaled = np.hstack((X_train_scaled, y[train_set.index][:, None]))
test_set_scaled = np.hstack((X_test_scaled, y[test_set.index][:, None]))


print(train_set_scaled[:5])


[[0.00000000e+00 0.00000000e+00 0.00000000e+00 6.85729887e-01
  3.56595002e-01 1.00000000e+00 0.00000000e+00 1.00000000e+00
  2.40545934e-01 4.73872188e-02 6.38081395e-01 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 1.00000000e+00
  0.00000000e+00 0.00000000e+00 6.85363344e+03]
 [0.00000000e+00 0.00000000e+00 3.33333333e-02 6.85729887e-01
  3.56595002e-01 0.00000000e+00 0.00000000e+00 6.63461538e-01
  2.30757107e-01 4.73872188e-02 6.10465116e-01 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  1.00000000e+00 0.00000000e+00 6.72761396e+03]
 [0.00000000e+00 0.00000000e+00 6.66666667e-02 7.19680666e-01
  3.24886080e-01 0.00000000e+00 0.00000000e+00 2.98076923e-01
  2.00056792e-01 4.73872188e-02 6.26453488e-01 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 1.00000000e+00 6.61640608e+03]
 [0.00000000e+00 0.00000000e+00 1.00000000e-01 7.09972863e-01
  5.04028540e-01 0.00000000e+00 8.77192982e-02 1.7

In [79]:
#Reshape the data for input to the LSTM model
#(This step is fit the data to 3D tensor format for LSTM model to process the sequential data efficiently 
#and capture any temporal dependencies in the data)


X_train = []
y_train = []
for i in range(1, len(train_set)):
    X_train.append(X_train_scaled[i-1:i, :])
    y_train.append(X_train_scaled[i, 0])
X_train, y_train = np.array(X_train), np.array(y_train)
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], X_train.shape[2]))

X_test = []
y_test = []
for i in range(1, len(test_set)):
    X_test.append(X_test_scaled[i-1:i, :])
    y_test.append(X_test_scaled[i, 0])
X_test, y_test = np.array(X_test), np.array(y_test)
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], X_train.shape[2]))

print(X_test[:5])





[[[0.         0.         0.         0.68572989 0.356595   1.
   0.         1.         0.24054593 0.04738722 0.6380814  0.
   0.         0.         0.         1.         0.         0.        ]]

 [[0.         0.         0.03333333 0.68572989 0.356595   0.
   0.         0.66346154 0.23075711 0.04738722 0.61046512 0.
   0.         0.         0.         0.         1.         0.        ]]

 [[0.         0.         0.06666667 0.71968067 0.32488608 0.
   0.         0.29807692 0.20005679 0.04738722 0.62645349 0.
   0.         0.         0.         0.         0.         1.        ]]

 [[0.         0.         0.1        0.70997286 0.50402854 0.
   0.0877193  0.17307692 0.20898201 0.04738722 0.60755814 1.
   0.         0.         0.         0.         0.         0.        ]]

 [[0.         0.         0.13333333 0.80326276 0.50636338 0.
   0.2443609  0.10897436 0.20666474 0.04738722 0.55377907 0.
   1.         0.         0.         0.         0.         0.        ]]]


In [80]:
# Build the LSTM model with three LSTM layers and one dense output layer


model = Sequential()
model.add(LSTM(units=64, activation='relu', return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(LSTM(units=32, activation='tanh', return_sequences=True))
model.add(LSTM(units=16, activation='relu'))
model.add(Dense(units=1, activation='linear'))


optimizer = RMSprop(learning_rate=0.0001)
model.compile(loss='mean_squared_error', optimizer=optimizer)



In [81]:
# define the hyperparameters to tune


#TO DO here



In [None]:


# Train the LSTM model
model.fit(X_train, y_train, epochs=100, batch_size=32)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100

In [None]:
# Evaluate the LSTM model

# Get model predictions on the test set
y_pred = model.predict(X_test)

# Calculate MSE and R-squared
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print('MSE: %.4f' % mse)
print('R-squared: %.4f' % r2)