In [2]:
import pandas as pd
import numpy as np


from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM
from tensorflow.keras.optimizers import RMSprop


from sklearn.preprocessing import MinMaxScaler,OneHotEncoder
from sklearn.model_selection import train_test_split

from sklearn.metrics import mean_squared_error, r2_score


In [3]:
#Load dataset into a pandas dataframe
df = pd.read_csv("../data/Cleaned data/data.csv") 

df.dropna(inplace = True)

df.head()

Unnamed: 0,YEAR,MONTH,DAY,HUMIDITY,WINDSPEED,DATE,TOTALDEMAND,HOLIDAY,MIN,MAX,RAIN,SOLAR,RRP,FORECASTDEMAND,OUTPUT,MONTHDATE,WEEKDAY,WEEKEND,TEMPAVE
0,2016,1,1,0.656341,15.902439,2016-01-01,6853.633437,2.0,15.3,28.6,0.0,32.2,38.472917,6665.366167,46.93,01-2016,4,0,21.95
1,2016,1,2,0.656341,15.902439,2016-01-02,6727.613958,0.0,15.9,26.1,0.0,21.7,36.907292,6236.849955,46.93,01-2016,5,1,21.0
2,2016,1,3,0.688837,14.488372,2016-01-03,6616.406076,0.0,17.5,25.6,0.0,10.3,31.997083,6551.924748,46.93,01-2016,6,1,21.55
3,2016,1,4,0.679545,22.477273,2016-01-04,7367.750278,0.0,18.2,23.6,14.0,6.4,33.424583,6729.993123,46.93,01-2016,0,0,20.9
4,2016,1,5,0.768837,22.581395,2016-01-05,7462.242014,0.0,17.6,20.5,39.0,4.4,33.053958,7333.898202,46.93,01-2016,1,0,19.05


In [4]:
#X = df.iloc[:,[3,5,9,16]]
#y = df.iloc[:,10]

df = df.drop(['DATE','MIN','MAX','FORECASTDEMAND', 'MONTHDATE','WEEKEND'], axis=1)

#y = df['FORECASTDEMAND']

df.head()

Unnamed: 0,YEAR,MONTH,DAY,HUMIDITY,WINDSPEED,TOTALDEMAND,HOLIDAY,RAIN,SOLAR,RRP,OUTPUT,WEEKDAY,TEMPAVE
0,2016,1,1,0.656341,15.902439,6853.633437,2.0,0.0,32.2,38.472917,46.93,4,21.95
1,2016,1,2,0.656341,15.902439,6727.613958,0.0,0.0,21.7,36.907292,46.93,5,21.0
2,2016,1,3,0.688837,14.488372,6616.406076,0.0,0.0,10.3,31.997083,46.93,6,21.55
3,2016,1,4,0.679545,22.477273,7367.750278,0.0,14.0,6.4,33.424583,46.93,0,20.9
4,2016,1,5,0.768837,22.581395,7462.242014,0.0,39.0,4.4,33.053958,46.93,1,19.05


In [5]:
# Onehot Encoding for categorial data (Weekday)


# Select the "WEEKDAY" column and create a new dataframe
weekday_df = df[['WEEKDAY']]

# Create a one-hot encoder object

encoder = OneHotEncoder(categories='auto')

# Fit and transform the weekday data
weekday_encoded = encoder.fit_transform(df[['WEEKDAY']]).toarray()

# Create a new dataframe with the encoded weekday data
weekday_df = pd.DataFrame(weekday_encoded, columns=['MON', 'TUE', 'WED', 'THU', 'FRI', 'SAT', 'SUN'])

# Concatenate the original dataframe with the encoded weekday dataframe
df = pd.concat([df, weekday_df], axis=1)

#Drop weekday column
df = df.drop(['WEEKDAY'], axis=1)

df.head()

Unnamed: 0,YEAR,MONTH,DAY,HUMIDITY,WINDSPEED,TOTALDEMAND,HOLIDAY,RAIN,SOLAR,RRP,OUTPUT,TEMPAVE,MON,TUE,WED,THU,FRI,SAT,SUN
0,2016,1,1,0.656341,15.902439,6853.633437,2.0,0.0,32.2,38.472917,46.93,21.95,0.0,0.0,0.0,0.0,1.0,0.0,0.0
1,2016,1,2,0.656341,15.902439,6727.613958,0.0,0.0,21.7,36.907292,46.93,21.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2,2016,1,3,0.688837,14.488372,6616.406076,0.0,0.0,10.3,31.997083,46.93,21.55,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,2016,1,4,0.679545,22.477273,7367.750278,0.0,14.0,6.4,33.424583,46.93,20.9,1.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2016,1,5,0.768837,22.581395,7462.242014,0.0,39.0,4.4,33.053958,46.93,19.05,0.0,1.0,0.0,0.0,0.0,0.0,0.0


In [9]:
# identify response variable and predictors
X = df.drop(['TOTALDEMAND'], axis=1).values
y = df['TOTALDEMAND'].values

# Split the dataset into training and testing sets

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [10]:
# Normalize the data

scaler = MinMaxScaler(feature_range=(0, 1))
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


print(X_train_scaled[:5])

[[0.66666667 0.27272727 0.         0.82446463 0.42215465 0.
  0.         0.18269231 0.37598142 0.46602747 0.60465116 0.
  0.         1.         0.         0.         0.         0.        ]
 [0.66666667 0.18181818 0.3        0.84998342 0.40512547 0.
  0.         0.53205128 0.30048508 0.64777414 0.57122093 0.
  1.         0.         0.         0.         0.         0.        ]
 [0.33333333 0.72727273 0.86666667 0.79107722 0.29346085 0.
  0.00125313 0.67628205 0.57016328 0.21803648 0.40843023 0.
  0.         0.         1.         0.         0.         0.        ]
 [0.66666667 0.90909091 0.1        0.70522388 0.31685972 0.
  0.         0.81089744 0.3494768  0.95154981 0.57994186 0.
  0.         1.         0.         0.         0.         0.        ]
 [0.5        0.90909091 0.9        0.46202322 0.43203417 0.
  0.         0.94871795 0.44530892 0.51957991 0.60755814 0.
  0.         0.         1.         0.         0.         0.        ]]


In [11]:
# Build the LSTM model with three LSTM layers and one dense output layer

n_steps = 1
n_features = X_train_scaled.shape[1]

X_train_reshaped = X_train_scaled.reshape(X_train_scaled.shape[0], n_steps, n_features)
X_test_reshaped = X_test_scaled.reshape(X_test_scaled.shape[0], n_steps, n_features)

model = Sequential()
model.add(LSTM(units=64, activation='relu', return_sequences=True, input_shape=(n_steps, n_features)))
model.add(LSTM(units=32, activation='relu', return_sequences=True))
model.add(LSTM(units=16, activation='relu'))
model.add(Dense(units=1, activation='linear'))

optimizer = RMSprop(learning_rate=0.0001)
model.compile(loss='mean_squared_error', optimizer=optimizer)


In [14]:
# Train the LSTM model
model.fit(X_train_reshaped, y_train, epochs=100, batch_size=32)

Epoch 1/100


ValueError: Creating variables on a non-first call to a function decorated with tf.function.

In [15]:
# Evaluate the LSTM model

# Get model predictions on the test set
y_pred = model.predict(X_test)

# Calculate MSE and R-squared
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print('MSE: %.4f' % mse)
print('R-squared: %.4f' % r2)

ValueError: in user code:

    /opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:1569 predict_function  *
        return step_function(self, iterator)
    /opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:1559 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    /opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:1285 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    /opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:2833 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    /opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:3608 _call_for_each_replica
        return fn(*args, **kwargs)
    /opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:1552 run_step  **
        outputs = model.predict_step(data)
    /opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:1525 predict_step
        return self(x, training=False)
    /opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/engine/base_layer.py:1013 __call__
        input_spec.assert_input_compatibility(self.input_spec, inputs, self.name)
    /opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/engine/input_spec.py:215 assert_input_compatibility
        raise ValueError('Input ' + str(input_index) + ' of layer ' +

    ValueError: Input 0 of layer sequential_1 is incompatible with the layer: expected ndim=3, found ndim=2. Full shape received: (None, 18)
