In [21]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from numpy import array
from numpy import hstack
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense

In [22]:
data_path = "/content/drive/MyDrive/Colab Notebooks/Hitachi/Data/final_data.csv"
data_df = pd.read_csv(data_path)
data_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 27552 entries, 0 to 27551
Data columns (total 28 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   Unnamed: 0                      27552 non-null  int64  
 1   datetime                        27552 non-null  object 
 2   Demand (MW)                     27552 non-null  float64
 3   Temperature                     27552 non-null  float64
 4   dewpoint                        27552 non-null  float64
 5   humidity                        27552 non-null  float64
 6   precipitation                   27552 non-null  float64
 7   precipprob                      27552 non-null  float64
 8   snow                            27552 non-null  float64
 9   snowdepth                       27552 non-null  float64
 10  windgust                        27552 non-null  float64
 11  windspeed                       27552 non-null  float64
 12  sealevelpressure                

In [23]:
data_df.isnull().sum()

Unnamed: 0                        0
datetime                          0
Demand (MW)                       0
Temperature                       0
dewpoint                          0
humidity                          0
precipitation                     0
precipprob                        0
snow                              0
snowdepth                         0
windgust                          0
windspeed                         0
sealevelpressure                  0
cloudcover                        0
visibility                        0
solarradiation                    0
severerisk                        0
freezingrain                      0
ice                               0
rain                              0
snow.1                            0
Clear                             0
Freezing Drizzle/Freezing Rain    0
Ice                               0
Overcast                          0
Partially cloudy                  0
Rain                              0
Snow                        

In [24]:
data_df.head()

Unnamed: 0.1,Unnamed: 0,datetime,Demand (MW),Temperature,dewpoint,humidity,precipitation,precipprob,snow,snowdepth,...,ice,rain,snow.1,Clear,Freezing Drizzle/Freezing Rain,Ice,Overcast,Partially cloudy,Rain,Snow
0,0,2020-01-01 00:00:00,445.8,38.0,29.0,69.76,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,1,0,0,0
1,1,2020-01-01 01:00:00,424.5,38.0,29.9,72.37,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,1,0,0,0
2,2,2020-01-01 02:00:00,423.5,38.0,30.8,75.06,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,1,0,0,0
3,3,2020-01-01 03:00:00,418.8,37.1,29.9,74.97,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0,1,0,0
4,4,2020-01-01 04:00:00,414.8,35.1,29.0,78.41,0.0,0.0,0.0,0.0,...,0,0,0,1,0,0,0,0,0,0


In [25]:
data_df.drop(columns=["Unnamed: 0", "datetime"], inplace=True)

In [26]:
data_df.head()

Unnamed: 0,Demand (MW),Temperature,dewpoint,humidity,precipitation,precipprob,snow,snowdepth,windgust,windspeed,...,ice,rain,snow.1,Clear,Freezing Drizzle/Freezing Rain,Ice,Overcast,Partially cloudy,Rain,Snow
0,445.8,38.0,29.0,69.76,0.0,0.0,0.0,0.0,15.410436,4.7,...,0,0,0,0,0,0,1,0,0,0
1,424.5,38.0,29.9,72.37,0.0,0.0,0.0,0.0,15.410436,4.7,...,0,0,0,0,0,0,1,0,0,0
2,423.5,38.0,30.8,75.06,0.0,0.0,0.0,0.0,15.410436,3.4,...,0,0,0,0,0,0,1,0,0,0
3,418.8,37.1,29.9,74.97,0.0,0.0,0.0,0.0,15.410436,4.7,...,0,0,0,0,0,0,0,1,0,0
4,414.8,35.1,29.0,78.41,0.0,0.0,0.0,0.0,15.410436,3.4,...,0,0,0,1,0,0,0,0,0,0


In [27]:
def min_max_normalize_dataframe(df, columns):
    df_normalized = df.copy()
    for column in columns:
        if df[column].dtype in [int, float]:
            min_val = df[column].min()
            max_val = df[column].max()
            df_normalized[column] = (df[column] - min_val) / (max_val - min_val)
    return df_normalized

In [28]:
features_columns = list(data_df.columns)
features_columns.remove('Demand (MW)')# removing the Demand (MW)
features_columns

['Temperature',
 'dewpoint',
 'humidity',
 'precipitation',
 'precipprob',
 'snow',
 'snowdepth',
 'windgust',
 'windspeed',
 'sealevelpressure',
 'cloudcover',
 'visibility',
 'solarradiation',
 'severerisk',
 'freezingrain',
 'ice',
 'rain',
 'snow.1',
 'Clear',
 'Freezing Drizzle/Freezing Rain',
 'Ice',
 'Overcast',
 'Partially cloudy',
 'Rain',
 'Snow']

In [29]:
normalized_df = min_max_normalize_dataframe(data_df, columns = features_columns)
transformed_df = normalized_df[features_columns].apply(np.log1p)

In [30]:
transformed_df["Demand (MW)"] = data_df["Demand (MW)"]

In [31]:
transformed_df.head()

Unnamed: 0,Temperature,dewpoint,humidity,precipitation,precipprob,snow,snowdepth,windgust,windspeed,sealevelpressure,...,rain,snow.1,Clear,Freezing Drizzle/Freezing Rain,Ice,Overcast,Partially cloudy,Rain,Snow,Demand (MW)
0,0.349596,0.383234,0.491779,0.0,0.0,0.0,0.0,0.23543,0.116613,0.387838,...,0.0,0.0,0.0,0.0,0.0,0.693147,0.0,0.0,0.0,445.8
1,0.349596,0.389739,0.51085,0.0,0.0,0.0,0.0,0.23543,0.116613,0.386469,...,0.0,0.0,0.0,0.0,0.0,0.693147,0.0,0.0,0.0,424.5
2,0.349596,0.396202,0.530131,0.0,0.0,0.0,0.0,0.23543,0.085695,0.387838,...,0.0,0.0,0.0,0.0,0.0,0.693147,0.0,0.0,0.0,423.5
3,0.343529,0.389739,0.529492,0.0,0.0,0.0,0.0,0.23543,0.116613,0.386469,...,0.0,0.0,0.0,0.0,0.0,0.0,0.693147,0.0,0.0,418.8
4,0.329914,0.383234,0.553636,0.0,0.0,0.0,0.0,0.23543,0.085695,0.385098,...,0.0,0.0,0.693147,0.0,0.0,0.0,0.0,0.0,0.0,414.8


In [32]:
transformed_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 27552 entries, 0 to 27551
Data columns (total 26 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   Temperature                     27552 non-null  float64
 1   dewpoint                        27552 non-null  float64
 2   humidity                        27552 non-null  float64
 3   precipitation                   27552 non-null  float64
 4   precipprob                      27552 non-null  float64
 5   snow                            27552 non-null  float64
 6   snowdepth                       27552 non-null  float64
 7   windgust                        27552 non-null  float64
 8   windspeed                       27552 non-null  float64
 9   sealevelpressure                27552 non-null  float64
 10  cloudcover                      27552 non-null  float64
 11  visibility                      27552 non-null  float64
 12  solarradiation                  

In [33]:
dataset = transformed_df.values
dataset.shape

(27552, 26)

In [34]:
def split_sequences(sequences, n_steps, number_of_predictions):
  X, y = list(), list()
  for i in range(len(sequences)):
    # find the end of this pattern
    end_ix = i + n_steps
    # check if we are beyond the dataset
    if end_ix > len(sequences):
      break
		# gather input and output parts of the pattern
    seq_x, seq_y = sequences[i:end_ix, :], sequences[end_ix: end_ix + number_of_predictions, -1]

    if len(seq_y) != number_of_predictions:
      continue

    X.append(seq_x)
    y.append(seq_y)
  return array(X), array(y)

In [35]:
# choose a number of time steps
number_of_weeks = 2
n_steps = 24*7*number_of_weeks
number_of_predictions = 7*24
# convert into input/output
X, y = split_sequences(dataset, n_steps, number_of_predictions)
# the dataset knows the number of features, e.g. 2
n_features = X.shape[2]
print(n_features)

26


In [36]:
X.shape, y.shape

((27049, 336, 26), (27049, 168))

In [37]:
validation_split = 0.1

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=validation_split, random_state=42)

# Print the shapes of the resulting arrays
print("X_train shape:", X_train.shape)
print("X_val shape:", X_val.shape)
print("y_train shape:", y_train.shape)
print("y_val shape:", y_val.shape)

X_train shape: (24344, 336, 26)
X_val shape: (2705, 336, 26)
y_train shape: (24344, 168)
y_val shape: (2705, 168)


In [39]:
# define model
model = Sequential()
model.add(LSTM(20, activation='relu', input_shape=(n_steps, n_features)))
model.add(Dense(512))
model.add(Dense(number_of_predictions))
model.compile(optimizer='adam', loss='mse')



In [40]:
# fit model
# history = model.fit(X_train, y_train, epochs=5, verbose=1, validation_data=(X_val, y_val))

In [44]:
history = model.fit(X_val, y_val, epochs=20, verbose=1, validation_data=(X_val, y_val))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
 9/85 [==>...........................] - ETA: 36s - loss: nan

KeyboardInterrupt: 

In [None]:
model_name = "/content/drive/MyDrive/Colab Notebooks/Hitachi/MODEL ARCHIVE/lstm_v1.h5"
model.save(model_name)

In [42]:
X_val[-1].shape

(336, 26)

In [43]:
x_input = X_val[-1]
x_input = x_input.reshape((1, n_steps, n_features))
yhat = model.predict(x_input, verbose=0)
print(yhat)

[[nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
  nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
  nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
  nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
  nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
  nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
  nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
  nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
  nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
  nan nan nan nan nan nan]]
