In [1]:
import os
import sys

module_path = os.path.abspath(os.path.join('../..'))

if module_path not in sys.path:
    sys.path.append(module_path)


import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


from math import sqrt
from tensorflow import keras
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import mean_absolute_error as mae
from tensorflow.keras.models import load_model
from tensorflow.keras.layers import *
from tensorflow.keras.models import *
from tensorflow.keras.optimizers import *
from tensorflow.keras.callbacks import *

from helper import series_to_supervised
from model.mlp import mlp_layer

2024-01-20 14:37:22.831304: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1


In [2]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [3]:
# import random

# random.seed(10)
# print(random.random())

In [None]:
dataset = pd.read_csv('../../data/energy_weather.csv', index_col=0)
# https://www.kaggle.com/datasets/nicholasjhana/energy-consumption-generation-prices-and-weather

dataset.fillna(0, inplace=True)
data = dataset
data

In [5]:
print(data['price'].min())
print(data['price'].max())

9.33
116.8


In [6]:
data.columns

Index(['price', 'price_dayahead', 'gen_coal', 'gen_gas', 'load_actual',
       'gen_lig', 'gen_oil', 'gen_oth_renew', 'pressure_Barcelona',
       'pressure_Bilbao', 'gen_waste', 'gen_bio', 'temp_min_Valencia',
       'pressure_Valencia', 'temp_min_Barcelona', 'humidity_Seville',
       'wind_deg_Bilbao', 'clouds_all_Bilbao', 'gen_hyd_river',
       'wind_deg_Seville', 'wind_speed_Barcelona', 'wind_speed_Valencia',
       'wind_speed_Bilbao', 'gen_wind', 'wind_speed_Madrid', 'gen_hyd_pump'],
      dtype='object')

In [7]:
# for col in data.columns:
#     plt.hist(data[col], bins=20)
#     plt.title(col)
#     plt.show()
#     plt.close()

In [8]:
np.percentile(data['price'], 95)

79.17

### Preprocess

In [74]:
values = data.values

# specify the number of lag hours
n_hours = 24*3
n_features = data.shape[-1]
k = 12
split1 = 0.7
split2 = 0.85

# frame as supervised learning
reframed = series_to_supervised(values, n_hours, k)
print("reframed.shape:", reframed.shape)

reframed.shape: (34980, 2184)


In [75]:
# split into train and test sets
reframed_values = reframed.values
n_train_hours = int(len(reframed_values)*split1)
n_valid_hours = int(len(reframed_values)*split2)

train = reframed_values[:n_train_hours, :]
val = reframed_values[n_train_hours:n_valid_hours, :]
test = reframed_values[n_valid_hours:, :]


# split into input and outputs
n_obs = n_hours * n_features
feature_idx = 0
train_X, train_y = train[:, :n_obs], train[:, [n_obs + feature_idx + n_features * i for i in range(k)]]
val_X, val_y = val[:, :n_obs], val[:, [n_obs + feature_idx + n_features * i for i in range(k)]]
test_X, test_y = test[:, :n_obs], test[:, [n_obs + feature_idx + n_features * i for i in range(k)]]


print("train_X.shape, train_y.shape, val_X.shape, val_y.shape, test_X.shape, test_y.shape", 
      train_X.shape, train_y.shape, val_X.shape, val_y.shape, test_X.shape, test_y.shape
     )

train_X.shape, train_y.shape, val_X.shape, val_y.shape, test_X.shape, test_y.shape (24486, 1872) (24486, 12) (5247, 1872) (5247, 12) (5247, 1872) (5247, 12)


In [76]:
# normalize features
scaler = MinMaxScaler(feature_range=(0, 1))

train_X = scaler.fit_transform(train_X)
train_y = scaler.fit_transform(train_y)

val_X = scaler.fit_transform(val_X)
val_y = scaler.fit_transform(val_y)

test_X = scaler.fit_transform(test_X)
test_y = scaler.fit_transform(test_y)

# reshape input to be 3D [samples, timesteps, features]
train_X = train_X.reshape((train_X.shape[0], n_hours, n_features))
val_X = val_X.reshape((val_X.shape[0], n_hours, n_features))
test_X = test_X.reshape((test_X.shape[0], n_hours, n_features))

print("train_X.shape, train_y.shape, val_X.shape, val_y.shape, test_X.shape, test_y.shape", 
      train_X.shape, train_y.shape, val_X.shape, val_y.shape, test_X.shape, test_y.shape
     )

train_X.shape, train_y.shape, val_X.shape, val_y.shape, test_X.shape, test_y.shape (24486, 72, 26) (24486, 12) (5247, 72, 26) (5247, 12) (5247, 72, 26) (5247, 12)


### Model & training

In [77]:
# plt.hist(train_y[:,0])

In [78]:
# ===== model parameters ======
mlp_unit1 = 128
mlp_unit2 = 128
mlp_unit3 = 64
mlp_unit4 = 64
mlp_unit5 = 32
mlp_unit6 = 32
mlp_unit7 = 16
mlp_unit8 = 16
dropout = 0.0  # 0.1
kernel_size = 2
pool_size = 2
learning_rate = 1e-4
decay_steps = 10000
decay_rate = 0.95
PATIENCE = 100
EPOCHS = 1000
BATCH = 512
opt_num = k
input_shape = train_X.shape[1:]

In [79]:
model = mlp_layer(input_shape=input_shape,
                   mlp_unit1=mlp_unit1,
                   mlp_unit2=mlp_unit2,
                   mlp_unit3=mlp_unit3,
                   mlp_unit4=mlp_unit4,
                   mlp_unit5=mlp_unit5,
                   mlp_unit6=mlp_unit6,
                   mlp_unit7=mlp_unit7,
                   mlp_unit8=mlp_unit8,
                   dropout=dropout,
                   masked_value=-1,
                   opt_num=opt_num
                  )
# model.summary()

In [80]:
# lr_schedule = keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=learning_rate, 
#                                                           decay_steps=decay_steps,
#                                                           decay_rate=decay_rate)

# model.compile(optimizer=Adam(learning_rate=lr_schedule),
#               loss='mse',
#               metrics=['mae']
#              )

model.compile(optimizer='adam',
              loss='mse',
              metrics=['mae']
             )


es = EarlyStopping(monitor='val_mae', mode='min', verbose=2, patience=PATIENCE)
mc = ModelCheckpoint('../../saved_models/price_all.h5', 
                     monitor='val_mae', 
                     mode='min', 
                     verbose=2, 
                     save_best_only=True
                    )


model.fit(train_X, train_y,
          validation_data=(val_X, val_y),
          epochs=EPOCHS,
          batch_size=BATCH,
          verbose=2,
          shuffle=True,
          callbacks=[es, mc]
         )

Epoch 1/1000
48/48 - 2s - loss: 0.0426 - mae: 0.1509 - val_loss: 0.0149 - val_mae: 0.0921

Epoch 00001: val_mae improved from inf to 0.09212, saving model to ../../saved_models/energy_all.h5
Epoch 2/1000
48/48 - 1s - loss: 0.0102 - mae: 0.0788 - val_loss: 0.0119 - val_mae: 0.0820

Epoch 00002: val_mae improved from 0.09212 to 0.08196, saving model to ../../saved_models/energy_all.h5
Epoch 3/1000
48/48 - 1s - loss: 0.0078 - mae: 0.0694 - val_loss: 0.0114 - val_mae: 0.0793

Epoch 00003: val_mae improved from 0.08196 to 0.07930, saving model to ../../saved_models/energy_all.h5
Epoch 4/1000
48/48 - 1s - loss: 0.0052 - mae: 0.0555 - val_loss: 0.0062 - val_mae: 0.0580

Epoch 00004: val_mae improved from 0.07930 to 0.05802, saving model to ../../saved_models/energy_all.h5
Epoch 5/1000
48/48 - 1s - loss: 0.0039 - mae: 0.0479 - val_loss: 0.0057 - val_mae: 0.0557

Epoch 00005: val_mae improved from 0.05802 to 0.05575, saving model to ../../saved_models/energy_all.h5
Epoch 6/1000
48/48 - 1s - los


Epoch 00055: val_mae did not improve from 0.05314
Epoch 56/1000
48/48 - 1s - loss: 8.1652e-04 - mae: 0.0222 - val_loss: 0.0121 - val_mae: 0.0818

Epoch 00056: val_mae did not improve from 0.05314
Epoch 57/1000
48/48 - 1s - loss: 8.0583e-04 - mae: 0.0221 - val_loss: 0.0123 - val_mae: 0.0823

Epoch 00057: val_mae did not improve from 0.05314
Epoch 58/1000
48/48 - 1s - loss: 7.7875e-04 - mae: 0.0217 - val_loss: 0.0124 - val_mae: 0.0832

Epoch 00058: val_mae did not improve from 0.05314
Epoch 59/1000
48/48 - 1s - loss: 8.1596e-04 - mae: 0.0223 - val_loss: 0.0131 - val_mae: 0.0852

Epoch 00059: val_mae did not improve from 0.05314
Epoch 60/1000
48/48 - 1s - loss: 8.2121e-04 - mae: 0.0223 - val_loss: 0.0124 - val_mae: 0.0829

Epoch 00060: val_mae did not improve from 0.05314
Epoch 61/1000
48/48 - 1s - loss: 7.5665e-04 - mae: 0.0214 - val_loss: 0.0123 - val_mae: 0.0831

Epoch 00061: val_mae did not improve from 0.05314
Epoch 62/1000
48/48 - 1s - loss: 7.2857e-04 - mae: 0.0210 - val_loss: 0.0

48/48 - 1s - loss: 4.6988e-04 - mae: 0.0169 - val_loss: 0.0168 - val_mae: 0.0959

Epoch 00112: val_mae did not improve from 0.05314
Epoch 113/1000
48/48 - 1s - loss: 4.1251e-04 - mae: 0.0157 - val_loss: 0.0167 - val_mae: 0.0959

Epoch 00113: val_mae did not improve from 0.05314
Epoch 114/1000
48/48 - 1s - loss: 4.3999e-04 - mae: 0.0163 - val_loss: 0.0170 - val_mae: 0.0966

Epoch 00114: val_mae did not improve from 0.05314
Epoch 115/1000
48/48 - 1s - loss: 4.5276e-04 - mae: 0.0166 - val_loss: 0.0165 - val_mae: 0.0952

Epoch 00115: val_mae did not improve from 0.05314
Epoch 116/1000
48/48 - 1s - loss: 4.8805e-04 - mae: 0.0173 - val_loss: 0.0157 - val_mae: 0.0930

Epoch 00116: val_mae did not improve from 0.05314
Epoch 117/1000
48/48 - 1s - loss: 4.0280e-04 - mae: 0.0155 - val_loss: 0.0164 - val_mae: 0.0953

Epoch 00117: val_mae did not improve from 0.05314
Epoch 118/1000
48/48 - 1s - loss: 3.9345e-04 - mae: 0.0154 - val_loss: 0.0171 - val_mae: 0.0968

Epoch 00118: val_mae did not improve

48/48 - 1s - loss: 3.0151e-04 - mae: 0.0135 - val_loss: 0.0166 - val_mae: 0.0970

Epoch 00168: val_mae did not improve from 0.05314
Epoch 169/1000
48/48 - 1s - loss: 4.8775e-04 - mae: 0.0174 - val_loss: 0.0153 - val_mae: 0.0930

Epoch 00169: val_mae did not improve from 0.05314
Epoch 170/1000
48/48 - 1s - loss: 3.3684e-04 - mae: 0.0143 - val_loss: 0.0159 - val_mae: 0.0945

Epoch 00170: val_mae did not improve from 0.05314
Epoch 171/1000
48/48 - 1s - loss: 2.8675e-04 - mae: 0.0131 - val_loss: 0.0163 - val_mae: 0.0960

Epoch 00171: val_mae did not improve from 0.05314
Epoch 172/1000
48/48 - 1s - loss: 2.9630e-04 - mae: 0.0133 - val_loss: 0.0155 - val_mae: 0.0937

Epoch 00172: val_mae did not improve from 0.05314
Epoch 173/1000
48/48 - 1s - loss: 3.1105e-04 - mae: 0.0137 - val_loss: 0.0163 - val_mae: 0.0960

Epoch 00173: val_mae did not improve from 0.05314
Epoch 174/1000
48/48 - 1s - loss: 2.8965e-04 - mae: 0.0132 - val_loss: 0.0164 - val_mae: 0.0961

Epoch 00174: val_mae did not improve

<tensorflow.python.keras.callbacks.History at 0x7f4ef071d580>