In [None]:
# Import modules
import sys

sys.path.append('../')
from functions import *

In [None]:
# Load and create dataframe object from the CSV file
df = pd.read_csv('../Data/ProductionData/Weather-Energy_rev_time_hourly_data.csv')

# print(df.info())

print(df.head())                                                      

In [None]:
cols = ['day', 'hour', 'temperature','wind_speed(m/s)','humidity(%)','solar_radiation(MJ/m^2)','energy']
# cols = ['day','hour','temperature','wind_speed(m/s)','humidity(%)','solar_radiation(MJ/m^2)','energy']
dataset = df.loc[:, cols]

In [None]:
dataset.index = df['time']

In [None]:
dataset['humidity(%)'] = 100 - dataset['humidity(%)']  

In [None]:
from sklearn.preprocessing import MinMaxScaler
scaler_y = MinMaxScaler(copy=False)
scaler_x = MinMaxScaler(copy=False)

In [None]:
# Find the index where our training data begins

start_idx_531 = df[df['time']=="2020-04-01 0:00"].index.tolist()[0]
end_idx_531 = df[df['time']=="2020-05-31 0:00"].index.tolist()[0]

In [None]:
X = dataset.to_numpy()
Y = dataset.loc[:, 'energy'].to_numpy()
Y = np.reshape(Y, (-1, 1))

In [None]:
print(X.shape)
print(Y.shape)

In [None]:
energy_max = np.max(Y)
energy_min = np.min(Y)

In [None]:
scaler_x.fit_transform(X)
scaler_y.fit_transform(Y)


In [None]:
def scale_train_data(x_seq, y_seq, scaler_x, scaler_y):
    
#    x_scaled = scaler_x.fit_transform(x_seq)
#    y_scaled = scaler_y.fit_transform(y_seq)
    
    scaler_x.fit_transform(x_seq)
    scaler_y.fit_transform(y_seq)


In [None]:
def gen_train_data_hour(start_idx, end_idx, seq_in_days, target_hrs, stride, data_array, label_array):
    
    x = []
    y = []
    
    num_data_in_seq = 24 * seq_in_days
    
    if (target_hrs < 0):
        num_data_in_target_hrs = - target_hrs
    else:
        num_data_in_target_hrs = target_hrs
    
    train_data_starts_at = start_idx
    train_data_ends_at = end_idx - num_data_in_seq - num_data_in_target_hrs
    
    for i in range(train_data_starts_at, train_data_ends_at, stride):
        x_temp = data_array[i:i+num_data_in_seq].tolist()
        y_temp = label_array[i+num_data_in_seq:i+num_data_in_seq+num_data_in_target_hrs]
        y_temp = np.reshape(y_temp, (num_data_in_target_hrs, 1)).tolist()
        
        x.append(x_temp)
        y.append(y_temp)
        
    x = np.array(x)
    y = np.array(y)
    
    return x, y


In [None]:
trainX_531 = X[start_idx_531:end_idx_531,:]
trainY_531 = Y[start_idx_531:end_idx_531,:]

In [None]:
# Extract training data for prediction

trainX_531, trainY_531 = gen_train_data_hour(start_idx_531, end_idx_531, 4, 24, 96, X, Y)

In [None]:
plt.plot(trainX_531[0, :, 5])

In [None]:
# The very first estimations are not accurate, and it can corrupt the overall performance of the network
# So we ignore the first 50 results during the training
warmup_steps = 50

def loss_mse_warmup(y_true, y_pred):
    """
    Calculate the MSE between y_true and y_pred
    Ignore the beginning 'warmup' steps of the sequences

    :param y_true: desired output
    :param y_pred: prediction made by model
    :return: MSE between answer and prediction while ignoring the results from early stage
    """

    y_true_slice = y_true[:, warmup_steps:, :]
    y_pred_slice = y_pred[:, warmup_steps:, :]

    loss = tf.losses.MSE(y_true_slice, y_pred_slice)
    # It's unclear whether Keras reduce a tensor of losses to a scalar value or not.
    # To ensure clarity, it's better to calculate the mean of the losses and return it.
    loss_mean = tf.reduce_mean(loss)

    return loss_mean

In [None]:
# Import modules related to build a RNN
from keras.models import Sequential, load_model
from keras.layers import GRU, Dense, RepeatVector, TimeDistributed
from keras import optimizers
from keras import losses
from keras.callbacks import ReduceLROnPlateau, EarlyStopping

In [None]:
model = Sequential()
model.add(GRU(units=128, activation='relu', return_sequences=True, input_shape=trainX_531.shape[-2:]))
model.add(GRU(units=64, activation='relu', return_sequences=False))
model.add(RepeatVector(trainY_531.shape[1]))
model.add(GRU(units=64, activation='relu', return_sequences=True))
model.add(GRU(units=128, activation='relu', return_sequences=True))
model.add(TimeDistributed(Dense(1)))

optimizer = optimizers.RMSprop(lr=1e-3)     # learning rate = 0.001

model.compile(loss=losses.mean_squared_error, optimizer=optimizer, metrics=[custom_loss])

In [None]:
callback_early_stopping = EarlyStopping(monitor='val_loss', patience=5, verbose=1)
callback_reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, min_lr=1e-4, patience=0, verbose=1)

callbacks = [callback_early_stopping,
             callback_reduce_lr]

In [None]:
# Split dataset into : train, test

x_train, y_train, x_val, y_val = split_data(trainX_531, trainY_531, 0.1)

In [None]:
history = model.fit(x_train, y_train, batch_size=64,epochs=30, validation_data=(x_val, y_val), verbose=1)

In [None]:
model.save('gru_531_20200727.h5')

In [None]:
model = load_model('../Data/Model/gru_531_20200727.h5', custom_objects = {'custom_loss' : custom_loss })

In [None]:
pred_531 = model.predict(np.expand_dims(dataset[end_idx_531-96:end_idx_531], axis = 0))
pred_531 = np.reshape(pred_531, pred_531.shape[-2:])

In [None]:
plt.plot(pred_531)

In [None]:
# Manually restore the data

restored_vals = (energy_max - energy_min) * pred_531 + energy_min

In [None]:
plt.plot(restored_vals, label='Prediction of energy production on 5/31')
plt.legend()
plt.plot()

# %% [markdown]
# TimeSeries Cross Validation

In [None]:
from sklearn.model_selection import TimeSeriesSplit

In [None]:
tscv = TimeSeriesSplit()

In [None]:
batch_size = 64
num_epochs = 20
verbosity = 1

In [None]:
acc_per_iter = []
loss_per_iter = []

In [None]:
X_seq = trainX_531
Y_seq = trainY_531

In [None]:
iter_num = 1
for train_idx, test_idx in tscv.split(X_seq):
    
    model = Sequential()
    model.add(GRU(units=128, activation='relu', return_sequences=True, input_shape=trainX_531.shape[-2:]))
    model.add(GRU(units=64, activation='relu', return_sequences=False))
    model.add(RepeatVector(trainY_531.shape[1]))
    model.add(GRU(units=64, activation='relu', return_sequences=True))
    model.add(GRU(units=128, activation='relu', return_sequences=True))
    model.add(TimeDistributed(Dense(1)))

    optimizer = optimizers.RMSprop(lr=1e-3)     # learning rate = 0.001
    
    # Compile the model
    model.compile(loss=losses.mean_squared_error, optimizer=optimizer, metrics=[custom_loss])

    # Generate a print
    print('------------------------------------------------------------------------')
    print(f'Training for {iter_num} ...')

    # Fit data to model
    history = model.fit(X_seq[train_idx], Y_seq[train_idx],
              batch_size=batch_size,
              epochs=num_epochs,
              verbose=verbosity)
    
    # Generate generalization metrics
    scores = model.evaluate(X_seq[test_idx], Y_seq[test_idx], verbose=0)
    print(f'Score for iteration {iter_num}: {model.metrics_names[0]} of {scores[0]}; {model.metrics_names[1]} of {scores[1]*100}%')
    acc_per_iter.append(scores[1] * 100)
    loss_per_iter.append(scores[0])

    # Increase fold number
    iter_num = iter_num + 1

In [None]:
# == Provide average scores ==
print('------------------------------------------------------------------------')
print('Score per iteration')
for i in range(0, len(acc_per_iter)):
  print('------------------------------------------------------------------------')
  print(f'> Fold {i+1} - Loss: {loss_per_iter[i]} - Accuracy: {acc_per_iter[i]}%')
print('------------------------------------------------------------------------')
print('Average scores for all folds:')
print(f'> Accuracy: {np.mean(acc_per_iter)} (+- {np.std(acc_per_iter)})')
print(f'> Loss: {np.mean(loss_per_iter)}')
print('------------------------------------------------------------------------')

In [None]:
timesplit_res = model.predict(np.expand_dims(X[end_idx_531-96:end_idx_531], axis = 0))

In [None]:
timesplit_res

In [None]:
timesplit_res = np.reshape(timesplit_res, timesplit_res.shape[-2:])

In [None]:
plt.plot(timesplit_res, label='5/31')
plt.legend()
plt.show()