In [1]:
import os
import json
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Reshape

In [2]:
print("Num GPUs Available: ", tf.config.list_physical_devices())

Num GPUs Available:  [PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'), PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


# 1. Horizonte N normal
Modelos entrenados sin tratamiento de outliers, hay dos casos:
- Datos normalizados entre 0 y 1
- Datos normalizados entre -1 y 1

Los modelos en la carpeta models se identifican de la siguiente forma:

{estación}\_{ventana temporal}\_{numero de outputs}\_{batch size}\_{epochs}\_{drop out}\_{neuronas}\_{optimizer}\_{normalization}.h5

Por ejemplo: **C6_24_1_6_20_0.05_64_adam_-11.h5**
- Datos de la estación C6
- La ventana temporal es de 24 (24 datos previos al instante predicho, cada una de estas 24 representa una medición cada 30min)
- Solo hay una salida (5 valores pero solo 30 minutos)
- Batch size de 6.
- Entrenado durante 20 epochs
- Drop out de 0.05
- La capa LSTM tiene 64 neuronas.
- El optimizador es ADAM
- Los datos son normalizados entre -1 y 1. 



## 1.1. Training phase
In here the training parameters for the sesion are decided. A list of dicts that contains the parameters of each model is created.

In [13]:
input_width = [48]
prediction_width = [48]
batch_size = [6]
epochs = [10]
dropout = [0.05]
neurons = [64]
optimizer = ['adagrad']
normalization = [[-1, 1]]#, [0, 1]]

station = 'D6.zip'
variables = ['T', 'HR', 'P', 'u10', 'v10', 'day', 'time', 'date']
input_vars = ['T', 'HR', 'P', 'u10', 'v10', 'day', 'time']
cols = ['T', 'HR', 'P', 'u10', 'v10']

df_initial = pd.read_csv(f'data/data_by_station/{station}', compression='zip', header=0, sep=',')
df_initial['date'] = pd.to_datetime(df_initial['date'], format='%Y-%m-%d %H:%M:%S')
df_initial['day'] = df_initial['date'].dt.dayofyear / 365
df_initial['time'] = df_initial['date'].dt.hour / 24
df_initial = df_initial.astype({'T': 'float', 'HR': 'float', 'P': 'float', 'u2': 'float', 'v2': 'float', 'u6': 'float', 'v6': 'float', 'u10': 'float', 'v10': 'float', 'altitud': 'float', 'latitud': 'float', 'longitud': 'float'})
df_initial = df_initial[variables]

parameters = []
for i in input_width:
    for j in prediction_width:
        for k in batch_size:
            for l in epochs:
                for m in dropout:
                    for n in neurons:
                            for p in optimizer:
                                for s in normalization:
                                    parameters.append({'width':i, 'output': j, 'batch': k, 'epochs': l, 'dropout': m, 'neurons': n, 'opt': p, 'norm': s})

In [14]:
last_params = {'width': 0, 'norm': []}
for params in parameters:
    df = df_initial.copy()
    for col in cols:
        df[col] = ((params['norm'][1] - params['norm'][0]) * (df[col] - df_initial[col].min()) / (df_initial[col].max() - df_initial[col].min())) + params['norm'][0]

    df_train = df[df['date'] < '2019-01-01'].copy()
    df_test = df[df['date'] >= '2019-01-01'].copy()

    train_X = []
    train_Y = []
    for i in range(params['width'], len(df_train) - params['output']):
        train_X.append(df_train.iloc[i - params['width']:i][input_vars].values)
        train_Y.append(df_train.iloc[i:i + params['output']][cols].values)
    train_X = np.array(train_X)
    train_Y = np.array(train_Y)
        
    model = Sequential()
    model.add(LSTM(params['neurons'], activation='tanh', input_shape=(train_X.shape[1], train_X.shape[2]), return_sequences=False))
    model.add(Dropout(params['dropout']))
    model.add(Dense(units=len(cols)*params['output'], activation='linear'))
    model.add(Reshape((params['output'], len(cols))))
    model.compile(optimizer=params['opt'], loss='mse', metrics=['mae'])

    with tf.device('/device:GPU:0'):
        history = model.fit(train_X, train_Y, epochs=params['epochs'], batch_size=params['batch'], validation_split=0.1, verbose=1, shuffle=False)
    model.save(f'models/{station.strip(".zip")}_{params["width"]}_{params["output"]}_{params["batch"]}_{params["epochs"]}_{params["dropout"]}_{params["neurons"]}_{params["opt"]}_{params["norm"][0]}{params["norm"][1]}.h5')

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [11]:
input_width = [48]
prediction_width = [48]
batch_size = [6]
epochs = [10]
dropout = [0.05]
neurons = [64]
optimizer = ['adagrad']
normalization = [[-1, 1]]#, [0, 1]]

station = 'CL.zip'
variables = ['T', 'HR', 'P', 'u10', 'v10', 'day', 'time', 'date']
input_vars = ['T', 'HR', 'P', 'u10', 'v10', 'day', 'time']
cols = ['T', 'HR', 'P', 'u10', 'v10']

df_initial = pd.read_csv(f'data/data_by_station/{station}', compression='zip', header=0, sep=',')
df_initial['date'] = pd.to_datetime(df_initial['date'], format='%Y-%m-%d %H:%M:%S')
df_initial['day'] = df_initial['date'].dt.dayofyear / 365
df_initial['time'] = df_initial['date'].dt.hour / 24
df_initial = df_initial.astype({'T': 'float', 'HR': 'float', 'P': 'float', 'u2': 'float', 'v2': 'float', 'u6': 'float', 'v6': 'float', 'u10': 'float', 'v10': 'float', 'altitud': 'float', 'latitud': 'float', 'longitud': 'float'})
df_initial = df_initial[variables]

parameters = []
for i in input_width:
    for j in prediction_width:
        for k in batch_size:
            for l in epochs:
                for m in dropout:
                    for n in neurons:
                            for p in optimizer:
                                for s in normalization:
                                    parameters.append({'width':i, 'output': j, 'batch': k, 'epochs': l, 'dropout': m, 'neurons': n, 'opt': p, 'norm': s})

In [12]:
last_params = {'width': 0, 'norm': []}
for params in parameters:
    df = df_initial.copy()
    for col in cols:
        df[col] = ((params['norm'][1] - params['norm'][0]) * (df[col] - df_initial[col].min()) / (df_initial[col].max() - df_initial[col].min())) + params['norm'][0]

    df_train = df[df['date'] < '2019-01-01'].copy()
    df_test = df[df['date'] >= '2019-01-01'].copy()

    train_X = []
    train_Y = []
    for i in range(params['width'], len(df_train) - params['output']):
        train_X.append(df_train.iloc[i - params['width']:i][input_vars].values)
        train_Y.append(df_train.iloc[i:i + params['output']][cols].values)
    train_X = np.array(train_X)
    train_Y = np.array(train_Y)
        
    model = Sequential()
    model.add(LSTM(params['neurons'], activation='tanh', input_shape=(train_X.shape[1], train_X.shape[2]), return_sequences=False))
    model.add(Dropout(params['dropout']))
    model.add(Dense(units=len(cols)*params['output'], activation='linear'))
    model.add(Reshape((params['output'], len(cols))))
    model.compile(optimizer=params['opt'], loss='mse', metrics=['mae'])

    with tf.device('/device:GPU:0'):
        history = model.fit(train_X, train_Y, epochs=params['epochs'], batch_size=params['batch'], validation_split=0.1, verbose=1, shuffle=False)
    model.save(f'models/{station.strip(".zip")}_{params["width"]}_{params["output"]}_{params["batch"]}_{params["epochs"]}_{params["dropout"]}_{params["neurons"]}_{params["opt"]}_{params["norm"][0]}{params["norm"][1]}.h5')

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


## 1.2. Prediction and plotting phase 
In this phase the predictions of the models trained are done and saved to plot afterwards

In [6]:
directories = os.listdir('models')

norm = [model for model in directories if '11' in model]

results = []

for model_path in norm:
    params = model_path.strip('.h5').split('_')
    if '01' in params[-1]:
        norm_min, norm_max = 0, 1
    else:
        norm_min, norm_max = -1, 1
    df_test = df_initial[df_initial['date'] >= '2019-01-01'].copy()

    for col in cols:
        df_test[col] = ((norm_max - norm_min) * (df_test[col] - df_initial[col].min()) / (df_initial[col].max() - df_initial[col].min())) + norm_min
    
    test_X = []
    test_Y = []
    for i in range(int(params[1]), len(df_test) - int(params[2])):
        test_X.append(df_test.iloc[i - int(params[1]):i][input_vars].values)
        test_Y.append(df_test.iloc[i:i + int(params[2])][cols].values)
    test_X = np.array(test_X)
    test_Y = np.array(test_Y)

    model = keras.models.load_model(f'models/{model_path}')
    y_pred = model.predict(test_X)
    for idx, col in enumerate(cols):
        y_pred[:, :, idx] = ((y_pred[:, :, idx] - norm_min) * (df_initial[col].max() - df_initial[col].min()) / (norm_max - norm_min)) + df_initial[col].min()
    results.append(y_pred)

ValueError: in user code:

    File "d:\MARC\UNI\MASTER\TFM\Wind-Fire_prediction\venv\lib\site-packages\keras\engine\training.py", line 1845, in predict_function  *
        return step_function(self, iterator)
    File "d:\MARC\UNI\MASTER\TFM\Wind-Fire_prediction\venv\lib\site-packages\keras\engine\training.py", line 1834, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "d:\MARC\UNI\MASTER\TFM\Wind-Fire_prediction\venv\lib\site-packages\keras\engine\training.py", line 1823, in run_step  **
        outputs = model.predict_step(data)
    File "d:\MARC\UNI\MASTER\TFM\Wind-Fire_prediction\venv\lib\site-packages\keras\engine\training.py", line 1791, in predict_step
        return self(x, training=False)
    File "d:\MARC\UNI\MASTER\TFM\Wind-Fire_prediction\venv\lib\site-packages\keras\utils\traceback_utils.py", line 67, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "d:\MARC\UNI\MASTER\TFM\Wind-Fire_prediction\venv\lib\site-packages\keras\engine\input_spec.py", line 264, in assert_input_compatibility
        raise ValueError(f'Input {input_index} of layer "{layer_name}" is '

    ValueError: Input 0 of layer "sequential_2" is incompatible with the layer: expected shape=(None, 48, 6), found shape=(None, 48, 7)


In [32]:
for idx, col in enumerate(cols):
    test_Y[:, :, idx] = ((test_Y[:, :, idx] - norm_min) * (df_initial[col].max() - df_initial[col].min()) / (norm_max - norm_min)) + df_initial[col].min()
    
rmse = np.sqrt(np.mean(((y_pred - test_Y) ** 2), axis=0))
test = [test_Y[:, idx, :] for idx in range(test_Y.shape[1])]

s1 = results[0].shape[1]
for idx, col in enumerate(cols):
    # plot each column
    plt.figure(figsize=(10, 6))
    plt.plot(test[0][s1:480+s1, idx], label='Real', color='blue')
    for idx2, result in enumerate(results):
        for i in range(1, result.shape[1], 2):
            params = norm[idx2].strip('.h5').split('_')
            rmse = np.sqrt(np.mean(np.power((test[0][:, idx] - result[:, i, idx]), 2)))
            plt.plot(result[s1-i:480+(s1-i), i, idx], label=f'{i*30} Minutes: {rmse:.4f}')
    plt.legend()
    plt.title(f'{col} - Batch size - {params[-1]} Normalization')
    plt.savefig(f'plots/{col}_batchSize_{params[-1]}norm.png')
    plt.clf()

# 2. Horizonte N "recursivo"

Modelos entrenados usando IQR como tratamiento de outliers, hay dos casos:
- Datos normalizados entre 0 y 1
- Datos normalizados entre -1 y 1

Los modelos en la carpeta models se identifican de la siguiente forma:

{estación}\_{ventana temporal}\_{numero de outputs}\_{batch size}\_{epochs}\_{drop out}\_{neuronas}\_{optimizer}\_{normalization}.h5

Por ejemplo: **C6_24_1_6_20_0.05_64_adam_o-11.h5**
- Datos de la estación C6
- La ventana temporal es de 24 (24 datos previos al instante predicho, cada una de estas 24 representa una medición cada 30min)
- Solo hay una salida (5 valores pero solo 30 minutos)
- Batch size de 6.
- Entrenado durante 20 epochs
- Drop out de 0.05
- La capa LSTM tiene 64 neuronas.
- El optimizador es ADAM
- Los datos son normalizados entre -1 y 1 y con los valores extremos tratados usando IQR. 



## 2.1. Training phase
In here the training parameters for the sesion are decided. A list of dicts that contains the parameters of each model is created.

In [22]:
input_width = [24]
prediction_width = [1]
batch_size = [6]
epochs = [15]
dropout = [0.05]
neurons = [64]
optimizer = ['adam']
normalization = [[-1, 1]]#, [0, 1]]

station = 'D6.zip'
variables = ['T', 'HR', 'P', 'u10', 'v10', 'day', 'time', 'date']
input_vars = ['T', 'HR', 'P', 'u10', 'v10', 'day', 'time']
cols = ['T', 'HR', 'P', 'u10', 'v10']

df_initial = pd.read_csv(f'data/data_by_station/{station}', compression='zip', header=0, sep=',')
df_initial['date'] = pd.to_datetime(df_initial['date'], format='%Y-%m-%d %H:%M:%S')
df_initial['day'] = df_initial['date'].dt.dayofyear / 365
df_initial['time'] = df_initial['date'].dt.hour / 24
df_initial = df_initial.astype({'T': 'float', 'HR': 'float', 'P': 'float', 'u2': 'float', 'v2': 'float', 'u6': 'float', 'v6': 'float', 'u10': 'float', 'v10': 'float', 'altitud': 'float', 'latitud': 'float', 'longitud': 'float'})
df_initial = df_initial[variables]

parameters = []
for i in input_width:
    for j in prediction_width:
        for k in batch_size:
            for l in epochs:
                for m in dropout:
                    for n in neurons:
                            for p in optimizer:
                                for s in normalization:
                                    parameters.append({'width':i, 'output': j, 'batch': k, 'epochs': l, 'dropout': m, 'neurons': n, 'opt': p, 'norm': s})

## 2.2. Prediction and plotting phase 
In this phase the predictions of the models trained are done and saved to plot afterwards

In [4]:
directories = os.listdir('models')

norm = [model for model in directories if '11' in model]

results = []

for model_path in norm:
    params = model_path.strip('.h5').split('_')
    params[2] = 1
    total_time = 48
    if '01' in params[-1]:
        norm_min, norm_max = 0, 1
    else:
        norm_min, norm_max = -1, 1
    df_test = df_initial[df_initial['date'] >= '2019-01-01'].copy()

    for col in cols:
        df_test[col] = ((norm_max - norm_min) * (df_test[col] - df_initial[col].min()) / (df_initial[col].max() - df_initial[col].min())) + norm_min
    
    test_X = []
    test_Y = []
    for i in range(int(params[1]), len(df_test) - total_time):
        test_X.append(df_test.iloc[i - int(params[1]):i][input_vars].values)
        test_Y.append(df_test.iloc[i:i + total_time][cols].values)
    test_X = np.array(test_X)
    test_Y = np.array(test_Y)

    model = keras.models.load_model(f'models/{model_path}')
    y_pred = model.predict(test_X)
    
    new_day, new_time = np.array((test_X[:, -1, 5] + (1/365)) % 1).reshape(test_X.shape[0], 1, 1), np.array((test_X[:, -1, 6] + (1/24)) % 1).reshape(test_X.shape[0], 1, 1)
    y_pred = y_pred.reshape(y_pred.shape[0], 1, y_pred.shape[1])
    new_info = np.concatenate((y_pred, new_day, new_time), axis=2)
    new_X = np.concatenate((test_X[:, 1:, :], new_info), axis=1)

    for idx, col in enumerate(cols):
            y_pred[:, :, idx] = ((y_pred[:, :, idx] - norm_min) * (df_initial[col].max() - df_initial[col].min()) / (norm_max - norm_min)) + df_initial[col].min()
    results.append(y_pred)

    for i in range(0, total_time - 1):
        y_pred = model.predict(new_X)

        new_day, new_time = np.array((new_X[:, -1, 5] + (1/365)) % 1).reshape(new_X.shape[0], 1, 1), np.array((new_X[:, -1, 6] + (1/24)) % 1).reshape(new_X.shape[0], 1, 1)
        y_pred = y_pred.reshape(y_pred.shape[0], 1, y_pred.shape[1])
        new_info = np.concatenate((y_pred, new_day, new_time), axis=2)
        new_X = np.concatenate((new_X[:, 1:, :], new_info), axis=1)
        for idx, col in enumerate(cols):
            y_pred[:, :, idx] = ((y_pred[:, :, idx] - norm_min) * (df_initial[col].max() - df_initial[col].min()) / (norm_max - norm_min)) + df_initial[col].min()

        results.append(y_pred)



In [51]:
test_Y.shape

(17448, 48, 5)

In [5]:
results = [np.concatenate(results, axis=1)]

for idx, col in enumerate(cols):
    test_Y[:, :, idx] = ((test_Y[:, :, idx] - norm_min) * (df_initial[col].max() - df_initial[col].min()) / (norm_max - norm_min)) + df_initial[col].min()
    
rmse = np.sqrt(np.mean(((y_pred - test_Y) ** 2), axis=0))
test = [test_Y[:, idx, :] for idx in range(test_Y.shape[1])]

s1 = results[0].shape[1]
for idx, col in enumerate(cols):
    # plot each column
    plt.figure(figsize=(10, 6))
    plt.plot(test[0][s1:96+s1, idx], label='Real', color='blue')
    for idx2, result in enumerate(results):
        for i in range(1, result.shape[1], 10):
            params = norm[idx2].strip('.h5').split('_')
            rmse = np.sqrt(np.mean(np.power((test[0][:, idx] - result[:, i, idx]), 2)))
            plt.plot(result[s1-i:96+(s1-i), i, idx], label=f'{i*30} Minutes: {rmse:.4f}')
    plt.legend()
    plt.title(f'{col} - Batch size - {params[-1]} Normalization')
    plt.savefig(f'plots/{col}_batchSize_{params[-1]}norm.png')
    plt.clf()


<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

# 3. Only 1 output at a given hour
This last method will predict one output for, for example, hour 8 and will ignore the other ones that lead to this point.

In [5]:
input_width = [48]
prediction_width = [1]
batch_size = [6]
epochs = [10]
dropout = [0.05]
neurons = [64]
optimizer = ['adagrad']
normalization = [[-1, 1]]#, [0, 1]]

station = 'CL.zip'
variables = ['T', 'HR', 'P', 'u10', 'v10', 'day', 'date', 'time'] # 'time' 'day'
input_vars = ['T', 'HR', 'P', 'u10', 'v10', 'day', 'time'] # 'time'
cols = ['T', 'HR', 'P', 'u10', 'v10']

df_initial = pd.read_csv(f'data/data_by_station/{station}', compression='zip', header=0, sep=',')
df_initial['date'] = pd.to_datetime(df_initial['date'], format='%Y-%m-%d %H:%M:%S')
df_initial['day'] = df_initial['date'].dt.dayofyear / 365
df_initial['time'] = df_initial['date'].dt.hour / 24
df_initial = df_initial.astype({'T': 'float', 'HR': 'float', 'P': 'float', 'u2': 'float', 'v2': 'float', 'u6': 'float', 'v6': 'float', 'u10': 'float', 'v10': 'float', 'altitud': 'float', 'latitud': 'float', 'longitud': 'float'})
df_initial = df_initial[variables]

parameters = []
for i in input_width:
    for j in prediction_width:
        for k in batch_size:
            for l in epochs:
                for m in dropout:
                    for n in neurons:
                            for p in optimizer:
                                for s in normalization:
                                    parameters.append({'width':i, 'output': j, 'batch': k, 'epochs': l, 'dropout': m, 'neurons': n, 'opt': p, 'norm': s})

In [6]:
last_params = {'width': 0, 'norm': []}
for params in parameters:
    df = df_initial.copy()
    for col in cols:
        df[col] = ((params['norm'][1] - params['norm'][0]) * (df[col] - df_initial[col].min()) / (df_initial[col].max() - df_initial[col].min())) + params['norm'][0]

    df_train = df[(df['date'] < '2019-01-01')].copy()
    df_test = df[df['date'] >= '2019-01-01'].copy()

    train_X = []
    train_Y = []
    for i in range(params['width'], len(df_train) - params['output']):
        train_X.append(df_train.iloc[i - params['width']:i][input_vars].values)
        train_Y.append(df_train.iloc[i + params['output'] - 1:i + params['output']][cols].values)
    train_X = np.array(train_X)
    train_Y = np.array(train_Y)

    test_X = []
    test_Y = []
    for i in range(params['width'], len(df_test) - params['output']):
        test_X.append(df_test.iloc[i - params['width']:i][input_vars].values)
        test_Y.append(df_test.iloc[i + params['output'] - 1:i + params['output']][cols].values)
    test_X = np.array(test_X)
    test_Y = np.array(test_Y)
        
    model = Sequential()
    model.add(LSTM(params['neurons'], activation='tanh', input_shape=(train_X.shape[1], train_X.shape[2]), return_sequences=False))
    model.add(Dropout(params['dropout']))
    model.add(Dense(units=len(cols), activation='linear'))
    model.compile(optimizer=params['opt'], loss='mse', metrics=['mae'])

    with tf.device('/device:GPU:0'):
        history = model.fit(train_X, train_Y, epochs=params['epochs'], batch_size=params['batch'], validation_split=0.1, verbose=1, shuffle=False)
    model.save(f'models/{station.strip(".zip")}_{params["width"]}_{params["output"]}_{params["batch"]}_{params["epochs"]}_{params["dropout"]}_{params["neurons"]}_{params["opt"]}_{params["norm"][0]}{params["norm"][1]}.h5')

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [7]:
input_width = [48]
prediction_width = [1]
batch_size = [6]
epochs = [10]
dropout = [0.05]
neurons = [64]
optimizer = ['adagrad']
normalization = [[-1, 1]]#, [0, 1]]

station = 'D6.zip'
variables = ['T', 'HR', 'P', 'u10', 'v10', 'day', 'date', 'time'] # 'time' 'day'
input_vars = ['T', 'HR', 'P', 'u10', 'v10', 'day', 'time'] # 'time'
cols = ['T', 'HR', 'P', 'u10', 'v10']

df_initial = pd.read_csv(f'data/data_by_station/{station}', compression='zip', header=0, sep=',')
df_initial['date'] = pd.to_datetime(df_initial['date'], format='%Y-%m-%d %H:%M:%S')
df_initial['day'] = df_initial['date'].dt.dayofyear / 365
df_initial['time'] = df_initial['date'].dt.hour / 24
df_initial = df_initial.astype({'T': 'float', 'HR': 'float', 'P': 'float', 'u2': 'float', 'v2': 'float', 'u6': 'float', 'v6': 'float', 'u10': 'float', 'v10': 'float', 'altitud': 'float', 'latitud': 'float', 'longitud': 'float'})
df_initial = df_initial[variables]

parameters = []
for i in input_width:
    for j in prediction_width:
        for k in batch_size:
            for l in epochs:
                for m in dropout:
                    for n in neurons:
                            for p in optimizer:
                                for s in normalization:
                                    parameters.append({'width':i, 'output': j, 'batch': k, 'epochs': l, 'dropout': m, 'neurons': n, 'opt': p, 'norm': s})

In [8]:
last_params = {'width': 0, 'norm': []}
for params in parameters:
    df = df_initial.copy()
    for col in cols:
        df[col] = ((params['norm'][1] - params['norm'][0]) * (df[col] - df_initial[col].min()) / (df_initial[col].max() - df_initial[col].min())) + params['norm'][0]

    df_train = df[(df['date'] < '2019-01-01')].copy()
    df_test = df[df['date'] >= '2019-01-01'].copy()

    train_X = []
    train_Y = []
    for i in range(params['width'], len(df_train) - params['output']):
        train_X.append(df_train.iloc[i - params['width']:i][input_vars].values)
        train_Y.append(df_train.iloc[i + params['output'] - 1:i + params['output']][cols].values)
    train_X = np.array(train_X)
    train_Y = np.array(train_Y)

    test_X = []
    test_Y = []
    for i in range(params['width'], len(df_test) - params['output']):
        test_X.append(df_test.iloc[i - params['width']:i][input_vars].values)
        test_Y.append(df_test.iloc[i + params['output'] - 1:i + params['output']][cols].values)
    test_X = np.array(test_X)
    test_Y = np.array(test_Y)
        
    model = Sequential()
    model.add(LSTM(params['neurons'], activation='tanh', input_shape=(train_X.shape[1], train_X.shape[2]), return_sequences=False))
    model.add(Dropout(params['dropout']))
    model.add(Dense(units=len(cols), activation='linear'))
    model.compile(optimizer=params['opt'], loss='mse', metrics=['mae'])

    with tf.device('/device:GPU:0'):
        history = model.fit(train_X, train_Y, epochs=params['epochs'], batch_size=params['batch'], validation_split=0.1, verbose=1, shuffle=False)
    model.save(f'models/{station.strip(".zip")}_{params["width"]}_{params["output"]}_{params["batch"]}_{params["epochs"]}_{params["dropout"]}_{params["neurons"]}_{params["opt"]}_{params["norm"][0]}{params["norm"][1]}.h5')

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [54]:
directories = os.listdir('models')

norm = [model for model in directories if '11' in model]

results = []

for model_path in norm:
    params = model_path.strip('.h5').split('_')
    if '01' in params[-1]:
        norm_min, norm_max = 0, 1
    else:
        norm_min, norm_max = -1, 1
    df_test = df_initial[df_initial['date'] >= '2019-01-01'].copy()

    for col in cols:
        df_test[col] = ((norm_max - norm_min) * (df_test[col] - df_initial[col].min()) / (df_initial[col].max() - df_initial[col].min())) + norm_min
    
    test_X = []
    test_Y = []
    for i in range(int(params[1]), len(df_test) - int(params[2])):
        test_X.append(df_test.iloc[i - int(params[1]):i][input_vars].values)
        test_Y.append(df_test.iloc[i+ int(params[2]) - 1:i + int(params[2])][cols].values)
    test_X = np.array(test_X)
    test_Y = np.array(test_Y)

    model = keras.models.load_model(f'models/{model_path}')
    y_pred = model.predict(test_X)
    for idx, col in enumerate(cols):
        y_pred[:, idx] = ((y_pred[:, idx] - norm_min) * (df_initial[col].max() - df_initial[col].min()) / (norm_max - norm_min)) + df_initial[col].min()
    
    results.append(y_pred)

for idx, col in enumerate(cols):
    test_Y[:, 0, idx] = ((test_Y[:, 0, idx] - norm_min) * (df_initial[col].max() - df_initial[col].min()) / (norm_max - norm_min)) + df_initial[col].min()



In [55]:

for idx, col in enumerate(cols):
    # plot each column
    plt.figure(figsize=(10, 6))
    plt.plot(test_Y[:480, :, idx], label='Real', color='blue')
    for idx2, result in enumerate(results):
        params = norm[idx2].strip('.h5').split('_')
        rmse = np.sqrt(np.mean(np.power((test_Y[:, 0, idx] - result[:, idx]), 2)))
        plt.plot(result[:480, idx], label=f'{params[3]} Batch size: {rmse:.4f}')
    plt.legend()
    plt.title(f'{col} at {int(params[2]) * 30} min horizon')
    plt.savefig(f'plots/{params[0]}_{col}_{params[2]}_notd.png')
    plt.clf()


<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

In [19]:
params[2]

'16'

In [None]:
%reset -f