Things to consider

- The source of the data OWM or OM
- Which models to train rfr, xgb, knn, ridge by settting the True or False labels
- Which models to train by giving the Deep Learning names

# Imports

In [24]:
from shared_utilities import *

# Utilities

In [25]:
def setup_data(data_ = f'data\Sere Wind Farm_hourly_OWM.csv', train_ = False, uni = True, window_size = 24*4, step = 24, sanity_check = False, tensor_ = False):
    if uni:
        column_ = 0
    else:
        column_ = None

    dm = WeatherDataModule(data_dir=data_, 
                        window_size=window_size, column=column_,
                        batch_size=32, step_=step, 
                        normalize_=True, return_tensor=tensor_)

    dm.prepare_data()
    dm.setup('')

    if sanity_check:
        plt.plot(np.arange(window_size),dm.f_test[0], label='Input')
        if step == 1:
            plt.scatter(np.arange(window_size, window_size+step),dm.t_test[0], label='Target', s=5, c='r')
        else:
            plt.plot(np.arange(window_size, window_size+step),dm.t_test[0], label='Target', c='r')
        plt.legend()
        plt.show()

    return dm

def train_deep_models(dm, window_size, step, source, name, folder='deep_models', verbose = 1):
    early_stop = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

    model = build_model(hidden_size=[64, 32], out=step, input_shape_= window_size, type_=name)

    checkpoint_path = f"{folder}/keras_model_{name}_ws_{window_size}_{step}_{source}.h5"

    checkpoint = ModelCheckpoint(checkpoint_path, monitor='val_loss', save_best_only=True, mode='min')

    model.compile(optimizer='adam', loss='mse')

    model.fit(dm.f_train, dm.t_train, validation_data=(dm.f_valid, dm.t_valid), epochs=150, batch_size=32, verbose=verbose, callbacks=[early_stop, checkpoint])

    return model



# Setup

In [31]:
window_size = 24*6
step = 38
source = 'ERA'
save_folder = 'models_compare'
train_reg = True
train_deep = True

# dm = setup_data(data_=f'data\Sere Wind Farm_hourly_{source}.csv', train_ = False, uni = True, window_size = window_size, step = step, sanity_check = False, tensor_=False)
dm = setup_data(data_=f'ERA5_Data\ERA5_Reanalysis.csv', train_ = False, uni = True, window_size = window_size, step = step, sanity_check = False, tensor_=False)


Train: (6132, 144)
Valid: (1752, 144)
Test: (694, 144)


# Train Regression

In [27]:
if train_reg:
    train(dm= dm, folder=save_folder, train_models=True, rfr=True, xgb_=True, knn=True, ridge=True, window_size=window_size, step=step)

Training Random Forest Regressor...
Elapsed minutes: 2.1648295203844707



Training XGBoost Model...




Elapsed minutes: 1.2796406428019205



Training KNN Regressor...
Elapsed minutes: 8.751948674519856e-05



Training ridge Regressor...
Elapsed minutes: 0.0005638957023620606







# Train Deep Models

In [30]:
def build_model(hidden_size = [128, 64], out = 1, input_shape_ = 24 * 2, type_ = 'DNN'):
    if type_ == 'DNN':
        model = Sequential()
        model.add(Dense(hidden_size[0], input_shape=(input_shape_,)))
        model.add(Dense(hidden_size[1], activation='relu'))
        model.add(Dense(out))
    
    elif type_ == 'LSTM':
        model = Sequential()
        model.add(LSTM(hidden_size[0], return_sequences=True, input_shape=(input_shape_, 1)))
        model.add(LSTM(hidden_size[1], activation='relu'))
        # model.add(Dense(hidden_size[1], activation='relu'))
        model.add(Dense(out))
    
    elif type_ == 'GRU':
        model = Sequential()
        model.add(GRU(hidden_size[0], input_shape=(input_shape_, 1)))
        model.add(Dense(hidden_size[1], activation='relu'))
        model.add(Dense(out))

    elif type_ == 'CNN':
        '''
        The CNN is more effective when using less layers and filters

        Hyperparameters will include:
        - Number of filters or hidden size
        - Kernel size
        - Total layers
        '''
        model = tf.keras.Sequential([
            Conv1D(filters=hidden_size[0], kernel_size=3, activation='relu', input_shape=(input_shape_, 1)),

            Conv1D(filters=hidden_size[0], kernel_size=3, activation='relu'),

            MaxPooling1D(pool_size=2),

            Conv1D(filters=hidden_size[1], kernel_size=3, activation='relu'),

            Conv1D(filters=hidden_size[1], kernel_size=3, activation='relu'),

            MaxPooling1D(pool_size=2),

            Flatten(),

            Dense(512, activation='relu'),

            Dropout(0.5),

            Dense(out, activation='sigmoid')
        ])

    elif type_ == 'test':
        model = tf.keras.Sequential([
            Conv1D(filters=hidden_size[0], kernel_size=3, activation='relu', input_shape=(input_shape_, 1)),

            Conv1D(filters=hidden_size[0], kernel_size=3, activation='relu'),

            MaxPooling1D(pool_size=2),

            Conv1D(filters=hidden_size[1], kernel_size=3, activation='relu'),

            Conv1D(filters=hidden_size[1], kernel_size=3, activation='relu'),

            MaxPooling1D(pool_size=2),

            LSTM(64, activation='relu'),

            Flatten(),            

            Dropout(0.5),

            Dense(out, activation='sigmoid')
        ])

    return model

In [32]:
if train_deep:
    model_names = ['LSTM'] #['DNN', 'LSTM', 'GRU', 'CNN']

    for name in model_names:
        print(f'Training {name} model')
        train_deep_models(dm, window_size, step, source, name, folder=save_folder, verbose=1)

Training LSTM model
Epoch 1/150
Epoch 2/150
  1/192 [..............................] - ETA: 19s - loss: 0.0196

  saving_api.save_model(


Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
