In [None]:
%cd drive/MyDrive/Colab\ Notebooks/step-counts-prediction

/content/drive/MyDrive/Colab Notebooks/step-counts-prediction


In [None]:
import pandas as pd
import numpy as np
import yaml
from sklearn.metrics import mean_squared_error, mean_absolute_error
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense, LSTM, Bidirectional, SimpleRNN, GRU
from keras.layers import Flatten, Dropout, TimeDistributed, ConvLSTM2D, Embedding
from keras.callbacks import EarlyStopping
from keras.optimizers import Adam
from sklearn.preprocessing import MinMaxScaler
import keras
import tensorflow as tf
from IPython.display import clear_output
import warnings
warnings.filterwarnings("ignore")

from utils.preprocessing import aggregate_steps, augment, to_supervised

In [None]:
! pip install optuna
clear_output()

In [None]:
import optuna
from optuna.integration import KerasPruningCallback
from optuna.visualization import plot_contour
from optuna.visualization import plot_edf
from optuna.visualization import plot_intermediate_values
from optuna.visualization import plot_optimization_history
from optuna.visualization import plot_parallel_coordinate
from optuna.visualization import plot_param_importances
from optuna.visualization import plot_slice

In [None]:
# Load the data
PATH = 'data/StepCount.csv'
data = pd.read_csv(PATH)

# Load config file
with open("config.yml", 'r') as handle:
    config = yaml.safe_load(handle)

# Create 3 datasets
steps_date = aggregate_steps(data, ['date'])
date_dow = steps_date[['date', 'dow']]
steps_date = steps_date.value

steps_hour = aggregate_steps(data, ['date', 'hour'])
date_hour = steps_hour[['date', 'hour']]
steps_hour = steps_hour.value

augmented_steps_date = augment(data)

print(steps_date.shape)
print(steps_hour.shape)
print(augmented_steps_date.shape)

(1671,)
(20334,)
(1671, 7)


In [None]:
median = steps_date.median()
steps_date[steps_date < 300] = median
augmented_steps_date.value[augmented_steps_date.value < 300] = median

In [None]:
N_TRIALS = 100

def scheduler(epoch, lr):
  if epoch > 20:
    return lr
  else:
    return (epoch * lr)/20

warmup = tf.keras.callbacks.LearningRateScheduler(scheduler, verbose=0)

In [None]:
# Check the device
tf.test.gpu_device_name()

'/device:GPU:0'

# Steps/Date

## SimpleRNN

In [None]:
def objective(trial):
    # Split to training, validation and test sets
    n = len(steps_date)
    train_data = steps_date[0:int(n*0.8)]
    val_data = steps_date[int(n*0.8):int(n*0.9)]
    test_data = steps_date[int(n*0.9):]

    # Normalize the data with MinMax normalization
    scaler = MinMaxScaler() 
    train_data = scaler.fit_transform(train_data.to_numpy().reshape(-1, 1))
    val_data = scaler.transform(val_data.to_numpy().reshape(-1, 1))
    test_data = scaler.transform(test_data.to_numpy().reshape(-1, 1))

    lag = 1
    # Prepare
    X_train, y_train = to_supervised(train_data.tolist()+val_data.tolist()[:1], lag, 1)
    X_val, y_val = to_supervised(val_data.tolist()+test_data.tolist()[:1], lag, 1)
    X_test, y_test = to_supervised(test_data, lag, 1)

    # Reshape
    X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
    X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1)
    X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

    input_shape = (X_train.shape[1], X_train.shape[2])
    lr = trial.suggest_float('lr', 0.0001, 0.01)
    optimizer = Adam(learning_rate=lr)

    # Build the model
    n_layers = trial.suggest_int("n_hidden", 0, 2)
    model = Sequential()
    for i in range(n_layers):
        model.add(
            SimpleRNN(
                trial.suggest_categorical("units_h_{}".format(i), [1, 16, 32, 64, 128, 256]),
                input_shape=input_shape,
                activation='tanh',
                 return_sequences=True
                )
            )
        
    model.add(
            SimpleRNN(
                trial.suggest_categorical("units", [1, 16, 32, 64, 128, 256]),
                input_shape=input_shape,
                activation='tanh'
                )
            )
    model.add(Dense(1))

    # Compile the model
    model.compile(loss="mse", optimizer=optimizer, metrics=['mean_absolute_error'])

    # Train the model
    model.fit(
        X_train,
        y_train,
        validation_data=(X_val, y_val),
        shuffle=False,
        batch_size=8,
        epochs=1000,
        verbose=0,
        callbacks=[EarlyStopping("val_mean_absolute_error", patience=10, restore_best_weights=True)]
    )

    # Evaluate the model
    predictions = model.predict(X_val)
    score = mean_absolute_error(y_val, predictions)

    return round(score, 4)


study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=N_TRIALS)

clear_output()

print("Number of finished trials: {}".format(len(study.trials)))

print("Best trial:")
trial = study.best_trial

print("  Value: {}".format(trial.value))

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

In [None]:
plot_optimization_history(study)

In [None]:
plot_parallel_coordinate(study)

In [None]:
plot_slice(study)

In [None]:
plot_param_importances(study)

## LSTM

In [None]:
def objective(trial):
    # Split to training, validation and test sets
    n = len(steps_date)
    train_data = steps_date[0:int(n*0.8)]
    val_data = steps_date[int(n*0.8):int(n*0.9)]
    test_data = steps_date[int(n*0.9):]

    # Normalize the data with MinMax normalization
    scaler = MinMaxScaler() 
    train_data = scaler.fit_transform(train_data.to_numpy().reshape(-1, 1))
    val_data = scaler.transform(val_data.to_numpy().reshape(-1, 1))
    test_data = scaler.transform(test_data.to_numpy().reshape(-1, 1))

    lag = 1
    # Prepare
    X_train, y_train = to_supervised(train_data.tolist()+val_data.tolist()[:1], lag, 1)
    X_val, y_val = to_supervised(val_data.tolist()+test_data.tolist()[:1], lag, 1)
    X_test, y_test = to_supervised(test_data, lag, 1)

    # Reshape
    X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
    X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1)
    X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

    input_shape = (X_train.shape[1], X_train.shape[2])
    lr = trial.suggest_float('lr', 0.0001, 0.01)
    optimizer = Adam(learning_rate=lr)

    # Build the model
    n_layers = trial.suggest_int("n_hidden", 0, 2)
    model = Sequential()
    for i in range(n_layers):
        model.add(
            LSTM(
                trial.suggest_categorical("units_h_{}".format(i), [1, 16, 32, 64, 128, 256]),
                input_shape=input_shape,
                activation='tanh',
                 return_sequences=True
                )
            )
        
    model.add(
            LSTM(
                trial.suggest_categorical("units", [1, 16, 32, 64, 128, 256]),
                input_shape=input_shape,
                activation='tanh'
                )
            )
    model.add(Dense(1))

    # Compile the model
    model.compile(loss="mse", optimizer=optimizer, metrics=['mean_absolute_error'])

    # Train the model
    model.fit(
        X_train,
        y_train,
        validation_data=(X_val, y_val),
        shuffle=False,
        batch_size=8,
        epochs=1000,
        verbose=0,
        callbacks=[EarlyStopping("val_mean_absolute_error", patience=10, restore_best_weights=True)]
    )

    # Evaluate the model
    predictions = model.predict(X_val)
    score = mean_absolute_error(y_val, predictions)

    return round(score, 4)


study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=N_TRIALS)

clear_output()

print("Number of finished trials: {}".format(len(study.trials)))

print("Best trial:")
trial = study.best_trial

print("  Value: {}".format(trial.value))

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

Number of finished trials: 100
Best trial:
  Value: 0.0984
  Params: 
    lr: 0.00817775724699038
    n_hidden: 0
    units: 1


In [None]:
plot_optimization_history(study)

In [None]:
plot_parallel_coordinate(study)

In [None]:
plot_slice(study)

In [None]:
plot_param_importances(study)

## BLSTM

In [None]:
def objective(trial):
    # Split to training, validation and test sets
    n = len(steps_date)
    train_data = steps_date[0:int(n*0.8)]
    val_data = steps_date[int(n*0.8):int(n*0.9)]
    test_data = steps_date[int(n*0.9):]

    # Normalize the data with MinMax normalization
    scaler = MinMaxScaler() 
    train_data = scaler.fit_transform(train_data.to_numpy().reshape(-1, 1))
    val_data = scaler.transform(val_data.to_numpy().reshape(-1, 1))
    test_data = scaler.transform(test_data.to_numpy().reshape(-1, 1))

    lag = 1
    # Prepare
    X_train, y_train = to_supervised(train_data.tolist()+val_data.tolist()[:1], lag, 1)
    X_val, y_val = to_supervised(val_data.tolist()+test_data.tolist()[:1], lag, 1)
    X_test, y_test = to_supervised(test_data, lag, 1)

    # Reshape
    X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
    X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1)
    X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

    input_shape = (X_train.shape[1], X_train.shape[2])
    lr = trial.suggest_float('lr', 0.0001, 0.01)
    optimizer = Adam(learning_rate=lr)

    # Build the model
    n_layers = trial.suggest_int("n_hidden", 0, 2)
    model = Sequential()
    for i in range(n_layers):
        model.add(
            Bidirectional(
                LSTM(
                    trial.suggest_categorical("units_h_{}".format(i), [1, 16, 32, 64, 128, 256]),
                    activation='tanh',
                    return_sequences=True
                    ),
                    input_shape=input_shape
                )
            )
        
    model.add(
        Bidirectional(
            LSTM(
                trial.suggest_categorical("units", [1, 16, 32, 64, 128, 256]),
                activation='tanh'
                ),
                input_shape=input_shape
            )
        )
    model.add(Dense(1))

    # Compile the model
    model.compile(loss="mse", optimizer=optimizer, metrics=['mean_absolute_error'])

    # Train the model
    model.fit(
        X_train,
        y_train,
        validation_data=(X_val, y_val),
        shuffle=False,
        batch_size=8,
        epochs=1000,
        verbose=0,
        callbacks=[EarlyStopping("val_mean_absolute_error", patience=10, restore_best_weights=True)]
    )

    # Evaluate the model
    predictions = model.predict(X_val)
    score = mean_absolute_error(y_val, predictions)

    return round(score, 4)


study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=N_TRIALS)

clear_output()

print("Number of finished trials: {}".format(len(study.trials)))

print("Best trial:")
trial = study.best_trial

print("  Value: {}".format(trial.value))

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

Number of finished trials: 100
Best trial:
  Value: 0.0984
  Params: 
    lr: 0.006062420955727154
    n_hidden: 1
    units_h_0: 16
    units: 256


In [None]:
plot_optimization_history(study)

In [None]:
plot_parallel_coordinate(study)

In [None]:
plot_slice(study)

In [None]:
plot_param_importances(study)

## GRU

In [None]:
def objective(trial):
    # Split to training, validation and test sets
    n = len(steps_date)
    train_data = steps_date[0:int(n*0.8)]
    val_data = steps_date[int(n*0.8):int(n*0.9)]
    test_data = steps_date[int(n*0.9):]

    # Normalize the data with MinMax normalization
    scaler = MinMaxScaler()
    train_data = scaler.fit_transform(train_data.to_numpy().reshape(-1, 1))
    val_data = scaler.transform(val_data.to_numpy().reshape(-1, 1))
    test_data = scaler.transform(test_data.to_numpy().reshape(-1, 1))

    lag = 1
    # Prepare
    X_train, y_train = to_supervised(train_data.tolist()+val_data.tolist()[:1], lag, 1)
    X_val, y_val = to_supervised(val_data.tolist()+test_data.tolist()[:1], lag, 1)
    X_test, y_test = to_supervised(test_data, lag, 1)

    # Reshape
    X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
    X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1)
    X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

    input_shape = (X_train.shape[1], X_train.shape[2])
    lr = trial.suggest_float('lr', 0.0001, 0.01)
    optimizer = Adam(learning_rate=lr)

    # Build the model
    n_layers = trial.suggest_int("n_hidden", 0, 2)
    model = Sequential()
    for i in range(n_layers):
        model.add(
            GRU(
                trial.suggest_categorical("units_h_{}".format(i), [1, 16, 32, 64, 128, 256]),
                input_shape=input_shape,
                activation='tanh',
                 return_sequences=True
                )
            )
        
    model.add(
            GRU(
                trial.suggest_categorical("units", [1, 16, 32, 64, 128, 256]),
                input_shape=input_shape,
                activation='tanh'
                )
            )
    model.add(Dense(1))

    # Compile the model
    model.compile(loss="mse", optimizer=optimizer, metrics=['mean_absolute_error'])

    # Train the model
    model.fit(
        X_train,
        y_train,
        validation_data=(X_val, y_val),
        shuffle=False,
        batch_size=8,
        epochs=1000,
        verbose=0,
        callbacks=[EarlyStopping("val_mean_absolute_error", patience=10, restore_best_weights=True)]
    )

    # Evaluate the model
    predictions = model.predict(X_val)
    score = mean_absolute_error(y_val, predictions)

    return round(score, 4)


study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=N_TRIALS)

clear_output()

print("Number of finished trials: {}".format(len(study.trials)))

print("Best trial:")
trial = study.best_trial

print("  Value: {}".format(trial.value))

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

Number of finished trials: 100
Best trial:
  Value: 0.0984
  Params: 
    lr: 0.007426098539860644
    n_hidden: 0
    units: 1


In [None]:
plot_optimization_history(study)

In [None]:
plot_parallel_coordinate(study)

In [None]:
plot_slice(study)

In [None]:
plot_param_importances(study)

# Steps/Hour

## SimpleRNN

In [None]:
def objective(trial):
    # Split to training, validation and test sets
    n = len(steps_hour)
    train_data = steps_hour[0:int(n*0.8)]
    val_data = steps_hour[int(n*0.8):int(n*0.9)]
    test_data = steps_hour[int(n*0.9):]

    # Normalize the data with MinMax normalization
    scaler = MinMaxScaler() 
    train_data = scaler.fit_transform(train_data.to_numpy().reshape(-1, 1))
    val_data = scaler.transform(val_data.to_numpy().reshape(-1, 1))
    test_data = scaler.transform(test_data.to_numpy().reshape(-1, 1))

    lag = 24
    # Prepare
    X_train, y_train = to_supervised(train_data.tolist()+val_data.tolist()[:24], lag, 24)
    X_val, y_val = to_supervised(val_data.tolist()+test_data.tolist()[:24], lag, 24)
    X_test, y_test = to_supervised(test_data, lag, 24)

    # Reshape
    X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
    X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1)
    X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

    input_shape = (X_train.shape[1], X_train.shape[2])
    lr = trial.suggest_float('lr', 0.0001, 0.01)
    optimizer = Adam(learning_rate=lr)

    # Build the model
    n_layers = trial.suggest_int("n_hidden", 0, 2)
    model = Sequential()
    for i in range(n_layers):
        model.add(
            SimpleRNN(
                trial.suggest_categorical("units_h_{}".format(i), [1, 16, 32, 64, 128, 256]),
                input_shape=input_shape,
                activation='tanh',
                 return_sequences=True
                )
            )
        
    model.add(
            SimpleRNN(
                trial.suggest_categorical("units", [1, 16, 32, 64, 128, 256]),
                input_shape=input_shape,
                activation='tanh'
                )
            )
    model.add(Dense(24))

    # Compile the model
    model.compile(loss="mse", optimizer=optimizer, metrics=['mean_absolute_error'])

    # Train the model
    model.fit(
        X_train,
        y_train,
        validation_data=(X_val, y_val),
        shuffle=False,
        batch_size=16,
        epochs=1000,
        verbose=0,
        callbacks=[EarlyStopping("val_mean_absolute_error", patience=10, restore_best_weights=True)]
    )

    # Evaluate the model
    predictions = model.predict(X_val)
    score = mean_absolute_error(y_val, predictions)

    return round(score, 4)


study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=50)

clear_output()

print("Number of finished trials: {}".format(len(study.trials)))

print("Best trial:")
trial = study.best_trial

print("  Value: {}".format(trial.value))

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

[32m[I 2021-05-10 06:28:44,937][0m A new study created in memory with name: no-name-a7460ac0-352e-47f2-b2e7-2fa12bc3d98d[0m
[32m[I 2021-05-10 06:47:11,332][0m Trial 0 finished with value: 0.0616 and parameters: {'lr': 0.0067101401159993905, 'n_hidden': 2, 'units_h_0': 1, 'units_h_1': 32, 'units': 16}. Best is trial 0 with value: 0.0616.[0m
[32m[I 2021-05-10 06:59:02,544][0m Trial 1 finished with value: 0.0674 and parameters: {'lr': 0.008875451342630063, 'n_hidden': 0, 'units': 1}. Best is trial 0 with value: 0.0616.[0m
[32m[I 2021-05-10 07:28:16,092][0m Trial 2 finished with value: 0.0609 and parameters: {'lr': 0.00806013278356797, 'n_hidden': 2, 'units_h_0': 256, 'units_h_1': 16, 'units': 16}. Best is trial 2 with value: 0.0609.[0m
[32m[I 2021-05-10 07:43:16,709][0m Trial 3 finished with value: 0.0695 and parameters: {'lr': 0.004089434576758066, 'n_hidden': 1, 'units_h_0': 16, 'units': 1}. Best is trial 2 with value: 0.0609.[0m
[32m[I 2021-05-10 07:47:24,773][0m Trial

In [None]:
plot_optimization_history(study)

In [None]:
plot_parallel_coordinate(study)

In [None]:
plot_slice(study)

In [None]:
plot_param_importances(study)

## LSTM

In [None]:
def objective(trial):
    # Split to training, validation and test sets
    n = len(steps_hour)
    train_data = steps_hour[0:int(n*0.8)]
    val_data = steps_hour[int(n*0.8):int(n*0.9)]
    test_data = steps_hour[int(n*0.9):]

    # Normalize the data with MinMax normalization
    scaler = MinMaxScaler() 
    train_data = scaler.fit_transform(train_data.to_numpy().reshape(-1, 1))
    val_data = scaler.transform(val_data.to_numpy().reshape(-1, 1))
    test_data = scaler.transform(test_data.to_numpy().reshape(-1, 1))

    lag = 24
    # Prepare
    X_train, y_train = to_supervised(train_data.tolist()+val_data.tolist()[:24], lag, 24)
    X_val, y_val = to_supervised(val_data.tolist()+test_data.tolist()[:24], lag, 24)
    X_test, y_test = to_supervised(test_data, lag, 24)

    # Reshape
    X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
    X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1)
    X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

    input_shape = (X_train.shape[1], X_train.shape[2])
    lr = trial.suggest_float('lr', 0.0001, 0.01)
    optimizer = Adam(learning_rate=lr)

    # Build the model
    n_layers = trial.suggest_int("n_hidden", 0, 2)
    model = Sequential()
    for i in range(n_layers):
        model.add(
            LSTM(
                trial.suggest_categorical("units_h_{}".format(i), [1, 16, 32, 64, 128, 256]),
                input_shape=input_shape,
                activation='tanh',
                 return_sequences=True
                )
            )
        
    model.add(
            LSTM(
                trial.suggest_categorical("units", [1, 16, 32, 64, 128, 256]),
                input_shape=input_shape,
                activation='tanh'
                )
            )
    model.add(Dense(24))

    # Compile the model
    model.compile(loss="mse", optimizer=optimizer, metrics=['mean_absolute_error'])

    # Train the model
    model.fit(
        X_train,
        y_train,
        validation_data=(X_val, y_val),
        shuffle=False,
        batch_size=32,
        epochs=1000,
        verbose=1,
        callbacks=[EarlyStopping("val_mean_absolute_error", patience=10, restore_best_weights=True)]
    )

    # Evaluate the model
    predictions = model.predict(X_val)
    score = mean_absolute_error(y_val, predictions)

    return round(score, 4)


study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=N_TRIALS)

clear_output()

print("Number of finished trials: {}".format(len(study.trials)))

print("Best trial:")
trial = study.best_trial

print("  Value: {}".format(trial.value))

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

In [None]:
plot_optimization_history(study)

In [None]:
plot_parallel_coordinate(study)

In [None]:
plot_slice(study)

In [None]:
plot_param_importances(study)

## BLSTM

In [None]:
def objective(trial):
    # Split to training, validation and test sets
    n = len(steps_hour)
    train_data = steps_hour[0:int(n*0.8)]
    val_data = steps_hour[int(n*0.8):int(n*0.9)]
    test_data = steps_hour[int(n*0.9):]

    # Normalize the data with MinMax normalization
    scaler = MinMaxScaler() 
    train_data = scaler.fit_transform(train_data.to_numpy().reshape(-1, 1))
    val_data = scaler.transform(val_data.to_numpy().reshape(-1, 1))
    test_data = scaler.transform(test_data.to_numpy().reshape(-1, 1))

    lag = 24
    # Prepare
    X_train, y_train = to_supervised(train_data.tolist()+val_data.tolist()[:24], lag, 24)
    X_val, y_val = to_supervised(val_data.tolist()+test_data.tolist()[:24], lag, 24)
    X_test, y_test = to_supervised(test_data, lag, 24)

    # Reshape
    X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
    X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1)
    X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

    input_shape = (X_train.shape[1], X_train.shape[2])
    lr = trial.suggest_float('lr', 0.0001, 0.01)
    optimizer = Adam(learning_rate=lr)

    # Build the model
    n_layers = trial.suggest_int("n_hidden", 0, 2)
    model = Sequential()
    for i in range(n_layers):
        model.add(
            Bidirectional(
                LSTM(
                    trial.suggest_categorical("units_h_{}".format(i), [1, 16, 32, 64, 128, 256]),
                    activation='tanh',
                    return_sequences=True
                    ),
                    input_shape=input_shape
                )
            )
        
    model.add(
        Bidirectional(
            LSTM(
                trial.suggest_categorical("units", [1, 16, 32, 64, 128, 256]),
                activation='tanh'
                ),
                input_shape=input_shape
            )
        )
    model.add(Dense(24))

    # Compile the model
    model.compile(loss="mse", optimizer=optimizer, metrics=['mean_absolute_error'])

    # Train the model
    model.fit(
        X_train,
        y_train,
        validation_data=(X_val, y_val),
        shuffle=False,
        batch_size=16,
        epochs=1000,
        verbose=0,
        callbacks=[EarlyStopping("val_mean_absolute_error", patience=10, restore_best_weights=True)]
    )

    # Evaluate the model
    predictions = model.predict(X_val)
    score = mean_absolute_error(y_val, predictions)

    return round(score, 4)


study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=N_TRIALS)

clear_output()

print("Number of finished trials: {}".format(len(study.trials)))

print("Best trial:")
trial = study.best_trial

print("  Value: {}".format(trial.value))

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

In [None]:
plot_optimization_history(study)

In [None]:
plot_parallel_coordinate(study)

In [None]:
plot_slice(study)

In [None]:
plot_param_importances(study)

## GRU

In [None]:
def objective(trial):
    # Split to training, validation and test sets
    n = len(steps_hour)
    train_data = steps_hour[0:int(n*0.8)]
    val_data = steps_hour[int(n*0.8):int(n*0.9)]
    test_data = steps_hour[int(n*0.9):]

    # Normalize the data with MinMax normalization
    scaler = MinMaxScaler() 
    train_data = scaler.fit_transform(train_data.to_numpy().reshape(-1, 1))
    val_data = scaler.transform(val_data.to_numpy().reshape(-1, 1))
    test_data = scaler.transform(test_data.to_numpy().reshape(-1, 1))

    lag = 24
    # Prepare
    X_train, y_train = to_supervised(train_data.tolist()+val_data.tolist()[:24], lag, 24)
    X_val, y_val = to_supervised(val_data.tolist()+test_data.tolist()[:24], lag, 24)
    X_test, y_test = to_supervised(test_data, lag, 24)

    # Reshape
    X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
    X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1)
    X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

    input_shape = (X_train.shape[1], X_train.shape[2])
    lr = trial.suggest_float('lr', 0.0001, 0.01)
    optimizer = Adam(learning_rate=lr)

    # Build the model
    n_layers = trial.suggest_int("n_hidden", 0, 2)
    model = Sequential()
    for i in range(n_layers):
        model.add(
            GRU(
                trial.suggest_categorical("units_h_{}".format(i), [1, 16, 32, 64, 128, 256]),
                input_shape=input_shape,
                activation='tanh',
                 return_sequences=True
                )
            )
        
    model.add(
            GRU(
                trial.suggest_categorical("units", [1, 16, 32, 64, 128, 256]),
                input_shape=input_shape,
                activation='tanh'
                )
            )
    model.add(Dense(24))

    # Compile the model
    model.compile(loss="mse", optimizer=optimizer, metrics=['mean_absolute_error'])

    # Train the model
    model.fit(
        X_train,
        y_train,
        validation_data=(X_val, y_val),
        shuffle=False,
        batch_size=16,
        epochs=1000,
        verbose=0,
        callbacks=[EarlyStopping("val_mean_absolute_error", patience=10, restore_best_weights=True)]
    )

    # Evaluate the model
    predictions = model.predict(X_val)
    score = mean_absolute_error(y_val, predictions)

    return round(score, 4)


study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=N_TRIALS)

clear_output()

print("Number of finished trials: {}".format(len(study.trials)))

print("Best trial:")
trial = study.best_trial

print("  Value: {}".format(trial.value))

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

In [None]:
plot_optimization_history(study)

In [None]:
plot_parallel_coordinate(study)

In [None]:
plot_slice(study)

In [None]:
plot_param_importances(study)

# Augmented Steps/Date

## Simple RNN

In [None]:
def objective(trial):
    # Split to training, validation and test sets
    n = len(augmented_steps_date)
    train_data = augmented_steps_date[0:int(n*0.8)]
    val_data = augmented_steps_date[int(n*0.8):int(n*0.9)]
    test_data = augmented_steps_date[int(n*0.9):]

    # Normalize the data with MinMax normalization
    scaler = MinMaxScaler() 
    train_data.iloc[:, 0] = scaler.fit_transform(train_data.iloc[:, 0].to_numpy().reshape(-1, 1))
    val_data.iloc[:, 0] = scaler.transform(val_data.iloc[:, 0].to_numpy().reshape(-1, 1))
    test_data.iloc[:, 0] = scaler.transform(test_data.iloc[:, 0].to_numpy().reshape(-1, 1))

    lag = 1
    # Prepare
    X_train, y_train = to_supervised(pd.concat([train_data, val_data.iloc[:1, :]]), lag, 1)
    X_val, y_val = to_supervised(pd.concat([val_data, test_data.iloc[:1, :]]), lag, 1)
    X_test, y_test = to_supervised(test_data, lag, 1)

    # Leave only steps as a target
    y_train = y_train[:, 0]
    y_val = y_val[:, 0]
    y_test = y_test[:, 0]

    # Reshape
    X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 7)
    X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 7)
    X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 7)

    input_shape = (X_train.shape[1], X_train.shape[2])
    lr = trial.suggest_float('lr', 0.0001, 0.01)
    optimizer = Adam(learning_rate=lr)

    # Build the model
    n_layers = trial.suggest_int("n_hidden", 0, 2)
    model = Sequential()
    for i in range(n_layers):
        model.add(
            SimpleRNN(
                trial.suggest_categorical("units_h_{}".format(i), [1, 16, 32, 64, 128, 256]),
                input_shape=input_shape,
                activation='tanh',
                 return_sequences=True
                )
            )
        
    model.add(
            SimpleRNN(
                trial.suggest_categorical("units", [1, 16, 32, 64, 128, 256]),
                input_shape=input_shape,
                activation='tanh'
                )
            )
    model.add(Dense(1))

    # Compile the model
    model.compile(loss="mse", optimizer=optimizer, metrics=['mean_absolute_error'])

    # Train the model
    model.fit(
        X_train,
        y_train,
        validation_data=(X_val, y_val),
        shuffle=False,
        batch_size=8,
        epochs=1000,
        verbose=0,
        callbacks=[EarlyStopping("val_mean_absolute_error", patience=10, restore_best_weights=True)]
    )

    # Evaluate the model
    predictions = model.predict(X_val)
    score = mean_absolute_error(y_val, predictions)

    return round(score, 4)


study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=N_TRIALS)

clear_output()

print("Number of finished trials: {}".format(len(study.trials)))

print("Best trial:")
trial = study.best_trial

print("  Value: {}".format(trial.value))

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

Number of finished trials: 100
Best trial:
  Value: 0.0971
  Params: 
    lr: 0.0014627429828170343
    n_hidden: 0
    units: 128


In [None]:
plot_optimization_history(study)

In [None]:
plot_parallel_coordinate(study)

In [None]:
plot_slice(study)

In [None]:
plot_param_importances(study)

## LSTM

In [None]:
def objective(trial):
    # Split to training, validation and test sets
    n = len(augmented_steps_date)
    train_data = augmented_steps_date[0:int(n*0.8)]
    val_data = augmented_steps_date[int(n*0.8):int(n*0.9)]
    test_data = augmented_steps_date[int(n*0.9):]

    # Normalize the data with MinMax normalization
    scaler = MinMaxScaler() 
    train_data.iloc[:, 0] = scaler.fit_transform(train_data.iloc[:, 0].to_numpy().reshape(-1, 1))
    val_data.iloc[:, 0] = scaler.transform(val_data.iloc[:, 0].to_numpy().reshape(-1, 1))
    test_data.iloc[:, 0] = scaler.transform(test_data.iloc[:, 0].to_numpy().reshape(-1, 1))

    lag = 1
    # Prepare
    X_train, y_train = to_supervised(pd.concat([train_data, val_data.iloc[:1, :]]), lag, 1)
    X_val, y_val = to_supervised(pd.concat([val_data, test_data.iloc[:1, :]]), lag, 1)
    X_test, y_test = to_supervised(test_data, lag, 1)

    # Leave only steps as a target
    y_train = y_train[:, 0]
    y_val = y_val[:, 0]
    y_test = y_test[:, 0]

    # Reshape
    X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 7)
    X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 7)
    X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 7)

    input_shape = (X_train.shape[1], X_train.shape[2])
    lr = trial.suggest_float('lr', 0.0001, 0.01)
    optimizer = Adam(learning_rate=lr)

    # Build the model
    n_layers = trial.suggest_int("n_hidden", 0, 2)
    model = Sequential()
    for i in range(n_layers):
        model.add(
            LSTM(
                trial.suggest_categorical("units_h_{}".format(i), [1, 16, 32, 64, 128, 256]),
                input_shape=input_shape,
                activation='tanh',
                 return_sequences=True
                )
            )
        
    model.add(
            LSTM(
                trial.suggest_categorical("units", [1, 16, 32, 64, 128, 256]),
                input_shape=input_shape,
                activation='tanh'
                )
            )
    model.add(Dense(1))

    # Compile the model
    model.compile(loss="mse", optimizer=optimizer, metrics=['mean_absolute_error'])

    # Train the model
    model.fit(
        X_train,
        y_train,
        validation_data=(X_val, y_val),
        shuffle=False,
        batch_size=8,
        epochs=1000,
        verbose=0,
        callbacks=[EarlyStopping("val_mean_absolute_error", patience=10, restore_best_weights=True)]
    )

    # Evaluate the model
    predictions = model.predict(X_val)
    score = mean_absolute_error(y_val, predictions)

    return round(score, 4)


study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=N_TRIALS)

clear_output()

print("Number of finished trials: {}".format(len(study.trials)))

print("Best trial:")
trial = study.best_trial

print("  Value: {}".format(trial.value))

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

Number of finished trials: 100
Best trial:
  Value: 0.0996
  Params: 
    lr: 0.008630314174108707
    n_hidden: 2
    units_h_0: 1
    units_h_1: 32
    units: 16


In [None]:
plot_optimization_history(study)

In [None]:
plot_parallel_coordinate(study)

In [None]:
plot_slice(study)

In [None]:
plot_param_importances(study)

## BLSTM

In [None]:
def objective(trial):
    # Split to training, validation and test sets
    n = len(augmented_steps_date)
    train_data = augmented_steps_date[0:int(n*0.8)]
    val_data = augmented_steps_date[int(n*0.8):int(n*0.9)]
    test_data = augmented_steps_date[int(n*0.9):]

    # Normalize the data with MinMax normalization
    scaler = MinMaxScaler() 
    train_data.iloc[:, 0] = scaler.fit_transform(train_data.iloc[:, 0].to_numpy().reshape(-1, 1))
    val_data.iloc[:, 0] = scaler.transform(val_data.iloc[:, 0].to_numpy().reshape(-1, 1))
    test_data.iloc[:, 0] = scaler.transform(test_data.iloc[:, 0].to_numpy().reshape(-1, 1))

    lag = 1
    # Prepare
    X_train, y_train = to_supervised(pd.concat([train_data, val_data.iloc[:1, :]]), lag, 1)
    X_val, y_val = to_supervised(pd.concat([val_data, test_data.iloc[:1, :]]), lag, 1)
    X_test, y_test = to_supervised(test_data, lag, 1)

    # Leave only steps as a target
    y_train = y_train[:, 0]
    y_val = y_val[:, 0]
    y_test = y_test[:, 0]

    # Reshape
    X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 7)
    X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 7)
    X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 7)

    input_shape = (X_train.shape[1], X_train.shape[2])
    lr = trial.suggest_float('lr', 0.0001, 0.01)
    optimizer = Adam(learning_rate=lr)

    # Build the model
    n_layers = trial.suggest_int("n_hidden", 0, 2)
    model = Sequential()
    for i in range(n_layers):
        model.add(
            Bidirectional(
                LSTM(
                    trial.suggest_categorical("units_h_{}".format(i), [1, 16, 32, 64, 128, 256]),
                    activation='tanh',
                    return_sequences=True
                    ),
                    input_shape=input_shape
                )
            )
        
    model.add(
        Bidirectional(
            LSTM(
                trial.suggest_categorical("units", [1, 16, 32, 64, 128, 256]),
                activation='tanh'
                ),
                input_shape=input_shape
            )
        )
    model.add(Dense(1))

    # Compile the model
    model.compile(loss="mse", optimizer=optimizer, metrics=['mean_absolute_error'])

    # Train the model
    model.fit(
        X_train,
        y_train,
        validation_data=(X_val, y_val),
        shuffle=False,
        batch_size=8,
        epochs=1000,
        verbose=0,
        callbacks=[EarlyStopping("val_mean_absolute_error", patience=10, restore_best_weights=True)]
    )

    # Evaluate the model
    predictions = model.predict(X_val)
    score = mean_absolute_error(y_val, predictions)

    return round(score, 4)


study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=N_TRIALS)

clear_output()

print("Number of finished trials: {}".format(len(study.trials)))

print("Best trial:")
trial = study.best_trial

print("  Value: {}".format(trial.value))

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

Number of finished trials: 100
Best trial:
  Value: 0.0966
  Params: 
    lr: 0.0066929996994330495
    n_hidden: 0
    units: 128


In [None]:
plot_optimization_history(study)

In [None]:
plot_parallel_coordinate(study)

In [None]:
plot_slice(study)

In [None]:
plot_param_importances(study)

## GRU

In [None]:
def objective(trial):
    # Split to training, validation and test sets
    n = len(augmented_steps_date)
    train_data = augmented_steps_date[0:int(n*0.8)]
    val_data = augmented_steps_date[int(n*0.8):int(n*0.9)]
    test_data = augmented_steps_date[int(n*0.9):]

    # Normalize the data with MinMax normalization
    scaler = MinMaxScaler() 
    train_data.iloc[:, 0] = scaler.fit_transform(train_data.iloc[:, 0].to_numpy().reshape(-1, 1))
    val_data.iloc[:, 0] = scaler.transform(val_data.iloc[:, 0].to_numpy().reshape(-1, 1))
    test_data.iloc[:, 0] = scaler.transform(test_data.iloc[:, 0].to_numpy().reshape(-1, 1))

    lag = 1
    # Prepare
    X_train, y_train = to_supervised(pd.concat([train_data, val_data.iloc[:1, :]]), lag, 1)
    X_val, y_val = to_supervised(pd.concat([val_data, test_data.iloc[:1, :]]), lag, 1)
    X_test, y_test = to_supervised(test_data, lag, 1)

    # Leave only steps as a target
    y_train = y_train[:, 0]
    y_val = y_val[:, 0]
    y_test = y_test[:, 0]

    # Reshape
    X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 7)
    X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 7)
    X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 7)

    input_shape = (X_train.shape[1], X_train.shape[2])
    lr = trial.suggest_float('lr', 0.0001, 0.01)
    optimizer = Adam(learning_rate=lr)

    # Build the model
    n_layers = trial.suggest_int("n_hidden", 0, 2)
    model = Sequential()
    for i in range(n_layers):
        model.add(
            GRU(
                trial.suggest_categorical("units_h_{}".format(i), [1, 16, 32, 64, 128, 256]),
                input_shape=input_shape,
                activation='tanh',
                 return_sequences=True
                )
            )
        
    model.add(
            GRU(
                trial.suggest_categorical("units", [1, 16, 32, 64, 128, 256]),
                input_shape=input_shape,
                activation='tanh'
                )
            )
    model.add(Dense(1))

    # Compile the model
    model.compile(loss="mse", optimizer=optimizer, metrics=['mean_absolute_error'])

    # Train the model
    model.fit(
        X_train,
        y_train,
        validation_data=(X_val, y_val),
        shuffle=False,
        batch_size=8,
        epochs=1000,
        verbose=0,
        callbacks=[EarlyStopping("val_mean_absolute_error", patience=10, restore_best_weights=True)]
    )

    # Evaluate the model
    predictions = model.predict(X_val)
    score = mean_absolute_error(y_val, predictions)

    return round(score, 4)


study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=N_TRIALS)

clear_output()

print("Number of finished trials: {}".format(len(study.trials)))

print("Best trial:")
trial = study.best_trial

print("  Value: {}".format(trial.value))

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

Number of finished trials: 100
Best trial:
  Value: 0.099
  Params: 
    lr: 0.0011917608776142465
    n_hidden: 2
    units_h_0: 256
    units_h_1: 64
    units: 64


In [None]:
plot_optimization_history(study)

In [None]:
plot_parallel_coordinate(study)

In [None]:
plot_slice(study)

In [None]:
plot_param_importances(study)