# Preliminary operations

In [None]:
import pandas as pd
import numpy as np
import copy
import tensorflow as tf
from tensorflow.keras.models import Model
from sklearn.model_selection import ParameterGrid

seed = 123
tf.keras.utils.set_random_seed(seed)

In [None]:
%run utils.ipynb

In [None]:
%run preprocessing.ipynb

In [None]:
%run preparation.ipynb

In [None]:
%run training_transformer.ipynb

In [None]:
%run predict.ipynb

# Loading data

In [None]:
data_path = 'datasets/raw/sf_normal_final_indicators_93600.csv'
df = pd.read_csv(data_path)
number_of_features = len(df.columns)

# Preprocessing

In [None]:
# List of parameters for preprocessing
params = [
    ['timestamp', 'rides_requested',  'rides_accepted', 'rides_not_served', 'rides_canceled', 'rides_completed', 'pending_customers', 'active_customers', 'responding_drivers'],
    False, # ratio
    False, # smoothing
    False, # collinearity
    False, # log
    300, # aggregation
    False, # statistics
    1, # differencing
    True, # seasonality
    True # normalize
]
scaler_path = f"df_{params[0]}_{params[1]}_{params[2]}_{params[3]}_{params[4]}_{params[5]}_{params[6]}_{params[7]}_{params[8]}_{params[9]}_scaler.pkl"

# Apply preprocessing strategy according to previous parameters selection
df_ = copy.deepcopy(df)
df_proc, scaler = preprocessing(df_,
                        columns_to_remove = params[0],
                        ratio = params[1],
                        smoothing = params[2],
                        collinearity = params[3],
                        log = params[4],
                        aggregation = params[5],
                        statistics = params[6],
                        differencing = params[7],
                        seasonality = params[8],
                        normalize = params[9],
                        load_scaler = False,
                        save = False,
                        continual = False)

# Hyperparameter Tuning

In [None]:
# Define the hyperparameter search space
param_grid_transformer = {
    'n_heads': [4, 8],
    'd_model': [64, 128],
    'feed_forward_size': [128, 256, 512],
    'n_layers_encoder': [1, 2],
    'n_layers_decoder': [1, 2],
    'learning_rate': [0.0001, 0.001],
    'dropout_rate': [0.0, 0.2],
    'batch_size': [32, 64],
}

In [None]:
def hyper_tuning_transformer(data, window_size, param_grid):

    results = []
    count = 0

    # Grid Search
    for hyperp in ParameterGrid(param_grid):

        # Compute only meaningful combinations
        if (hyperp['n_heads'] == 4 and hyperp['d_model'] == 64 and hyperp['feed_forward_size'] in [128, 256]) or (hyperp['n_heads'] == 4 and hyperp['d_model'] == 128 and hyperp['feed_forward_size'] in [256, 512]) or (hyperp['n_heads'] == 8 and hyperp['d_model'] == 128 and hyperp['feed_forward_size'] in [256, 512]):

            if hyperp['n_layers_encoder'] == hyperp['n_layers_decoder']:
                count += 1
                print(f"Combination: {count}, testing the following hyperparameters: {hyperp}")

                train_set = create_sequences(data, window_size, 0)
                history, model = training_transformer_autoencoder(train_set,
                                    train_set,
                                    n_heads = hyperp['n_heads'],
                                    d_model = hyperp['d_model'],
                                    num_encoder_layers = hyperp['n_layers_encoder'],
                                    num_decoder_layers = hyperp['n_layers_decoder'],
                                    feed_forward_dim = hyperp['feed_forward_size'],
                                    dropout_rate = hyperp['dropout_rate'],
                                    learning_rate = hyperp['learning_rate'],
                                    n_epochs = 500,
                                    batch_size = hyperp['batch_size'],
                                    window_size = window_size,
                                    metric = 'mse',
                                    plot = False,
                                    save = False)
                # Save each score
                score = np.min(history.history['val_loss'])
                results.append({"params": hyperp, "score": score})
                print(f"Score for params {hyperp}: {score}\n")

    print(f"Total number of combinations: {count}")
    results_df = pd.DataFrame(results)
    results_df.to_csv('hyper_tuning/transformer_performance_w_size_' + str(window_size) + '.csv', index = False)

In [None]:
windows = [20, 30]
for window in windows:
    hyper_tuning_transformer(df_proc, window, param_grid_transformer)

In [None]:
hyper_tuning_transformer(df_proc, 20, param_grid_transformer)

In [27]:
df_scores_20 = pd.read_csv('hyper_tuning/transformer_performance_w_size_20.csv')
df_scores_30 = pd.read_csv('hyper_tuning/transformer_performance_w_size_30.csv')
final_df_scores = pd.concat([df_scores_20, df_scores_30])
final_df_scores.to_csv('hyper_tuning/hyper_tuning_transformer_performance.csv', index = False)
final_df_scores = final_df_scores.sort_values(by = 'score', ascending = True)
print('Best combination: ' + str(final_df_scores['params'].iloc[0]) + ', loss: ' + str(final_df_scores['score'].iloc[0]))

Best combination: {'batch_size': 32, 'd_model': 128, 'dropout_rate': 0.2, 'feed_forward_size': 256, 'learning_rate': 0.0001, 'n_heads': 8, 'n_layers_decoder': 1, 'n_layers_encoder': 1}, loss: 9.77929012151435e-05
