## Model selection.
This is a minimal example of model selection via hyperparameters optimization.

In [None]:
%load_ext autoreload
%autoreload 2

import numpy as np
import functools
import sklearn
import copy
import sklearn.model_selection
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

import tsgm

#### 0. Install optuna

In [None]:
import sys
!{sys.executable} -m pip install optuna

In [None]:
import optuna

#### 1. Load data
We are working with a toy dataset, and use `tsgm` utility called `tsgm.utils.gen_sine_dataset` to generate the data. We define a function that generates the dataset and then featurewise scale it using `tsgm.utils.TSFeatureWiseScaler`, so that each feature is in $[0, 1]$.

In [None]:
def get_data():
    data = tsgm.utils.gen_sine_dataset(10000, 24, 5)
    scaler = tsgm.utils.TSFeatureWiseScaler()        
    scaled_data = scaler.fit_transform(data)
    return scaled_data

#### 2. Define the optimization problem

In [None]:
# we aim at minimizing the discrepancy metric defined in next cell
study = optuna.create_study(direction="minimize")

In [None]:
metric_to_optimize = tsgm.metrics.metrics.DistanceMetric(
            statistics=[
                functools.partial(tsgm.metrics.statistics.axis_max_s, axis=None),
                functools.partial(tsgm.metrics.statistics.axis_min_s, axis=None),
                functools.partial(tsgm.metrics.statistics.axis_max_s, axis=1),
                functools.partial(tsgm.metrics.statistics.axis_min_s, axis=1),
            ],
            discrepancy=lambda x, y: np.linalg.norm(x - y),
        )

#### 3. Define the search space for the optimizer
We can optimize the choice of the optimizer and its hyperparameters

In [None]:
# optimizers and the search space for the hyperparameters
def _create_optimizer(trial):
    # optimize the choice of optimizers as well as their parameters
    kwargs = {}
    optimizer_options = ["RMSprop", "Adam", "SGD"]
    optimizer_selected = trial.suggest_categorical("optimizer", optimizer_options)
    if optimizer_selected == "RMSprop":
        kwargs["learning_rate"] = trial.suggest_float(
            "rmsprop_learning_rate", 1e-5, 1e-1, log=True
        )
        kwargs["momentum"] = trial.suggest_float(
            "rmsprop_momentum", 1e-5, 1e-1, log=True
        )
    elif optimizer_selected == "Adam":
        kwargs["learning_rate"] = trial.suggest_float(
            "adam_learning_rate", 1e-5, 1e-1, log=True
        )
    elif optimizer_selected == "SGD":
        kwargs["learning_rate"] = trial.suggest_float(
            "sgd_opt_learning_rate", 1e-5, 1e-1, log=True
        )
        kwargs["momentum"] = trial.suggest_float(
            "sgd_opt_momentum", 1e-5, 1e-1, log=True
        )

    optimizer = getattr(tf.optimizers, optimizer_selected)(**kwargs)
    return optimizer

#### 4. Define the objective function
In the objective function, we load the data and use them to train a TimeGAN model (`tsgm.models.timeGAN.TimeGAN`) while changing its parameters. We use the fitted TimeGAN model to generate synthetic samples, and finally use them to compute the value of the metric we want to optimize. 

In [None]:
def objective(trial):
    # Get data
    train_data = get_data()

    # Define the search space
    n_layers = trial.suggest_int(name="n_layers", low=1, high=10)
    num_hidden = trial.suggest_int(name="num_hidden", low=4, high=128, log=True)
    
    # Build TimeGAN model
    model = tsgm.models.timeGAN.TimeGAN(
        seq_len=24,
        module="gru",
        hidden_dim=num_hidden,
        n_features=5,
        n_layers=n_layers,
        batch_size=256,
        gamma=1.0,
    )
    # get optimizer
    optimizer = _create_optimizer(trial)
    
    # compile model
    model.compile(optimizer)

    # Training and validating
    EPOCHS = 100
    model.fit(data=train_data, epochs=EPOCHS)
    
    # Generate 10 samples of synthetic data
    _y = model.generate(n_samples=10)
    
    # Evaluate them vs the first 10 samples of training data
    objective_to_optimize = metric_to_optimize(_y, np.array(train_data[:10]))
    
    # Return last validation score
    return objective_to_optimize

In [None]:
study.optimize(objective, n_trials=10)

In [None]:
# parameters corresponding to best trial
study.best_trial