# Step 1: Install dependencies

In [None]:
!pip install gym[box2d]
!pip install stable-baselines3[extra]
!pip install botorch

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


# Step 2: Import libraries

Libraries used for the Bayesian Optimization Loop

In [None]:
import os
import torch
import numpy as np
import plotly
import plotly.graph_objects as go

import gym

from stable_baselines3 import PPO
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.env_util import make_vec_env

import botorch
from botorch.utils.transforms import standardize, normalize, unnormalize

Libraries used to save checkpoints in GDrive

In [None]:
from google.colab import drive
import pandas as pd

drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


# Step 3: Define objetive function
This will be the lower bound of the mean reward of a trained model 

In [None]:
def get_hyp_values(hyperparams_tensor):
  '''
  Returns a tuple of values from a tensor containing a hyperparameter configuration

          Parameters:
                  hyperparams_tensor (torch.DoubleTensor): A tensor of size 1xn (1 row, n columns) with n being the number of hyperparameters to tune
          
          Returns:
                  hyperparams_tuple (tuple): A tuple with the unpacked values of the hyperparams_tensor 

  '''
  hyperparams_list = [hyperparams_tensor[0][i].item() for i in range(len(hyperparams_tensor[0]))]
  hyperparams_tuple = tuple(hyperparams_list)
  return tuple(hyperparams_list)


def create_model(hyp,
                 policy='MlpPolicy',
                 env_name='LunarLander-v2'):
  '''
  Returns a PPO model given a policy, environment, and hyperparameters of PPO

          Parameters:
                  hyp (float): The value of the hyperparameter to train the model with
                  policy (str): The NN to train with PPO in the environment. Default is 'MlpPolicy'
                  env (stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv): Specifies the gym environment to use for the training

          Returns:
                  model (stable_baselines3.ppo.ppo.PPO): The model to train
  '''
  # lr,  = get_hyp_values(hyperparams)
  env = make_vec_env(env_name, n_envs=1)
  model = PPO(policy = policy,
              env = env,
              learning_rate = hyp,
              n_steps = 1024,
              batch_size = 64,
              n_epochs = 4,
              gamma = 0.999,
              gae_lambda = 0.98,
              ent_coef = 0.01,
              verbose=0)
  
  return model


def train_model(model, timesteps=50000):
  '''
  Trains a PPO model during a number of timesteps
          
          Parameters:
                  model (stable_baselines3.ppo.ppo.PPO): The model to train
                  timesteps (int): The number of timesteps used to train the model

          Returns:
                  None
  '''
  model.learn(total_timesteps=timesteps)
  return


def evaluate_model(model, 
                   rl_env_name='LunarLander-v2', 
                   n_eval_episodes=25):
  '''
  Evaluates the model for a number of episodes in a specified environment, this environment MUST be the same as the one the model has been trained in.

          Parameters:
                  model (stable_baselines3.ppo.ppo.PPO): The model to train
                  rl_env_name (str): The name of the gym environment where the model has been trained
                  n_eval_episodes (int): The number of episodes for which the model will be evaluated to obtain a mean and standard deviation

          Returns:
                  lower_mean_reward (float): A tensor of size 1x1 (1 row, 1 column) containing the mean_reward
  '''
  eval_env = gym.make(rl_env_name)
  mean_reward, std_reward = evaluate_policy(model, 
                                            eval_env, 
                                            n_eval_episodes=n_eval_episodes, 
                                            deterministic=True)
  
  print(f"mean_reward={mean_reward:.2f} +/- {std_reward}")
  lower_mean_reward = mean_reward - std_reward
  return lower_mean_reward


def target_function(hyperparams, 
                    timesteps=50000,
                    rl_env_name='LunarLander-v2'):
  '''
  Given a hyperparameter configuration, evaluates their performance
          Parameters:
                  hyperparams (float): The value of the learning_rate to train the model with
                  timesteps (int): timesteps (int): The number of timesteps used to train the model
                  rl_env_name (str): The name of the gym environment where the model has been trained

          Returns:
                  lower_mean_reward (float): A tensor of size 1x1 (1 row, 1 column) containing the mean_reward
  '''
  model = create_model(hyperparams, env_name=rl_env_name)
  
  train_model(model, 
              timesteps)
  
  lower_mean_reward = evaluate_model(model, 
                                     rl_env_name)
  
  return lower_mean_reward


# Step 4: Define hyperparameters to tune
First define the bounds

In [None]:
lr = 0.9
# Define here the list of parameters to tune
hyperparams_list = [lr]
# Define the lower bounds of the parameters
lower_bounds = [0.0001]
# Define the upper bounds of the parameters
upper_bounds = [0.1]

Then convert lists to tensors

In [None]:
# Create tensors with the hyperparameters configurations and bounds for BOTorch to use
hyperparams_tensor = torch.DoubleTensor([hyperparams_list])
bounds_tensor = torch.DoubleTensor([lower_bounds, upper_bounds])

# Step 5: Define functions needed for the Bayesian Optimization Loop

In [None]:
def generate_initial_data(upper_bound, 
                          lower_bound, 
                          n=3):
  '''
  Gets n values of the hyperparameter's bounded space and evaluates them
          Parameters:
                upper_bound (float): The upper bound of the hyperparameter value
                lower_bound (float): The lower bound of the hyperparameter value
                n (int): The number of initial points to get. Default is 3
          
          Returns:
                train_x (torch.DoubleTensor): A tensor of size (n, 1) (n rows and 1 column) with the initial points
                exact_obj (torch.DoubleTensor): A tensor of size (n, 1) (n rows and 1 column) containing the evaluation of the model with the sampled hyperparameters values
                best_observed_vale: The best evaluation of the hyperparameters
  '''
  # Create our initial hyperparameter values
  train_x = torch.rand(n, 1, dtype=torch.double) * (upper_bound - lower_bound) + lower_bound

  # Evaluate them and store them in a torch.Tensor
  exact_obj = torch.tensor([[target_function(float(hyp))] for hyp in train_x])

  # Get the best observed value
  best_observed_value = exact_obj.max().item()
  
  return train_x, exact_obj, best_observed_value

In [None]:
from botorch.acquisition.analytic import ExpectedImprovement
from botorch.optim import optimize_acqf
from botorch.utils.transforms import standardize, normalize, unnormalize
from botorch.models import SingleTaskGP
from gpytorch.mlls.exact_marginal_log_likelihood import ExactMarginalLogLikelihood
from botorch import fit_gpytorch_model

def compute_acquisition_function(single_model, 
                                 best_init_y,
                                 l_bound=-2.,
                                 h_bound=10.,
                                 resolution=1000):
  
  '''
  Evaluates the acquisition function in the discretized space of the bounded space
          Parameters:
                  single_model (botorch.models.gp_regression): A Gaussian Process regression model
                  best_init_y (float): The best lower_bound_reward obtained until the moment
                  l_bound (float): The lower bound of the hyperparameter value
                  h_bound (float): The upper bound of the hyperparameter value
                  resolution (int): The number of discretized points of the hyperparameter value bounded space

          Returns:
                  result_tensor (torch.Tensor): A tensor of shape 1xresolution (1 row, {resolution} colums) containing the evaluations of the acquisition function
  '''
  # Discretize the bounded hyperparameter value space
  linspace = torch.linspace(l_bound, 
                            h_bound,
                            steps=resolution)
  x_test = torch.tensor([linspace[0]]).unsqueeze(-1)

  # Compute our acquistion function
  EI = ExpectedImprovement(model=single_model, 
                           best_f=best_init_y, 
                           maximize=True)
  result = []

  # Evaluate the acquisition function in the discretized space
  for x in linspace:
    x_test = torch.tensor([x]).unsqueeze(-1)
    result.append(EI(x_test))
  
  # Convert results list in a tensor
  result_tensor = torch.tensor(result)
  return result_tensor


def print_acquisition_function(acq_fun, 
                               iteration,
                               l_bound=-2.,
                               h_bound=10.,
                               resolution=1000, 
                               suggested=None):
  '''
  Plots the acquistion function given a series of evaluations previously computed
          Parameters:
                  acq_fun (torch.Tensor): A tensor of shape 1xresolution with the evaluations of the acquisition function
                  iteration (int): The iteration number of the Gaussian Process
                  l_bound (float): The lower bound of the hyperparameter value
                  h_bound (float): The upper bound of the hyperparameter value
                  resolution (int): The number of discretized points of the hyperparameter value bounded space
                  suggested (float): The suggested value of the hyperparameter

          Returns:
                  None
  '''

  # Discretize the hyperparameter value space
  x = torch.linspace(l_bound, h_bound, steps=resolution).detach().numpy()
  x_new = x.reshape((resolution,-1))
  z = acq_fun

  # Get the value that maximizes the acquisition value
  max_acq_fun = x[((acq_fun == acq_fun.max().item()).nonzero(as_tuple=True)[0])]

  # Plot our data
  data = go.Scatter(x=x, y=z, line_color="yellow")

  # Axis
  fig = go.Figure(data=data)
  fig.update_layout(title="Expected Improvement acquisition function. Iteration " + str(iteration), xaxis_title="input", yaxis_title="output")

  # If we have suggested a point, draw a red vertical line on it, otherwise draw a red vertical line on the value that maximizes the acq function
  if(suggested==None):
    fig.add_vline(x=max_acq_fun, line_width=3, line_color="red")
  else:
    fig.add_vline(x=float(suggested[0][0]), line_width=3, line_color="red")
  fig.show()


def compute_predictive_distribution(single_model,
                                    best_init_y,
                                    l_bound=-2.,
                                    h_bound=10., 
                                    resolution=1000):
  '''
  Computes the predictive distribution of the functions given a Gaussian Process Regresion model
          Parameters:
                  single_model (botorch.models.gp_regression): A Gaussian Process regression model
                  best_init_y (float): The best lower_bound_reward obtained until the moment
                  l_bound (float): The lower bound of the hyperparameter value
                  h_bound (float): The upper bound of the hyperparameter value
                  resolution (int): The number of discretized points of the hyperparameter value bounded space

          Returns:
                  means_tensor (torch.Tensor): A tensor of shape 1xresolution (1 row, {resolution} colums) containing the means of the discretized points
                  variances_tensor (torch.Tensor): A tensor of shape 1xresolution (1 row, {resolution} colums) containing the variances of the discretized points
  '''
  # Discretize the hyperparameter value bounded space
  linspace = torch.linspace(l_bound, h_bound, steps=resolution)
  x_test = torch.tensor([linspace[0]]).unsqueeze(-1)

  # Initialize our lists, result for the means
  result = []
  variances = []

  # Evaluate means and variances given the GP model
  for x in linspace:
    x_test = torch.tensor([x]).unsqueeze(-1)
    result.append(single_model.posterior(x_test).mean)
    variances.append(single_model.posterior(x_test).variance)
  
  # Convert results into tensors
  means_tensor = torch.tensor(result)
  variances_tensor = torch.tensor(variances)

  return torch.tensor(result), torch.tensor(variances)


def print_predictive_mean(predictive_mean, 
                          predictive_variance,
                          iteration, 
                          l_bound=-2.,
                          h_bound=10.,
                          resolution=1000,
                          suggested=None,
                          old_obs=[],
                          old_values=[]):
  '''
  Plots the function distribution obtained by the Gaussian Process
          Parameters:
                  predictive_mean (torch.Tensor): A tensor of shape 1xresolution (1 row, {resolution} colums) containing the means of the discretized points
                  predictive_variance (torch.Tensor): A tensor of shape 1xresolution (1 row, {resolution} colums) containing the variances of the discretized points
                  iteration (int): The iteration number of the Gaussian Process
                  l_bound (float): The lower bound of the hyperparameter value
                  h_bound (float): The upper bound of the hyperparameter value
                  resolution (int): The number of discretized points of the hyperparameter value bounded space
                  suggested (float): The suggested value of the hyperparameter
                  old_obs (list): A list with previous candidates selected by the optimization of the acquisition function
                  old_values (list): A list with previous lower bound reward of the hyperparameter candidates selected previously by the optimization of the acquisition function

          Returns:
                  None
  '''

  # Discretize the space
  x = torch.linspace(l_bound, h_bound, steps=resolution).detach().numpy()
  x_new = x.reshape((resolution,-1))
  z = predictive_mean

  # Get the value that maximizes the predictive mean
  max_predictive_mean = x[((predictive_mean == predictive_mean.max().item()).nonzero(as_tuple=True)[0])]

  # Create figure
  fig = go.Figure()

  # Plot upper bound of the expected reward (predictive mean + predictive variance)
  fig.add_trace(go.Scatter(x=x, 
                           y= predictive_mean + np.sqrt(predictive_variance),
                           mode='lines',
                           line=dict(color="#19D3F3",width =0.1),
                           name='upper bound'))
  
  # Plot predictive mean of each point's expected reward
  fig.add_trace(go.Scatter(x=x, 
                           y= predictive_mean,
                           mode='lines',
                           line=dict(color="blue"),
                           fill='tonexty',
                           name='predictive mean'))
  
  # Plot lower bound of the expected reward (predictive mean - predictive variance)
  fig.add_trace(go.Scatter(x=x, y= predictive_mean - np.sqrt(predictive_variance),
                         mode='lines',
                         line=dict(color="blue", width =0.1),
                         fill='tonexty',
                         name='lower bound'))
  
  
  # Axis
  fig.update_layout(title="GP Predictive distribution. Iteration " + str(iteration), xaxis_title="Learning rate", yaxis_title="Expected reward", showlegend=False)

  # Add a vertical line in the point that maximizes the expected reward or in the suggested point if provided in the parameters
  if(suggested==None):
    fig.add_vline(x=max_predictive_mean, line_width=3, line_color="red")
  else:
    fig.add_vline(x=float(suggested[0][0]), line_width=3, line_color="red")  

  # Plot old values
  if(len(old_obs)>0):
    fig.add_trace(go.Scatter(x=old_obs, y=old_values, mode = 'markers', marker_color="black", marker_size=10))

  fig.show()


def visualize_functions(single_model,
                        best_init_y,
                        best_candidate,
                        candidate_acq_fun,
                        iteration,
                        previous_observations,
                        previous_values,
                        bounds,
                        best_candidate_normalized):
  '''
  Function that visualizes the acquisition function and gaussian process
          Parameters:
                  single_model (botorch.models.gp_regression): A Gaussian Process regression model
                  best_init_y (float): The best lower_bound_reward obtained until the moment
                  best_candidate (float): The normalized best candidate until this iteration of the optimization process 
                  candidate_acq_fun (float): The selected unnormalized candidate in this iteration of the optimization process
                  iteration (int): The iteration number of the optimization process
                  previous_observations (list): A list with previous candidates selected by the optimization of the acquisition function
                  previous_values (list): A list with previous lower bound reward of the hyperparameter candidates selected previously by the optimization of the acquisition function
                  bounds (torch.Tensor): Normalized bounds of the hyperparameter values in the form of tensors of shape 2x1 (2 rows, 1 column), first row containing lower bound, second containing upper bound

          Returns:
                  None

  '''

  # Compute the mean and variance of the function distribution given by the gaussian process (our single model)
  predictive_mean, predictive_variance = compute_predictive_distribution(single_model, 
                                                                         best_init_y,
                                                                         l_bound=0,
                                                                         h_bound=1)
  # Plot the distribution
  print_predictive_mean(predictive_mean, 
                        predictive_variance, 
                        iteration,
                        suggested=candidate_acq_fun, 
                        old_obs=previous_observations,
                        old_values=previous_values,
                        l_bound=bounds[0][0],
                        h_bound=bounds[1][0])
  
  # Compute the acquisition function 
  acq_fun = compute_acquisition_function(single_model, 
                                         best_init_y, 
                                         l_bound=0,
                                         h_bound=1)

  # Plot the acquisition function
  print_acquisition_function(acq_fun,
                             iteration,
                             suggested=candidate_acq_fun, 
                             l_bound=bounds[0][0],
                             h_bound=bounds[1][0])
  

def get_next_points_and_visualize_norm(init_x,
                                       init_y, 
                                       best_init_y, 
                                       normalized_bounds, 
                                       iteration, 
                                       previous_observations,
                                       previous_values,
                                       bounds,
                                       n_points=1,
                                       visualize=False):
  '''
  Function that computes the next point to add to the Gaussian Process and visualizes the acquisition function and function distribution
          Parameters:
                  init_x (torch.Tensor): A tensor of shape {iteration}x1 containing the previous hyperparameters
                  init_y (torch.Tensor): A tensor of shape {iteration}x1 containing the previous rewards of the models trained with x_init hyperparameters values
                  best_init_y (float): Best reward obtained until the moment
                  normalized_bounds (torch.Tensor): Normalized bounds of the hyperparameter values in the form of tensors of shape 2x1 (2 rows, 1 column), first row containing lower bound, second containing upper bound
                  iteration (int): The iteration number of the Bayesian Optimization process
                  previous_observations (list): A list with previous hyperparameter values
                  previous_values (list): A list with previous rewards obtained
                  bounds (torch.Tensor): Actual bounds of the hyperparameter values in the form of tensors of shape 2x1 (2 rows, 1 column), first row containing lower bound, second containing upper bound
                  n_points (int): Number of candidates to obtain for the next iteration. Default is 1
                  visualize (bool): If True, then visualize the GP. Default is True

          Returns:
                  candidates (torch.Tensor): A tensor of shape 1x1 containing the value of the hyperparameter that optimizes the acquisition function
  '''
  # Create our model with the points
  single_model = SingleTaskGP(init_x, init_y)

  mll = ExactMarginalLogLikelihood(single_model.likelihood, single_model)
  fit_gpytorch_model(mll)

  # Instantiaet the acquisition function given our model
  EI = ExpectedImprovement(model=single_model, best_f=best_init_y, maximize=True)
  
  # Optimize the acquisition function
  candidates, _ = optimize_acqf(acq_function=EI, 
                                bounds=normalized_bounds, 
                                q=n_points,
                                num_restarts=200,
                                raw_samples=512, 
                                options={"batch_limit": 5, "maxiter": 200})
  
  # Get the best candidate unnormalized
  best_candidate = unnormalize(init_x[((init_y == best_init_y).nonzero(as_tuple=True)[0])][0][0], bounds=normalized_bounds)
  # Get our best candidate normalized for the GP to use
  best_candidate_normalized = init_x[((init_y == best_init_y).nonzero(as_tuple=True)[0])][0][0]

  # Visualize acquisition functions and GP regression
  if visualize:
    visualize_functions(single_model, 
                        best_init_y,
                        best_candidate,
                        unnormalize(candidates, bounds=bounds),
                        iteration, previous_observations,
                        previous_values, 
                        bounds, 
                        best_candidate_normalized)

  return candidates

# Step 6: Set experiments' configuration

First let us define functions to save and load checkpoints

In [None]:
def create_experiment_df():
  '''
  Creates an empty dataframe to save checkpoints
          Parameters:

          Returns:
                  experiment_df (pandas.DataFrame): An empty Dataframe with columns specified below that will be used to save the experiment history
  '''
  columns = ["method",
             "experiment",
             "iteration",
             "learning_rate",
             "reward_lower_bound",
             "best_learning_rate",
             "best_reward_lower_bound"
             ]

  experiment_df = pd.DataFrame(columns=columns)
  return experiment_df


def get_filepath(experiment_name):
  '''
  Returns the path of the csv of the experiment specified.
          Parameters:
                  experiment_name (string): The name of the experiment

          Returns:
                  filepath (string): The path to the .csv file that has the data of the experiment
  '''
  filepath = f"/content/gdrive/My Drive/{experiment_name}.csv"
  return filepath


def update_experiment_history(method, 
                              experiment_number, 
                              iteration,
                              hyp,
                              reward_lower_bound,
                              best_hyp,
                              best_reward_lower_bound,
                              experiment_df,
                              experiment_name):
  '''
  Updates the experiment dataframe and saves it in GDrive
          Parameters:
                  method (int): 0 if Bayesian Optimization, 1 if Random Search
                  experiment_number (int): The id of the experiment
                  iteration (int): The current iteration of the experiment
                  hype (float): The hyperparameter value selected for this iteration
                  reward_lower_bound (float): The reward's lower bound obtained by the model trained this iteration
                  best_hyp (float): The hyperparameter value that has induced the best reward lower bound
                  best_reward_lower_bound (float): The best reward lower bound obtained until this iteration
                  experiment_df (pandas.DataFrame): The dataframe containing the experiments history
                  experiment_name (string): The name of the experiment

          Returns:
                  concatenated_df (pandas.DataFrame): The updated experiments history dataframe
  '''
  # Define the columns of the dataframe
  columns = ["method",
             "experiment",
             "iteration",
             "learning_rate",
             "reward_lower_bound",
             "best_learning_rate",
             "best_reward_lower_bound"]

  # Set the values of the columns given by the iteration configuration and result
  iteration_results = [[method,
                        experiment_number,
                        iteration,
                        hyp,
                        reward_lower_bound,
                        best_hyp,
                        best_reward_lower_bound]]

  # Create a one row dataframe for this experiment
  new_iteration_df = pd.DataFrame(iteration_results, columns=columns)

  # Concatenate the experiments history with this experiment's results
  concatenated_df = pd.concat([experiment_df, new_iteration_df], ignore_index=True)

  # Save the updated history in google drive
  save_checkpoint(concatenated_df,
                  experiment_name)
  
  # Return the concatenated dataframe representing the updated experiment history
  return concatenated_df


def save_checkpoint(experiment_df,
                    experiment_name):
  '''
  Saves the experiments history dataframe in google drive
          Parameters:
                  experiment_df (pandas.DataFrame): The dataframe containing the experiments history
                  experiment_name (string): The name of the experiment
          
          Returns:
                  None
  '''

  filepath = get_filepath(experiment_name)
  experiment_df.to_csv(filepath, index=False)


def load_checkpoint(experiment_name,
                    experiment_results,
                    experiment_configurations):
  '''
  Loads a checkpoint of an experiment given its name

          Parameters:
                  experiment_name (string): The name of the experiment
                  experiment_results (numpy.array): A numpy array of three dimensions (method, iteration, best_result)
                  expeirment_configuration (numpy.array): A numpy array of three dimensions (method, iteration, best_learning rate)

          Returns:
                  experiment_df (pandas.DataFrame): A dataframe with the experiment history
  '''
  # First we retrieve the dataframe from GDrive
  filepath = get_filepath(experiment_name)
  experiment_df = pd.read_csv(filepath)

  # Now we iterate through the rows of the dataframe to update the experiment history numpy arrays that we will use later to compare the methods and plot results
  for index, row in experiment_df.iterrows():
      # Unpack the columns
      method, exp, iter, hyp, rlb, best_hyp, best_rlb = row.values
      # Add them to the experiments arrays
      experiment_results[int(method)][int(exp)][int(iter)] = best_rlb
      experiment_configurations[int(method)][int(exp)][int(iter)] = best_hyp

  method, exp, iter, hyp, rlb, best_hyp, best_rlb = experiment_df.iloc[-1]
  print(experiment_df.iloc[-1])
  if method == 0:
    bo_done = False
    last_bo_experiment = int(exp)
    last_rs_experiment = 0

    # Plus one because we want to start in the next one
    last_bo_iteration = int(iter)+1
    last_rs_iteration = 1

  else:
    bo_done = True
    last_bo_experiment = experiment_configurations.shape[1]-1
    last_rs_experiment = int(exp)

    # Plus one because we want to start in the next one
    last_bo_iteration = experiment_configurations.shape[2]-1
    last_rs_iteration = int(iter)+1

  # Now lets get the initial data
  bo_experiment_df = experiment_df[experiment_df["method"]==0]
  init_x = torch.DoubleTensor([[float(lr)] for lr in bo_experiment_df.learning_rate.values])
  init_y = torch.DoubleTensor([[float(reward)] for reward in bo_experiment_df.reward_lower_bound.values])
  best_init_y = init_y.max().item()

  rs_experiment_df = experiment_df[experiment_df["method"]==0]
  if rs_experiment_df.empty:
      best_rs_hyp = 0
      best_rs_r = 0
  else:
      best_rs_hyp = rs_experiment_df.iloc[-1]["best_learning_rate"]
      best_rs_r = rs_experiment_df.iloc[-1]["best_reward_lower_bound"]
             
  return experiment_df, last_bo_experiment, last_rs_experiment, last_bo_iteration, last_rs_iteration, init_x, init_y, best_init_y, best_rs_hyp, best_rs_r, bo_done

Now let's set the configuration for the experiments

In [None]:
import numpy as np
# The name of the file (WITHOUT EXTENSION) where the history of experiments will be saved
experiment_name = "lunar_lander_learning_rate_low_fidelity"
# If true, this will look for the experiment history .csv in google drive and continue from there
continue_from_checkpoint = True

# Number of experiments per method
n_experiments = 25

# Number of iterations per experiment after the first random point being evaluated
n_iterations = 30

# Number of methods
n_methods = 2

# Index of Bayesian Optimization method
bo_method = 0

# Index of Random Search method
rs_method = 1

# Arrays containing the results and configurations of experiments
experiment_results = np.zeros((n_methods, n_experiments, n_iterations+1))
experiment_configurations = np.zeros((n_methods, n_experiments, n_iterations+1))

# Now load checkpoint if necessary
if continue_from_checkpoint:
  experiment_df, last_bo_experiment, last_rs_experiment, last_bo_iteration, last_rs_iteration, init_x, init_y, best_init_y, best_observed_candidate_rs, best_observed_result_rs, bo_done = load_checkpoint(experiment_name,
                                            experiment_results,
                                            experiment_configurations)
else:
  experiment_df = create_experiment_df()
  
  save_checkpoint(experiment_df, 
                  experiment_name)

  
  

method                       1.000000
experiment                  20.000000
iteration                    5.000000
learning_rate                0.025019
reward_lower_bound        -164.603528
best_learning_rate           0.088802
best_reward_lower_bound   -155.261101
Name: 1400, dtype: float64


In [None]:
if continue_from_checkpoint:
  init_experiment = last_bo_experiment
  init_iteration = last_bo_iteration
else:
  init_experiment = 0
  init_iteration = 1
  bo_done = False

if not bo_done:
  for e in range(init_experiment, n_experiments):
    print(f"EXPERIMENT {e}")
    if not (continue_from_checkpoint and init_experiment == e):
      # Sample initial hyperparameter values and evaluate the models obtained with them
      init_x, init_y, best_init_y = generate_initial_data(upper_bounds[0],
                                                          lower_bounds[0],
                                                          1)

    # We normalize the bounds of the hyperparameters as BOTorch assumes this
    normalized_bounds = torch.tensor([[0.0], [1.0]])

    # Normalize the hyperparameter as BOTorch assumes this
    init_x_normalized = normalize(init_x,
                                  bounds=bounds_tensor)

    # Standardize the objective as BOTorch assumes this
    init_y_standardized = standardize(init_y)

    # Obtain the best result among the initial random experiments
    best_init_y_standardized = init_y_standardized.max().item()

    candidates=[]
    results=[]

    best_observed_result_bo = best_init_y
    best_observed_candidate_bo = init_x[0][0].item()

    if not (continue_from_checkpoint and init_experiment == e):
      experiment_df = update_experiment_history(bo_method, 
                                                e, 
                                                0,
                                                best_observed_candidate_bo, # The gamma selected for this iteration
                                                best_observed_result_bo, # The reward lower bound of the model
                                                best_observed_candidate_bo, # The best_gamma
                                                best_observed_result_bo, # The reward lower bound
                                                experiment_df,
                                                experiment_name)
      
      experiment_configurations[rs_method,e,0] = best_observed_candidate_bo
      experiment_results[rs_method,e,0] = best_observed_result_bo

    for i in range(init_iteration, n_iterations+1):
      print(f"Number of iterations done: {i}")
      # Get the next points given our actual queries
      normalized_new_candidates = get_next_points_and_visualize_norm(init_x_normalized,
                                                                    init_y_standardized, 
                                                                    best_init_y_standardized, 
                                                                    normalized_bounds,
                                                                    i, 
                                                                    init_x,
                                                                    init_y,
                                                                    bounds_tensor,
                                                                    1,
                                                                    True)
      
      # Unnormalize the candidate hyperparameter value
      new_candidates = unnormalize(normalized_new_candidates,
                                  bounds=bounds_tensor)
      
      # Compute the performance of the model
      new_results = torch.tensor([[target_function(float(new_candidates))]])


      print(f"New candidates are: {new_candidates}")
      # Update our hyperparameters and rewards history
      init_x = torch.cat([init_x, new_candidates])
      init_y = torch.cat([init_y, new_results])

      # Normalize our updated hyperparameters and rewards history
      init_x_normalized = normalize(init_x, bounds=bounds_tensor)
      init_y_standardized = standardize(init_y)

      # Update the best reward
      best_init_y = init_y.max().item()
      best_init_y_standardized = init_y_standardized.max().item()

      print(f"Best point performs this way: {best_init_y}")
      candidates.append(float(normalized_new_candidates[0][0]))
      results.append(float(standardize(new_results[0][0])))

      if best_observed_result_bo < new_results[0][0]:
        best_observed_result_bo = new_results[0][0].item()
        best_observed_candidate_bo = new_candidates[0][0].item()

      experiment_df = update_experiment_history(bo_method, 
                                                e, 
                                                i,
                                                new_candidates[0][0].item(), # The gamma selected for this iteration
                                                new_results[0][0].item(), # The reward lower bound of the model
                                                best_observed_candidate_bo, # The best_gamma
                                                best_observed_result_bo, # The reward lower bound
                                                experiment_df,
                                                experiment_name)

      experiment_configurations[bo_method,e,i] = best_observed_candidate_bo
      experiment_results[bo_method,e,i] = best_observed_result_bo
      print('----------------------')
    init_iteration = 1

First we try the bayesian optimization method

Now we perform a random search

In [None]:
if continue_from_checkpoint:
  init_experiment = last_rs_experiment
  init_iteration = last_rs_iteration
else:
  init_experiment = 0
  init_iteration = 1

for e in range(init_experiment, n_experiments):
  if not (continue_from_checkpoint and init_experiment == e) or (init_experiment==0 and init_iteration==1):
    # Initiate with a random value
    random_value = np.random.random() * (upper_bounds[0] - lower_bounds[0]) + lower_bounds[0]
    best_observed_result_rs = target_function(random_value)
    best_observed_candidate_rs = random_value
    # Update our experiments histories
    experiment_df = update_experiment_history(rs_method, 
                                              e, 
                                              0,
                                              random_value, # The gamma selected for this iteration
                                              best_observed_result_rs, # The reward lower bound of the model
                                              best_observed_candidate_rs, # The best_gamma
                                              best_observed_result_rs, # The reward lower bound
                                              experiment_df,
                                              experiment_name)
  
  # Iterate with random search
  for i in range(init_iteration, n_iterations+1):
    # Get a new random value for the hyperparameter
    random_value = np.random.random() * (upper_bounds[0] - lower_bounds[0]) + lower_bounds[0]
    # Evaluate the model with that hyperparameter value
    rs_obj_fun_result = target_function(random_value)

    # Update best reward and candidate found if necessary
    if best_observed_result_rs < rs_obj_fun_result:
      best_observed_result_rs = rs_obj_fun_result
      best_observed_candidate_rs = random_value
    
    # Update our experiments histories
    experiment_df = update_experiment_history(rs_method, 
                                              e, 
                                              i,
                                              random_value, # The gamma selected for this iteration
                                              rs_obj_fun_result, # The reward lower bound of the model
                                              best_observed_candidate_rs, # The best_gamma
                                              best_observed_result_rs, # The reward lower bound
                                              experiment_df,
                                              experiment_name)
    experiment_configurations[rs_method,e,i] = best_observed_candidate_rs
    experiment_results[rs_method,e,i] = best_observed_result_rs

  init_iteration = 1



mean_reward=-843.88 +/- 508.62075444661485
mean_reward=-167.75 +/- 59.85503076215683
mean_reward=-120.14 +/- 39.69622483358638
mean_reward=-1068.58 +/- 820.1311088731918
mean_reward=-125.12 +/- 42.949875755516146
mean_reward=-787.78 +/- 337.4077136558841
mean_reward=-591.53 +/- 181.0639050705153
mean_reward=-1098.18 +/- 1028.733192005623
mean_reward=-132.99 +/- 42.25080277017848
mean_reward=-539.93 +/- 140.42339540091606
mean_reward=-131.43 +/- 48.08618706253258
mean_reward=-583.92 +/- 191.0832426225525
mean_reward=-130.73 +/- 34.79257047043687
mean_reward=-592.46 +/- 196.213264137458
mean_reward=-742.77 +/- 490.79419712205345
mean_reward=-122.78 +/- 343.36633717567224
mean_reward=-770.10 +/- 612.6156667569886
mean_reward=-564.37 +/- 150.98958139531538
mean_reward=-133.11 +/- 33.280346199028195
mean_reward=-129.94 +/- 47.72973084465294
mean_reward=-572.94 +/- 163.84654836958197
mean_reward=-131.82 +/- 39.95349666017372
mean_reward=-695.77 +/- 357.94720098263423
mean_reward=-129.38 +/- 

# Step 7: Compare the results

First we give the recommendation as the best observed result

In [None]:
best_observed_result = np.max(experiment_results)
index_set = np.where(experiment_results==best_observed_result)
print("The best observed result is: " + str(best_observed_result))
print("The best observed result belong to the : " + str(index_set[0][0]) + " method. Its value is " + str(experiment_configurations[index_set][0]))

The best observed result is: 77.08980140098915
The best observed result belong to the : 1 method. Its value is 0.0132395695725988


And now we plot the results to compare both methods

In [None]:
x = np.linspace(1, n_iterations, n_iterations).astype(int)
mean_bo = np.mean(experiment_results[0,:,:], axis=0)
mean_rs = np.mean(experiment_results[1,:,:], axis=0)
std_bo = np.std(experiment_results[0,:,:], axis=0) * 0.25
std_rs = np.std(experiment_results[1,:,:], axis=0) * 0.25
bo_ub_results = go.Scatter(x=x, y=mean_bo + std_bo, mode='lines', name="", line_color="green", line_width=0.1)
bo_results = go.Scatter(x=x, y=mean_bo, mode='lines', fill='tonexty', line_color="green", name="Bayesian Optimization")
bo_lb_results = go.Scatter(x=x, y=mean_bo - std_bo, mode='lines', fill='tonexty', name="", line_color="green", line_width=0.1)

rs_ub_results = go.Scatter(x=x, y=mean_rs + std_rs, mode='lines', name="", line_color="red", line_width=0.1)
rs_results = go.Scatter(x=x, y=mean_rs, mode='lines', fill='tonexty', line_color="red", name="Random Search")
rs_lb_results = go.Scatter(x=x, y=mean_rs - std_rs, mode='lines', fill='tonexty', name="", line_color="red", line_width=0.1)
  
fig = go.Figure()
fig.add_trace(bo_ub_results)
fig.add_trace(bo_results)
fig.add_trace(bo_lb_results)
fig.add_trace(rs_ub_results)
fig.add_trace(rs_results)
fig.add_trace(rs_lb_results)
fig.update_layout(title="Performance comparison between BO and RS", xaxis_title="Iterations", yaxis_title="Reward lower bound")
fig.show()