Copyright 2021-2023 Lawrence Livermore National Security, LLC and other MuyGPyS
Project Developers. See the top-level COPYRIGHT file for details.

SPDX-License-Identifier: MIT

# Anisotropic tutorial with optimization loop chassis

This notebook walks through four experimental treatments using the optimaztion loop chassis and 2D Univariate Sampler used in the [Anisotropic Metric Tutorial](../docs/examples/anisotropic_tutorial.ipynb). The goal is to recover the response on the held-out test data by training a simple anisotropic `MuyGPS` model on the perturbed training data with `nu` smoothness hyperparameter known, while the two `distance scaling` hyperparameters are to be learned. Each iteration of the optimization loop we scale training features using learned length scale hyperparameters, update nearest neighbors lookup using sklearn, update objective function, and run bayes optimization using numpy math backend. We start by:
1. Sampling a 2D surface from a conventional GP
2. Create duplicate of `MuyGPyS` object used in `sampler` to be used for training and inference
3. Execute four treatments using `optimize_from_tensors_mini_batch`

In [None]:
# import sys
# for m in sys.modules.keys():
#     if m.startswith("Muy"):
#         sys.modules.pop(m)
# %env MUYGPYS_BACKEND=numpy
# %env MUYGPYS_FTYPE=64


In [None]:
import numpy as np

from docs.examples.utils import UnivariateSampler2D

from MuyGPyS.gp import MuyGPS
from MuyGPyS.gp.distortion import AnisotropicDistortion, l2
from MuyGPyS.gp.hyperparameter import ScalarHyperparameter
from MuyGPyS.gp.kernels import Matern
from MuyGPyS.gp.noise import HomoscedasticNoise
from MuyGPyS.optimize.experimental.chassis import optimize_from_tensors_mini_batch


np.random.seed(0)

points_per_dim = 60                             # Observations per dimension
train_step = 13                                 # Train/test data split
nugget_noise = HomoscedasticNoise(1e-14)        # Assume no noise in truth
measurement_noise = HomoscedasticNoise(1e-7)    # Noise to perturb train
sim_nu = ScalarHyperparameter(1.5)              # HP smoothness
sim_length_scale0 = ScalarHyperparameter(0.1)   # HP distance scaling dim 0
sim_length_scale1 = ScalarHyperparameter(0.5)   # HP distance scaling dim 1
sampler = UnivariateSampler2D(
    points_per_dim=points_per_dim,
    train_step=train_step,
    kernel=Matern(
        nu=sim_nu,
        metric=AnisotropicDistortion(
            l2,
            length_scale0=sim_length_scale0,
            length_scale1=sim_length_scale1,
        ),
    ),
    eps=nugget_noise,
    measurement_eps=measurement_noise,
)
train_features, test_features = sampler.features()
train_responses, test_responses = sampler.sample()
# TODO sampler.plot_sample()

exp_length_scale0 = ScalarHyperparameter("log_sample", (0.01, 1.0))
exp_length_scale1 = ScalarHyperparameter("log_sample", (0.01, 1.0))
muygps = MuyGPS(
    kernel=Matern(
        nu=sim_nu,
        metric=AnisotropicDistortion(
            l2,
            length_scale0=exp_length_scale0,
            length_scale1=exp_length_scale1,
        ),
    ),
    eps=measurement_noise,
)


Treatment #1 (baseline) - brute force workflow following [Anisotropic Metric Tutorial](../docs/examples/anisotropic_tutorial.ipynb) using single optimization step with a single initial probe point

In [None]:
train_count = sampler.train_count
batch_count = sampler.train_count
num_epochs = int(train_count / batch_count)
print(f"train size {train_count}, batch size {batch_count}, num epochs {num_epochs}")
muygps_optloop = optimize_from_tensors_mini_batch(
    muygps,
    train_features,
    train_responses,
    nn_count=30,
    batch_count=batch_count,
    train_count=train_count,
    num_epochs=num_epochs,
    keep_state=False,
    probe_previous=False,
    batch_features=None,
    loss_method="lool",
    obj_method="loo_crossval",
    sigma_method="analytic",
    verbose=False,
    random_state=1,
    init_points=5,
    n_iter=20,
    allow_duplicate_points=True,
)
print(f"BayesianOptimization finds an optimal:")
print(f"\tlength_scale0 is {muygps_optloop.kernel.distortion_fn.length_scale['length_scale0']()}")
print(f"\tlength_scale1 is {muygps_optloop.kernel.distortion_fn.length_scale['length_scale1']()}")
print(f"sigma_sq is {muygps_optloop.sigma_sq()[0]}")


Treatment #2a (opt loop) - each loop interation create new optimizer instance, initialize with new objective function, and do not probe previous points

In [None]:
train_count = sampler.train_count
batch_count = 55
num_epochs = int(train_count / batch_count)
print(f"train size {train_count}, batch size {batch_count}, num epochs {num_epochs}")
muygps_optloop = optimize_from_tensors_mini_batch(
    muygps,
    train_features,
    train_responses,
    nn_count=30,
    batch_count=batch_count,
    train_count=train_count,
    num_epochs=num_epochs,
    keep_state=False,
    probe_previous=False,
    batch_features=None,
    loss_method="lool",
    obj_method="loo_crossval",
    sigma_method="analytic",
    verbose=False,
    random_state=1,
    init_points=5,
    n_iter=20,
    allow_duplicate_points=True,
)
print(f"BayesianOptimization finds an optimal:")
print(f"\tlength_scale0 is {muygps_optloop.kernel.distortion_fn.length_scale['length_scale0']()}")
print(f"\tlength_scale1 is {muygps_optloop.kernel.distortion_fn.length_scale['length_scale1']()}")
print(f"sigma_sq is {muygps_optloop.sigma_sq()[0]}")


Treatment #2b (opt loop) - each loop interation create new optimizer instance, initialize with new objective function, and probe previous points

In [None]:
batch_count = 55
train_count = sampler.train_count
num_epochs = int(train_count / batch_count)
print(f"batch size {batch_count}, train size {train_count}, num epochs {num_epochs}")
muygps_optloop = optimize_from_tensors_mini_batch(
    muygps,
    train_features,
    train_responses,
    nn_count=30,
    batch_count=batch_count,
    train_count=train_count,
    num_epochs=num_epochs,
    keep_state=False,
    probe_previous=True,
    batch_features=None,
    loss_method="lool",
    obj_method="loo_crossval",
    sigma_method="analytic",
    verbose=False,
    random_state=1,
    init_points=5,
    n_iter=20,
    allow_duplicate_points=True,
)
print(f"BayesianOptimization finds an optimal:")
print(f"\tlength_scale0 is {muygps_optloop.kernel.distortion_fn.length_scale['length_scale0']()}")
print(f"\tlength_scale1 is {muygps_optloop.kernel.distortion_fn.length_scale['length_scale1']()}")
print(f"sigma_sq is {muygps_optloop.sigma_sq()[0]}")


Experiment #2c (opt loop) - each loop interation reuse optimizer instance, set `BayesianOptimization.TargetSpace.target_function` to new objective function, and do not probe previous points 

In [None]:
batch_count = sampler.train_count
train_count = sampler.train_count
num_epochs = 5
print(f"batch size {batch_count}, train size {train_count}, num epochs {num_epochs}")
muygps_optloop = optimize_from_tensors_mini_batch(
    muygps,
    train_features,
    train_responses,
    nn_count=30,
    batch_count=batch_count,
    train_count=train_count,
    num_epochs=num_epochs,
    keep_state=True,
    probe_previous=False,
    batch_features=None,
    loss_method="lool",
    obj_method="loo_crossval",
    sigma_method="analytic",
    verbose=False,
    random_state=1,
    init_points=5,
    n_iter=20,
    allow_duplicate_points=True,
)
print(f"BayesianOptimization finds an optimal:")
print(f"\tlength_scale0 is {muygps_optloop.kernel.distortion_fn.length_scale['length_scale0']()}")
print(f"\tlength_scale1 is {muygps_optloop.kernel.distortion_fn.length_scale['length_scale1']()}")
print(f"sigma_sq is {muygps_optloop.sigma_sq()[0]}")
