Copyright 2021-2023 Lawrence Livermore National Security, LLC and other MuyGPyS
Project Developers. See the top-level COPYRIGHT file for details.

SPDX-License-Identifier: MIT

# Optimization loop chassis test

This notebook walks through the `MuyGPyS` univariate regression tutorial using experimental optimaztion loop chassis. The goal is to recover the response on the held-out test data by training a univariate `MuyGPS` model on the perturbed training data with `length_scale` and `measurement_noise` known, while `nu` smoothness hyperparameter is to be learned.
1. Sample a curve from a conventional GP
2. Construct nearest neighbor lookups
3. Create duplicate of `MuyGPyS` object used in `sampler` to be used for training and inference
4. Call `optimize_from_tensors_mini_batch` to sample batches of data, construct tensors, and run bayes optimization using numpy math backend

In [None]:
import numpy as np

from utils import UnivariateSampler, print_results

from MuyGPyS.gp import MuyGPS
from MuyGPyS.gp.distortion import IsotropicDistortion, l2
from MuyGPyS.gp.hyperparameter import ScalarHyperparameter
from MuyGPyS.gp.kernels import Matern
from MuyGPyS.gp.noise import HomoscedasticNoise
from MuyGPyS.gp.tensors import make_predict_tensors
from MuyGPyS.optimize.experimental.chassis import optimize_from_tensors_mini_batch
from MuyGPyS.neighbors import NN_Wrapper


np.random.seed(0)
lb = -10.0
ub = 10.0
data_count = 5001
train_step = 10
nugget_noise = HomoscedasticNoise(1e-14)
measurement_noise = HomoscedasticNoise(1e-5)
sim_length_scale = ScalarHyperparameter(1.0)
sim_nu = ScalarHyperparameter(2.0)
sampler = UnivariateSampler(
    lb=lb,
    ub=ub,
    data_count=data_count,
    train_step=train_step,
    kernel=Matern(
        nu=sim_nu,
        metric=IsotropicDistortion(
            l2,
            length_scale=sim_length_scale,
        ),
    ),
    eps=nugget_noise,
    measurement_eps=measurement_noise,
)
train_features, test_features = sampler.features()
train_responses, test_responses = sampler.sample()
# sampler.plot_sample()

nn_count = 30
nbrs_lookup = NN_Wrapper(train_features, nn_count, nn_method="exact", algorithm="ball_tree")

exp_nu = ScalarHyperparameter("log_sample", (0.1, 5.0))
muygps = MuyGPS(
    kernel=Matern(
        nu=exp_nu,
        metric=IsotropicDistortion(
            l2,
            length_scale=sim_length_scale,
        ),
    ),
    eps=measurement_noise,
)

Validate using all training data.

In [None]:
muygps_bayes_mse = optimize_from_tensors_mini_batch(
    muygps,
    train_features,
    train_responses,
    nbrs_lookup,
    batch_count=sampler.train_count,
    train_count=sampler.train_count,
    num_epochs=1,
    batch_features=None,
    loss_method="mse",
    obj_method="loo_crossval",
    sigma_method="analytic",
    verbose=True,
    random_state=1,
    init_points=5,
    n_iter=20,
)
print(f"\n`loss_method` 'mse' `nu` {muygps_bayes_mse.kernel.nu()} `sigma_sq` {muygps_bayes_mse.sigma_sq()}")

muygps_bayes_lool = optimize_from_tensors_mini_batch(
    muygps,
    train_features,
    train_responses,
    nbrs_lookup,
    batch_count=sampler.train_count,
    train_count=sampler.train_count,
    num_epochs=1,
    batch_features=None,
    loss_method="lool",
    obj_method="loo_crossval",
    sigma_method="analytic",
    verbose=True,
    random_state=1,
    init_points=5,
    n_iter=20,
)
print(f"\n`loss_method` 'lool' `nu` {muygps_bayes_lool.kernel.nu()} `sigma_sq` {muygps_bayes_lool.sigma_sq()}")

Test optimization loop chassis using sample batch size = 100.

In [None]:
batch_count=100
train_count=sampler.train_count
num_epochs=int(sampler.train_count / batch_count)

muygps_bayes_mse = optimize_from_tensors_mini_batch(
    muygps,
    train_features,
    train_responses,
    nbrs_lookup,
    batch_count=batch_count,
    train_count=train_count,
    num_epochs=num_epochs,
    batch_features=None,
    loss_method="mse",
    obj_method="loo_crossval",
    sigma_method="analytic",
    verbose=True,
    random_state=1,
    init_points=5,
    n_iter=20,
)
print(f"\n`loss_method` 'mse' `nu` {muygps_bayes_mse.kernel.nu()} `sigma_sq` {muygps_bayes_mse.sigma_sq()}")

muygps_bayes_lool = optimize_from_tensors_mini_batch(
    muygps,
    train_features,
    train_responses,
    nbrs_lookup,
    batch_count=batch_count,
    train_count=train_count,
    num_epochs=num_epochs,
    batch_features=None,
    loss_method="lool",
    obj_method="loo_crossval",
    sigma_method="analytic",
    verbose=True,
    random_state=1,
    init_points=5,
    n_iter=20,
)
print(f"\n`loss_method` 'lool' `nu` {muygps_bayes_lool.kernel.nu()} `sigma_sq` {muygps_bayes_lool.sigma_sq()}")

Predict the response of test data.
1. Find the indices of the nearest neighbors of test elements
2. Make distance difference and target tensors for test data
3. Create kernel tensors and find posterior means and variances associated with each training prediction
4. Evaluate prediction performance in terms of RMSE, mean diagonal posterior variance, the mean 95% confidence interval size, and the coverage, which ideally should be near 95%. 

In [None]:
test_count, _ = test_features.shape
indices = np.arange(test_count)
test_nn_indices, _ = nbrs_lookup.get_nns(test_features)

(
    test_crosswise_diffs,
    test_pairwise_diffs,
    test_nn_targets,
) = make_predict_tensors(
    indices,
    test_nn_indices,
    test_features,
    train_features,
    train_responses,
)

bayes_Kcross = muygps_bayes_mse.kernel(test_crosswise_diffs)
bayes_K = muygps_bayes_mse.kernel(test_pairwise_diffs)
bayes_predictions = muygps_bayes_mse.posterior_mean(
    bayes_K, bayes_Kcross, test_nn_targets
)
bayes_variances = muygps_bayes_mse.posterior_variance(
    bayes_K, bayes_Kcross
)

bayes_confidence_intervals = np.sqrt(bayes_variances) * 1.96
bayes_coverage = (
    np.count_nonzero(
        np.abs(test_responses - bayes_predictions) < bayes_confidence_intervals
    ) / test_count
)
print_results("bayes mse", test_responses, bayes_predictions, bayes_variances, bayes_confidence_intervals, bayes_coverage)
# sampler.plot_results(bayes_predictions, bayes_confidence_intervals)
