Copyright 2021-2023 Lawrence Livermore National Security, LLC and other MuyGPyS
Project Developers. See the top-level COPYRIGHT file for details.

SPDX-License-Identifier: MIT

# Univariate regression with optimization loop chassis

This notebook walks through the [Univariate Regression Tutorial](../docs/examples/univariate_regression_tutorial.ipynb) using experimental optimaztion loop chassis. The goal is to recover the response on the held-out test data by training a univariate `MuyGPS` model on the perturbed training data with `length_scale` and `measurement_noise` known, while `nu` smoothness hyperparameter is to be learned.
1. Sample a curve from a conventional GP
2. Construct nearest neighbor lookups
3. Create duplicate of `MuyGPyS` object used in `sampler` to be used for training and inference
4. Call `optimize_from_tensors_mini_batch` to sample batches of data, construct tensors, and run bayes optimization using numpy math backend

In [None]:
import sys
for m in sys.modules.keys():
    if m.startswith("Muy"):
        sys.modules.pop(m)
%env MUYGPYS_BACKEND=numpy
%env MUYGPYS_FTYPE=64

In [None]:
import numpy as np

from docs.examples.utils import UnivariateSampler

from MuyGPyS.gp import MuyGPS
from MuyGPyS.gp.distortion import IsotropicDistortion, l2
from MuyGPyS.gp.hyperparameter import ScalarHyperparameter
from MuyGPyS.gp.kernels import Matern
from MuyGPyS.gp.noise import HomoscedasticNoise
from MuyGPyS.optimize.experimental.chassis import optimize_from_tensors_mini_batch


np.random.seed(0)
data_count = 5001
train_step = 10
nugget_noise = HomoscedasticNoise(1e-14)
measurement_noise = HomoscedasticNoise(1e-7)
sim_length_scale = ScalarHyperparameter(0.05)
sim_nu = ScalarHyperparameter(2.0)
sampler = UnivariateSampler(
    data_count=data_count,
    train_step=train_step,
    kernel=Matern(
        nu=sim_nu,
        metric=IsotropicDistortion(
            l2,
            length_scale=sim_length_scale,
        ),
    ),
    eps=nugget_noise,
    measurement_eps=measurement_noise,
)
train_features, test_features = sampler.features()
train_responses, test_responses = sampler.sample()
sampler.plot_sample()

exp_nu = ScalarHyperparameter("log_sample", (0.1, 5.0))
muygps = MuyGPS(
    kernel=Matern(
        nu=exp_nu,
        metric=IsotropicDistortion(
            l2,
            length_scale=sim_length_scale,
        ),
    ),
    eps=measurement_noise,
)

Validate using all training data.

In [None]:
muygps_bayes_lool = optimize_from_tensors_mini_batch(
    muygps,
    train_features,
    train_responses,
    nn_count=30,
    batch_count=sampler.train_count,
    train_count=sampler.train_count,
    length_scaled=False,
    num_epochs=1,
    batch_features=None,
    loss_method="lool",
    obj_method="loo_crossval",
    sigma_method="analytic",
    verbose=True,
    random_state=1,
    init_points=5,
    n_iter=20,
    allow_duplicate_points=True,
)
print(f"\n`loss_method` 'lool' `nu` {muygps_bayes_lool.kernel.nu()} `sigma_sq` {muygps_bayes_lool.sigma_sq()}")


Test optimization loop chassis using sample batch size = 100.

In [None]:
batch_count=100
train_count=sampler.train_count
num_epochs=int(sampler.train_count / batch_count)
muygps_bayes_lool = optimize_from_tensors_mini_batch(
    muygps,
    train_features,
    train_responses,
    nn_count=30,
    batch_count=batch_count,
    train_count=train_count,
    length_scaled=False,
    num_epochs=num_epochs,
    batch_features=None,
    loss_method="lool",
    obj_method="loo_crossval",
    sigma_method="analytic",
    verbose=True,
    random_state=1,
    init_points=5,
    n_iter=20,
    allow_duplicate_points=True,
)
print(f"\n`loss_method` 'lool' `nu` {muygps_bayes_lool.kernel.nu()} `sigma_sq` {muygps_bayes_lool.sigma_sq()}")
