In [1]:
# import pandas as pd
# import numpy as np
import collections

import math

import matplotlib
import matplotlib.pyplot as plt
from matplotlib import rc

import torch
from torch import nn
from torch.utils.data import TensorDataset, DataLoader

import os
import math
from dataclasses import dataclass

import torch
from botorch.acquisition import qExpectedImprovement
from botorch.fit import fit_gpytorch_mll
from botorch.generation import MaxPosteriorSampling
from botorch.models import SingleTaskGP
from botorch.optim import optimize_acqf
from botorch.test_functions import Ackley
from botorch.utils.transforms import unnormalize
from torch.quasirandom import SobolEngine

import gpytorch
from gpytorch.constraints import Interval
from gpytorch.kernels import MaternKernel, ScaleKernel
from gpytorch.likelihoods import GaussianLikelihood
from gpytorch.mlls import ExactMarginalLogLikelihood
from gpytorch.priors import HorseshoePrior


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
dtype = torch.float
SMOKE_TEST = os.environ.get("SMOKE_TEST")

import warnings
warnings.filterwarnings("ignore")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
fun = Ackley(dim=20, negate=True).to(dtype=dtype, device=device)
fun.bounds[0, :].fill_(-5)
fun.bounds[1, :].fill_(10)
dim = fun.dim
lb, ub = fun.bounds
print(dim)

batch_size = 4
n_init = 2 * dim
max_cholesky_size = float("inf")  # Always use Cholesky

NUM_RESTARTS = 10 if not SMOKE_TEST else 2
RAW_SAMPLES = 512 if not SMOKE_TEST else 4
N_CANDIDATES = min(5000, max(2000, 200 * dim)) if not SMOKE_TEST else 4

print(NUM_RESTARTS, RAW_SAMPLES, N_CANDIDATES)

20
10 512 4000


In [3]:
def eval_objective(x):
    """This is a helper function we use to unnormalize and evalaute a point"""
    return fun(unnormalize(x, fun.bounds))

def get_initial_points(dim, n_pts, seed=0):
    sobol = SobolEngine(dimension=dim, scramble=True, seed=seed)
    X_init = sobol.draw(n=n_pts).to(dtype=dtype, device=device)
    return X_init

### GP-EI

In [9]:
torch.manual_seed(0)

X_ei = get_initial_points(dim, n_init)
Y_ei = torch.tensor(
    [eval_objective(x) for x in X_ei], dtype=dtype, device=device
).unsqueeze(-1)

while len(Y_ei) < 400:
    train_Y = (Y_ei - Y_ei.mean()) / Y_ei.std()
    likelihood = GaussianLikelihood(noise_constraint=Interval(1e-8, 1e-3))
    model = SingleTaskGP(X_ei, train_Y, likelihood=likelihood)
    mll = ExactMarginalLogLikelihood(model.likelihood, model)
    fit_gpytorch_mll(mll)

    # Create a batch
    ei = qExpectedImprovement(model, train_Y.max())
    candidate, acq_value = optimize_acqf(
        ei,
        bounds=torch.stack(
            [
                torch.zeros(dim, dtype=dtype, device=device),
                torch.ones(dim, dtype=dtype, device=device),
            ]
        ),
        q=batch_size,
        num_restarts=NUM_RESTARTS,
        raw_samples=RAW_SAMPLES,
    )

    Y_next = torch.tensor(
        [eval_objective(x) for x in candidate], dtype=dtype, device=device
    ).unsqueeze(-1)

    # Append data
    X_ei = torch.cat((X_ei, candidate), axis=0)
    Y_ei = torch.cat((Y_ei, Y_next), axis=0)
    # print(X_ei.size(), Y_ei.size())

    # Print current status
    print(f"{len(X_ei)}) Best value: {Y_ei.max().item():.2e}")

qExpectedImprovement(
  (model): SingleTaskGP(
    (likelihood): GaussianLikelihood(
      (noise_covar): HomoskedasticNoise(
        (raw_noise_constraint): Interval(1.000E-08, 1.000E-03)
      )
    )
    (mean_module): ConstantMean()
    (covar_module): ScaleKernel(
      (base_kernel): MaternKernel(
        (lengthscale_prior): GammaPrior()
        (raw_lengthscale_constraint): Positive()
      )
      (outputscale_prior): GammaPrior()
      (raw_outputscale_constraint): Positive()
    )
  )
  (objective): IdentityMCObjective()
)


## Sparse GP

In [5]:
# from typing import Optional

# from botorch import acquisition
# from botorch.posteriors import Posterior
# from botorch.acquisition import AnalyticAcquisitionFunction
# from botorch.models import SingleTaskGP
# from botorch.models.model import Model
# from botorch.models.transforms import Warp
# from botorch.models.transforms.outcome import OutcomeTransform
# from botorch.optim import optimize_acqf
# from botorch.optim.utils import _filter_kwargs
# from botorch.sampling import SobolQMCNormalSampler

# import gpytorch
# from gpytorch.models import ApproximateGP
# from gpytorch.variational import CholeskyVariationalDistribution, VariationalStrategy
# from torch import Tensor


# class SVGP(ApproximateGP):
#     def __init__(self, inducing_points: Tensor, covar_module: Optional[gpytorch.kernels.Kernel] = None):
#         variational_distribution = CholeskyVariationalDistribution(inducing_points.size(0))
#         variational_strategy = VariationalStrategy(self, 
#                                                    inducing_points, 
#                                                    variational_distribution,
#                                                    learn_inducing_locations=True)
#         super(SVGP, self).__init__(variational_strategy)
#         self.mean_module = gpytorch.means.ConstantMean()
#         if covar_module is None:
#             self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel())
#         else:
#             self.covar_module = covar_module

#     def forward(self, x):
#         mean_x = self.mean_module(x)
#         covar_x = self.covar_module(x)
#         return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

#     def posterior(self, x: Tensor):
#         self.eval()
#         return self.forward(x)
    


In [6]:
# import gpytorch
# from botorch.models.approximate_gp import ApproximateGPyTorchModel
# from gpytorch.models import ApproximateGP
# from gpytorch.variational import CholeskyVariationalDistribution
# from gpytorch.variational import VariationalStrategy
# from botorch.posteriors.gpytorch import GPyTorchPosterior
# import torch

# class GPModel(ApproximateGP):
#     def __init__(self, inducing_points, likelihood):
#         variational_distribution = CholeskyVariationalDistribution(inducing_points.size(0) )
#         variational_strategy = VariationalStrategy(
#             self,
#             inducing_points,
#             variational_distribution,
#             learn_inducing_locations=True
#             )
#         super(GPModel, self).__init__(variational_strategy)
#         self.mean_module = gpytorch.means.ConstantMean()
#         self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel())
#         self.num_outputs = 1
#         self.likelihood = likelihood 

#     def forward(self, x):
#         mean_x = self.mean_module(x)
#         covar_x = self.covar_module(x)
#         return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

#     def posterior(
#             self, X, output_indices=None, observation_noise=False, *args, **kwargs
#         ) -> GPyTorchPosterior:
#             self.eval()  # make sure model is in eval mode
#             # self.model.eval()
#             self.likelihood.eval()
#             dist = self.likelihood(self(X)) 

#             return GPyTorchPosterior(mvn=dist)


In [None]:
# from gpytorch.mlls import PredictiveLogLikelihood 

# torch.manual_seed(0)

# X_sparse = get_initial_points(dim, n_init)
# Y_sparse = torch.tensor(
#     [eval_objective(x) for x in X_sparse], dtype=dtype, device=device
# ).unsqueeze(-1)

# likelihood = GaussianLikelihood(noise_constraint=Interval(1e-8, 1e-3))
# model = GPModel(X_sparse, likelihood=likelihood)
# optimizer = torch.optim.Adam(model.parameters(), lr=3e-4)

# while len(Y_sparse) < 400:
#     train_Y = (Y_sparse - Y_sparse.mean()) / Y_sparse.std()

#     model = GPModel(X_sparse, likelihood=likelihood)
#     mll = PredictiveLogLikelihood(model.likelihood, model, num_data=X_sparse.shape[0])
    
#     pred = model(X_sparse)
#     loss = -mll(pred, train_Y)
#     optimizer.zero_grad()
#     loss.mean().backward()
#     torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
#     optimizer.step()

#     # Create a batch
#     ei = qExpectedImprovement(model, train_Y.max())
#     candidate, acq_value = optimize_acqf(
#         ei,
#         bounds=torch.stack(
#             [
#                 torch.zeros(dim, dtype=dtype, device=device),
#                 torch.ones(dim, dtype=dtype, device=device),
#             ]
#         ),
#         q=batch_size,
#         num_restarts=NUM_RESTARTS,
#         raw_samples=RAW_SAMPLES,
#     )

#     Y_next = torch.tensor(
#         [eval_objective(x) for x in candidate], dtype=dtype, device=device
#     ).unsqueeze(-1)

#     # Append data
#     X_sparse = torch.cat((X_sparse, candidate), axis=0)
#     Y_sparse = torch.cat((Y_sparse, Y_next), axis=0)
#     # print(X_ei.size(), Y_ei.size())

#     # Print current status
#     print(f"{len(X_sparse)}) Best value: {Y_sparse.max().item():.2e}")

In [8]:
from sklearn.cluster import MiniBatchKMeans
from gpytorch.kernels import ScaleKernel, RBFKernel, InducingPointKernel, MaternKernel, Kernel


torch.manual_seed(0)

X_sparse = get_initial_points(dim, n_init)
Y_sparse = torch.tensor(
    [eval_objective(x) for x in X_sparse], dtype=dtype, device=device
).unsqueeze(-1)


n_inducing_points = 5
kmeans = MiniBatchKMeans(
    n_clusters=n_inducing_points,
    batch_size=min(10000, X_sparse.shape[0]),
)
kmeans.fit(X_sparse.cpu().numpy())
inducing_points = torch.from_numpy(kmeans.cluster_centers_.copy())

likelihood = GaussianLikelihood(noise_constraint=Interval(1e-8, 1e-3))
base_covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel())
covar_module = InducingPointKernel(base_covar_module, inducing_points=inducing_points, likelihood=likelihood)

while len(Y_sparse) < 400:
    train_Y = (Y_sparse - Y_sparse.mean()) / Y_sparse.std()
    
    model = SingleTaskGP(X_sparse, train_Y, covar_module=covar_module, likelihood=likelihood)
    mll = ExactMarginalLogLikelihood(model.likelihood, model)
    fit_gpytorch_mll(mll)
    
    # Create a batch
    ei = qExpectedImprovement(model, train_Y.max())
    candidate, acq_value = optimize_acqf(
        ei,
        bounds=torch.stack(
            [
                torch.zeros(dim, dtype=dtype, device=device),
                torch.ones(dim, dtype=dtype, device=device),
            ]
        ),
        q=batch_size,
        num_restarts=NUM_RESTARTS,
        raw_samples=RAW_SAMPLES,
    )

    Y_next = torch.tensor(
        [eval_objective(x) for x in candidate], dtype=dtype, device=device
    ).unsqueeze(-1)

    # Append data
    X_sparse = torch.cat((X_sparse, candidate), axis=0)
    Y_sparse = torch.cat((Y_sparse, Y_next), axis=0)
    # print(X_ei.size(), Y_ei.size())

    # Print current status
    print(f"{len(X_sparse)}) Best value: {Y_sparse.max().item():.2e}")

44) Best value: -5.14e+00
48) Best value: -5.14e+00
52) Best value: -5.14e+00
56) Best value: -5.14e+00
60) Best value: -5.14e+00
64) Best value: -5.14e+00
68) Best value: -5.14e+00
72) Best value: -5.14e+00
76) Best value: -5.14e+00
80) Best value: -5.14e+00
84) Best value: -5.14e+00
88) Best value: -5.14e+00
92) Best value: -5.14e+00
96) Best value: -5.14e+00
100) Best value: -5.14e+00
104) Best value: -5.14e+00
108) Best value: -5.14e+00
112) Best value: -5.14e+00
116) Best value: -5.14e+00
120) Best value: -5.14e+00
124) Best value: -5.14e+00
128) Best value: -5.14e+00
132) Best value: -5.14e+00
136) Best value: -5.14e+00
140) Best value: -5.14e+00
144) Best value: -5.14e+00
148) Best value: -5.14e+00
152) Best value: -5.14e+00
156) Best value: -5.14e+00
160) Best value: -5.14e+00
164) Best value: -5.14e+00
168) Best value: -5.14e+00
172) Best value: -5.14e+00
176) Best value: -5.14e+00


ModelFittingError: All attempts to fit the model have failed. For more information, try enabling botorch.settings.debug mode.

In [None]:
# from botorch.models import SingleTaskVariationalGP
# from gpytorch.mlls import VariationalELBO

# torch.manual_seed(0)

# X_sparse = get_initial_points(dim, n_init)
# Y_sparse = torch.tensor(
#     [eval_objective(x) for x in X_sparse], dtype=dtype, device=device
# ).unsqueeze(-1)

# likelihood = GaussianLikelihood(noise_constraint=Interval(1e-8, 1e-3))

# while len(Y_sparse) < 400:
#     train_Y = (Y_sparse - Y_sparse.mean()) / Y_sparse.std()

#     model = SingleTaskVariationalGP(X_sparse, train_Y, likelihood=likelihood)
#     # mll = VariationalELBO(model.likelihood, model.model, num_data=X_sparse.shape[0])
#     mll = ExactMarginalLogLikelihood(model.likelihood, model.model)
#     fit_gpytorch_mll(mll)
    
#     # Create a batch
#     ei = qExpectedImprovement(model, train_Y.max())
#     candidate, acq_value = optimize_acqf(
#         ei,
#         bounds=torch.stack(
#             [
#                 torch.zeros(dim, dtype=dtype, device=device),
#                 torch.ones(dim, dtype=dtype, device=device),
#             ]
#         ),
#         q=batch_size,
#         num_restarts=NUM_RESTARTS,
#         raw_samples=RAW_SAMPLES,
#     )

#     Y_next = torch.tensor(
#         [eval_objective(x) for x in candidate], dtype=dtype, device=device
#     ).unsqueeze(-1)

#     # Append data
#     X_sparse = torch.cat((X_sparse, candidate), axis=0)
#     Y_sparse = torch.cat((Y_sparse, Y_next), axis=0)
#     # print(X_ei.size(), Y_ei.size())

#     # Print current status
#     print(f"{len(X_sparse)}) Best value: {Y_sparse.max().item():.2e}")

### NP-MC

In [8]:
import glob
import numpy as np
import torch
from math import pi
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets, transforms

class My_Ackley(Dataset):
    def __init__(self, train_x, train_y):
        self.train_x = train_x
        self.train_y = train_y

        # Generate data
        self.data = []
        self.data.append((self.train_x, self.train_y))

    def __getitem__(self, index):
        return self.data[index]

    def __len__(self):
        return len(self.data)

In [9]:
# X_np = get_initial_points(dim, n_init)
# Y_np = torch.tensor(
#     [eval_objective(x) for x in X_np], dtype=dtype, device=device
# ).unsqueeze(-1)

# ackley_dataset = My_Ackley(train_x = X_np, train_y = Y_np)
# data_loader = DataLoader(ackley_dataset, batch_size=1, shuffle=True)

# for _, i in enumerate(data_loader):
#     print(i[0].shape)
#     print(i[1].shape)
#     break


In [10]:
## bo
from torch.utils.data import DataLoader
from neural_process import NeuralProcess
from training import NeuralProcessTrainer
from botorch.sampling.normal import SobolQMCNormalSampler
from utils import context_target_split
from botorch.acquisition.monte_carlo import qExpectedImprovement
from botorch.sampling.stochastic_samplers import StochasticSampler
from botorch.acquisition.objective import IdentityMCObjective

## NP model
x_dim = 20
y_dim = 1
r_dim = 50  # Dimension of representation of context points
z_dim = 50  # Dimension of sampled latent variable
h_dim = 50  # Dimension of hidden layers in encoder and decoder
neuralprocess = NeuralProcess(x_dim, y_dim, r_dim, z_dim, h_dim)
optimizer = torch.optim.Adam(neuralprocess.parameters(), lr=3e-4)

train_batch_size = 1

torch.manual_seed(0)

X_np = get_initial_points(dim, n_init)
Y_np = torch.tensor(
    [eval_objective(x) for x in X_np], dtype=dtype, device=device
).unsqueeze(-1)

sampler = StochasticSampler(torch.Size([batch_size*2]), seed=1234)
obj = IdentityMCObjective()

while len(Y_np) < 400:
    best_f = Y_np.max() 
    train_Y = (Y_np - Y_np.mean()) / Y_np.std()
    ackley_dataset = My_Ackley(train_x = X_np, train_y = train_Y)
    data_loader = DataLoader(ackley_dataset, batch_size=train_batch_size, shuffle=True)

    num_context = X_np.shape[0] // 4
    num_target = X_np.shape[0] // 4
    np_trainer = NeuralProcessTrainer(device, neuralprocess, optimizer,
                                    num_context_range=(num_context, num_context),
                                    num_extra_target_range=(num_target, num_target), 
                                    print_freq=200)

    neuralprocess.training = True
    np_trainer.train(data_loader, 200)

    # Create a batch
    neuralprocess.training = False
    for batch in data_loader:
        break
    x, y = batch
    x_context, y_context, _, _ = context_target_split(x[0:1], y[0:1], 
                                                    num_context, 
                                                    num_target)
    neuralprocess.set_context_for_posterior(x_context, y_context)

    ei = qExpectedImprovement(neuralprocess, best_f, sampler, obj)
    candidate, acq_value = optimize_acqf(
        ei,
        bounds=torch.stack(
            [
                torch.zeros(dim, dtype=dtype, device=device),
                torch.ones(dim, dtype=dtype, device=device),
            ]
        ),
        q=batch_size*2, # The number of candidates
        num_restarts=NUM_RESTARTS,
        raw_samples=RAW_SAMPLES, # The number of samples for initialization.
    )
    # break
    Y_next = torch.tensor(
        [eval_objective(x) for x in candidate], dtype=dtype, device=device
    ).unsqueeze(-1)

    # Append data
    X_np = torch.cat((X_np, candidate), axis=0)
    Y_np = torch.cat((Y_np, Y_next), axis=0)

    # Print current status
    print(f"{len(X_np)}) Best value: {Y_np.max().item():.2e}")

iteration 200, loss 28.024
48) Best value: -1.24e+01
iteration 200, loss 22.455
56) Best value: -1.17e+01
iteration 200, loss 18.247
64) Best value: -1.17e+01
iteration 200, loss 15.737
72) Best value: -1.17e+01
iteration 200, loss 14.828
80) Best value: -1.17e+01
iteration 200, loss 24.107
88) Best value: -1.17e+01
iteration 200, loss 16.874
96) Best value: -1.17e+01
iteration 200, loss 19.728
104) Best value: -1.17e+01
iteration 200, loss 31.477
112) Best value: -1.17e+01
iteration 200, loss 23.219
120) Best value: -1.17e+01
iteration 200, loss 22.382
128) Best value: -1.17e+01
iteration 200, loss 13.183
136) Best value: -1.17e+01
iteration 200, loss 5.858
144) Best value: -1.16e+01
iteration 200, loss 21.742
152) Best value: -1.16e+01
iteration 200, loss 21.042
160) Best value: -1.16e+01
iteration 200, loss 13.620
168) Best value: -1.16e+01
iteration 200, loss -0.981
176) Best value: -1.12e+01
iteration 200, loss 3.853
184) Best value: -1.12e+01
iteration 200, loss 6.343
192) Best v

## PLOT

In [9]:
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
from matplotlib import rc

%matplotlib inline

names = ["GP-qEI", "Sparse-GP-qEI", "NP-qEI-stochasticsampler-bc4"] # , "EI", "Sobol"
runs = [Y_ei, Y_sparse, Y_np] # , Y_ei, Y_Sobol
fig, ax = plt.subplots(figsize=(8, 6))

for name, run in zip(names, runs):
    fx = np.maximum.accumulate(run.cpu())
    plt.plot(fx, marker="", lw=3)

plt.plot([0, len(Y_ei)], [fun.optimal_value, fun.optimal_value], "k--", lw=3)
plt.xlabel("Function value", fontsize=18)
plt.xlabel("Number of evaluations", fontsize=18)
plt.title("20D Ackley", fontsize=24)
plt.xlim([0, len(Y_ei)])
plt.ylim([-15, 1])

plt.grid(True)
plt.tight_layout()
plt.legend(
    names + ["Global optimal value"],
    loc="lower center",
    bbox_to_anchor=(0, -0.08, 1, 1),
    bbox_transform=plt.gcf().transFigure,
    ncol=4,
    fontsize=16,
)
plt.show()

NameError: name 'Y_np' is not defined

## MULTIVARIATE

In [12]:
# import torch
# from models import Encoder, MuSigmaEncoder, Decoder

In [13]:
# import torch
# from models import Encoder, MuSigmaEncoder, Decoder
# from torch import nn
# from torch.distributions import Normal
# from utils import img_mask_to_np_input
# from botorch.posteriors.deterministic import DeterministicPosterior

# #
# import itertools
# import warnings
# from abc import ABC
# from copy import deepcopy
# from typing import Any, List, Optional, Tuple, TYPE_CHECKING, Union

# import torch
# from botorch.acquisition.objective import PosteriorTransform
# from botorch.models.utils import (
#     _make_X_full,
#     add_output_dim,
#     gpt_posterior_settings,
#     mod_batch_shape,
#     multioutput_to_batch_mode_transform,
# )

# import gpytorch
# from gpytorch.distributions import MultitaskMultivariateNormal, MultivariateNormal
# from gpytorch.likelihoods.gaussian_likelihood import FixedNoiseGaussianLikelihood
# from torch import Tensor

# from botorch.posteriors.transformed import TransformedPosterior  # pragma: no cover

# from botorch.posteriors.torch import TorchPosterior
# from botorch.posteriors.gpytorch import GPyTorchPosterior

# from gpytorch.kernels import ScaleKernel
# from gpytorch.kernels import RBFKernel

# from gpytorch.likelihoods import Likelihood

# ## bo
# from torch.utils.data import DataLoader
# from neural_process import NeuralProcess
# from training import NeuralProcessTrainer
# from botorch.sampling.normal import SobolQMCNormalSampler
# from utils import context_target_split
# from botorch.acquisition.monte_carlo import qExpectedImprovement
# from botorch.sampling.stochastic_samplers import StochasticSampler
# from botorch.acquisition.objective import IdentityMCObjective

In [14]:
# class NeuralProcess1(nn.Module):
#     """
#     Implements Neural Process for functions of arbitrary dimensions.

#     Parameters
#     ----------
#     x_dim : int
#         Dimension of x values.

#     y_dim : int
#         Dimension of y values.

#     r_dim : int
#         Dimension of output representation r.

#     z_dim : int
#         Dimension of latent variable z.

#     h_dim : int
#         Dimension of hidden layer in encoder and decoder.
#     """
    
#     def __init__(self, x_dim, y_dim, r_dim, z_dim, h_dim):
#         super(NeuralProcess1, self).__init__()
#         self.x_dim = x_dim
#         self.y_dim = y_dim
#         self.r_dim = r_dim
#         self.z_dim = z_dim
#         self.h_dim = h_dim

#         # Initialize networks
#         self.xy_to_r = Encoder(x_dim, y_dim, h_dim, r_dim)
#         self.r_to_mu_sigma = MuSigmaEncoder(r_dim, z_dim)
#         self.xz_to_y = Decoder(x_dim, z_dim, h_dim, y_dim)

#         self._num_outputs = 1

#         ##
#         self.mu_context = None
#         self.sigma_context = None
#         self.q_context = None
#         self.z_sample = None

#         self.likelihood = Likelihood

#     def aggregate(self, r_i):
#         """
#         Aggregates representations for every (x_i, y_i) pair into a single
#         representation.

#         Parameters
#         ----------
#         r_i : torch.Tensor
#             Shape (batch_size, num_points, r_dim)
#         """
#         return torch.mean(r_i, dim=1)

#     def xy_to_mu_sigma(self, x, y):
#         """
#         Maps (x, y) pairs into the mu and sigma parameters defining the normal
#         distribution of the latent variables z.

#         Parameters
#         ----------
#         x : torch.Tensor
#             Shape (batch_size, num_points, x_dim)

#         y : torch.Tensor
#             Shape (batch_size, num_points, y_dim)
#         """
#         batch_size, num_points, _ = x.size()
#         # Flatten tensors, as encoder expects one dimensional inputs
#         x_flat = x.view(batch_size * num_points, self.x_dim)
#         y_flat = y.contiguous().view(batch_size * num_points, self.y_dim)
#         # Encode each point into a representation r_i
#         r_i_flat = self.xy_to_r(x_flat, y_flat)
#         # Reshape tensors into batches
#         r_i = r_i_flat.view(batch_size, num_points, self.r_dim)
#         # Aggregate representations r_i into a single representation r
#         r = self.aggregate(r_i)
#         # Return parameters of distribution
#         return self.r_to_mu_sigma(r)

#     def forward(self, x_context, y_context, x_target, y_target=None):
#         """
#         Given context pairs (x_context, y_context) and target points x_target,
#         returns a distribution over target points y_target.

#         Parameters
#         ----------
#         x_context : torch.Tensor
#             Shape (batch_size, num_context, x_dim). Note that x_context is a
#             subset of x_target.

#         y_context : torch.Tensor
#             Shape (batch_size, num_context, y_dim)

#         x_target : torch.Tensor
#             Shape (batch_size, num_target, x_dim)

#         y_target : torch.Tensor or None
#             Shape (batch_size, num_target, y_dim). Only used during training.

#         Note
#         ----
#         We follow the convention given in "Empirical Evaluation of Neural
#         Process Objectives" where context is a subset of target points. This was
#         shown to work best empirically.
#         """
#         # Infer quantities from tensor dimensions
#         batch_size, num_context, x_dim = x_context.size()
#         _, num_target, _ = x_target.size()
#         _, _, y_dim = y_context.size()

#         if self.training:
#             # Encode target and context (context needs to be encoded to
#             # calculate kl term)
#             mu_target, sigma_target = self.xy_to_mu_sigma(x_target, y_target)
#             mu_context, sigma_context = self.xy_to_mu_sigma(x_context, y_context)
#             # Sample from encoded distribution using reparameterization trick
#             ## change to mvn
#             # mu_target = mu_target.squeeze(0)
#             # sigma_target = sigma_target.squeeze(0)
#             # mu_context = mu_context.squeeze(0)
#             # sigma_context = sigma_context.squeeze(0)
#             q_target = Normal(mu_target, sigma_target)
#             q_context = Normal(mu_context, sigma_context)
#             z_sample = q_target.rsample()
#             # Get parameters of output distribution
#             y_pred_mu, y_pred_sigma = self.xz_to_y(x_target, z_sample)
#             # y_pred_mu = y_pred_mu.squeeze(0)
#             # y_pred_sigma = y_pred_sigma.squeeze(0)
#             p_y_pred = MultivariateNormal(y_pred_mu, y_pred_sigma)

#             return p_y_pred, q_target, q_context
#         else:
#             # At testing time, encode only context
#             mu_context, sigma_context = self.xy_to_mu_sigma(x_context, y_context)
#             # Sample from distribution based on context
#             ## change to mvn
#             q_context = Normal(mu_context, sigma_context)
#             z_sample = q_context.rsample()
#             # Predict target points based on context
#             y_pred_mu, y_pred_sigma = self.xz_to_y(x_target, z_sample)
#             ## change to mvn
#             p_y_pred = MultivariateNormal(y_pred_mu, y_pred_sigma)

#             return p_y_pred
        
#     def num_outputs(self):
#         r"""The number of outputs of the model."""
#         return self._num_outputs
    
#     def set_context_for_posterior(self, x_context, y_context):
#         # At testing time, encode only context
#         self.mu_context, self.sigma_context = self.xy_to_mu_sigma(x_context, y_context)
#         # Sample from distribution based on context
#         ## change to mvn
#         # self.mu_context = self.mu_context.squeeze(0)
#         # self.sigma_context = self.sigma_context.squeeze(0)
#         self.q_context = Normal(self.mu_context, self.sigma_context)
#         self.z_sample = self.q_context.rsample()

#     def posterior(self, X, posterior_transform=None):
#         # # At testing time, encode only context
#         # mu_context, sigma_context = self.xy_to_mu_sigma(x_context, y_context)
#         # # Sample from distribution based on context
#         # q_context = Normal(mu_context, sigma_context)
#         # z_sample = q_context.rsample()
#         # Predict target points based on context
#         y_pred_mu, y_pred_sigma = self.xz_to_y(X, self.z_sample)
#         # y_pred_mu = y_pred_mu.squeeze(0).squeeze(-1)
#         # y_pred_sigma = y_pred_sigma.squeeze(0).squeeze(-1)
#         ## change to mvn
#         p_y_pred = MultivariateNormal(y_pred_mu, y_pred_sigma)
#         posterior = TorchPosterior(p_y_pred)
#         # p_y_pred = MultivariateNormal(y_pred_mu, y_pred_sigma)
#         # posterior = GPyTorchPosterior(p_y_pred)
#         return p_y_pred

In [15]:
# ## NP model
# x_dim = 20
# y_dim = 1
# r_dim = 50  # Dimension of representation of context points
# z_dim = 50  # Dimension of sampled latent variable
# h_dim = 50  # Dimension of hidden layers in encoder and decoder
# f = NeuralProcess1(x_dim, y_dim, r_dim, z_dim, h_dim)
# optimizer = torch.optim.Adam(f.parameters(), lr=3e-4)


# ##

# train_x = get_initial_points(dim, n_init)
# train_y = torch.tensor(
#     [eval_objective(x) for x in train_x], dtype=dtype, device=device
# ).unsqueeze(-1)
# ackley_dataset = My_Ackley(train_x = train_x, train_y = train_y)
# train_batch_size = 1
# data_loader = DataLoader(ackley_dataset, batch_size=train_batch_size, shuffle=True)

# num_context = train_x.shape[0] // 4
# num_target = train_x.shape[0] // 4
# np_trainer = NeuralProcessTrainer(device, f, optimizer,
#                                 num_context_range=(num_context, num_context),
#                                 num_extra_target_range=(num_target, num_target), 
#                                 print_freq=100)

# f.training = True
# np_trainer.train(data_loader, 200)

In [16]:
# # Create a batch
# f.training = False
# for batch in data_loader:
#     break
# x, y = batch
# x_context, y_context, _, _ = context_target_split(x[0:1], y[0:1], 
#                                                 num_context, 
#                                                 num_target)
# f.set_context_for_posterior(x_context, y_context)

In [17]:
# test_x = torch.rand([1,40,20])
# # model.posterior(test_x)
# print(f.posterior(test_x))

In [18]:
# sampler = StochasticSampler(torch.Size([batch_size*2]), seed=1234)
# obj = IdentityMCObjective()

# ei = qExpectedImprovement(f, train_y.max(), sampler, obj)
# candidate, acq_value = optimize_acqf(
#     ei,
#     bounds=torch.stack(
#         [
#             torch.zeros(dim, dtype=dtype, device=device),
#             torch.ones(dim, dtype=dtype, device=device),
#         ]
#     ),
#     q=batch_size*2, # The number of candidates
#     num_restarts=NUM_RESTARTS,
#     raw_samples=RAW_SAMPLES, # The number of samples for initialization.
# )

# Y_next = torch.tensor(
#     [eval_objective(x) for x in candidate], dtype=dtype, device=device
# ).unsqueeze(-1)

In [19]:
# print(candidate.shape)