Copyright 2023-2023 Lawrence Livermore National Security, LLC and other MuyGPyS
Project Developers. See the top-level COPYRIGHT file for details.

SPDX-License-Identifier: MIT

# Shear Kernel Tutorial

This notebook demonstrates how to use the specialized lensing shear kernel (hard-coded to RBF at the moment).

⚠️ _Note that this is still an experimental feature._ ⚠️

In [None]:
import matplotlib.pyplot as plt
import numpy as np

from matplotlib.colors import LogNorm
from MuyGPyS._src.gp.tensors import _crosswise_differences, _pairwise_differences
from MuyGPyS.gp import MuyGPS
from MuyGPyS.gp.deformation import Isotropy, l2, F2
from MuyGPyS.gp.hyperparameter import Parameter
from MuyGPyS.gp.kernels.experimental import ShearKernel
from MuyGPyS.gp.tensors import make_predict_tensors
from MuyGPyS.neighbors import NN_Wrapper
from MuyGPyS.gp.noise import HomoscedasticNoise

This is required to import the implementation from Bob Armstrong's original repository.
Must set `shear_kernel_dir = "path/to/local/shear_kernel/"` for things to run properly.

In [None]:
import importlib.util
import sys
import os
import torch
# introduce a variable for path/to/shear_kernel
shear_kernel_dir = "../../../projects/shear_kernel/"
if not os.path.isdir(shear_kernel_dir):
    shear_kernel_dir = "../../shear_kernel/"
spec_analytic = importlib.util.spec_from_file_location("analytic_kernel", shear_kernel_dir + "analytic_kernel.py") 
bar = importlib.util.module_from_spec(spec_analytic)
sys.modules["analytic_kernel"] = bar
spec_analytic.loader.exec_module(bar)
from analytic_kernel import shear_kernel

We will set a random seed here for consistency when building docs.
In practice we would not fix a seed.

In [None]:
np.random.seed(0)

## Kernel Implementation Comparisons

Here we will compare the analytic implementation of the kernel function to the `MuyGPyS` implementation, using some simple data.

Here we build some simple data, which is mean to represent a grid of sky coordinates.

In [None]:
n = 25  # number of galaxies on a side
xmin = 0
xmax = 1
ymin = 0
ymax = 1

xx = np.linspace(xmin, xmax, n)
yy = np.linspace(ymin, ymax, n)

x, y = np.meshgrid(xx, yy)
features = np.vstack((x.flatten(), y.flatten())).T
data_count = features.shape[0]
diffs = _pairwise_differences(features)
length_scale = 0.5

Use an Isotropic distance functor.

In [None]:
dist_fn = Isotropy(
    metric=F2,
    length_scale=Parameter(length_scale),
)

Here we construct a shear value kernel (partial differential components of RBF), as well as the original RBF kernel using Bob's implementation.

In [None]:
def original_shear(X1, X2=None, length_scale=1.0):
    if X2 is None:
        X2 = X1
    n1, _ = X1.shape
    n2, _ = X2.shape
    vals = np.zeros((3 * (n1), 3 * (n2)))
    vals[:] = np.nan
    for i, (ix, iy) in enumerate(X1):
        for j, (jx, jy) in enumerate(X2):
            tmp = shear_kernel(ix, iy, jx, jy, b=length_scale)
            for a in range(3):
                for b in range(3):
                    vals[(a * n1) + i, (b * n2) + j] = tmp[a, b]
            
    return vals

### Pairwise kernels (`Kin`)
This code computes the `Kin` kernels.

In [None]:
Kin_analytic = original_shear(features, length_scale=length_scale)

Here we do the same using the MuyGPyS implementation. Note the increased efficiency.

In [None]:
Kin_muygps = ShearKernel(deformation=dist_fn)(diffs)

`Kin_muygps` is a more generalized tensor, so we need to flatten it to a conforming shape.

In [None]:
Kin_flat = Kin_muygps.reshape(data_count * 3, data_count * 3)

In [None]:
print(f"Kin_analytic.shape = {Kin_analytic.shape}")
print(f"Kin_muygps.shape = {Kin_muygps.shape}")
print(f"Kin_flat.shape = {Kin_flat.shape}")

Do the two implementations agree?

In [None]:
np.allclose(Kin_analytic, Kin_flat)

In [None]:
Kin_residual = np.abs(Kin_analytic - Kin_flat)
print(f"Kin residual max: {np.max(Kin_residual)}, min: {np.min(Kin_residual)}, mean : {np.mean(Kin_residual)}")

Plot results of the baseline and MuyGPyS implementations. 

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(12, 4))
axes[0].set_title("original shear kernel")
axes[0].imshow(Kin_analytic)
axes[1].set_title("MuyGPyS shear kernel")
axes[1].imshow(Kin_flat)
axes[2].set_title("Residual")
im = axes[2].imshow(Kin_residual, norm=LogNorm())
fig.colorbar(im, ax=axes[2])
plt.show()

### Cross-Covariance (`Kcross`)
Now we perform a similar analysis for the cross-covariance.

In [None]:
split = 200
X1 = features[:split]
X2 = features[split:]
n1, _ = X1.shape
n2, _ = X2.shape
crosswise_diffs = _crosswise_differences(X1, X2)
print(X1.shape, X2.shape, crosswise_diffs.shape)

In [None]:
Kcross_analytic = original_shear(X1, X2, length_scale=length_scale)

In [None]:
Kcross_muygps = ShearKernel(deformation=dist_fn)(crosswise_diffs, adjust=False)

In [None]:
Kcross_flat = Kcross_muygps.reshape(n1 * 3, n2 * 3)

In [None]:
print(f"Kcross_analytic.shape = {Kcross_analytic.shape}")
print(f"Kcross_muygps.shape = {Kcross_muygps.shape}")
print(f"Kcross_flat.shape = {Kcross_flat.shape}")

In [None]:
np.allclose(Kcross_analytic, Kcross_flat)

In [None]:
Kcross_residual = np.abs(Kcross_analytic - Kcross_flat)
print(f"Kcross residual max: {np.max(Kcross_residual)}, min: {np.min(Kcross_residual)}, mean : {np.mean(Kcross_residual)}")

Now we visualize the comparison.

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(12, 4))
axes[0].set_title("original shear kernel")
axes[0].imshow(Kcross_analytic)
axes[1].set_title("MuyGPyS shear kernel")
axes[1].imshow(Kcross_flat)
axes[2].set_title("Residual")
im = axes[2].imshow(Kcross_residual, norm=LogNorm())
fig.colorbar(im, ax=axes[2])
plt.show()

Runtime comparison of the two implementations (Change `False` to `True` to run):

In [None]:
if False:
    %timeit original_shear(features)
    %timeit ShearKernel(deformation=dist_fn)(diffs)

## Posterior Mean Comparison

Now we will test the `posterior_mean` of the analytic and muygps implementations.
First, we set up an arbitrary taget array.
Targets should be square matrices like a grid of a swath of sky.
Ulitimately, the target array will have shape `(625,3)`, given `n=25` above.

Set noise level

In [None]:
noise_prior = 1e-4

Define the target matrices.
Initially was run with arbitrary targets, now as of 1/25/24 can sample targets from GP.

In [None]:
def targets_from_GP(features, n, ls):

    Kernel = ShearKernel(
            deformation=Isotropy(
                F2,
                length_scale=Parameter(ls),
            ),
        )

    diffs = _pairwise_differences(features)

    #print(diffs.shape)

    Kin = 1.0 * Kernel(diffs, adjust=False)

    Kin_flat = Kin.reshape(3 * n**2, 3 * n**2) + 1e-10 * np.identity(3 * n**2)

    #plt.imshow(Kin)

    #print(Kin.shape, np.min(Kin), np.max(Kin))

    e = np.random.normal(0, 1, 3 * n**2)
    L = np.linalg.cholesky(Kin_flat)
    targets = np.dot(L, e).reshape(3, n**2).swapaxes(0,1)

    return(targets)

def arbitrary_targets(n):

    targets_grid_x, targets_grid_y = np.meshgrid(
        np.linspace(1, 10, n),
        np.linspace(1, 10, n),
        indexing = 'ij'
    )
    targets_grid = targets_grid_x * targets_grid_y

    targets = np.vstack(
        (targets_grid.flatten(), np.rot90(targets_grid, k=3).flatten(), np.rot90(targets_grid).flatten()),
    )

    targets_norm = (targets - np.min(targets)) / (np.max(targets) - np.min(targets))

    return(targets_norm)

In [None]:
targets = targets_from_GP(features, n, length_scale)

Here we create a train/test split in the dataset.
Modify the `train_ratio` to specify the proportion of data to hold out for training.

In [None]:
train_ratio = 0.2

In [None]:
rng = np.random.default_rng(seed=1)
interval_count = int(data_count * train_ratio)
interval = int(data_count / interval_count)
sfl = rng.permutation(np.arange(data_count))
train_mask = np.zeros(data_count, dtype=bool)
for i in range(interval_count):
    idx = np.random.choice(sfl[i * interval : (i + 1) * interval])
    train_mask[idx] = True
test_mask = np.invert(train_mask)
train_count = np.count_nonzero(train_mask)
test_count = np.count_nonzero(test_mask)

In [None]:
train_targets = targets[train_mask, :]
test_targets = targets[test_mask, :]
train_features = features[train_mask, :]
test_features = features[test_mask, :]

Let's visualize the train/test datasets.

In [None]:
def make_im(vec, mask):
    ret = np.zeros(len(mask))
    ret[mask] = vec
    ret[np.invert(mask)] = -np.inf
    return ret.reshape(n, n)

In [None]:
fig, ax = plt.subplots(2, 3,figsize = (10,7))
ax[0, 0].imshow(make_im(train_targets[:,0], train_mask))
ax[0, 0].set_ylabel("train", fontsize = 15)
ax[0, 0].set_title("$\kappa$", fontsize = 15)
ax[1, 0].imshow(make_im(test_targets[:,0], test_mask))
ax[1, 0].set_ylabel("test", fontsize = 15)
ax[0, 1].imshow(make_im(train_targets[:,1], train_mask))
ax[0, 1].set_title("g1", fontsize = 15)
ax[1, 1].imshow(make_im(test_targets[:,1], test_mask))
ax[0, 2].imshow(make_im(train_targets[:,2], train_mask))
ax[0, 2].set_title("g2", fontsize = 15)
ax[1, 2].imshow(make_im(test_targets[:,2], test_mask))
plt.show()

Explicitly define the target matrices.
Also add leading unitary dimension to `targets_muygpys` for things to work.

In [None]:
train_targets_flat = train_targets.swapaxes(0,1).reshape(3 * train_count)
test_targets_flat = test_targets.swapaxes(0,1).reshape(3 * test_count)
print(
    train_targets_flat.shape, test_targets_flat.shape,
)

Having defined the `K*` matrices above, compare posterior means, sans-optimization. 
This comes from (3.4) in Muyskens et al. (2021), which looks like:
$$ \hat{Y}(X^*|X) = K_{\theta}(X^*,X)(K_{\theta}(X,X)+\epsilon)^{-1}Y(X) $$
where $\epsilon = \sigma^2 I$ with $\sigma^2$ being the noise variance.

Analytic: for the analytic implementation, I'll do things with the full "flattened" difference tensors.

In [None]:
Kin_analytic = original_shear(train_features, train_features, length_scale=length_scale)
Kcross_analytic = original_shear(test_features, train_features, length_scale=length_scale)

In [None]:
print(Kcross_analytic.shape, Kin_analytic.shape, train_targets_flat.shape)

In [None]:
def conventional_mean(Kin, Kcross, targets, noise=noise_prior):
    nugget_size = Kin.shape[0]
    test_count = int(Kcross.shape[0] / 3)
    return (
        Kcross @ np.linalg.solve(
            Kin + noise * np.eye(nugget_size),
            targets,
        )
    ).reshape(3, test_count).swapaxes(0,1)

In [None]:
posterior_mean_analytic = conventional_mean(
    Kin_analytic,
    Kcross_analytic,
    train_targets_flat,
)

Here we'll compare to a flattened `MuyGPyS` implementation without nearest neighbors sparsification.

In [None]:
# create MuyGPS object
shear_model = MuyGPS(
        kernel=ShearKernel(
            deformation=Isotropy(
                F2,
                length_scale=Parameter(length_scale),
            ),
        ),
#         noise = HomoscedasticNoise(noise_prior),
        noise = HomoscedasticNoise(1e-4),
    )

Create flat solve using MuyGPyS functions.
This should be very close to the analytic solution.

In [None]:
pairwise_diffs = _pairwise_differences(train_features)
crosswise_diffs = _crosswise_differences(test_features, train_features)
Kin_muygps = shear_model.kernel(pairwise_diffs, adjust=False)
Kcross_muygps = shear_model.kernel(crosswise_diffs, adjust=False)
Kin_flat = Kin_muygps.reshape(3 * train_count, 3 * train_count)
Kcross_flat = Kcross_muygps.reshape(3 * test_count, 3 * train_count)

In [None]:
print(Kin_muygps.shape, Kcross_muygps.shape, Kin_flat.shape, Kcross_flat.shape)

Check that the flattened kernel tensors agree with the analytic tensors (should pass if the above passed).

In [None]:
print(
    np.allclose(Kin_analytic, Kin_flat),
    np.allclose(Kcross_analytic, Kcross_flat),
)

Plotting code.

In [None]:
def show_im(vec, mask, ax):
    mat = np.zeros(len(mask))
    mat[mask] = vec
    mat[np.invert(mask)] = -np.inf
    im = ax.imshow(mat.reshape(n, n), norm=LogNorm())
    fig.colorbar(im, ax=ax)

def compare_predictions(truth, first, second, fname, sname, fontsize=12):
    f_residual = np.abs(truth - first) + 1e-15
    s_residual = np.abs(truth - second) + 1e-15
    fs_residual = np.abs(first - second) + 1e-15

    fig, ax = plt.subplots(6, 3, figsize = (10, 18))
    
    for axis_set in ax:
        for axis in axis_set:
            axis.set_xticks([])
            axis.set_yticks([])

    ax[0, 0].set_title("$\kappa$")
    ax[0, 1].set_title("g1")
    ax[0, 2].set_title("g2")
    ax[0, 0].set_ylabel("Truth", fontsize=fontsize)
    ax[1, 0].set_ylabel(f"{fname} Mean", fontsize=fontsize)
    ax[2, 0].set_ylabel(f"|truth - {fname}|", fontsize=fontsize)
    ax[3, 0].set_ylabel(f"{sname} Mean", fontsize=fontsize)
    ax[4, 0].set_ylabel(f"|truth - {sname}|", fontsize=fontsize)
    ax[5, 0].set_ylabel(f"|{fname} - {sname}|", fontsize=fontsize)

    # truth
    ax[0, 0].imshow(make_im(truth[:,0], test_mask))
    ax[0, 1].imshow(make_im(truth[:,1], test_mask))
    ax[0, 2].imshow(make_im(truth[:,2], test_mask))

    # first model
    ax[1, 0].imshow(make_im(first[:,0], test_mask))
    ax[1, 1].imshow(make_im(first[:,1], test_mask))
    ax[1, 2].imshow(make_im(first[:,2], test_mask))
    
    # first model residual
    show_im(f_residual[:,0], test_mask, ax=ax[2, 0])
    show_im(f_residual[:,1], test_mask, ax=ax[2, 1])
    show_im(f_residual[:,2], test_mask, ax=ax[2, 2])

    # second model
    ax[3, 0].imshow(make_im(second[:,0], test_mask))
    ax[3, 1].imshow(make_im(second[:,1], test_mask))
    ax[3, 2].imshow(make_im(second[:,2], test_mask))
    
    # second model residual
    show_im(s_residual[:, 0], test_mask, ax=ax[4, 0])
    show_im(s_residual[:, 1], test_mask, ax=ax[4, 1])
    show_im(s_residual[:, 2], test_mask, ax=ax[4, 2])

    # residual between the two models
    show_im(fs_residual[:, 0], test_mask, ax=ax[5, 0])
    show_im(fs_residual[:, 1], test_mask, ax=ax[5, 1])
    show_im(fs_residual[:, 2], test_mask, ax=ax[5, 2])

    plt.show()

Now we compute the flattened `MuyGPyS` conventional solution.

In [None]:
posterior_mean_flat = conventional_mean(
    Kin_flat,
    Kcross_flat,
    train_targets_flat,
)

In [None]:
print(posterior_mean_flat.shape, posterior_mean_analytic.shape)

And finally, visually compare the predictions.
The flat and analytic solutions should be very close, up to ~1e-10 or so.

In [None]:
compare_predictions(test_targets, posterior_mean_flat, posterior_mean_analytic, "flat", "analytic")

## MuyGPyS workflow

Here we'll use an nn-sparsified MuyGPyS workflow to the conventional GP using the analytic kernel.
The two approaches should converge (up to ~1e-9 precision) when `nn_count == test_count`.
As `nn_count` decreases, the `MuyGPyS` workflow will get faster but will correspondingly drift from the conventional predictions.
The two solutions should still remain visually similar, however.

In [None]:
def get_nn_tensors(nn_count=50):
    indices = np.arange(test_count)
    if nn_count == train_count:
        nn_indices = np.array([
            np.arange(train_count) for _ in range(test_count)
        ])
    else:
        nbrs_lookup = NN_Wrapper(train_features, nn_count, nn_method='exact', algorithm='ball_tree')
        nn_indices, _ = nbrs_lookup.get_nns(test_features)
    
    (
        crosswise_diffs,
        pairwise_diffs,
        nn_targets,
    ) = make_predict_tensors(
        indices,
        nn_indices,
        test_features,
        train_features,
        train_targets,
    )

    nn_targets= nn_targets.swapaxes(-2, -1)
    
#     if nn_count == train_count:
#         nn_targets = np.array([train_targets for _ in range(test_count)])
    
    x0_features = test_features[indices[0]][None, ...]
    x0_nn_features = train_features[nn_indices[0]]
    
    return crosswise_diffs, pairwise_diffs, x0_features, x0_nn_features, nn_targets

### This section consistency checks the various MuyGPyS tensors when `nn_count == train_count`

In [None]:
crosswise_diffs, pairwise_diffs, x0_features, x0_nn_features, x0_nn_targets = get_nn_tensors(nn_count=train_count)

Check that the `nn_targets` agree.

In [None]:
x0_targets_flat = x0_nn_targets.reshape(test_count, 3 * train_count)

In [None]:
print(x0_nn_targets.shape, x0_targets_flat.shape)

In [None]:
np.all([
    np.allclose(train_targets_flat, x0_targets_flat[0])
    for _ in range(test_count)
])

Check that the `Kin`s agree.

In [None]:
Kin_test = shear_model.kernel(pairwise_diffs)

In [None]:
print(Kin_test.shape, Kin_flat.shape)

In [None]:
np.all([
    np.allclose(
        Kin_analytic,
        Kin_test[i].reshape(3 * train_count, 3 * train_count),
    ) for i in range(test_count)
])

Check that the `Kcross`es agree.

In [None]:
Kcross_test = shear_model.kernel(crosswise_diffs)

In [None]:
print(Kcross_test.shape, Kcross_analytic.shape)

In [None]:
np.all([
    np.allclose(
        np.squeeze(Kcross_analytic.reshape(3, test_count, 3 * train_count)[:, i, :]),
        Kcross_test[i].reshape(3 * train_count, 3).swapaxes(-2, -1),
    ) for i in range(test_count)
])

Here we check to see if the resulting means agree.

In [None]:
x0_analytic = np.array([
    conventional_mean(
        Kin_analytic,
        np.squeeze(Kcross_analytic.reshape(3, test_count, 3 * train_count)[:, i, :]),
        train_targets_flat,
    ) for i in range(test_count)
])

In [None]:
x0_test = np.array([
    conventional_mean(
        Kin_test[i].reshape(3 * train_count, 3 * train_count),
        Kcross_test[i].reshape(3 * train_count, 3).swapaxes(-2, -1),
        x0_targets_flat[i],
    ) for i in range(test_count)
])

In [None]:
np.allclose(x0_analytic, x0_test)

Here we define the full `MuyGPyS` workflow.
There are a lot of unnecessary internal checks and prints that were used for debugging.

In [None]:
def muygps_mean_workflow(nn_count=50):
    crosswise_diffs, pairwise_diffs, x0_features, x0_nn_features, nn_targets = get_nn_tensors(nn_count=nn_count)

    Kcross = shear_model.kernel(crosswise_diffs)
    Kin = shear_model.kernel(pairwise_diffs)
    
    print(pairwise_diffs.shape, Kin.shape)
    
    Kin_flat = Kin.reshape(test_count, 3 * nn_count, 3 * nn_count)
    Kcross_flat = Kcross.reshape(test_count, 3 * nn_count, 3)
    nn_targets_flat = nn_targets.reshape(test_count, 3 * nn_count)
    
    Kin_an = original_shear(
        x0_nn_features,
        length_scale=length_scale,
    )
    Kcross_an = original_shear(
        x0_features,
        x0_nn_features,
        length_scale=length_scale,
    )

    # here we are consistency checking the tensors of each implementation
    print(f"Kin.shape = {Kin.shape}")
    print(f"Kcross.shape = {Kcross.shape}")
    print(f"nn_targets.shape = {nn_targets.shape}")
    print("----------")
    print(f"Kin_flat.shape = {Kin_flat.shape}")
    print(f"Kcross_flat.shape = {Kcross_flat.shape}")
    print(f"nn_targets_flat.shape = {nn_targets_flat.shape}")
    print("----------")
    print(f"Kin_an.shape = {Kin_an.shape}")
    print(f"Kcross_an.shape = {Kcross_an.shape}")
    print("----------")
    print("----------")
    print(f"Kin_flat[0] == Kin_an? {np.allclose(Kin_flat[0], Kin_an)}")
    print(f"Kcross_flat[0] == Kcross_an? {np.allclose(Kcross_flat[0], Kcross_an.swapaxes(-2, -1))}")
    
    mean = shear_model.posterior_mean(Kin, Kcross, nn_targets)

    # This is more spot checking to see whether and to what extent the different implementations
    # agree on a particular prediction.
    mean_flat = np.squeeze(conventional_mean(
        Kin_flat[0],
        Kcross_flat[0].swapaxes(-2, -1),
        nn_targets_flat[0],
#         noise=1e-15,
    ))
    mean_an = np.squeeze(conventional_mean(
        Kin_an,
        Kcross_an,
        nn_targets_flat[0],
#         noise=1e-15,
    ))
    
    print("----------")
    print("----------")
    print(f"mean.shape = {mean.shape}")
    print(f"mean_flat.shape = {mean_flat.shape}")
    print(f"mean_an.shape = {mean_an.shape}")
    print("----------")
    print(f"mean_flat == mean_an? {np.allclose(mean_flat, mean_an)}")
    print(f"mean[0] == mean_flat? {np.allclose(mean[0], mean_flat)}")
    print(f"mean[0] == mean_an? {np.allclose(mean[0], mean_an)}")
    
    print("----------")
    print("----------")
    print(mean[0])
    print(posterior_mean_analytic[:, 0])
    print(posterior_mean_flat[:, 0])
    
    return mean

Here we compute the MuyGPs posterior mean.
If `nn_count == train_count`, the results should agree with the analytic/flat solutions.
Smaller `nn_count`s will drift (as expected).

In [None]:
posterior_mean_muygps = muygps_mean_workflow(nn_count=50)

Check numerically if things are close.

In [None]:
np.allclose(posterior_mean_analytic, posterior_mean_muygps)

Check the mean error

In [None]:
np.mean(np.abs(posterior_mean_analytic - posterior_mean_muygps))

In [None]:
print(posterior_mean_analytic.shape, posterior_mean_muygps.shape)

Finally, we compare the `MuyGPyS` predictions to the conventional GP predictions.

In [None]:
compare_predictions(test_targets, posterior_mean_muygps, posterior_mean_analytic, "MuyGPyS", "Analytic")

# Posterior Variance Tests

Want to do similar analysis as above for the "conventional" posterior variance, which is defined as
$$ \mathrm{Var}(\hat{Y}(X^*|X)) = K_{\theta}(X^*,X^*) - K_{\theta}(X^*,X)[K_{\theta}(X,X) - \epsilon]^{-1}K_{\theta}(X,X^*) $$

In [None]:
def conventional_variance(Kin, Kcross, Kin_test, noise=noise_prior):
    nugget_size = Kin.shape[0]
#    test_count = int(Kcross.shape[0] / 3)
    return ( 
            Kin_test - Kcross @ np.linalg.solve(
            Kin + noise * np.eye(nugget_size),
            Kcross.T,
        )
    )

Construct the `K*` tensors.
Using the `train` and `test` tensors from above.

In [None]:
Kin_an = original_shear(
    train_features,
    length_scale=length_scale,
)
Kcross_an = original_shear(
    test_features,
    train_features,
    length_scale=length_scale,
)
# Construct the tensors K(X*,X*) and K(X,X*), 
# although not sure that that the explicit K(X,X*)
# is necessary
Kin_test_an = original_shear(
    test_features,  
    length_scale=length_scale
    )

print("----------")
print("Kin_an.shape = ", Kin_an.shape)
print("Kcross_an.shape = ", Kcross_an.shape)
print("Kin_test_an.shape = ", Kin_test_an.shape)
print("----------")

In [None]:
nn_count = 50
#crosswise_diffs, pairwise_diffs, x0_features, x0_nn_features, nn_targets = get_nn_tensors(nn_count=nn_count)

pairwise_diffs = _pairwise_differences(train_features)
crosswise_diffs = _crosswise_differences(test_features, train_features)
# test diffs
pairwise_diffs_test = _pairwise_differences(test_features)


Kin_test_muygps = shear_model.kernel(pairwise_diffs_test, adjust=False)
Kin_muygps = shear_model.kernel(pairwise_diffs, adjust=False)
Kcross_muygps = shear_model.kernel(crosswise_diffs, adjust=False)

Kin_muygps_flat = Kin_muygps.reshape(3 * train_count, 3 * train_count)
Kcross_muygps_flat = Kcross_muygps.reshape(3 * test_count, 3 * train_count)
Kin_test_muygps_flat = Kin_test_muygps.reshape(3 * test_count, 3 * test_count)

print("----------")
print("Kin_muygps.shape = ", Kin_muygps_flat.shape)
print("Kcross_muygps.shape = ", Kcross_muygps_flat.shape)
print("Kin_test_muygps.shape = ", Kin_test_muygps_flat.shape)
print("----------")


In [None]:
conventional_var_analytic_flat = conventional_variance(
        Kin_an, 
        Kcross_an, 
        Kin_test_an,
    )
conventional_var_muygps_flat = conventional_variance(
        Kin_muygps_flat, 
        Kcross_muygps_flat, 
        Kin_test_muygps_flat,
    )

In [None]:
residual = np.abs(conventional_var_analytic_flat-conventional_var_muygps_flat)
fig, ax = plt.subplots(1,3,figsize = (14,4))

ax[0].imshow(conventional_var_analytic_flat)
ax[0].set_title("Analytic Variance")

ax[1].imshow(conventional_var_muygps_flat)
ax[1].set_title("Flat MuyGPyS Variance")

ax[2].set_title("|Analytic - Flat MuyGPyS|")
im = ax[2].imshow(residual)
fig.colorbar(im, ax = ax[2])

print("Min Resid = ", np.min(residual), ", Max Resid = ", np.max(residual), ", Avg Residual = ", np.mean(residual))


Need to unflatten the variances and extract the diagonals from each "block".

In [None]:
conventional_var_analytic_unflat = np.zeros((3,500,3,500))
for i in range(3):
    for j in range(3):
        conventional_var_analytic_unflat[i,:,j,:] = conventional_var_analytic_flat[500*i:500*(i+1), 500*j:500*(j+1)]

Make sure things are correct and that reshaping is consistent.
First, construct a `posterior_var` in the analytic framework that should be the analog of `shear_model.posterior_variance`.

In [None]:
posterior_var_an = np.zeros((500,3,3))
for i in range(3):
    for j in range(3):
        posterior_var_an[:,i,j] = np.diagonal(conventional_var_analytic_unflat[i,:,j,:])

print(posterior_var_an.shape)


In [None]:
# reconstruct the filled block version
block_posterior_var_an = np.zeros((3,500,3,500))
for i in range(3):
    for j in range(3):
        block_posterior_var_an[i, :, j, :] = (np.diag(posterior_var_an[:,i,j]))
print(block_posterior_var_an.shape)

The diagonals in each block of the residual *should* be zero if everything was reshaped consistently.

In [None]:
fig, ax = plt.subplots(1,3,figsize = (14,4))

ax[0].imshow(conventional_var_analytic_flat, norm = LogNorm())
ax[0].set_title("Flat Analytic")

ax[1].imshow(block_posterior_var_an.reshape(1500,1500), norm = LogNorm())
ax[1].set_title("Block Posterior Var An")

resid = np.abs(conventional_var_analytic_flat-block_posterior_var_an.reshape(1500,1500))
print("----------")
print("Min Resid = ", np.min(resid))
print("----------")
im = ax[2].imshow(resid, norm = LogNorm())
fig.colorbar(im, ax = ax[2])
ax[2].set_title("Residual")

In [None]:
resid_diags = np.zeros((500,3,3))
for i in range(3):
    for j in range(3):
        resid_diags[:,i,j] = np.diagonal(resid[500*i:500*(i+1), 500*j:500*(j+1)])

print(
    "Min=", np.min(resid_diags), 
    ", Max=",
    np.max(resid_diags), 
    ", Avg=",
    np.mean(resid_diags))

These are very tiny. 
Is this just numerical junk that I can consider acceptably close to zero (ie, that the diagonals have been properly extracted)?

Now compare the analytic versus the `MuyGPS.posterior_variance`. 
Create similar function to the above `muygps_mean_workflow`.

In [None]:
def muygps_variance_workflow(nn_count = 50):
    
    train_features = features[train_mask, :]
    test_features = features[test_mask, :]

    # analytic version for comparison
    Kin_an = original_shear(
        train_features,
        length_scale=length_scale,
    )
    Kcross_an = original_shear(
        test_features,
        train_features,
        length_scale=length_scale,
    )
    # Construct the tensors K(X*,X*) and K(X,X*), 
    # although not sure that that the explicit K(X,X*)
    # is necessary
    Kin_test_an = original_shear(
        test_features,  
        length_scale=length_scale
        )
    
    print("----------")
    print("Kin_an.shape = ", Kin_an.shape)
    print("Kcross_an.shape = ", Kcross_an.shape)
    print("Kin_test_an.shape = ", Kin_test_an.shape)
    print("----------")

    conventional_var_analytic_flat = conventional_variance(
        Kin_an, 
        Kcross_an, 
        Kin_test_an,
    )

    posterior_var_an = np.zeros((500,3,3))
    for i in range(3):
        for j in range(3):
            posterior_var_an[:,i,j] = np.diagonal(conventional_var_analytic_flat[500*i:500*(i+1), 500*j:500*(j+1)])

    
    #---------------------------
    # MuyGPyS version
    #---------------------------

    # Construct the same matrices as in the mean workflow
    crosswise_diffs, pairwise_diffs, _, _, _ = get_nn_tensors(nn_count=nn_count)

    Kcross_muygps = shear_model.kernel(crosswise_diffs)
    Kin_muygps = shear_model.kernel(pairwise_diffs)

    
    print("----------")
    print("Kin_muygps.shape = ", Kin_muygps.shape)
    print("Kcross_muygps.shape = ", Kcross_muygps.shape)


    posterior_var_muygps = shear_model.posterior_variance(Kin_muygps, Kcross_muygps)

    print("----------")
    print("----------")  
    print("muygps_variance.shape = ", posterior_var_muygps.shape)
    print("analytic_variance.shape = ", posterior_var_an.shape)

    print("----------")
    print("----------")
    print("muygps and analytic close?", np.allclose(posterior_var_an, posterior_var_muygps))
    

    var_residual = np.abs(posterior_var_an - posterior_var_muygps)
    print("----------")
    print("Min Resid = ", np.min(var_residual), ", Max Resid = ", np.max(var_residual), ", Avg Residual = ", np.mean(var_residual))
    print("----------")

    return( posterior_var_muygps )

In [None]:
posterior_var_muygps = muygps_variance_workflow(nn_count=train_count)

Compare the `posterior_var_muygps` to the `block_diag_var_an`.
Initially looks like the residuals vary dramatically, but the shapes are the same for the `analytic` and `muygps` variance. 
Check visually if things are organized in a consistent way.

In [None]:
# convert to diagonal matrices
block_posterior_var_muygps = np.zeros((3,500,3,500))
for i in range(3):
    for j in range(3):
        block_posterior_var_muygps[i,:,j,:] = np.diag(posterior_var_muygps[:,i,j])


In [None]:
fig, ax = plt.subplots(1,3,figsize = (14,4))

ax[0].imshow(block_posterior_var_an.reshape(1500,1500), norm = LogNorm())
ax[0].set_title("Analytic")

ax[1].imshow(block_posterior_var_muygps.reshape(1500,1500), norm = LogNorm())
ax[1].set_title("MuyGPyS")

var_residual = np.abs(block_posterior_var_an.reshape(1500,1500)-block_posterior_var_muygps.reshape(1500,1500))
print("----------")
print("Max Resid = ", np.max(var_residual))
print("----------")
im = ax[2].imshow(var_residual, norm = LogNorm())
fig.colorbar(im, ax = ax[2])
ax[2].set_title("Residual")



# Optimization Test

The next step is to test the optimizer on the mock data generated above.
Recall that the mock data are sampled with a GP given a `length_scale`, which means that if we run hyperparameter optimization, we *should* recover this length scale if the optimizer is working properly.

First, create the train tensors.

In [None]:
from MuyGPyS.gp.tensors import crosswise_tensor, pairwise_tensor
from MuyGPyS.optimize.batch import sample_batch
from MuyGPyS.optimize import Bayes_optimize
from MuyGPyS.optimize.loss import lool_fn, looph_fn, mse_fn
from MuyGPyS.gp.hyperparameter import AnalyticScale

In [None]:
print(train_targets.shape, train_features.shape)


In [None]:
shear_model = MuyGPS(
    kernel=ShearKernel(
        deformation=Isotropy(
            F2,
            length_scale=Parameter(0.5, [0.3, 0.7]), # this is the raw length scale I think
        ),
    ),
    noise=HomoscedasticNoise(1e-7),
    scale=AnalyticScale(),
)

train_features_count = train_features.shape[0]

nn_count = 50
nbrs_lookup = NN_Wrapper(train_features, nn_count, nn_method='exact', algorithm='ball_tree')
    

batch_count=500
batch_indices, batch_nn_indices = sample_batch(
    nbrs_lookup, batch_count, train_features_count
)

# need pairwise and crosswise diffs
batch_crosswise_diffs = crosswise_tensor(
    train_features,
    train_features,
    batch_indices,
    batch_nn_indices,
)

batch_pairwise_diffs = pairwise_tensor(
    train_features, batch_nn_indices
)



In [None]:
batch_targets = train_targets[batch_indices]

batch_nn_targets= train_targets[batch_nn_indices].swapaxes(-2, -1)

See if the optimization correctly predicts the length scale with the `mse` loss fn.

In [None]:
shear_mse_optimized = Bayes_optimize(
    shear_model,
    batch_targets,
    batch_nn_targets,
    batch_crosswise_diffs,
    batch_pairwise_diffs,
    train_targets,
    loss_fn=mse_fn,
    verbose=True,
    init_points=5,
    n_iter=20,
)

In [None]:
test_features_count = test_features.shape[0]

indices = np.arange(test_features_count)
test_nn_indices, _ = nbrs_lookup.get_nns(test_features)

(
    test_crosswise_diffs,
    test_pairwise_diffs,
    test_nn_targets,
) = make_predict_tensors(
    indices,
    test_nn_indices,
    test_features,
    train_features,
    train_targets,
)

test_nn_targets= test_nn_targets.swapaxes(-2, -1)

In [None]:
Kcross = shear_mse_optimized.kernel(test_crosswise_diffs)
Kin = shear_mse_optimized.kernel(test_pairwise_diffs)
posterior_mean_muygps_optimized = shear_mse_optimized.posterior_mean(Kin, Kcross, test_nn_targets)

In [None]:
compare_predictions(test_targets, posterior_mean_muygps_optimized, posterior_mean_analytic, "Optimized MuyGPyS", "Analytic")