Copyright 2023-2023 Lawrence Livermore National Security, LLC and other MuyGPyS
Project Developers. See the top-level COPYRIGHT file for details.

SPDX-License-Identifier: MIT

# Shear Kernel Tutorial

This notebook demonstrates how to use the specialized lensing shear kernel (hard-coded to RBF at the moment).

⚠️ _Note that this is still an experimental feature._ ⚠️

In [None]:
import matplotlib.pyplot as plt
import numpy as np

from matplotlib.colors import LogNorm
from MuyGPyS._src.gp.tensors import _crosswise_differences, _pairwise_differences
from MuyGPyS.gp import MuyGPS
from MuyGPyS.gp.deformation import Isotropy, l2, F2
from MuyGPyS.gp.hyperparameter import Parameter
from MuyGPyS.gp.kernels.experimental import ShearKernel
from MuyGPyS.gp.tensors import make_predict_tensors
from MuyGPyS.neighbors import NN_Wrapper
from MuyGPyS.gp.noise import HomoscedasticNoise

This is required to import the implementation from Bob Armstrong's original repository.
Must set `shear_kernel_dir = "path/to/local/shear_kernel/"` for things to run properly.

In [None]:
import importlib.util
import sys
import torch
# introduce a variable for path/to/shear_kernel
shear_kernel_dir = "../../../projects/shear_kernel/"
spec_analytic = importlib.util.spec_from_file_location("analytic_kernel", shear_kernel_dir + "analytic_kernel.py") 
bar = importlib.util.module_from_spec(spec_analytic)
sys.modules["analytic_kernel"] = bar
spec_analytic.loader.exec_module(bar)
from analytic_kernel import shear_kernel

We will set a random seed here for consistency when building docs.
In practice we would not fix a seed.

In [None]:
np.random.seed(0)

Here we build some simple data, which is mean to represent a grid of sky coordinates.

In [None]:
n = 25  # number of galaxies on a side
xmin = 0
xmax = 1
ymin = 0
ymax = 1

xx = np.linspace(xmin, xmax, n)
yy = np.linspace(ymin, ymax, n)

x, y = np.meshgrid(xx, yy)
features = np.vstack((x.flatten(), y.flatten())).T
data_count = features.shape[0]
diffs = _pairwise_differences(features)
length_scale = 0.5

Use an Isotropic distance functor.

In [None]:
dist_fn = Isotropy(
    metric=F2,
    length_scale=Parameter(length_scale),
)

Here we construct a shear value kernel (partial differential components of RBF), as well as the original RBF kernel using Bob's implementation.

In [None]:
def original_shear(X1, X2=None, length_scale=1.0):
    if X2 is None:
        X2 = X1
    n1, _ = X1.shape
    n2, _ = X2.shape
    vals = np.zeros((3 * (n1), 3 * (n2)))
    vals[:] = np.nan
    for i, (ix, iy) in enumerate(X1):
        for j, (jx, jy) in enumerate(X2):
            tmp = shear_kernel(ix, iy, jx, jy, b=length_scale)
            for a in range(3):
                for b in range(3):
                    vals[(a * n1) + i, (b * n2) + j] = tmp[a, b]
            
    return vals

In [None]:
Kin_analytic = original_shear(features, length_scale=length_scale)

Here we do the same using the MuyGPyS implementation. Note the increased efficiency.

In [None]:
Kin_muygps = ShearKernel(deformation=dist_fn)(diffs)

`Kin_muygps` is a more generalized tensor, so we need to flatten it to a conforming shape.

In [None]:
Kin_muygps_flat = Kin_muygps.reshape(data_count * 3, data_count * 3)

In [None]:
print(f"Kin_analytic.shape = {Kin_analytic.shape}")
print(f"Kin_muygps.shape = {Kin_muygps.shape}")
print(f"Kin_muygps_flat.shape = {Kin_muygps_flat.shape}")

Do the two implementations agree?

In [None]:
np.allclose(Kin_analytic, Kin_muygps_flat)

In [None]:
residual = np.abs(Kin_analytic - Kin_muygps_flat)
print(f"residual max: {np.max(residual)}, min: {np.min(residual)}, mean : {np.mean(residual)}")

Plot results of the baseline and internal implementations. 

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(12, 4))
axes[0].set_title("original shear kernel")
axes[0].imshow(Kin_analytic)
axes[1].set_title("MuyGPyS shear kernel")
axes[1].imshow(Kin_muygps_flat)
axes[2].set_title("Residual")
im = axes[2].imshow(residual, norm=LogNorm())
fig.colorbar(im, ax=axes[2])
plt.show()

Now we perform a similar analysis for the cross-covariance.

In [None]:
split = 200
X1 = features[:split]
X2 = features[split:]
n1, _ = X1.shape
n2, _ = X2.shape
crosswise_diffs = _crosswise_differences(X1, X2)
print(X1.shape, X2.shape, crosswise_diffs.shape)

In [None]:
Kcross_analytic = original_shear(X1, X2, length_scale=length_scale)

In [None]:
Kcross_muygps = ShearKernel(deformation=dist_fn)(crosswise_diffs, adjust=False)

In [None]:
Kcross_muygps_flat = Kcross_muygps.reshape(n1 * 3, n2 * 3)

In [None]:
print(f"Kcross_analytic.shape = {Kcross_analytic.shape}")
print(f"Kcross_muygps.shape = {Kcross_muygps.shape}")
print(f"Kcross_muygps_flat.shape = {Kcross_muygps_flat.shape}")

In [None]:
np.allclose(Kcross_analytic, Kcross_muygps_flat)

In [None]:
cross_residual = np.abs(Kcross_analytic - Kcross_muygps_flat)
print(f"residual max: {np.max(cross_residual)}, min: {np.min(cross_residual)}, mean : {np.mean(cross_residual)}")

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(12, 4))
axes[0].set_title("original shear kernel")
axes[0].imshow(Kcross_analytic)
axes[1].set_title("MuyGPyS shear kernel")
axes[1].imshow(Kcross_muygps_flat)
axes[2].set_title("Residual")
im = axes[2].imshow(cross_residual, norm=LogNorm())
fig.colorbar(im, ax=axes[2])
plt.show()

Runtime comparison of the two implementations:

In [None]:
if False:
    %timeit original_shear(features)
    %timeit ShearKernel(deformation=dist_fn)(diffs)

Now we will test the `posterior_mean` of the analytic and muygps implementations.
First, we set up an arbitrary taget array.
Targets should be square matrices like a grid of a swath of sky.
Ulitimately, the target array will have shape `(625,3)`, given `n=25` above.

In [None]:
targets_grid_x, targets_grid_y = np.meshgrid(
        np.linspace(1, 10, n),
        np.linspace(1, 10, n),
        indexing = 'ij'
    )
targets_grid = targets_grid_x * targets_grid_y

In [None]:
targets = np.vstack(
    (targets_grid.flatten(), np.rot90(targets_grid, k=3).flatten(), np.rot90(targets_grid).flatten()),
)

In [None]:
print(np.hstack(targets).shape)

Normalize the data to a unit hypercube.

In [None]:
targets_norm = (targets - np.min(targets)) / (np.max(targets) - np.min(targets))
print(np.min(targets_norm), np.max(targets_norm), targets_norm.shape)

In [None]:
fig, ax = plt.subplots(3,1,figsize = (4,12))
ax[0].imshow(targets[0,:].reshape(n,n))
ax[0].set_ylabel('$\kappa$', fontsize = 15)
ax[1].imshow(targets[1,:].reshape(n,n))
ax[1].set_ylabel("g1", fontsize = 15)
ax[2].imshow(targets[2,:].reshape(n,n))
ax[2].set_ylabel("g2", fontsize = 15)

Explicitly define the target matrices.
Also add leading unitary dimension to `targets_muygpys` for things to work.

In [None]:
targets_analytic = targets_norm
targets_muygps = targets_norm[None,:]
print(targets_analytic.shape, targets_muygps.shape)

Having defined the `K*` matrices above, compare posterior means, sans-optimization. 
This comes from (3.4) in Muyskens et al. (2021), which looks like:
$$ \hat{Y}(X^*|X) = K_{\theta}(X^*,X)(K_{\theta}(X,X)+\epsilon)^{-1}Y(X) $$
where $\epsilon = \sigma^2 I$ with $\sigma^2$ being the noise variance.

Analytic: for the analytic implementation, I'll do things with the full "flattened" difference tensors.

In [None]:
Kcross_analytic_full = original_shear(features, features, length_scale=length_scale)

In [None]:
print(Kcross_analytic_full.shape, Kin_analytic.shape, targets_analytic.shape)

In [None]:
posterior_mean_analytic = np.matmul(
    np.matmul(Kcross_analytic_full,np.linalg.inv(Kin_analytic + 1e-4 * np.identity(n**2 * 3))), 
    np.hstack(targets_analytic)
    ).reshape(3, n**2)

In [None]:
posterior_mean_analytic.shape

MuyGPyS implementation using nearest neighbors.

In [None]:
# create MuyGPS object
shear_model = MuyGPS(
        kernel=ShearKernel(
            deformation=Isotropy(
                F2,
                length_scale=Parameter(length_scale),
            ),
        ),
        noise = HomoscedasticNoise(1e-4)
    )

In [None]:
nn_count = 50
nbrs_lookup = NN_Wrapper(features, nn_count, nn_method='exact', algorithm='ball_tree')

features_count = features.shape[0]

indices = np.arange(features_count)
test_nn_indices, _ = nbrs_lookup.get_nns(features)

(
    crosswise_diffs,
    pairwise_diffs,
    nn_targets,
) = make_predict_tensors(
    indices,
    test_nn_indices,
    features,
    features,
    targets_norm.swapaxes(0,1),
)

nn_targets= nn_targets.swapaxes(-2, -1)

Kcross = shear_model.kernel(crosswise_diffs)
K = shear_model.kernel(pairwise_diffs)

posterior_mean_muygps = shear_model.posterior_mean(K, Kcross, nn_targets).swapaxes(0,1)

Check numerically if things are close.

In [None]:
np.allclose(posterior_mean_analytic, posterior_mean_muygps)

In [None]:
posterior_residual = np.abs(posterior_mean_muygps - posterior_mean_analytic)

fig, ax = plt.subplots(3,3, figsize = (10, 10))

ax[0,0].set_title("MuyGPyS Posterior Mean")
ax[0,1].set_title("Analytic Posterior Mean")
ax[0,2].set_title("Residual")
ax[0,0].set_ylabel('$\kappa$', fontsize = 15)
ax[1,0].set_ylabel("g1", fontsize = 15)
ax[2,0].set_ylabel("g2", fontsize = 15)

ax[0,0].imshow(posterior_mean_muygps[0,:].reshape(n,n))
ax[1,0].imshow(posterior_mean_muygps[1,:].reshape(n,n))
ax[2,0].imshow(posterior_mean_muygps[2,:].reshape(n,n))

ax[0,1].imshow(posterior_mean_analytic[0,:].reshape(n,n))
ax[1,1].imshow(posterior_mean_analytic[1,:].reshape(n,n))
ax[2,1].imshow(posterior_mean_analytic[2,:].reshape(n,n))


im1 = ax[0,2].imshow(posterior_residual[0,:].reshape(n,n), norm=LogNorm())
fig.colorbar(im1, ax=ax[0,2])
im2 = ax[1,2].imshow(posterior_residual[1,:].reshape(n,n), norm=LogNorm())
fig.colorbar(im2, ax=ax[1,2])
im3 = ax[2,2].imshow(posterior_residual[2,:].reshape(n,n), norm=LogNorm())
fig.colorbar(im3, ax=ax[2,2])
plt.show()

Residual is absolutely wack. 
Check if the MuyGPyS is even close to the initial targets.

In [None]:
muygps_residual = np.abs(posterior_mean_muygps - targets_norm)
print(muygps_residual.shape)

In [None]:
fig, ax = plt.subplots(3,3, figsize = (10, 10))

ax[0,0].set_title("Truth")
ax[0,1].set_title("MuyGPyS Posterior Mean")
ax[0,2].set_title("Residual")
ax[0,0].set_ylabel('$\kappa$', fontsize = 15)
ax[1,0].set_ylabel("g1", fontsize = 15)
ax[2,0].set_ylabel("g2", fontsize = 15)

ax[0,0].imshow(targets_norm[0,:].reshape(n,n))
ax[1,0].imshow(targets_norm[1,:].reshape(n,n))
ax[2,0].imshow(targets_norm[2,:].reshape(n,n))


ax[0,1].imshow(posterior_mean_muygps[0,:].reshape(n,n))
ax[1,1].imshow(posterior_mean_muygps[1,:].reshape(n,n))
ax[2,1].imshow(posterior_mean_muygps[2,:].reshape(n,n))



im1 = ax[0,2].imshow(muygps_residual[0,:].reshape(n,n))
fig.colorbar(im1, ax=ax[0,2])
im2 = ax[1,2].imshow(muygps_residual[1,:].reshape(n,n), norm = LogNorm())
fig.colorbar(im2, ax=ax[1,2])
im3 = ax[2,2].imshow(muygps_residual[2,:].reshape(n,n), norm = LogNorm())
fig.colorbar(im3, ax=ax[2,2])
plt.show()

print("Are the predictions close? ", np.allclose(targets_norm, posterior_mean_muygps), f" min: {np.min(muygps_residual)}, max: {np.max(muygps_residual)}")

In [None]:
np.allclose(posterior_mean_muygps[0,], targets_norm[0,])

The Kappa prediction looks good, but the other predictions... don't.
How do the analytic ones look?

In [None]:
analytic_residual = np.abs(posterior_mean_analytic - targets_norm)


fig, ax = plt.subplots(3,3, figsize = (10, 10))

ax[0,0].set_title("Truth")
ax[0,1].set_title("Analytic Posterior Mean")
ax[0,2].set_title("Residual")
ax[0,0].set_ylabel('$\kappa$', fontsize = 15)
ax[1,0].set_ylabel("g1", fontsize = 15)
ax[2,0].set_ylabel("g2", fontsize = 15)

ax[0,0].imshow(targets_norm[0,:].reshape(n,n))
ax[1,0].imshow(targets_norm[1,:].reshape(n,n))
ax[2,0].imshow(targets_norm[2,:].reshape(n,n))


ax[0,1].imshow(posterior_mean_analytic[0,:].reshape(n,n))
ax[1,1].imshow(posterior_mean_analytic[1,:].reshape(n,n))
ax[2,1].imshow(posterior_mean_analytic[2,:].reshape(n,n))



im1 = ax[0,2].imshow(analytic_residual[0,:].reshape(n,n), norm = LogNorm())
fig.colorbar(im1, ax=ax[0,2])
im2 = ax[1,2].imshow(analytic_residual[1,:].reshape(n,n), norm = LogNorm())
fig.colorbar(im2, ax=ax[1,2])
im3 = ax[2,2].imshow(analytic_residual[2,:].reshape(n,n), norm = LogNorm())
fig.colorbar(im3, ax=ax[2,2])
plt.show()

print("Are the predictions close? ", np.allclose(targets_norm, posterior_mean_analytic), f" min: {np.min(analytic_residual)}, max: {np.max(analytic_residual)}")