Copyright 2023-2023 Lawrence Livermore National Security, LLC and other MuyGPyS
Project Developers. See the top-level COPYRIGHT file for details.

SPDX-License-Identifier: MIT

# Shear Kernel Tutorial

This notebook demonstrates how to use the specialized lensing shear kernel (hard-coded to RBF at the moment).

⚠️ _Note that this is still an experimental feature._ ⚠️

In [None]:
import matplotlib.pyplot as plt
import numpy as np

from matplotlib.colors import LogNorm
from MuyGPyS._src.gp.tensors import _crosswise_differences, _pairwise_differences
from MuyGPyS.gp import MuyGPS
from MuyGPyS.gp.deformation import Isotropy, l2, F2
from MuyGPyS.gp.hyperparameter import Parameter
from MuyGPyS.gp.kernels.experimental import ShearKernel

This is required to import the implementation from Bob Armstrong's original repository.
Must set `shear_kernel_dir = "path/to/local/shear_kernel/"` for things to run properly.

In [None]:
import importlib.util
import sys
import torch
# introduce a variable for path/to/shear_kernel
shear_kernel_dir = "../../../projects/shear_kernel/"
spec_analytic = importlib.util.spec_from_file_location("analytic_kernel", shear_kernel_dir + "analytic_kernel.py") 
bar = importlib.util.module_from_spec(spec_analytic)
sys.modules["analytic_kernel"] = bar
spec_analytic.loader.exec_module(bar)
from analytic_kernel import shear_kernel

We will set a random seed here for consistency when building docs.
In practice we would not fix a seed.

In [None]:
np.random.seed(0)

Here we build some simple data, which is mean to represent a grid of sky coordinates.

In [None]:
n = 25  # number of galaxies on a side
xmin = 0
xmax = 1
ymin = 0
ymax = 1

xx = np.linspace(xmin, xmax, n)
yy = np.linspace(ymin, ymax, n)

x, y = np.meshgrid(xx, yy)
features = np.vstack((x.flatten(), y.flatten())).T
data_count = features.shape[0]
diffs = _pairwise_differences(features)
length_scale = 0.5

Use an Isotropic distance functor.

In [None]:
dist_fn = Isotropy(
    metric=F2,
    length_scale=Parameter(length_scale),
)

Here we construct a shear value kernel (partial differential components of RBF), as well as the original RBF kernel using Bob's implementation.

In [None]:
def original_shear(X1, X2=None, length_scale=1.0):
    if X2 is None:
        X2 = X1
    n1, _ = X1.shape
    n2, _ = X2.shape
    vals = np.zeros((3 * (n1), 3 * (n2)))
    vals[:] = np.nan
    for i, (ix, iy) in enumerate(X1):
        for j, (jx, jy) in enumerate(X2):
            tmp = shear_kernel(ix, iy, jx, jy, b=length_scale)
            for a in range(3):
                for b in range(3):
                    vals[(a * n1) + i, (b * n2) + j] = tmp[a, b]
            
    return vals

In [None]:
Kin_analytic = original_shear(features, length_scale=length_scale)

Here we do the same using the MuyGPyS implementation. Note the increased efficiency.

In [None]:
Kin_muygps = ShearKernel(deformation=dist_fn)(diffs)

`Kin_muygps` is a more generalized tensor, so we need to flatten it to a conforming shape.

In [None]:
Kin_muygps_flat = Kin_muygps.reshape(data_count * 3, data_count * 3)

In [None]:
print(f"Kin_analytic.shape = {Kin_analytic.shape}")
print(f"Kin_muygps.shape = {Kin_muygps.shape}")
print(f"Kin_muygps_flat.shape = {Kin_muygps_flat.shape}")

Do the two implementations agree?

In [None]:
np.allclose(Kin_analytic, Kin_muygps_flat)

In [None]:
residual = np.abs(Kin_analytic - Kin_muygps_flat)
print(f"residual max: {np.max(residual)}, min: {np.min(residual)}, mean : {np.mean(residual)}")

Plot results of the baseline and internal implementations. 

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(12, 4))
axes[0].set_title("original shear kernel")
axes[0].imshow(Kin_analytic)
axes[1].set_title("MuyGPyS shear kernel")
axes[1].imshow(Kin_muygps_flat)
axes[2].set_title("Residual")
im = axes[2].imshow(residual, norm=LogNorm())
fig.colorbar(im, ax=axes[2])
plt.show()

Now we perform a similar analysis for the cross-covariance.

In [None]:
split = 200
X1 = features[:split]
X2 = features[split:]
n1, _ = X1.shape
n2, _ = X2.shape
crosswise_diffs = _crosswise_differences(X1, X2)
print(X1.shape, X2.shape, crosswise_diffs.shape)

In [None]:
Kcross_analytic = original_shear(X1, X2, length_scale=length_scale)

In [None]:
Kcross_muygps = ShearKernel(deformation=dist_fn)(crosswise_diffs, adjust=False)

In [None]:
Kcross_muygps_flat = Kcross_muygps.reshape(n1 * 3, n2 * 3)

In [None]:
print(f"Kcross_analytic.shape = {Kcross_analytic.shape}")
print(f"Kcross_muygps.shape = {Kcross_muygps.shape}")
print(f"Kcross_muygps_flat.shape = {Kcross_muygps_flat.shape}")

In [None]:
np.allclose(Kcross_analytic, Kcross_muygps_flat)

In [None]:
cross_residual = np.abs(Kcross_analytic - Kcross_muygps_flat)
print(f"residual max: {np.max(cross_residual)}, min: {np.min(cross_residual)}, mean : {np.mean(cross_residual)}")

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(12, 4))
axes[0].set_title("original shear kernel")
axes[0].imshow(Kcross_analytic)
axes[1].set_title("MuyGPyS shear kernel")
axes[1].imshow(Kcross_muygps_flat)
axes[2].set_title("Residual")
im = axes[2].imshow(cross_residual, norm=LogNorm())
fig.colorbar(im, ax=axes[2])
plt.show()

Runtime comparison of the two implementations:

In [None]:
if False:
    %timeit original_shear(features)
    %timeit ShearKernel(deformation=dist_fn)(diffs)

# Test

For a more formal test, use the data generated with galsim.
This will provide a "proper" set of features and responses.
Essentially we want to emulate the procedure with exactly the data that are used.

In [None]:
from MuyGPyS.gp import MuyGPS

In [None]:
foo = shear_kernel_dir + "gregs_work/nbs/kernel_check.pt"
unscaled_features, unscaled_responses, scale_responses, mean_responses, scale_features, mean_features = torch.load(foo)

# normalize the data
features = ( np.array(unscaled_features) - np.array(mean_features) ) / np.array(scale_features)
train_count, feature_count = unscaled_features.shape
responses = ( np.array(unscaled_responses).reshape(train_count, 3) - np.array(mean_responses) ) /np.array(scale_responses) # these are the targets

In [None]:
unscaled_responses.shape

Explicitly define the target matrices.
Also add leading unitary dimension to `targets_muygpys` for things to work.

In [None]:
targets_analytic = responses
targets_muygps = responses[None,:].swapaxes(-2, -1)
print(targets_analytic.shape, targets_muygps.shape)

We have already determined that the two implementations agree. 
I will recreate the `K*_analytic` and `K*_muygps` to use with the data.
Need leading unitary dimensions on `K*` for things to play nice.

In [None]:
# Analytic
Kin_analytic = original_shear(features, length_scale=length_scale)
Kcross_analytic = original_shear(features, features, length_scale=length_scale)

# MuyGPyS
pairwise_diffs = _pairwise_differences(features)
crosswise_diffs = _crosswise_differences(features, features)

Kin_muygps = ShearKernel(deformation=dist_fn)(pairwise_diffs)
Kcross_muygps = ShearKernel(deformation=dist_fn)(crosswise_diffs, adjust=False)
Kin_muygps_flat = Kin_muygps.reshape(961*3, 961*3)
Kcross_muygps_flat = Kcross_muygps.reshape( 961 * 3, 961 * 3)


In [None]:
print(Kin_analytic.shape, Kin_muygps.shape, Kin_muygps_flat.shape)
print(Kcross_analytic.shape, Kcross_muygps.shape, Kcross_muygps_flat.shape)

Check to make sure the `K*` themselves are the same.

In [None]:
np.allclose(Kin_analytic, Kin_muygps_flat, atol = 1e-6)

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(12, 4))
axes[0].set_title("original shear kernel")
axes[0].imshow(Kin_analytic)
axes[1].set_title("MuyGPyS shear kernel")
axes[1].imshow(Kin_muygps_flat)
axes[2].set_title("Residual")
im = axes[2].imshow(np.abs(Kin_analytic-Kin_muygps_flat), norm=LogNorm())
fig.colorbar(im, ax=axes[2])
plt.show()

In [None]:
np.allclose(Kcross_analytic, Kcross_muygps_flat, atol = 1e-6)

Having defined the `K*` matrices, compare posterior means, sans-optimization. 
This comes from (3.4) in Muyskens et al. (2021), which looks like:
$$ \hat{Y}(X^*|X) \approx K_{\theta}(X^*,X)K_{\theta}(X,X)^{-1}Y(X) $$


In [None]:
posterior_mean_analytic = np.matmul(
    np.matmul(Kcross_analytic,np.linalg.inv(Kin_analytic)), 
    targets_analytic.reshape(961*3,1)
    ).reshape(961,3) * np.array(scale_responses) + np.array(mean_responses)

We also want the posterior mean from MuyGPyS.
To do this, we will create a `muygps` object using the `dist_fn` above.

In [None]:
shear_model = MuyGPS(
        kernel=ShearKernel(
            deformation=Isotropy(
                F2,
                length_scale=Parameter(length_scale),
            ),
        ),
    )


In [None]:
posterior_mean_muygps = shear_model.posterior_mean(Kin_muygps[None,:], Kcross_muygps[None,:], targets_muygps).reshape(961,3) * np.array(scale_responses) + np.array(mean_responses)

Now we check if things are the same.

In [None]:
np.allclose(posterior_mean_muygps, posterior_mean_analytic, atol=1e-6)

In [None]:
fig, ax = plt.subplots(3,4, figsize = (10, 10))

# Truth
ax[0,0].set_title("Truth")
ax[0,0].imshow(unscaled_responses[0,:].reshape(31,31))
ax[1,0].imshow(unscaled_responses[1,:].reshape(31,31))
ax[2,0].imshow(unscaled_responses[2,:].reshape(31,31))

ax[0,1].set_title("Posterior Mean Analytic")
ax[0,1].imshow(posterior_mean_analytic[:,0].reshape(31,31))
ax[1,1].imshow(posterior_mean_analytic[:,1].reshape(31,31))
ax[2,1].imshow(posterior_mean_analytic[:,2].reshape(31,31))

ax[0,2].set_title("Posterior Mean MuyGPS")
ax[0,2].imshow(posterior_mean_muygps[:,0].reshape(31,31))
ax[1,2].imshow(posterior_mean_muygps[:,1].reshape(31,31))
ax[2,2].imshow(posterior_mean_muygps[:,2].reshape(31,31))

ax[0,3].set_title('|Analytic - MuyGPS|')
im1 = ax[0,3].imshow((np.abs(posterior_mean_analytic[:,0]-posterior_mean_muygps[:,0])).reshape(31,31), norm=LogNorm())
fig.colorbar(im1, shrink=0.75, ax=ax[0,3])
im2 = ax[1,3].imshow((np.abs(posterior_mean_analytic[:,1]-posterior_mean_muygps[:,1])).reshape(31,31), norm=LogNorm())
fig.colorbar(im2, shrink=0.75, ax=ax[1,3])
im3 = ax[2,3].imshow((np.abs(posterior_mean_analytic[:,2]-posterior_mean_muygps[:,2])).reshape(31,31), norm=LogNorm())
fig.colorbar(im3, shrink=0.75, ax=ax[2,3])



These don't look anything close.
I'm not sure if I've messed up on:
* (a) The actual math for forming the posterior mean
* (b) The dimensionality and organization of all of the tensors
* (c) Something entirely trivial that either isn't clicking or that I am overlooking

Given that the posterior mean is returning weird things anyway, I do not expect things to be the same necessarily, but I am sure I'm doing something either slightly or very wrong.