In [None]:
import mogpjax as mpx
import jax.numpy as jnp
import matplotlib.pyplot as plt
import jax.random as jr
import pandas as pd 

key = jr.PRNGKey(123)

## Synthetic data generation

We generate synthetic data according to
$$\begin{align}
y_1(x) & = -\frac{\sin(10\pi(x+1))}{2x+1} - x^4 + \varepsilon_1 \\
y_2(x) & = \cos^2(y_1(x)) + \sin(3x) + \varepsilon_2 \\
y_3(x) & = y_2(x)y_1^2(x) + 3x + \varepsilon_3\,,
\end{align}$$
where $\varepsilon_1, \varepsilon_2, \varepsilon_3 \sim \mathcal{N}(0, 0.05)$, as per <strong data-cite="requeima2019gaussian"></strong>

In [None]:
n = 100
noise = 0.05

f1 = lambda x: -jnp.sin(10*jnp.pi*(x+1))/(2*x + 1)-jnp.power(x, 4)
f2 = lambda x: jnp.square(jnp.cos(f1(x)))+jnp.sin(3*x)
f3 = lambda x: jnp.square(f1(x))*f2(x) + 3*x 

x = jnp.linspace(0, 1, n)


y1 = f1(x) + jr.normal(key, shape=(n, ))*noise
key, subkey = jr.split(key)
y2 = f2(x) + jr.normal(subkey, shape=(n, ))*noise
key, subkey = jr.split(key)
y3 = f3(x) + jr.normal(subkey, shape=(n, ))*noise

fig, ax = plt.subplots(1, 1, figsize=(8, 4))
ax.plot(x, y1, label=r'$y_1$')
ax.plot(x, y2, label=r'$y_2$')
ax.plot(x, y3, label=r'$y_3$')
ax.legend(loc='best')
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)

## Jura Dataset

The Jura data from <strong data-cite="goovaerts1997geostatistics"></strong> contains 259 spatial coordinates. At each coordinate, we observe a 3-dimensional vector containing nickel, zinc, and cadmium measurements. At test time, we have 100 coordinates for which we only measure nickel and zinc, and the task is to then condition on these observations to predict cadmium.

In [None]:
colnames = {"Xloc": "x", "Yloc": "y", "Landuse": "land", "Rock": "rock"}

train = pd.read_csv('data/jura/prediction.dat', sep=r'\s+')
train.columns = [colnames[c] if c in colnames else c for c in train.columns]
train.set_index(["x", "y"], inplace=True)

test = pd.read_csv('data/jura/validation.dat', sep=r'\s+')
test.columns = [colnames[c] if c in colnames else c for c in test.columns]
test.set_index(["x", "y"], inplace=True)

train = pd.concat([train[["Ni", "Zn", "Cd"]], test[["Ni", "Zn"]]])
test = test[["Cd"]]

train.head()