In [None]:
import numpy as np
import matplotlib.pyplot as plt

from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, WhiteKernel

## Simulate data

In [None]:
X = np.linspace(start=0, stop=10, num=1000).reshape(-1, 1)
y = np.squeeze(X * np.sin(X))

In [None]:
plt.plot(X, y, label=r"$f(x) = x \sin(x)$", linestyle="dotted")
plt.legend()
plt.xlabel("$x$")
plt.ylabel("$f(x)$")
_ = plt.title("True generative process")

In [None]:
rng = np.random.RandomState(1)
training_indices = rng.choice(np.arange(y.size), size=6, replace=False)
X_train, y_train = X[training_indices], y[training_indices]

In [None]:
noise_std = 0.75
y_train_noisy = y_train + rng.normal(loc=0.0, scale=noise_std, size=y_train.shape)

## Define GP Kernel

In [None]:
kernel = 1 * RBF(length_scale=1.0, length_scale_bounds=(1e-2, 1e2))
gaussian_process = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=9)

In [None]:
x_sort = np.arange(100).reshape(-1, 1)/0.1
y_samples = gaussian_process.sample_y(x_sort, n_samples=10).T

In [None]:
for y_i in y_samples:
    plt.plot(x_sort, y_i)

## Fit GP (clean)

In [None]:
gaussian_process.fit(X_train, y_train)
gaussian_process.kernel_

In [None]:
mean_prediction, std_prediction = gaussian_process.predict(X, return_std=True)

In [None]:
plt.plot(X, y, label=r"$f(x) = x \sin(x)$", linestyle="dotted")
plt.scatter(X_train, y_train, label="Observations")
plt.plot(X, mean_prediction, label="Mean prediction")
plt.fill_between(
    X.ravel(),
    mean_prediction - 1.96 * std_prediction,
    mean_prediction + 1.96 * std_prediction,
    alpha=0.5,
    label=r"95% confidence interval",
)
plt.legend()
plt.xlabel("$x$")
plt.ylabel("$f(x)$")
_ = plt.title("Gaussian process regression on noise-free dataset")

In [None]:
gp_samples = gaussian_process.sample_y(X, n_samples=10).T

In [None]:
for gp_sample in gp_samples:
    plt.plot(X, gp_sample)

## Fit GP (noise)

In [None]:
gaussian_process_noise = GaussianProcessRegressor(kernel=kernel, alpha=noise_std**2, n_restarts_optimizer=9)
gaussian_process_noise.fit(X_train, y_train_noisy)
gaussian_process_noise.kernel_

In [None]:
mean_prediction, std_prediction = gaussian_process_noise.predict(X, return_std=True)

In [None]:
plt.plot(X, y, label=r"$f(x) = x \sin(x)$", linestyle="dotted")
plt.errorbar(
    X_train,
    y_train_noisy,
    noise_std,
    linestyle="None",
    color="tab:blue",
    marker=".",
    markersize=10,
    label="Observations",
)
plt.plot(X, mean_prediction, label="Mean prediction")
plt.fill_between(
    X.ravel(),
    mean_prediction - 1.96 * std_prediction,
    mean_prediction + 1.96 * std_prediction,
    color="tab:orange",
    alpha=0.5,
    label=r"95% confidence interval",
)
plt.legend()
plt.xlabel("$x$")
plt.ylabel("$f(x)$")
_ = plt.title("Gaussian process regression on a noisy dataset")

In [None]:
gp_noise_samples = gaussian_process_noise.sample_y(X, n_samples=10).T

In [None]:
for gp_noise_sample in gp_noise_samples:
    plt.plot(X, gp_noise_sample)

[Source - scikit learn](https://scikit-learn.org/stable/auto_examples/gaussian_process/plot_gpr_noisy_targets.html#sphx-glr-auto-examples-gaussian-process-plot-gpr-noisy-targets-py)