# Lesson 1 : Generate Gaussian Processes & Get prediction in 1D / 2D

Below some packages to import that will be used for this lesson

Cell just bellow is here for avoiding scrolling when plot is create within ipython notebook

In [1]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines){
    return false;
}

<IPython.core.display.Javascript object>

In [2]:
# Classical package for manipulating
# array, for plotting and interactiv plots.
import pylab as plt
from matplotlib import gridspec
import numpy as np
import ipywidgets as widgets
from ipywidgets import interact
import itertools

from treegp import AnisotropicRBF, eval_kernel
import treegp

## Exercice 1): Play with gaussian random fields realization (1D)

In [3]:
#################################################################
# EXERCICE 1: Play with gaussian random fields realization (1D) #
#################################################################

# function to generate a 1D gaussian random field for a 
# given scikit-learn kernel. 
def make_1d_grf(kernel, noise=None, seed=42, N_points=40):
    # fixing the seed
    np.random.seed(seed)
    # generate random 1D coordinate
    x = np.random.uniform(-10,10, N_points).reshape((N_points,1))
    # creating the correlation matrix / kernel 
    K = kernel.__call__(x)
    # generating gaussian random field
    y = np.random.multivariate_normal(np.zeros(N_points), K)
    if noise is not None:
        # adding noise
        y += np.random.normal(scale=noise, size=N_points)
        y_err = np.ones_like(y) * noise
        return x, y, y_err
    else:
        return x, y

# function to interactivly plot gaussian random fields realization. 
@interact(n_real = widgets.IntSlider(value=5, min=1, max=20, step=1,
          description='N real:', disabled=False,
          continuous_update=False, orientation='horizontal',
          readout=True,
          readout_format='d'),
          sigma = widgets.FloatSlider(value=1., min=0.01, max=5.0, step=0.01, description='$\sigma$:',
          disabled=False,
          continuous_update=False,
          orientation='horizontal',
          readout=True,
          readout_format='.2f'), 
          l = widgets.FloatSlider(value=1, min=0.01, max=10.0, step=0.01, description='$l$:',
          disabled=False,
          continuous_update=False,
          orientation='horizontal',
          readout=True,
          readout_format='.2f'),
          kernel = widgets.Dropdown(options=['RBF', 'Matern'],
                                  value='RBF',
                                  description='Kernel:',
                                  disabled=False,))
def plot_generate_grf(n_real, sigma, l, kernel):
    gs = gridspec.GridSpec(1, 2, width_ratios=[1.5, 1])
    plt.figure(figsize=(20,8))
    plt.subplot(gs[0])
    for i in range(n_real):
        Kernel = "%f * %s(%f)"%((sigma**2, kernel, l))
        Kernel = eval_kernel(Kernel)
        x, y, y_err = make_1d_grf(Kernel, noise = sigma*0.01, 
                                  seed=62+i, N_points=80)
        plt.scatter(x, y, label = 'data')
        plt.errorbar(x, y, linestyle='', yerr=y_err, 
                     alpha=0.7,marker='.',zorder=0)
    plt.plot([-10,10], [0, 0],'k--')
    plt.xlim(-10,10)
    plt.ylim(-8.,8.)
    plt.xlabel('X', fontsize=20)
    plt.ylabel('Y', fontsize=20)
    plt.title('Number of realizations: %i'%(n_real), fontsize=16)
    plt.subplot(gs[1])
    distance = np.linspace(0, 8, 30)
    coord = np.array([distance, np.zeros_like(distance)]).T
    pcf = Kernel.__call__(coord, Y=np.zeros_like(coord))[:,0]
    plt.plot(distance, pcf, 'k', lw=3)
    plt.ylim(0, 25)
    plt.xlim(0, 8)
    plt.ylabel('$\\xi(|x_i-x_j|)$', fontsize=20)
    plt.xlabel('$|x_i-x_j|$', fontsize=20)
    plt.title('Used correlation function (%s)'%(kernel), fontsize=16)
    

interactive(children=(IntSlider(value=5, continuous_update=False, description='N real:', max=20, min=1), Float…

## Exercice 2): Play with gaussian random fields realization (2D) 

In [4]:
################################################################
# EXERCICE 2: Play with gaussian random fields realization (2D)#
################################################################

# function to generate a 1D gaussian random field for a 
# given scikit-learn kernel.

def get_correlation_length_matrix(size, g1, g2):
    if abs(g1)>1:
        g1 = 0
    if abs(g2)>1:
        g2 = 0
    g = np.sqrt(g1**2 + g2**2)
    q = (1-g) / (1+g)
    phi = 0.5 * np.arctan2(g2,g1)
    rot = np.array([[np.cos(phi), np.sin(phi)],
                    [-np.sin(phi), np.cos(phi)]])
    ell = np.array([[size**2, 0],
                    [0, (size * q)**2]])
    L = np.dot(rot.T, ell.dot(rot))
    return L

def make_2d_grf(kernel, noise=None, seed=42, N_points=40):
    # fixing the seed
    np.random.seed(seed)
    # generate random 2D coordinate
    x1 = np.random.uniform(-10,10, N_points)
    x2 = np.random.uniform(-10,10, N_points)
    x = np.array([x1, x2]).T
    # creating the correlation matrix / kernel 
    K = kernel.__call__(x)
    # generating gaussian random field
    y = np.random.multivariate_normal(np.zeros(N_points), K)
    if noise is not None:
        # adding noise
        y += np.random.normal(scale=noise, size=N_points)
        y_err = np.ones_like(y) * noise
        return x, y, y_err
    else:
        return x, y

# function to interactivly plot gaussian random fields realization. 
@interact(sigma = widgets.FloatSlider(value=2., min=0.01, max=5.0, step=0.01, description='$\sigma$:',
          disabled=False,
          continuous_update=False,
          orientation='horizontal',
          readout=True,
          readout_format='.2f'), 
          size = widgets.FloatSlider(value=1, min=0.01, max=10.0, step=0.01, description='$l$:',
          disabled=False,
          continuous_update=False,
          orientation='horizontal',
          readout=True,
          readout_format='.2f'),
          g1 = widgets.FloatSlider(value=0, min=-0.99, max=0.99, step=0.01, description='$g_1$:',
          disabled=False,
          continuous_update=False,
          orientation='horizontal',
          readout=True,
          readout_format='.2f'),
          g2 = widgets.FloatSlider(value=0, min=-0.99, max=0.99, step=0.01, description='$g_2$:',
          disabled=False,
          continuous_update=False,
          orientation='horizontal',
          readout=True,
          readout_format='.2f'),
          kernel = widgets.Dropdown(options=['AnisotropicRBF'],
                                  value='AnisotropicRBF',
                                  description='Kernel:',
                                  disabled=False,))
def plot_generate_grf_2d(sigma, size, g1, g2, kernel):
    
    L = get_correlation_length_matrix(size, g1, g2)
    inv_L = np.linalg.inv(L)
    gs = gridspec.GridSpec(1, 2, width_ratios=[1.5, 1])
    
    plt.figure(figsize=(20,8))
    plt.subplot(gs[0])
    Kernel = "%f"%(sigma**2) + " * AnisotropicRBF(invLam={0!r})".format(inv_L)
    Kernel = eval_kernel(Kernel)
    x, y, y_err = make_2d_grf(Kernel, noise=sigma*0.01, seed=42, N_points=500)
                              

    plt.scatter(x[:,0], x[:,1], c=y, s=80, cmap=plt.cm.seismic, vmin=-5, vmax=5)
    cb = plt.colorbar()
    cb.set_label('Y', fontsize=20)
    plt.xlabel('$\\theta_{X}$', fontsize=20)
    plt.ylabel('$\\theta_{Y}$', fontsize=20)
    plt.title('Generated 2D Gaussian random fields', fontsize=20)

    plt.subplot(gs[1])
    pixel_squareroot = 25
    npixels = pixel_squareroot**2
    x = np.linspace(-5, 5, pixel_squareroot)
    x1, x2 = np.meshgrid(x, x)
    coord = np.array([x1.reshape(npixels), x2.reshape(npixels)]).T
    pcf = Kernel.__call__(coord, Y=np.zeros_like(coord))[:,0]
    pcf = pcf.reshape((pixel_squareroot, pixel_squareroot))
    
    plt.imshow(pcf, extent=[x[0], x[-1], x[0], x[-1]],
               origin='lower', cmap=plt.cm.Blues, vmin=0, vmax=5)
    cb = plt.colorbar()
    cb.set_label(r'$\xi (\Delta \theta_{X}, \Delta \theta_{Y})$', fontsize=20)
    plt.ylabel('$\Delta \\theta_{Y} = \\theta_{Y,i} - \\theta_{Y,j}$', fontsize=20)
    plt.xlabel('$\Delta \\theta_{X} = \\theta_{X,i} - \\theta_{X,j}$', fontsize=20)
    plt.title('Used correlation function (%s)'%(kernel), fontsize=16)

interactive(children=(FloatSlider(value=2.0, continuous_update=False, description='$\\sigma$:', max=5.0, min=0…

## Exercice 3): GP prediction and what are the best hyperparameters and kernel (1D)

In [6]:
###################################################################################
# EXERCICE 3: GP prediction and what are the best hyperparameters and kernel (1D) #
###################################################################################


def gp_regression(x, new_x, y, kernel, y_err=None):
    
    if y_err is None:
        y_err =np.ones_like(y) *1e-10

    gp = treegp.GPInterpolation(kernel=kernel, optimizer='none', 
                                normalize=False, white_noise=0., p0=[3000., 0.,0.],
                                n_neighbors=4, average_fits=None, nbins=20, 
                                min_sep=None, max_sep=None)
    gp.initialize(x, y, y_err=y_err)
    y_predict, y_cov = gp.predict(new_x, return_cov=True)
    y_std = np.sqrt(np.diag(y_cov))
    return gp, y_predict, y_std


data = np.loadtxt('data/data_1d_grf.txt')
x = data[:,0].reshape((len(data[:,0]),1))
y = data[:,1]
y_err = data[:,2]

@interact(sigma = widgets.FloatSlider(value=2., min=0.01, max=5.0, step=0.01, description='$\sigma$:',
          disabled=False,
          continuous_update=False,
          orientation='horizontal',
          readout=True,
          readout_format='.2f'), 
          l = widgets.FloatSlider(value=1., min=0.01, max=10.0, step=0.01, description='$l$:',
          disabled=False,
          continuous_update=False,
          orientation='horizontal',
          readout=True,
          readout_format='.2f'),
          kernel = widgets.Dropdown(options=['RBF', 'Matern'],
                                  value='RBF',
                                  description='Kernel:',
                                  disabled=False,))
def plot_predict_1D(sigma, l, kernel):
    
    new_x = np.linspace(-24,24, 400).reshape((400,1))
    Kernel = "%f * %s(%f)"%((sigma**2, kernel, l))
    gp, y_pred, y_std = gp_regression(x, new_x, y, Kernel, y_err=y_err)

    gs = gridspec.GridSpec(1, 2, width_ratios=[1.5, 1])
    plt.figure(figsize=(20,8))
    plt.subplot(gs[0])
    
    # Data
    plt.scatter(x, y, c='b', label = 'data')
    plt.errorbar(x, y, linestyle='', yerr=y_err, ecolor='b', 
                 alpha=0.7,marker='.',zorder=0)
    
    # GP prediction
    plt.plot(new_x, y_pred, 'r', lw =3, label = 'GP prediction')
    plt.fill_between(new_x.T[0], y_pred-y_std, y_pred+y_std, color='r', alpha=0.3)
    
    plt.plot(new_x, np.zeros_like(new_x),'k--')
    plt.xlim(-24,24)
    plt.ylim(-3.,3.)
    plt.xticks(fontsize=14)
    plt.yticks(fontsize=14)
    plt.xlabel('X', fontsize=20)
    plt.ylabel('Y', fontsize=20)
    plt.legend(fontsize=18)
    plt.subplot(gs[1])
    distance = np.linspace(0, 8, 60)
    coord = np.array([distance, np.zeros_like(distance)]).T
    pcf = gp.kernel.__call__(coord, Y=np.zeros_like(coord))[:,0]
    plt.plot(distance, pcf, 'k', lw=3)
    plt.ylim(0, 10)
    plt.xlim(0, 8)
    plt.ylabel('$\\xi(|x_i-x_j|)$', fontsize=20)
    plt.xlabel('$|x_i-x_j|$', fontsize=20)
    plt.title('Used correlation function (%s)'%(kernel), fontsize=16)

interactive(children=(FloatSlider(value=2.0, continuous_update=False, description='$\\sigma$:', max=5.0, min=0…

## Exercice 4) GP prediction and what are the best hyperparameters (2D)

In [7]:
########################################################################
# EXERCICE 4: GP prediction and what are the best hyperparameters (2D) #
########################################################################

data = np.loadtxt('data/data_2d_grf.txt')
theta_x = data[:,0]
theta_y = data[:,1]
XXX = np.array([theta_x, theta_y]).T
YYY = data[:,2]
YYY_err = data[:,3]

# function to interactivly plot gaussian random fields realization. 
@interact(sigma = widgets.FloatSlider(value=2., min=0.01, max=5.0, step=0.01, description='$\sigma$:',
          disabled=False,
          continuous_update=False,
          orientation='horizontal',
          readout=True,
          readout_format='.2f'), 
          size = widgets.FloatSlider(value=1, min=0.01, max=10.0, step=0.01, description='$l$:',
          disabled=False,
          continuous_update=False,
          orientation='horizontal',
          readout=True,
          readout_format='.2f'),
          g1 = widgets.FloatSlider(value=0, min=-0.99, max=0.99, step=0.01, description='$g_1$:',
          disabled=False,
          continuous_update=False,
          orientation='horizontal',
          readout=True,
          readout_format='.2f'),
          g2 = widgets.FloatSlider(value=0, min=-0.99, max=0.99, step=0.01, description='$g_2$:',
          disabled=False,
          continuous_update=False,
          orientation='horizontal',
          readout=True,
          readout_format='.2f'),
          kernel = widgets.Dropdown(options=['AnisotropicRBF'],
                                  value='AnisotropicRBF',
                                  description='Kernel:',
                                  disabled=False,))
def plot_predict_2D(sigma, size, g1, g2, kernel):
    
    gs = gridspec.GridSpec(1, 3, width_ratios=[1.3, 1.3, 1])
    
    plt.figure(figsize=(22,6))
    plt.subplots_adjust(wspace=0.2)
    plt.subplot(gs[0])

    plt.scatter(XXX[:,0], XXX[:,1], c=YYY, s=80, cmap=plt.cm.seismic, vmin=-5, vmax=5)
    cb = plt.colorbar()
    cb.set_label('Y', fontsize=20)
    plt.xlabel('$\\theta_{X}$', fontsize=20)
    plt.ylabel('$\\theta_{Y}$', fontsize=20)
    plt.title('Data', fontsize=20)
    
    L = get_correlation_length_matrix(size, g1, g2)
    inv_L = np.linalg.inv(L)
    Kernel = "%f"%(sigma**2) + " * AnisotropicRBF(invLam={0!r})".format(inv_L)
    plt.subplot(gs[1])

    NPOINT = 31
    new_x = np.linspace(-10,10, NPOINT)
    new_x, new_y = np.meshgrid(new_x, new_x)
    coord = np.array([new_x.reshape(NPOINT**2), new_y.reshape(NPOINT**2)]).T
    gp, y_pred, y_std = gp_regression(XXX, coord, YYY, Kernel, y_err=YYY_err)
    

    y_pred = y_pred.reshape((NPOINT, NPOINT)) 
    plt.imshow(y_pred, extent=[-10, 10, -10, 10],
               origin='lower', cmap=plt.cm.seismic, vmin=-5, vmax=5)

    cb = plt.colorbar()
    cb.set_label('Y', fontsize=20)
    plt.xlabel('$\\theta_{X}$', fontsize=20)
    plt.ylabel('$\\theta_{Y}$', fontsize=20)
    plt.title('Gaussian Process prediction', fontsize=20)

    plt.subplot(gs[2])
    pixel_squareroot = 25
    npixels = pixel_squareroot**2
    x = np.linspace(-5, 5, pixel_squareroot)
    x1, x2 = np.meshgrid(x, x)
    coord = np.array([x1.reshape(npixels), x2.reshape(npixels)]).T
    pcf = gp.kernel.__call__(coord, Y=np.zeros_like(coord))[:,0]
    pcf = pcf.reshape((pixel_squareroot, pixel_squareroot))
    
    plt.imshow(pcf, extent=[x[0], x[-1], x[0], x[-1]],
               origin='lower', cmap=plt.cm.Blues, vmin=0, vmax=5)
    cb = plt.colorbar()
    cb.set_label(r'$\xi (\Delta \theta_{X}, \Delta \theta_{Y})$', fontsize=20)
    plt.ylabel('$\Delta \\theta_{Y} = \\theta_{Y,i} - \\theta_{Y,j}$', fontsize=20)
    plt.xlabel('$\Delta \\theta_{X} = \\theta_{X,i} - \\theta_{X,j}$', fontsize=20)
    plt.title('Used correlation function \n (%s)'%(kernel), fontsize=16)

interactive(children=(FloatSlider(value=2.0, continuous_update=False, description='$\\sigma$:', max=5.0, min=0…