# Setup of Noteboook

The follwing code clones the github repository with course files. 
Subsequently it imports all libraries and custom modules needed for this notebook

In [1]:
!git clone https://github.com/DataHow/analytics-course-scripts.git
#!pip install --upgrade scipy==1.7.3

Cloning into 'analytics-course-scripts'...
remote: Enumerating objects: 240, done.[K
remote: Counting objects: 100% (4/4), done.[K
remote: Compressing objects: 100% (4/4), done.[K
remote: Total 240 (delta 0), reused 1 (delta 0), pack-reused 236[K
Receiving objects: 100% (240/240), 2.04 MiB | 7.48 MiB/s, done.
Resolving deltas: 100% (134/134), done.


In [2]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Gaussian Processes

In this script, we are investigating the main idea behind using Gaussian Processes and how conditional probability can be used to make predictions from data.
The idea is that we have a function that it is unknown to the use (originator function below). We sample the function in different "experiments" and we get noisy data in return.
We will use such data to construct a Gaussian Process.

## Create originator function

Here we first create an originator function that's nonlinear and with modifyable parameters.

In [3]:
# Fix seed
np.random.seed(42)
# Function parameters
x = np.array(range(1,7));
beta0 = 6;
beta1 = -3;
beta2 = 0.5;
sigma = 0.2;
# Originator function
def f(x, beta0=6, beta1=-3, beta2=0.5, sigma=0.2):
    return np.matmul(np.vstack((np.ones(len(x)), np.array(x), np.array(x) **2)).T,np.array([beta0,beta1,beta2])) + sigma*np.random.normal(0,1,len(x))

In [4]:
# Plot originator function with generated data
xgrid = np.arange(0,7,0.1)
fig = px.line(x=xgrid,y=f(xgrid,sigma=0),color=px.Constant("Originator function"), title = "Generated function we try to learn",labels=dict(x="x", y="y", color="Legend"))
fig.add_trace(go.Scatter(x=x,y=f(x),mode='markers',marker=dict(size=10),name="Generated data"))
fig.show()

### Generate data from the function

Now we can generate teh data from that function. Define the number of experiments, i.e. how many times the underlying process is sampled at the different values of x. To generate samples we slightly vary the underlying parameters.

In [5]:
# Generate data
no_exp = 25;

In [6]:
# Plot generated experiments
Y = np.zeros((no_exp,len(x)))
fig = px.scatter()
for n in range(no_exp):
    Y[n,:] = f(x,float(np.random.normal(size=1)*0.5+beta0),float(np.random.normal(size=1)*0.1+beta1),float(np.random.normal(size=1)*0.01+beta2))
    fig.add_trace(go.Scatter(x=x,y=Y[n,:],mode='lines',line=dict(color = 'rgb(49,130,189)')))
fig.update_layout(showlegend=False,title="Generated data",xaxis_title="x",yaxis_title="y")
fig.show()


## Conditional probability of next step

### Plot correlations

Here, we show as an example how the data at x=1 are correlated to those at x=2.
Below, we compute the mean of the function at each sampling point and the corresponding coviance/correlation matrices.


In [7]:
# Select x-axis step
select_x_step = 1
# Select y-axis step
select_y_step = 2

In [8]:
# Plot correlation between selected steps
fig = px.scatter(x=Y[:,select_x_step],y=Y[:,select_y_step], title = "Correlation between step x = "+str(select_x_step)+" and x = "+str(select_y_step)+" with correlation being "+str(np.round(np.corrcoef(Y[:,select_x_step],Y[:,select_y_step])[0,1],3)),labels=dict(x="Y(x = "+str(select_x_step)+")", y="Y(x = "+str(select_y_step)+")"))
fig.show()

Here we compute and visualize the mean, covariance, and correlation for all the steps.

In [9]:
Y_mu = np.mean(Y,axis=0)
Y_cov = np.cov(Y,rowvar=False)
Y_corr = np.corrcoef(Y,rowvar=False)
print('Mean: \n',pd.DataFrame(Y_mu).T)
print('\nCovariance: \n',pd.DataFrame(Y_cov))
print('\nCorrelation: \n',pd.DataFrame(Y_corr))

Mean: 
          0         1         2         3         4         5
0  3.41856  2.076548  1.638223  2.084442  3.640273  6.228154

Covariance: 
           0         1         2         3         4         5
0  0.226924  0.196476  0.184843  0.189247  0.196882  0.213099
1  0.196476  0.249935  0.215806  0.257875  0.273509  0.275973
2  0.184843  0.215806  0.237602  0.255222  0.250451  0.263694
3  0.189247  0.257875  0.255222  0.388584  0.371904  0.398079
4  0.196882  0.273509  0.250451  0.371904  0.454542  0.432896
5  0.213099  0.275973  0.263694  0.398079  0.432896  0.487167

Correlation: 
           0         1         2         3         4         5
0  1.000000  0.825002  0.796044  0.637304  0.613025  0.640918
1  0.825002  1.000000  0.885572  0.827471  0.811468  0.790887
2  0.796044  0.885572  1.000000  0.839945  0.762099  0.775062
3  0.637304  0.827471  0.839945  1.000000  0.884914  0.914929
4  0.613025  0.811468  0.762099  0.884914  1.000000  0.919935
5  0.640918  0.790887  0.775062  

In [12]:
# Assume Multivariate normal distribution for the data with mean and covariance specified above
Ym = np.random.multivariate_normal(Y_mu,Y_cov,100000)

### Using the correlations

If we define the correlation structure and suppose that the underlying probability density function is a multivariate normal distribution, we can sample from such distribution and generate new data.
Then we can plot all the pairs of data at x=1 and x=2 from such distribution.

In [13]:
# Define number of samples to generate from multivariate distribution
num_samples = 50
# Animate results
use_anim = False

In [14]:
if use_anim:
    trace1 = go.Scatter(x=[select_x_step,select_y_step],y=[Ym[n,select_x_step],Ym[n,select_y_step]],mode='lines')
    frames = [ dict(data = dict( type = 'scatter', x=[1,2],y=[Ym[k,1],Ym[k,2]])) for k in range(num_samples)]
    layout = go.Layout(showlegend=False, hovermode='x unified', updatemenus=[dict(type='buttons', showactive=False, y=1.05, x=1.15, xanchor='right', yanchor='top', pad=dict(t=0, r=10), buttons=[dict(label='Play simulations', method='animate',args=[None, dict(frame=dict(duration=50, redraw=False), transition=dict(duration=0), fromcurrent=True, mode='immediate')])] ) ])
    layout.update(xaxis =dict(range=[select_x_step-1,select_y_step+1]),yaxis =dict(range=[min(Ym[:,select_x_step]),max(Ym[:,select_x_step])]))
    fig = go.Figure(data=trace1, frames=frames, layout=layout)
    fig.update_layout(showlegend=False,title="Generated data",xaxis_title="x",yaxis_title="y")
    fig.show()
else:
    fig = px.scatter()
    for n in range(num_samples):
        fig.add_trace(go.Scatter(x=[select_x_step,select_y_step],y=[Ym[n,select_x_step],Ym[n,select_y_step]],mode='lines+markers',line=dict(color = 'rgb(49,130,189)')))
    fig.update_xaxes(range=[select_x_step-1,select_y_step+1])
    fig.update_layout(showlegend=False,title="Generated data",xaxis_title="x",yaxis_title="y")
    fig.show()

## Conditional probability given single observation

What is the distribution of $f(x)$ at $x=2$ assuming that $f(x=1) = 3$?
This is a typical conditional probability problem, where we have to "conditionally" sample from the original distribution.

Hence, we look at conditional probability $P(A | B)$ where $A: x=$ `a_step` and $B: f(x=$ `b_step` $)=$ `b_value`


In [17]:
# Probability of f(x) at step x equal to
select_a_step = 2 
# Given the observation of f(x) equal to
select_b_value = 3
# at the observation step x equal to
select_b_step = 1
# tolerance region for conditional probability calculation
tol = 0.01

In [18]:
# Match samples from the distribution that specify observations above
rows = np.logical_and(Ym[:,select_b_step] > select_b_value -tol,Ym[:,select_b_step] > select_b_value +tol)

In [20]:
# Plot histogram
fig = px.histogram(Ym[rows,select_a_step],histnorm='probability density')
fig.update_layout(bargap=0.01,showlegend=False,title="Conditional Probability",xaxis_title="P(x="+str(select_a_step)+"|f(x="+str(select_b_step)+")="+str(select_b_value)+")",yaxis_title="Probability density")
fig.show()

Compute conditional probability exactly

In [21]:
mu_step = np.mean(Ym[rows,select_a_step])
std_step = np.std(Ym[rows,select_a_step])
print(mu_step)
print(std_step)

2.609794131227229
0.2711337357695473


Hece, we can show conditional probability of the function given our observation

In [24]:
# Select number of simulations to plot
num_simulations = 1

In [25]:
fig = px.scatter()
for n in range(num_simulations):
    fig.add_trace(go.Scatter(x=[select_b_step,select_a_step],y=[select_b_value,Ym[rows,select_a_step][n]],mode='lines+markers',line=dict(color = 'rgb(49,130,189)')))
fig.add_trace(go.Scatter(x=[select_a_step,select_a_step],y=[mu_step-2*std_step,mu_step+2*std_step],mode='lines+markers',line=dict(color = px.colors.qualitative.G10[1])))
fig.add_trace(go.Scatter(x=[select_a_step,select_a_step],y=[mu_step,mu_step],mode='markers',marker=dict(size=10,color = px.colors.qualitative.G10[1])))
fig.add_trace(go.Scatter(x=[select_b_step,select_b_step],y=[select_b_value,select_b_value],mode='markers',marker=dict(size=10,color = 'rgb(49,130,189)')))
fig.update_layout(showlegend=False,title="Conditional Probability given observations",xaxis_title="x",yaxis_title="y")
fig.update_xaxes(range=[select_x_step-1,select_y_step+1])
fig.show()

## Conditional probability given observation for all steps


### Histogram of f(x) at day 2,3,4,5 given Y(x = 1) = 3

It is possible to repeat the exercize above, where we fixed the value of $f(x=1) = 3$. However, this time, we will check the probability of measuring $f(x)$ at $x = 2, 3, 4, 5$.
As shown in the correlation matrix above, values at these positions are correlated to those at $x = 1$, although the correlation descreses getting far away from $x = 1$, and so the corresponding uncertainty increases.

In [26]:
# Observation of f(x) equal to
select_b_value = 3
# Observation step x equal to
select_b_step = 1
# tolerance region for conditional probability calculation
tol = 0.01
# Compute conditional probabilities for remaining steps
step_1 = 2
step_2 = 3
step_3 = 4
step_4 = 5

In [27]:
rows = np.logical_and(Ym[:,select_b_step] > select_b_value -tol,Ym[:,select_b_step] > select_b_value +tol)

fig = go.Figure()
fig.add_trace(go.Histogram(x=Ym[rows,step_1],histnorm='probability density',name="x = "+str(step_1)))
fig.add_trace(go.Histogram(x=Ym[rows,step_2],histnorm='probability density',name="x = "+str(step_2)))
fig.add_trace(go.Histogram(x=Ym[rows,step_3],histnorm='probability density',name="x = "+str(step_3)))
fig.add_trace(go.Histogram(x=Ym[rows,step_4],histnorm='probability density',name="x = "+str(step_4)))

# Overlay both histograms
fig.update_layout(barmode='overlay')
# Reduce opacity to see both histograms
fig.update_traces(opacity=0.75)
fig.update_layout(bargap=0.01,showlegend=True,title="Conditional Probability at all steps",xaxis_title="P(x="+str(step_1)+","+str(step_2)+","+str(step_3)+","+str(step_4)+" | f(x="+str(select_b_step)+")="+str(select_b_value)+")",yaxis_title="Probability density")
fig.show()

In [28]:
mu_1 = np.mean(Ym[rows,step_1])
mu_2 = np.mean(Ym[rows,step_2])
mu_3 = np.mean(Ym[rows,step_3])
mu_4 = np.mean(Ym[rows,step_4])
std_1 = np.std(Ym[rows,step_1])
std_2 = np.std(Ym[rows,step_2])
std_3 = np.std(Ym[rows,step_3])
std_4 = np.std(Ym[rows,step_4])
df_sum = pd.DataFrame(columns = ["steps","means"])
df_sum["steps"]= [step_1,step_2,step_3,step_4]
df_sum["means"]= [mu_1,mu_2,mu_3,mu_4]
print('\nMeans: \n',df_sum.to_string(index=False))
Y_corr = np.corrcoef(Ym[rows,step_1:step_4+1],rowvar=False)
print('\nCorrelations: \n',pd.DataFrame(Y_corr,columns=[step_1,step_2,step_3,step_4],index=[step_1,step_2,step_3,step_4]))



Means: 
  steps    means
     2 2.609794
     3 3.236773
     4 4.864705
     5 7.461276

Correlations: 
           2         3         4         5
2  1.000000  0.568646  0.379674  0.438809
3  0.568646  1.000000  0.713312  0.797981
4  0.379674  0.713312  1.000000  0.814839
5  0.438809  0.797981  0.814839  1.000000


### Visualizing corresponding function realizations.

In [29]:
# Select number of simulations to plot
num_simulations = 100

In [30]:
fig = px.scatter()
for n in range(num_simulations):
    fig.add_trace(go.Scatter(x=[select_b_step,step_1,step_2,step_3,step_4],y=np.concatenate((np.array(select_b_value),Ym[rows][:,[step_1,step_2,step_3,step_4]][n]),axis=None),mode='lines+markers',line=dict(color = 'rgb(49,130,189)')))
fig.add_trace(go.Scatter(x=[select_b_step,select_b_step],y=[select_b_value,select_b_value],mode='markers',marker=dict(size=10,color = 'rgb(49,130,189)')))
fig.add_trace(go.Scatter(x=[step_1,step_1],y=[mu_1-2*std_1,mu_1+2*std_1],mode='lines+markers',line=dict(color = px.colors.qualitative.G10[1])))
fig.add_trace(go.Scatter(x=[step_1,step_1],y=[mu_1,mu_1],mode='markers',marker=dict(size=10,color = px.colors.qualitative.G10[1])))
fig.add_trace(go.Scatter(x=[step_2,step_2],y=[mu_2-2*std_2,mu_2+2*std_2],mode='lines+markers',line=dict(color = px.colors.qualitative.G10[1])))
fig.add_trace(go.Scatter(x=[step_2,step_2],y=[mu_2,mu_2],mode='markers',marker=dict(size=10,color = px.colors.qualitative.G10[1])))
fig.add_trace(go.Scatter(x=[step_3,step_3],y=[mu_3-2*std_3,mu_3+2*std_3],mode='lines+markers',line=dict(color = px.colors.qualitative.G10[1])))
fig.add_trace(go.Scatter(x=[step_3,step_3],y=[mu_3,mu_3],mode='markers',marker=dict(size=10,color = px.colors.qualitative.G10[1])))
fig.add_trace(go.Scatter(x=[step_4,step_4],y=[mu_4-2*std_4,mu_4+2*std_4],mode='lines+markers',line=dict(color = px.colors.qualitative.G10[1])))
fig.add_trace(go.Scatter(x=[step_4,step_4],y=[mu_4,mu_4],mode='markers',marker=dict(size=10,color = px.colors.qualitative.G10[1])))
fig.update_layout(showlegend=False,title="Conditional Probability given observations",xaxis_title="x",yaxis_title="y")
fig.update_xaxes(range=[select_b_step-1,step_4+1])
fig.show()

## Conditional probability given multiple observations

Let's suppose to fix the starting point at $f(x=1) = 3$ and that at day $4$, $f(x=4) = 2$. What is the corresponding distribution of the Ys?

In [31]:
# Define the observations
obs_1_value = 3
obs_1_step = 1
# Define additional observation
obs_2_value = 5
obs_2_step = 4
# Remaining steps to do predictions at
step_1 = 2
step_2 = 3
step_3 = 5

# Specify tolerance
tol=0.01

In [32]:
rows = np.logical_and(np.logical_and(Ym[:,obs_1_step] > obs_1_value -tol,Ym[:,obs_1_step] > obs_1_value +tol),np.logical_and(Ym[:,obs_2_step] > obs_2_value -tol,Ym[:,obs_2_step] > obs_2_value +tol))

fig = go.Figure()
fig.add_trace(go.Histogram(x=Ym[rows,step_1],histnorm='probability density',name="x = "+str(step_1)))
fig.add_trace(go.Histogram(x=Ym[rows,step_2],histnorm='probability density',name="x = "+str(step_2)))
fig.add_trace(go.Histogram(x=Ym[rows,step_3],histnorm='probability density',name="x = "+str(step_3)))
fig.update_layout(barmode='overlay')
fig.update_traces(opacity=0.75)
fig.update_layout(bargap=0.01,showlegend=True,title="Conditional Probability at all steps",xaxis_title="P(x="+str(step_1)+","+str(step_2)+","+str(step_3)+","+str(step_4)+" | f(x="+str(obs_1_step)+")="+str(obs_1_value)+", f(x="+str(obs_2_step)+")="+str(obs_2_value)+")",yaxis_title="Probability density")
fig.show()

In [33]:
mu_1 = np.mean(Ym[rows,step_1])
mu_2 = np.mean(Ym[rows,step_2])
mu_3 = np.mean(Ym[rows,step_3])
std_1 = np.std(Ym[rows,step_1])
std_2 = np.std(Ym[rows,step_2])
std_3 = np.std(Ym[rows,step_3])
df_sum = pd.DataFrame(columns = ["steps","means"])
df_sum["steps"]= [step_1,step_2,step_3]
df_sum["means"]= [mu_1,mu_2,mu_3]
print('\nMeans: \n',df_sum.to_string(index=False))
Y_corr = np.corrcoef(Ym[rows][:,[step_1,step_2,step_3]],rowvar=False)
print('\nCorrelations: \n',pd.DataFrame(Y_corr,columns=[step_1,step_2,step_3],index=[step_1,step_2,step_3]))



Means: 
  steps    means
     2 2.713373
     3 3.514485
     5 7.848037

Correlations: 
           2         3         5
2  1.000000  0.567701  0.405662
3  0.567701  1.000000  0.687699
5  0.405662  0.687699  1.000000


In [34]:
# Select number of simulations to plot
num_simulations = 100

In [35]:
fig = px.scatter()
for n in range(num_simulations):
    fig.add_trace(go.Scatter(x=[obs_1_step,step_1,step_2,obs_2_step,step_3],y=np.concatenate((obs_1_value,Ym[rows][:,[step_1,step_2]][n],obs_2_value,Ym[rows][:,[step_3]][n]),axis=None),mode='lines+markers',line=dict(color = 'rgb(49,130,189)')))
fig.add_trace(go.Scatter(x=[obs_1_step,obs_1_step],y=[obs_1_value,obs_1_value],mode='markers',marker=dict(size=10,color = 'rgb(49,130,189)')))
fig.add_trace(go.Scatter(x=[obs_2_step,obs_2_step],y=[obs_2_value,obs_2_value],mode='markers',marker=dict(size=10,color = 'rgb(49,130,189)')))
fig.add_trace(go.Scatter(x=[step_1,step_1],y=[mu_1-2*std_1,mu_1+2*std_1],mode='lines+markers',line=dict(color = px.colors.qualitative.G10[1])))
fig.add_trace(go.Scatter(x=[step_1,step_1],y=[mu_1,mu_1],mode='markers',marker=dict(size=10,color = px.colors.qualitative.G10[1])))
fig.add_trace(go.Scatter(x=[step_2,step_2],y=[mu_2-2*std_2,mu_2+2*std_2],mode='lines+markers',line=dict(color = px.colors.qualitative.G10[1])))
fig.add_trace(go.Scatter(x=[step_2,step_2],y=[mu_2,mu_2],mode='markers',marker=dict(size=10,color = px.colors.qualitative.G10[1])))
fig.add_trace(go.Scatter(x=[step_3,step_3],y=[mu_3-2*std_3,mu_3+2*std_3],mode='lines+markers',line=dict(color = px.colors.qualitative.G10[1])))
fig.add_trace(go.Scatter(x=[step_3,step_3],y=[mu_3,mu_3],mode='markers',marker=dict(size=10,color = px.colors.qualitative.G10[1])))
fig.update_layout(showlegend=False,title="Conditional Probability given observations",xaxis_title="x",yaxis_title="y")
fig.update_xaxes(range=[obs_1_step-1,step_3+1])
fig.show()

# Kernels in Gaussian Processes

In this section, we will try to learn an unknown function ("originator function" below) using few samples.

The key point is the definition of a the kernel function, which is defining the similarity among data. The key parameter is sigma, which define the so-called similarity scale.

In the following, we will show how these simple concepts can be put together to predict the falue of the unknown function away from the data, and which is the behavior of the Guassian Process model in responce to changes in the model parameters.


## Create originator function

Similarly as above, we define a function which we try to learn

In [399]:
# Define observations
x = np.zeros(3);
x[0] = 2; # position of first observation
x[1] = 5; # position of second observation
x[2] = 8; # position of third observation
# Fix seed
np.random.seed(10)
# Function parameters
beta0 = 6
beta1 = -3
beta2 = 0.5
beta3 = -0.25
eps = 0
# Originator function
def f(x, beta0=6, beta1=-3, beta2=0.5, beta3=-0.025,eps=0):
    return np.matmul(np.vstack((np.ones(len(x)), np.array(x), np.array(x) **2,np.array(x) **3)).T,np.array([beta0,beta1,beta2,beta3])) + eps*np.random.normal(0,1,len(x))

y=f(x)    

In [400]:
# Plot originator function with generated data
xgrid = np.arange(0,12,0.1)
fig = go.Figure()
fig.add_trace(go.Scatter(x=xgrid,y=f(xgrid),mode='lines',name="Originator function"))
fig.add_trace(go.Scatter(x=x,y=f(x),mode='markers',marker=dict(size=10),name="Generated data"))
fig.update_layout(showlegend=True,title="Generating function we try to learn",xaxis_title="x",yaxis_title="y")
fig.show()

### Create a kernel function


The user can select different values of the similarity scale (sigma).
For better understanding, we are plotting the value of the similarity (kernel) function centered on the second observation (i.e., the kernel is $= 1$ in $x(2)$).

In [401]:
# Select point around which we plot kernel
x_kernel_base = x[1] # x=5
# Kernel parameters
sigma = 2
# Kernel function
def kernel_fcn(xi,xj,sigma=2): 
    return np.exp(-0.5*((np.array(xi)-np.array(xj))/sigma)**2) # This is rbf / normal distribution kernel
    #return np.exp(- sigma*np.abs((np.array(xi) - np.array(xj)))) # This is laplacian kernel
    #return (np.array(xi)*np.array(xj)+1)**sigma # This is polynomial kernel
    

In [402]:
# Plot originator function with generated data
xgrid = np.arange(0,12,0.1)
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(go.Scatter(x=xgrid,y=f(xgrid),mode='lines',name="Originator function"))
fig.add_trace(go.Scatter(x=xgrid,y=kernel_fcn(xgrid,x_kernel_base,sigma),mode='lines',marker=dict(size=10),name="Kernel function"),secondary_y=True)
fig.add_trace(go.Scatter(x=x,y=f(x),mode='markers',marker=dict(size=10),name="Generated data"))
fig.update_layout(showlegend=True,title="Generating function with kernel",xaxis_title="x",yaxis_title="y")
fig.update_yaxes(title_text="Kernel fuc", secondary_y=True)
fig.show()

Compute the kernel matrix

In [403]:
no_data = len(x)
k = np.zeros((no_data,no_data));
for i in range(no_data):
    k[i,:] = kernel_fcn(x[i],x,sigma)
K = pd.DataFrame(k)
K.index = x
K.columns = x
print("Kernel given observations: \n",K)

Kernel given observations: 
           2.0       5.0       8.0
2.0  1.000000  0.324652  0.011109
5.0  0.324652  1.000000  0.324652
8.0  0.011109  0.324652  1.000000


### Add new observation point

The user can define a new observation point, where we want to predict the function.
The new observation will be added in the first position of the new kernel matrix, which is now 4x4, and it is calculated below.


In [404]:
#Define a new observation
new_obs_point = 6

In [405]:
# Update kernel
x_pred = np.array(new_obs_point)
x_new = np.append(x,x_pred)
k_pred = np.zeros((len(x)+1,len(x)+1))
# Copy old kernel
k_pred[0:len(x),0:len(x)] = k
# Compute kernel for new observations
k_pred[:,len(x)] =  kernel_fcn(x_pred,x_new,sigma)
k_pred[len(x),:] =  kernel_fcn(x_pred,x_new,sigma)
# Visualize kernel
K_pred = pd.DataFrame(k_pred); K_pred.index = x_new ; K_pred.columns = x_new
print("\nKernel given new observations: \n",K_pred)


Kernel given new observations: 
           2.0       5.0       8.0       6.0
2.0  1.000000  0.324652  0.011109  0.135335
5.0  0.324652  1.000000  0.324652  0.882497
8.0  0.011109  0.324652  1.000000  0.606531
6.0  0.135335  0.882497  0.606531  1.000000


### Create the covariance matrix

A second parameter is needed, which is the variance of the observation. This parameter is defining the prior knowldge.

In [406]:
sigma_y = sigma
cov_pred = sigma_y*k_pred
print(cov_pred)

[[2.         0.64930493 0.02221799 0.27067057]
 [0.64930493 2.         0.64930493 1.76499381]
 [0.02221799 0.64930493 2.         1.21306132]
 [0.27067057 1.76499381 1.21306132 2.        ]]


### New point prediction

In gaussian processes, the prediction of a new point $x^*$ having taget value $f^* = f(x^*)$ which is equal to $y^*$ in the noise free case we are dealing with now. Case with measurment error is described below.

Given the training and testing datasets defined above, the **joint distribution** of Gaussian process functions over the training and testing datasets ($f$ and $f^∗$, respectively) is given by:

\begin{align}\begin{bmatrix} f \\ f^* \end{bmatrix} \sim \mathcal{N} \left( 0,\begin{bmatrix} K(x,x) & K(x,x^*)\\ K(x^*,x) & K(x^*,x^*) \end{bmatrix} \right)\end{align}



However, when making a new prediction, what we are actually interested (as visualized above) is the conditional distribution of the possible predictions $f^*$ given our train and test data and train targets $x,x^*,f$. This is given by:

\begin{align} f^* | x,x^*,f \sim \mathcal{N}\left(\bar{f}(x^*),Cov(\bar{f}(x^*))\right)\end{align}

where the mean is:
\begin{align} \bar{f}(x^*) = K(x^*,x) K(x,x)^{-1} f\end{align}

and the covariance is:
\begin{align} Cov(\bar{f}(x^*)) = K(x^*,x^*) - K(x^*,x) K(x,x)^{-1} K(x,x^*)\end{align}

### Create multivariate distribution


Create the conditional multivariate distribution for the new observation point, given the other observations.

In [407]:
def mvncond2(joint_mean, joint_cov, index, obs_diff):
    # joint_mean is a zero vector
    # joint_cov covariance matrix containing new obs
    # index pointing to a new obs in covariance matrix
    # obs_diff = obs_f - obs_mu de-meaned predictions
    i = index
    n = joint_cov.shape[0]
    kxx = joint_cov[0:i,0:i]
    kxsx = joint_cov[i,0:i]
    s12s22 = np.matmul(kxsx,np.linalg.inv(kxx))
    cond_mean =joint_mean[i] +  np.matmul(s12s22,(obs_diff-joint_mean[i:n]))
    cond_cov = joint_cov[i,i] - np.matmul(s12s22,np.transpose(kxsx))
    return cond_mean, cond_cov
   

In [408]:
joint_mean = np.zeros(len(x)+1)
joint_cov = cov_pred
obs_f = y
obs_mu = np.mean(y)
cond_mean,cond_cov = mvncond2(joint_mean,joint_cov,3,obs_f - obs_mu)
new_obs_mu = cond_mean + obs_mu
print('Conditional mean of new obs: ',new_obs_mu)
new_obs_std = np.sqrt(cond_cov)
print('Conditional std of new obs: ',new_obs_std)

Conditional mean of new obs:  0.4506748781905998
Conditional std of new obs:  0.4269103110623952


In [409]:
# Plot originator function with generated data
fig = go.Figure()
fig.add_trace(go.Scatter(x=xgrid,y=f(xgrid),mode='lines',name="Originator function"))
fig.add_trace(go.Scatter(x=x,y=f(x),mode='markers',marker=dict(size=10),name="Generated data"))
fig.add_trace(go.Scatter(x=[new_obs_point,new_obs_point],y=[new_obs_mu-2*new_obs_std,new_obs_mu+2*new_obs_std],mode='markers+lines',line=dict(color = px.colors.qualitative.G10[3]),name = "Conditional Deviation"))
fig.add_trace(go.Scatter(x=[new_obs_point,new_obs_point],y=[new_obs_mu,new_obs_mu],mode='markers',marker=dict(size=10,color = px.colors.qualitative.G10[3]),name = "Conditional Mean"))
fig.update_layout(showlegend=True,title="Conditional prediction at a new point x="+str(new_obs_point),xaxis_title="x",yaxis_title="y")
fig.show()

### Compute predictions for all x

Here we repeat the exact same process as above for all the x-axis points.

In [410]:
# Loop through all points and compute the conditional mean and std
xgrid_mu = []
xgrid_std = []
for new_obs_point in xgrid:
    # Update kernel
    x_pred = np.array(new_obs_point)
    x_new = np.append(x,x_pred)
    k_pred = np.zeros((len(x)+1,len(x)+1))
    # Copy old kernel
    k_pred[0:len(x),0:len(x)] = k
    # Compute kernel for new observations
    k_pred[:,len(x)] =  kernel_fcn(x_pred,x_new,sigma)
    k_pred[len(x),:] =  kernel_fcn(x_pred,x_new,sigma)
    # Compute joint distribution
    joint_mean = np.zeros(len(x)+1)
    joint_cov = sigma_y*k_pred
    obs_f = y
    obs_mu = np.mean(y)
    # Compute conditional distribution
    cond_mean,cond_cov = mvncond2(joint_mean,joint_cov,3,obs_f - obs_mu)
    xgrid_mu.append(cond_mean + obs_mu)
    xgrid_std.append(np.sqrt(cond_cov))


In [411]:
# Plot the distribution for all points
fig = go.Figure()
fig.add_trace(go.Scatter(x=xgrid,y=f(xgrid),mode='lines',name="Originator function"))
fig.add_trace(go.Scatter(x=x,y=f(x),mode='markers',marker=dict(size=10),name="Generated data"))
#fig.add_trace(go.Scatter(x=[new_obs_point,new_obs_point],y=[new_obs_mu-2*new_obs_std,new_obs_mu+2*new_obs_std],mode='markers+lines',line=dict(color = px.colors.qualitative.G10[3]),name = "Conditional Deviation"))
fig.add_trace(go.Scatter(x=xgrid,y=xgrid_mu,mode='lines',line=dict(width=3,color = px.colors.qualitative.G10[3]),name = "Conditional Mean"))
fig.add_trace(go.Scatter(x=xgrid,y=np.array(xgrid_mu)-2*np.array(xgrid_std),mode='lines',line=dict(width=1,color = px.colors.qualitative.G10[3]),showlegend=False))
fig.add_trace(go.Scatter(x=xgrid,y=np.array(xgrid_mu)+2*np.array(xgrid_std),mode='lines',fill='tonexty',line=dict(width=1,color = px.colors.qualitative.G10[3]),name = "Conditional Deviation"))

fig.update_layout(showlegend=True,title="Conditional prediction at all points",xaxis_title="x",yaxis_title="y")
fig.show()

### Effect of error in measurment

In this section, we simulate the effect of measurement error on the model.
We suppose of sampling twice the function is 6 regular points.

In [413]:
# Fix seed
np.random.seed(12)
# Generate observations
x_ind = np.arange(2,12,2)
# Specify number of measurments at the observation points
num_measurements = 2
x = np.repeat(x_ind,num_measurements)
# Predict values with measurement noise
eps = 0.1
y = f(x,eps=eps)
y

array([1.84729858, 1.73185741, 0.42424395, 0.22992644, 0.67531428,
       0.44652787, 1.20051271, 1.18797723, 0.91930181, 1.28718194])

In [414]:
# Plot originator function with generated data
xgrid = np.arange(0,12,0.1)
fig = go.Figure()
fig.add_trace(go.Scatter(x=xgrid,y=f(xgrid),mode='lines',name="Originator function"))
fig.add_trace(go.Scatter(x=x,y=y,mode='markers',marker=dict(size=10),name="Data with measurment error"))
fig.update_layout(showlegend=True,title="Generating function we try to learn",xaxis_title="x",yaxis_title="y")
fig.show()

We compute the kernel

In [415]:
no_data = len(x)
k = np.zeros((no_data,no_data));
for i in range(no_data):
    k[i,:] = kernel_fcn(x[i],x,sigma)
K = pd.DataFrame(k)
K.index = x
K.columns = x
print("Kernel given observations: \n",K)

Kernel given observations: 
           2         2         4         4         6         6         8   \
2   1.000000  1.000000  0.606531  0.606531  0.135335  0.135335  0.011109   
2   1.000000  1.000000  0.606531  0.606531  0.135335  0.135335  0.011109   
4   0.606531  0.606531  1.000000  1.000000  0.606531  0.606531  0.135335   
4   0.606531  0.606531  1.000000  1.000000  0.606531  0.606531  0.135335   
6   0.135335  0.135335  0.606531  0.606531  1.000000  1.000000  0.606531   
6   0.135335  0.135335  0.606531  0.606531  1.000000  1.000000  0.606531   
8   0.011109  0.011109  0.135335  0.135335  0.606531  0.606531  1.000000   
8   0.011109  0.011109  0.135335  0.135335  0.606531  0.606531  1.000000   
10  0.000335  0.000335  0.011109  0.011109  0.135335  0.135335  0.606531   
10  0.000335  0.000335  0.011109  0.011109  0.135335  0.135335  0.606531   

          8         10        10  
2   0.011109  0.000335  0.000335  
2   0.011109  0.000335  0.000335  
4   0.135335  0.011109  0.011

And create predictions

In [417]:
# Loop through all points and compute the conditional mean and std
xgrid_mu = []
xgrid_std = []
for new_obs_point in xgrid:
    num_data = len(x)
    # Update kernel
    x_pred = np.array(new_obs_point)
    x_new = np.append(x,x_pred)
    k_pred = np.zeros((num_data+1,num_data+1))
    # Copy old kernel
    k_pred[0:num_data,0:num_data] = k
    # Compute kernel for new observations
    k_pred[:,num_data] =  kernel_fcn(x_pred,x_new,sigma)
    k_pred[num_data,:] =  kernel_fcn(x_pred,x_new,sigma)
    # Compute joint distribution
    joint_mean = np.zeros(num_data+1)
    # IMPORTANT! we add measurment noise to the covariance matrix
    joint_cov = sigma_y*k_pred + np.diag(np.diag(eps*np.ones((num_data+1,num_data+1))))
    obs_f = y
    obs_mu = np.mean(y)
    # Compute conditional distribution
    cond_mean,cond_cov = mvncond2(joint_mean,joint_cov,num_data,obs_f - obs_mu)
    xgrid_mu.append(cond_mean + obs_mu)
    xgrid_std.append(np.sqrt(cond_cov))


In [418]:
# Plot the distribution for all points
fig = go.Figure()
fig.add_trace(go.Scatter(x=xgrid,y=f(xgrid),mode='lines',name="Originator function"))
fig.add_trace(go.Scatter(x=x,y=y,mode='markers',marker=dict(size=10),name="Generated data"))
#fig.add_trace(go.Scatter(x=[new_obs_point,new_obs_point],y=[new_obs_mu-2*new_obs_std,new_obs_mu+2*new_obs_std],mode='markers+lines',line=dict(color = px.colors.qualitative.G10[3]),name = "Conditional Deviation"))
fig.add_trace(go.Scatter(x=xgrid,y=xgrid_mu,mode='lines',line=dict(width=3,color = px.colors.qualitative.G10[3]),name = "Conditional Mean"))
fig.add_trace(go.Scatter(x=xgrid,y=np.array(xgrid_mu)-2*np.array(xgrid_std),mode='lines',line=dict(width=1,color = px.colors.qualitative.G10[3]),showlegend=False))
fig.add_trace(go.Scatter(x=xgrid,y=np.array(xgrid_mu)+2*np.array(xgrid_std),mode='lines',fill='tonexty',line=dict(width=1,color = px.colors.qualitative.G10[3]),name = "Conditional Deviation"))

fig.update_layout(showlegend=True,title="Conditional prediction at all points",xaxis_title="x",yaxis_title="y")
fig.show()

# Task: 

