# Setup of Noteboook

The follwing code clones the github repository with course files. 
Subsequently it imports all libraries and custom modules needed for this notebook

In [1]:
!git clone https://github.com/DataHow/analytics-course-scripts.git

Cloning into 'analytics-course-scripts'...
remote: Enumerating objects: 356, done.[K
remote: Counting objects: 100% (120/120), done.[K
remote: Compressing objects: 100% (82/82), done.[K
remote: Total 356 (delta 77), reused 78 (delta 38), pack-reused 236[K
Receiving objects: 100% (356/356), 5.59 MiB | 27.15 MiB/s, done.
Resolving deltas: 100% (211/211), done.


In [2]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Gaussian Processes

In this script, we are investigating the main idea behind using Gaussian Processes and how conditional probability can be used to make predictions from data.
The idea is that we have a function that it is unknown to the use (originator function below). We sample the function in different "experiments" and we get noisy data in return.
We will use such data to construct a Gaussian Process.

## Create originator function

Here we first create an originator function that's nonlinear and with modifyable parameters.

In [26]:
""" Function parameters """
XPOINTS = np.array(range(1,7));
beta0 = 6;
beta1 = -3;
beta2 = 0.5;
sigma = 0.2;
""" Function randomness """
SEED = 42
np.random.seed(SEED)
""" Originator function definition """
def f(x, beta0=6, beta1=-3, beta2=0.5, sigma=0.2):
    return np.matmul(np.vstack((np.ones(len(x)), np.array(x), np.array(x) **2)).T,np.array([beta0,beta1,beta2])) + sigma*np.random.normal(0,1,len(x))

In [27]:
# Plot originator function with generated data
XGRID = np.arange(0,7,0.1)
fig = px.line(x=XGRID,y=f(XGRID,beta0,beta1,beta2,sigma=0),color=px.Constant("Originator function"), title = "Generated function we try to learn",labels=dict(x="x", y="y", color="Legend"))
fig.add_trace(go.Scatter(x=XPOINTS,y=f(XPOINTS),mode='markers',marker=dict(size=10),name="Generated data"))
fig.show()

### Generate data from the function

Now we can generate teh data from that function. Define the number of experiments, i.e. how many times the underlying process is sampled at the different values of x. To generate samples we slightly vary the underlying parameters.

In [30]:
""" Generate data"""
NO_EXP = 25;

In [31]:
# Plot generated experiments
np.random.seed(SEED)
YEXP = np.zeros((NO_EXP,len(XPOINTS)))
fig = px.scatter()
for n in range(NO_EXP):
    YEXP[n,:] = f(XPOINTS,float(np.random.normal(size=1)*0.5+beta0),float(np.random.normal(size=1)*0.1+beta1),float(np.random.normal(size=1)*0.01+beta2))
    fig.add_trace(go.Scatter(x=XPOINTS,y=YEXP[n,:],mode='lines+markers',line=dict(color = 'rgb(49,130,189)')))
fig.update_layout(showlegend=False,title="Generated data",xaxis_title="x",yaxis_title="y")
fig.update_layout(width=1000)
fig.show()


## Conditional probability of next step

### Plot correlations

Here, we show as an example how the data at x=1 are correlated to those at x=2.
Below, we compute the mean of the function at each sampling point and the corresponding coviance/correlation matrices.


In [56]:
""" Select steps to plot on axis """
STEP_XAXIS = 1
STEP_YAXIS = 2

In [58]:
# Plot correlation between selected steps
fig = px.scatter(x=YEXP[:,STEP_XAXIS-1],y=YEXP[:,STEP_YAXIS-1], 
                 title = "Correlation between step x = "+str(STEP_XAXIS)+" and x = "+str(STEP_YAXIS)+" with correlation being "+str(np.round(np.corrcoef(YEXP[:,STEP_XAXIS-1],YEXP[:,STEP_YAXIS-1])[0,1],3)),
                 labels=dict(x="Y(x = "+str(STEP_XAXIS)+")", y="Y(x = "+str(STEP_YAXIS)+")"),
                 marginal_x="box", marginal_y="box",)
fig.update_layout(width=1000,height=700)
fig.show()

Here we compute and visualize the mean, covariance, and correlation for all the steps.

In [59]:
Y_mu = np.mean(YEXP,axis=0)
Y_cov = np.cov(YEXP,rowvar=False)
Y_corr = np.corrcoef(YEXP,rowvar=False)
print('Mean: \n',pd.DataFrame(Y_mu).T)
print('\nCovariance: \n',pd.DataFrame(Y_cov))
print('\nCorrelation: \n',pd.DataFrame(Y_corr))

Mean: 
           0         1         2        3         4         5
0  3.513084  1.966933  1.477335  1.93705  3.367365  5.907705

Covariance: 
           0         1         2         3         4         5
0  0.194905  0.179594  0.193249  0.198866  0.149583  0.153221
1  0.179594  0.246583  0.226285  0.232950  0.206985  0.213588
2  0.193249  0.226285  0.314380  0.278909  0.287660  0.326630
3  0.198866  0.232950  0.278909  0.342012  0.318072  0.396732
4  0.149583  0.206985  0.287660  0.318072  0.384331  0.460696
5  0.153221  0.213588  0.326630  0.396732  0.460696  0.672466

Correlation: 
           0         1         2         3         4         5
0  1.000000  0.819216  0.780691  0.770245  0.546535  0.423225
1  0.819216  1.000000  0.812732  0.802160  0.672364  0.524518
2  0.780691  0.812732  1.000000  0.850579  0.827558  0.710385
3  0.770245  0.802160  0.850579  1.000000  0.877308  0.827260
4  0.546535  0.672364  0.827558  0.877308  1.000000  0.906204
5  0.423225  0.524518  0.710385  

In [60]:
# Assume Multivariate normal distribution for the data with mean and covariance specified above
YMN = np.random.multivariate_normal(Y_mu,Y_cov,100000)

### Using the correlations

If we define the correlation structure and suppose that the underlying probability density function is a multivariate normal distribution, we can sample from such distribution and generate new data.
Then we can plot all the pairs of data at x=1 and x=2 from such distribution.

In [76]:
""" Define number of samples to generate from multivariate distribution """
NUM_SAMPLES = 50
""" Select steps to plot on axis """
STEP_XAXIS = 1
STEP_YAXIS = 2

In [77]:
fig = px.scatter()
for n in range(NUM_SAMPLES):
    fig.add_trace(go.Scatter(x=[STEP_XAXIS,STEP_YAXIS],y=[YMN[n,STEP_XAXIS-1],YMN[n,STEP_YAXIS-1]],mode='lines+markers',line=dict(color = 'rgb(49,130,189)')))
fig.update_xaxes(range=[STEP_XAXIS-1,STEP_YAXIS+1])
fig.update_layout(showlegend=False,title="Generated data",xaxis_title="x",yaxis_title="y")
fig.update_layout(width=1000,height=500)
fig.show()


## Conditional probability given single observation

What is the distribution of $f(x)$ at $x=2$ assuming that $f(x=1) = 3$?
This is a typical conditional probability problem, where we have to "conditionally" sample from the original distribution.

Hence, we look at conditional probability $P(B | A)$ where $B: x=$ `STEP_B` and $A: f(x=$ `STEP_A` $)=$ `VALUE_A`


In [82]:
""" Step at which we want to find out conditional probability """
STEP_B = 2
""" Step & Value at which we condition on / we have observed """
STEP_A = 1
VALUE_A= 3
""" tolerance region for observed value range """
TOL = 0.01

In [91]:
# Match samples from the distribution that specify observations above
rows = np.logical_and(YMN[:,STEP_A-1] > VALUE_A - TOL,YMN[:,STEP_A-1] > VALUE_A + TOL)
# Compute conditional probability exactly
mu_step = np.mean(YMN[rows,STEP_B-1])
std_step = np.std(YMN[rows,STEP_B-1])
print('\n Conditional Mean: ',mu_step)
print('\n Conditional Deviation: ',std_step,'\n')
# Plot histogram
fig = px.histogram(YMN[rows,STEP_B-1],histnorm='probability density')
fig.update_layout(bargap=0.01,showlegend=False,title="Conditional Probability",xaxis_title="P(x="+str(STEP_B)+"|f(x="+str(STEP_A)+")="+str(VALUE_A)+")",yaxis_title="Probability density")
fig.add_vline(mu_step)
fig.add_vline(mu_step-std_step,line=dict(color='orange'))
fig.add_vline(mu_step+std_step,line=dict(color='orange'))
fig.add_vline(mu_step-2*std_step,line=dict(color='orange',dash='dash'))
fig.add_vline(mu_step+2*std_step,line=dict(color='orange',dash='dash'))
fig.show()



 Conditional Mean:  2.064403818554492

 Conditional Deviation:  0.4390817539291218 



Hece, we can show conditional probability of the function given our observation

In [95]:
""" Select number of simulations to plot """
NUM_SIMULATIONS = 20

In [96]:
fig = px.scatter()
for n in range(NUM_SIMULATIONS):
    fig.add_trace(go.Scatter(x=[STEP_A,STEP_B],y=[VALUE_A,YMN[rows,STEP_B-1][n]],mode='lines+markers',line=dict(color = 'rgb(49,130,189)')))
fig.add_trace(go.Scatter(x=[STEP_B,STEP_B],y=[mu_step-2*std_step,mu_step+2*std_step],mode='lines+markers',line=dict(color = px.colors.qualitative.G10[1])))
fig.add_trace(go.Scatter(x=[STEP_B,STEP_B],y=[mu_step,mu_step],mode='markers',marker=dict(size=10,color = px.colors.qualitative.G10[1])))
fig.add_trace(go.Scatter(x=[STEP_A,STEP_A],y=[VALUE_A,VALUE_A],mode='markers',marker=dict(size=10,color = 'rgb(49,130,189)')))
fig.update_layout(showlegend=False,title="Conditional Probability given observations",xaxis_title="x",yaxis_title="y")
fig.update_xaxes(range=[STEP_A-1,STEP_B+1])
fig.show()

## Conditional probability given observation for all steps


### Histogram of f(x) at day 2,3,4,5 given Y(x = 1) = 3

It is possible to repeat the exercize above, where we fixed the value of $f(x=1) = 3$. However, this time, we will check the probability of measuring $f(x)$ at $x = 2, 3, 4, 5$.
As shown in the correlation matrix above, values at these positions are correlated to those at $x = 1$, although the correlation descreses getting far away from $x = 1$, and so the corresponding uncertainty increases.

In [106]:
""" Step & Value at which we condition on / we have observed """
STEP_A = 1
VALUE_A= 3
""" tolerance region for observed value range """
TOL = 0.01
""" Compute conditional probabilities for remaining steps """
STEPS_OTHER = [2,3,4,5,6]


In [134]:
rows = np.logical_and(YMN[:,STEP_A-1] > VALUE_A - TOL,YMN[:,STEP_A-1] > VALUE_A + TOL)
mu_OTHER = []
std_OTHER = []
for i in STEPS_OTHER:
    mu_OTHER.append(np.mean(YMN[rows,i-1]))
    std_OTHER.append(np.std(YMN[rows,i-1]))
df_sum = pd.DataFrame(columns = ["steps","means"])
df_sum["steps"]= STEPS_OTHER
df_sum["means"]= mu_OTHER
print('\nMeans: \n',df_sum.to_string(index=False))
Y_corr = np.corrcoef(YMN[rows,(STEPS_OTHER[0]-1):(STEPS_OTHER[-1])].shape,rowvar=True)
print('\nCorrelations: \n',pd.DataFrame(Y_corr,columns=STEPS_OTHER,index=STEPS_OTHER))



Means: 
  steps    means
     2 2.064404
     3 1.582587
     4 2.043541
     5 3.447588
     6 5.990337

Correlations: 
      2    3    4    5    6
2  1.0  1.0  1.0  1.0  1.0
3  1.0  1.0  1.0  1.0  1.0
4  1.0  1.0  1.0  1.0  1.0
5  1.0  1.0  1.0  1.0  1.0
6  1.0  1.0  1.0  1.0  1.0


In [103]:

mu_B = np.mean(YMN[rows,STEP_B])
mu_C = np.mean(YMN[rows,STEP_C])
mu_D = np.mean(YMN[rows,STEP_D])
mu_E = np.mean(YMN[rows,STEP_E])
mu_F = np.mean(YMN[rows,STEP_F])
std_B = np.std(YMN[rows,STEP_B])
std_C = np.std(YMN[rows,STEP_C])
std_D = np.std(YMN[rows,STEP_D])
std_E = np.std(YMN[rows,STEP_E])
std_F = np.std(YMN[rows,STEP_F])
std_F = np.std(YMN[rows,STEP_F])
df_sum = pd.DataFrame(columns = ["steps","means"])
df_sum["steps"]= [STEP_B,STEP_C,STEP_D,STEP_E,STEP_F]
df_sum["means"]= [mu_B,mu_C,mu_D,mu_E,mu_F]
print('\nMeans: \n',df_sum.to_string(index=False))
Y_corr = np.corrcoef(YMN[rows,[STEP_B-1,STEP_C-1,STEP_D-1,STEP_E-1,STEP_F-1]],rowvar=False)
print('\nCorrelations: \n',pd.DataFrame(Y_corr,columns=[step_1,step_2,step_3,step_4],index=[step_1,step_2,step_3,step_4]))

fig = go.Figure()
fig.add_trace(go.Histogram(x=YMN[rows,STEP_B-1],histnorm='probability density',name="x = "+str(STEP_B)))
fig.add_trace(go.Histogram(x=YMN[rows,STEP_C-1],histnorm='probability density',name="x = "+str(STEP_C)))
fig.add_trace(go.Histogram(x=YMN[rows,STEP_D-1],histnorm='probability density',name="x = "+str(STEP_D)))
fig.add_trace(go.Histogram(x=YMN[rows,STEP_E-1],histnorm='probability density',name="x = "+str(STEP_E)))
fig.add_trace(go.Histogram(x=YMN[rows,STEP_F-1],histnorm='probability density',name="x = "+str(STEP_F)))
# Overlay both histograms
fig.update_layout(barmode='overlay')
# Reduce opacity to see both histograms
fig.update_traces(opacity=0.75)
fig.update_layout(bargap=0.01,showlegend=True,title="Conditional Probability at all steps",xaxis_title="P(x="+str(STEP_B)+","+str(STEP_C)+","+str(STEP_D)+","+str(STEP_E)+","+str(STEP_F)+" | f(x="+str(STEP_A)+")="+str(VALUE_A)+")",yaxis_title="Probability density")
fig.show()

NameError: ignored

### Visualizing corresponding function realizations.

In [None]:
# Select number of simulations to plot
num_simulations = 100

In [None]:
fig = px.scatter()
for n in range(num_simulations):
    fig.add_trace(go.Scatter(x=[select_b_step,step_1,step_2,step_3,step_4],y=np.concatenate((np.array(select_b_value),Ym[rows][:,[step_1,step_2,step_3,step_4]][n]),axis=None),mode='lines+markers',line=dict(color = 'rgb(49,130,189)')))
fig.add_trace(go.Scatter(x=[select_b_step,select_b_step],y=[select_b_value,select_b_value],mode='markers',marker=dict(size=10,color = 'rgb(49,130,189)')))
fig.add_trace(go.Scatter(x=[step_1,step_1],y=[mu_1-2*std_1,mu_1+2*std_1],mode='lines+markers',line=dict(color = px.colors.qualitative.G10[1])))
fig.add_trace(go.Scatter(x=[step_1,step_1],y=[mu_1,mu_1],mode='markers',marker=dict(size=10,color = px.colors.qualitative.G10[1])))
fig.add_trace(go.Scatter(x=[step_2,step_2],y=[mu_2-2*std_2,mu_2+2*std_2],mode='lines+markers',line=dict(color = px.colors.qualitative.G10[1])))
fig.add_trace(go.Scatter(x=[step_2,step_2],y=[mu_2,mu_2],mode='markers',marker=dict(size=10,color = px.colors.qualitative.G10[1])))
fig.add_trace(go.Scatter(x=[step_3,step_3],y=[mu_3-2*std_3,mu_3+2*std_3],mode='lines+markers',line=dict(color = px.colors.qualitative.G10[1])))
fig.add_trace(go.Scatter(x=[step_3,step_3],y=[mu_3,mu_3],mode='markers',marker=dict(size=10,color = px.colors.qualitative.G10[1])))
fig.add_trace(go.Scatter(x=[step_4,step_4],y=[mu_4-2*std_4,mu_4+2*std_4],mode='lines+markers',line=dict(color = px.colors.qualitative.G10[1])))
fig.add_trace(go.Scatter(x=[step_4,step_4],y=[mu_4,mu_4],mode='markers',marker=dict(size=10,color = px.colors.qualitative.G10[1])))
fig.update_layout(showlegend=False,title="Conditional Probability given observations",xaxis_title="x",yaxis_title="y")
fig.update_xaxes(range=[select_b_step-1,step_4+1])
fig.show()

## Conditional probability given multiple observations

Let's suppose to fix the starting point at $f(x=1) = 3$ and that at day $4$, $f(x=4) = 2$. What is the corresponding distribution of the Ys?

In [None]:
# Define the observations
obs_1_value = 3
obs_1_step = 1
# Define additional observation
obs_2_value = 5
obs_2_step = 4
# Remaining steps to do predictions at
step_1 = 2
step_2 = 3
step_3 = 5

# Specify tolerance
tol=0.01

In [None]:
rows = np.logical_and(np.logical_and(Ym[:,obs_1_step] > obs_1_value -tol,Ym[:,obs_1_step] > obs_1_value +tol),np.logical_and(Ym[:,obs_2_step] > obs_2_value -tol,Ym[:,obs_2_step] > obs_2_value +tol))

fig = go.Figure()
fig.add_trace(go.Histogram(x=Ym[rows,step_1],histnorm='probability density',name="x = "+str(step_1)))
fig.add_trace(go.Histogram(x=Ym[rows,step_2],histnorm='probability density',name="x = "+str(step_2)))
fig.add_trace(go.Histogram(x=Ym[rows,step_3],histnorm='probability density',name="x = "+str(step_3)))
fig.update_layout(barmode='overlay')
fig.update_traces(opacity=0.75)
fig.update_layout(bargap=0.01,showlegend=True,title="Conditional Probability at all steps",xaxis_title="P(x="+str(step_1)+","+str(step_2)+","+str(step_3)+","+str(step_4)+" | f(x="+str(obs_1_step)+")="+str(obs_1_value)+", f(x="+str(obs_2_step)+")="+str(obs_2_value)+")",yaxis_title="Probability density")
fig.show()

In [None]:
mu_1 = np.mean(Ym[rows,step_1])
mu_2 = np.mean(Ym[rows,step_2])
mu_3 = np.mean(Ym[rows,step_3])
std_1 = np.std(Ym[rows,step_1])
std_2 = np.std(Ym[rows,step_2])
std_3 = np.std(Ym[rows,step_3])
df_sum = pd.DataFrame(columns = ["steps","means"])
df_sum["steps"]= [step_1,step_2,step_3]
df_sum["means"]= [mu_1,mu_2,mu_3]
print('\nMeans: \n',df_sum.to_string(index=False))
Y_corr = np.corrcoef(Ym[rows][:,[step_1,step_2,step_3]],rowvar=False)
print('\nCorrelations: \n',pd.DataFrame(Y_corr,columns=[step_1,step_2,step_3],index=[step_1,step_2,step_3]))



Means: 
  steps    means
     2 2.713373
     3 3.514485
     5 7.848037

Correlations: 
           2         3         5
2  1.000000  0.567701  0.405662
3  0.567701  1.000000  0.687699
5  0.405662  0.687699  1.000000


In [None]:
# Select number of simulations to plot
num_simulations = 100

In [None]:
fig = px.scatter()
for n in range(num_simulations):
    fig.add_trace(go.Scatter(x=[obs_1_step,step_1,step_2,obs_2_step,step_3],y=np.concatenate((obs_1_value,Ym[rows][:,[step_1,step_2]][n],obs_2_value,Ym[rows][:,[step_3]][n]),axis=None),mode='lines+markers',line=dict(color = 'rgb(49,130,189)')))
fig.add_trace(go.Scatter(x=[obs_1_step,obs_1_step],y=[obs_1_value,obs_1_value],mode='markers',marker=dict(size=10,color = 'rgb(49,130,189)')))
fig.add_trace(go.Scatter(x=[obs_2_step,obs_2_step],y=[obs_2_value,obs_2_value],mode='markers',marker=dict(size=10,color = 'rgb(49,130,189)')))
fig.add_trace(go.Scatter(x=[step_1,step_1],y=[mu_1-2*std_1,mu_1+2*std_1],mode='lines+markers',line=dict(color = px.colors.qualitative.G10[1])))
fig.add_trace(go.Scatter(x=[step_1,step_1],y=[mu_1,mu_1],mode='markers',marker=dict(size=10,color = px.colors.qualitative.G10[1])))
fig.add_trace(go.Scatter(x=[step_2,step_2],y=[mu_2-2*std_2,mu_2+2*std_2],mode='lines+markers',line=dict(color = px.colors.qualitative.G10[1])))
fig.add_trace(go.Scatter(x=[step_2,step_2],y=[mu_2,mu_2],mode='markers',marker=dict(size=10,color = px.colors.qualitative.G10[1])))
fig.add_trace(go.Scatter(x=[step_3,step_3],y=[mu_3-2*std_3,mu_3+2*std_3],mode='lines+markers',line=dict(color = px.colors.qualitative.G10[1])))
fig.add_trace(go.Scatter(x=[step_3,step_3],y=[mu_3,mu_3],mode='markers',marker=dict(size=10,color = px.colors.qualitative.G10[1])))
fig.update_layout(showlegend=False,title="Conditional Probability given observations",xaxis_title="x",yaxis_title="y")
fig.update_xaxes(range=[obs_1_step-1,step_3+1])
fig.show()

# Kernels in Gaussian Processes

In this section, we will try to learn an unknown function ("originator function" below) using few samples.

The key point is the definition of a the kernel function, which is defining the similarity among data. The key parameter is sigma, which define the so-called similarity scale.

In the following, we will show how these simple concepts can be put together to predict the falue of the unknown function away from the data, and which is the behavior of the Guassian Process model in responce to changes in the model parameters.


## Create originator function

Similarly as above, we define a function which we try to learn

In [None]:
# Define observations
x = np.zeros(3);
x[0] = 2; # position of first observation
x[1] = 5; # position of second observation
x[2] = 8; # position of third observation
# Fix seed
np.random.seed(10)
# Function parameters
beta0 = 6
beta1 = -3
beta2 = 0.5
beta3 = -0.25
eps = 0
# Originator function
def f(x, beta0=6, beta1=-3, beta2=0.5, beta3=-0.025,eps=0):
    return np.matmul(np.vstack((np.ones(len(x)), np.array(x), np.array(x) **2,np.array(x) **3)).T,np.array([beta0,beta1,beta2,beta3])) + eps*np.random.normal(0,1,len(x))

y=f(x)    

In [None]:
# Plot originator function with generated data
xgrid = np.arange(0,12,0.1)
fig = go.Figure()
fig.add_trace(go.Scatter(x=xgrid,y=f(xgrid),mode='lines',name="Originator function"))
fig.add_trace(go.Scatter(x=x,y=f(x),mode='markers',marker=dict(size=10),name="Generated data"))
fig.update_layout(showlegend=True,title="Generating function we try to learn",xaxis_title="x",yaxis_title="y")
fig.show()

### Create a kernel function


The user can select different values of the similarity scale (sigma).
For better understanding, we are plotting the value of the similarity (kernel) function centered on the second observation (i.e., the kernel is $= 1$ in $x(2)$).

In [None]:
# Select point around which we plot kernel
x_kernel_base = x[1] # x=5
# Kernel parameters
sigma = 2
# Kernel function
def kernel_fcn(xi,xj,sigma=2): 
    return np.exp(-0.5*((np.array(xi)-np.array(xj))/sigma)**2) # This is rbf / normal distribution kernel
    #return np.exp(- sigma*np.abs((np.array(xi) - np.array(xj)))) # This is laplacian kernel
    #return (np.array(xi)*np.array(xj)+1)**sigma # This is polynomial kernel
    

In [None]:
# Plot originator function with generated data
xgrid = np.arange(0,12,0.1)
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(go.Scatter(x=xgrid,y=f(xgrid),mode='lines',name="Originator function"))
fig.add_trace(go.Scatter(x=xgrid,y=kernel_fcn(xgrid,x_kernel_base,sigma),mode='lines',marker=dict(size=10),name="Kernel function"),secondary_y=True)
fig.add_trace(go.Scatter(x=x,y=f(x),mode='markers',marker=dict(size=10),name="Generated data"))
fig.update_layout(showlegend=True,title="Generating function with kernel",xaxis_title="x",yaxis_title="y")
fig.update_yaxes(title_text="Kernel fuc", secondary_y=True)
fig.show()

Compute the kernel matrix

In [None]:
no_data = len(x)
k = np.zeros((no_data,no_data));
for i in range(no_data):
    k[i,:] = kernel_fcn(x[i],x,sigma)
K = pd.DataFrame(k)
K.index = x
K.columns = x
print("Kernel given observations: \n",K)

Kernel given observations: 
           2.0       5.0       8.0
2.0  1.000000  0.324652  0.011109
5.0  0.324652  1.000000  0.324652
8.0  0.011109  0.324652  1.000000


### Add new observation point

The user can define a new observation point, where we want to predict the function.
The new observation will be added in the first position of the new kernel matrix, which is now 4x4, and it is calculated below.


In [None]:
#Define a new observation
new_obs_point = 6

In [None]:
# Update kernel
x_pred = np.array(new_obs_point)
x_new = np.append(x,x_pred)
k_pred = np.zeros((len(x)+1,len(x)+1))
# Copy old kernel
k_pred[0:len(x),0:len(x)] = k
# Compute kernel for new observations
k_pred[:,len(x)] =  kernel_fcn(x_pred,x_new,sigma)
k_pred[len(x),:] =  kernel_fcn(x_pred,x_new,sigma)
# Visualize kernel
K_pred = pd.DataFrame(k_pred); K_pred.index = x_new ; K_pred.columns = x_new
print("\nKernel given new observations: \n",K_pred)


Kernel given new observations: 
           2.0       5.0       8.0       6.0
2.0  1.000000  0.324652  0.011109  0.135335
5.0  0.324652  1.000000  0.324652  0.882497
8.0  0.011109  0.324652  1.000000  0.606531
6.0  0.135335  0.882497  0.606531  1.000000


### Create the covariance matrix

A second parameter is needed, which is the variance of the observation. This parameter is defining the prior knowldge.

In [None]:
sigma_y = sigma
cov_pred = sigma_y*k_pred
print(cov_pred)

[[2.         0.64930493 0.02221799 0.27067057]
 [0.64930493 2.         0.64930493 1.76499381]
 [0.02221799 0.64930493 2.         1.21306132]
 [0.27067057 1.76499381 1.21306132 2.        ]]


### New point prediction

In gaussian processes, the prediction of a new point $x^*$ having taget value $f^* = f(x^*)$ which is equal to $y^*$ in the noise free case we are dealing with now. Case with measurment error is described below.

Given the training and testing datasets defined above, the **joint distribution** of Gaussian process functions over the training and testing datasets ($f$ and $f^∗$, respectively) is given by:

\begin{align}\begin{bmatrix} f \\ f^* \end{bmatrix} \sim \mathcal{N} \left( 0,\begin{bmatrix} K(x,x) & K(x,x^*)\\ K(x^*,x) & K(x^*,x^*) \end{bmatrix} \right)\end{align}



However, when making a new prediction, what we are actually interested (as visualized above) is the conditional distribution of the possible predictions $f^*$ given our train and test data and train targets $x,x^*,f$. This is given by:

\begin{align} f^* | x,x^*,f \sim \mathcal{N}\left(\bar{f}(x^*),Cov(\bar{f}(x^*))\right)\end{align}

where the mean is:
\begin{align} \bar{f}(x^*) = K(x^*,x) K(x,x)^{-1} f\end{align}

and the covariance is:
\begin{align} Cov(\bar{f}(x^*)) = K(x^*,x^*) - K(x^*,x) K(x,x)^{-1} K(x,x^*)\end{align}

### Create multivariate distribution


Create the conditional multivariate distribution for the new observation point, given the other observations.

In [None]:
def mvncond2(joint_mean, joint_cov, index, obs_diff):
    # joint_mean is a zero vector
    # joint_cov covariance matrix containing new obs
    # index pointing to a new obs in covariance matrix
    # obs_diff = obs_f - obs_mu de-meaned predictions
    i = index
    n = joint_cov.shape[0]
    kxx = joint_cov[0:i,0:i]
    kxsx = joint_cov[i,0:i]
    s12s22 = np.matmul(kxsx,np.linalg.inv(kxx))
    cond_mean =joint_mean[i] +  np.matmul(s12s22,(obs_diff-joint_mean[i:n]))
    cond_cov = joint_cov[i,i] - np.matmul(s12s22,np.transpose(kxsx))
    return cond_mean, cond_cov
   

In [None]:
joint_mean = np.zeros(len(x)+1)
joint_cov = cov_pred
obs_f = y
obs_mu = np.mean(y)
cond_mean,cond_cov = mvncond2(joint_mean,joint_cov,3,obs_f - obs_mu)
new_obs_mu = cond_mean + obs_mu
print('Conditional mean of new obs: ',new_obs_mu)
new_obs_std = np.sqrt(cond_cov)
print('Conditional std of new obs: ',new_obs_std)

Conditional mean of new obs:  0.4506748781905998
Conditional std of new obs:  0.4269103110623952


In [None]:
# Plot originator function with generated data
fig = go.Figure()
fig.add_trace(go.Scatter(x=xgrid,y=f(xgrid),mode='lines',name="Originator function"))
fig.add_trace(go.Scatter(x=x,y=f(x),mode='markers',marker=dict(size=10),name="Generated data"))
fig.add_trace(go.Scatter(x=[new_obs_point,new_obs_point],y=[new_obs_mu-2*new_obs_std,new_obs_mu+2*new_obs_std],mode='markers+lines',line=dict(color = px.colors.qualitative.G10[3]),name = "Conditional Deviation"))
fig.add_trace(go.Scatter(x=[new_obs_point,new_obs_point],y=[new_obs_mu,new_obs_mu],mode='markers',marker=dict(size=10,color = px.colors.qualitative.G10[3]),name = "Conditional Mean"))
fig.update_layout(showlegend=True,title="Conditional prediction at a new point x="+str(new_obs_point),xaxis_title="x",yaxis_title="y")
fig.show()

### Compute predictions for all x

Here we repeat the exact same process as above for all the x-axis points.

In [None]:
# Loop through all points and compute the conditional mean and std
xgrid_mu = []
xgrid_std = []
for new_obs_point in xgrid:
    # Update kernel
    x_pred = np.array(new_obs_point)
    x_new = np.append(x,x_pred)
    k_pred = np.zeros((len(x)+1,len(x)+1))
    # Copy old kernel
    k_pred[0:len(x),0:len(x)] = k
    # Compute kernel for new observations
    k_pred[:,len(x)] =  kernel_fcn(x_pred,x_new,sigma)
    k_pred[len(x),:] =  kernel_fcn(x_pred,x_new,sigma)
    # Compute joint distribution
    joint_mean = np.zeros(len(x)+1)
    joint_cov = sigma_y*k_pred
    obs_f = y
    obs_mu = np.mean(y)
    # Compute conditional distribution
    cond_mean,cond_cov = mvncond2(joint_mean,joint_cov,3,obs_f - obs_mu)
    xgrid_mu.append(cond_mean + obs_mu)
    xgrid_std.append(np.sqrt(cond_cov))


In [None]:
# Plot the distribution for all points
fig = go.Figure()
fig.add_trace(go.Scatter(x=xgrid,y=f(xgrid),mode='lines',name="Originator function"))
fig.add_trace(go.Scatter(x=x,y=f(x),mode='markers',marker=dict(size=10),name="Generated data"))
#fig.add_trace(go.Scatter(x=[new_obs_point,new_obs_point],y=[new_obs_mu-2*new_obs_std,new_obs_mu+2*new_obs_std],mode='markers+lines',line=dict(color = px.colors.qualitative.G10[3]),name = "Conditional Deviation"))
fig.add_trace(go.Scatter(x=xgrid,y=xgrid_mu,mode='lines',line=dict(width=3,color = px.colors.qualitative.G10[3]),name = "Conditional Mean"))
fig.add_trace(go.Scatter(x=xgrid,y=np.array(xgrid_mu)-2*np.array(xgrid_std),mode='lines',line=dict(width=1,color = px.colors.qualitative.G10[3]),showlegend=False))
fig.add_trace(go.Scatter(x=xgrid,y=np.array(xgrid_mu)+2*np.array(xgrid_std),mode='lines',fill='tonexty',line=dict(width=1,color = px.colors.qualitative.G10[3]),name = "Conditional Deviation"))

fig.update_layout(showlegend=True,title="Conditional prediction at all points",xaxis_title="x",yaxis_title="y")
fig.show()

### Effect of error in measurment

In this section, we simulate the effect of measurement error on the model.
We suppose of sampling twice the function is 6 regular points.

In [None]:
# Fix seed
np.random.seed(12)
# Generate observations
x_ind = np.arange(2,12,2)
# Specify number of measurments at the observation points
num_measurements = 2
x = np.repeat(x_ind,num_measurements)
# Predict values with measurement noise
eps = 0.1
y = f(x,eps=eps)
y

array([1.84729858, 1.73185741, 0.42424395, 0.22992644, 0.67531428,
       0.44652787, 1.20051271, 1.18797723, 0.91930181, 1.28718194])

In [None]:
# Plot originator function with generated data
xgrid = np.arange(0,12,0.1)
fig = go.Figure()
fig.add_trace(go.Scatter(x=xgrid,y=f(xgrid),mode='lines',name="Originator function"))
fig.add_trace(go.Scatter(x=x,y=y,mode='markers',marker=dict(size=10),name="Data with measurment error"))
fig.update_layout(showlegend=True,title="Generating function we try to learn",xaxis_title="x",yaxis_title="y")
fig.show()

We compute the kernel

In [None]:
no_data = len(x)
k = np.zeros((no_data,no_data));
for i in range(no_data):
    k[i,:] = kernel_fcn(x[i],x,sigma)
K = pd.DataFrame(k)
K.index = x
K.columns = x
print("Kernel given observations: \n",K)

Kernel given observations: 
           2         2         4         4         6         6         8   \
2   1.000000  1.000000  0.606531  0.606531  0.135335  0.135335  0.011109   
2   1.000000  1.000000  0.606531  0.606531  0.135335  0.135335  0.011109   
4   0.606531  0.606531  1.000000  1.000000  0.606531  0.606531  0.135335   
4   0.606531  0.606531  1.000000  1.000000  0.606531  0.606531  0.135335   
6   0.135335  0.135335  0.606531  0.606531  1.000000  1.000000  0.606531   
6   0.135335  0.135335  0.606531  0.606531  1.000000  1.000000  0.606531   
8   0.011109  0.011109  0.135335  0.135335  0.606531  0.606531  1.000000   
8   0.011109  0.011109  0.135335  0.135335  0.606531  0.606531  1.000000   
10  0.000335  0.000335  0.011109  0.011109  0.135335  0.135335  0.606531   
10  0.000335  0.000335  0.011109  0.011109  0.135335  0.135335  0.606531   

          8         10        10  
2   0.011109  0.000335  0.000335  
2   0.011109  0.000335  0.000335  
4   0.135335  0.011109  0.011

And create predictions

In [None]:
# Loop through all points and compute the conditional mean and std
xgrid_mu = []
xgrid_std = []
for new_obs_point in xgrid:
    num_data = len(x)
    # Update kernel
    x_pred = np.array(new_obs_point)
    x_new = np.append(x,x_pred)
    k_pred = np.zeros((num_data+1,num_data+1))
    # Copy old kernel
    k_pred[0:num_data,0:num_data] = k
    # Compute kernel for new observations
    k_pred[:,num_data] =  kernel_fcn(x_pred,x_new,sigma)
    k_pred[num_data,:] =  kernel_fcn(x_pred,x_new,sigma)
    # Compute joint distribution
    joint_mean = np.zeros(num_data+1)
    # IMPORTANT! we add measurment noise to the covariance matrix
    joint_cov = sigma_y*k_pred + np.diag(np.diag(eps*np.ones((num_data+1,num_data+1))))
    obs_f = y
    obs_mu = np.mean(y)
    # Compute conditional distribution
    cond_mean,cond_cov = mvncond2(joint_mean,joint_cov,num_data,obs_f - obs_mu)
    xgrid_mu.append(cond_mean + obs_mu)
    xgrid_std.append(np.sqrt(cond_cov))


In [None]:
# Plot the distribution for all points
fig = go.Figure()
fig.add_trace(go.Scatter(x=xgrid,y=f(xgrid),mode='lines',name="Originator function"))
fig.add_trace(go.Scatter(x=x,y=y,mode='markers',marker=dict(size=10),name="Generated data"))
#fig.add_trace(go.Scatter(x=[new_obs_point,new_obs_point],y=[new_obs_mu-2*new_obs_std,new_obs_mu+2*new_obs_std],mode='markers+lines',line=dict(color = px.colors.qualitative.G10[3]),name = "Conditional Deviation"))
fig.add_trace(go.Scatter(x=xgrid,y=xgrid_mu,mode='lines',line=dict(width=3,color = px.colors.qualitative.G10[3]),name = "Conditional Mean"))
fig.add_trace(go.Scatter(x=xgrid,y=np.array(xgrid_mu)-2*np.array(xgrid_std),mode='lines',line=dict(width=1,color = px.colors.qualitative.G10[3]),showlegend=False))
fig.add_trace(go.Scatter(x=xgrid,y=np.array(xgrid_mu)+2*np.array(xgrid_std),mode='lines',fill='tonexty',line=dict(width=1,color = px.colors.qualitative.G10[3]),name = "Conditional Deviation"))

fig.update_layout(showlegend=True,title="Conditional prediction at all points",xaxis_title="x",yaxis_title="y")
fig.show()

# Task: 

