In [1]:
using Zygote
using LinearAlgebra, Distributions, Random, Statistics, SpecialFunctions
using Plots, JLD

pyplot()

Plots.PyPlotBackend()

In [2]:
include("F:/Tue/Thesis/codes/GP_CVI/Training_FITC.jl")#remember to change the path to "Training_FITC.jl"
include("F:/Tue/Thesis/codes/GP_CVI/FITC.jl") # remember to change the path to "FITC.jl"

FITC_online_update (generic function with 2 methods)

### Create function and observation

In [3]:
Random.seed!(1209) ;
true_process(x) = sin(x[1]^2) + 5*cos(x[2]); #x is 2-elements vector

N_obser = 100; #number of observations
x_train = rand(1:0.01:100,(2,N_obser)); #training points
y_train = Vector{Float64}(undef, N_obser); #training function values
for i=1:N_obser
    y_train[i] = true_process(x_train[:,i])
end

U = 50
x_induc = rand(1:0.01:100,(2,50)); #inducing point


In [4]:
#hyper-parameters for GP
σ, l = 5,[7,4];
#Now, find the info of the posterior of inducing points
μ_u, Σ_u = FITC_post_indu(x_train, x_induc, y_train, σ, l)

#there is actually a training step here, but at the moment just skip it. Remember that the ouput of the training must 
#include the optimal values of 3 following things: μ_u, Σ_u and Xu

([4.354843661668971, 0.18562413482039172, 0.8915038786114581, 2.967542769426658, 0.9324177190945193, 3.601150587264511, 4.901066101428669, -2.4610164797002723, -1.2970715549660847, -0.831010714100934  …  8.797027975393773, -2.946099125622936, -2.149311412717421, -2.183671248310092, 0.0022169590171483367, 2.9725040421072593, 3.4210578772921276, -1.5053756675538852, 1.122382711337553, -1.8072105998684538], [3.7642617614273317 0.008470552407627684 … 2.1475722406321934e-8 -3.838164129975099e-8; 0.008470552407626931 24.988763361100652 … -1.255706720827434e-10 -1.9625859987434427e-9; … ; 2.147572240632136e-8 -1.2557067208268046e-10 … 15.683631608765852 0.02943863032130478; -3.838164129975067e-8 -1.9625859987434973e-9 … 0.029438630321304837 14.87873378189952])

In [5]:
#Now perform prediction on new points
x_test = rand(1:100,(2,1));
μ_predict, σ_predict = FITC_predict(x_induc,x_test,μ_u,Σ_u,σ,l);
println("New points:",x_test)
println("Predicted valued:", μ_predict)
println("uncertainty:",sqrt(σ_predict))
println("True value:",true_process(x_test))

New points:[88; 92]
Predicted valued:[3.633211825654834]
uncertainty:[2.8562098599663455]
True value:-3.106334033293108


#### Basically everthing works just fine. we get the wrong result because all parameters are not optimized yet. 
#### Now, we will perform optimization, using Gradient Descent.

## Optimization

In [6]:
neg_llh(x_train,x_induc,y_train,σ,l)

205.97752571702296

In [12]:
Xu_op, σ_op, l_op, μ_u, Σ_u, llh_val = train_FITC(x_train, x_induc, y_train, σ, l, 1e-2, 70000)


([52.76612850828854 63.43898452839426 … 36.675863854079175 7.450957252466448; 81.01105061794325 94.24966097102498 … 12.396556024163415 38.54289674765451], 3.4111036695287935, [145.271496359267, 1.2816824339682302], [4.270367688499968, 5.694431458646791, -0.4188347086873273, 1.6786972705726677, 2.864598525971304, 1.2148522202025487, 5.730649047042886, -3.722479698106484, -1.6811037358943275, -0.7577389351144169  …  -0.13285160214259045, -3.8377988061862585, -4.856327153523058, -1.9971732783390188, -4.575402428792395, 5.329588923524865, 2.0746165036601663, -4.902454672131508, 4.679514051204174, 1.5725033939257607], [0.1061282977060283 1.3720818817881807e-6 … 2.705588121391091e-32 -8.014295518798196e-26; 1.3720818818359573e-6 0.3118477509527564 … -1.0290800768726288e-37 3.0482658403161476e-31; … ; 2.705588121389862e-32 -1.0290800768283937e-37 … 0.0572883498038507 -2.919132008855884e-7; -8.014295518853373e-26 3.0482658402969323e-31 … -2.919132008881375e-7 1.2458470316411656], 109.211699700

In [None]:
save("F:\\Tue\\Thesis\\codes\\GP_CVI\\optimal_params.jld","params",[Xu_op, σ_op, l_op]); #store optimal values

In [74]:
μ_u, Σ_u = FITC_post_indu(x_train, Xu_op, y_train, σ_op, l_op);

In [75]:

μ_predict_new, σ_predict_new = FITC_predict(Xu_op,x_test,μ_u,Σ_u,σ_op,l_op);
println("New points:",x_test)
println("Predicted valued (after optimizing):", μ_predict_new[1])
println("uncertainty (after optimizaing):",sqrt(σ_predict_new)[1])
println("True value:",true_process(x_test))

New points:[88; 92]
Predicted valued (after optimizing):-3.2106004318026278
uncertainty (after optimizaing):1.9708950826170808
True value:-3.106334033293108


### Test updating functions (still working on this)
We create a set of new test points and predict the function values of them. There are some points outside the training region, thus the uncertainty of those points might be large, and we need to update them. To do so, we need a threshold for the uncertainty. 

In [205]:

X_test = rand(100:200,(2,100)); # create a set of new points, wherein some points lie outside the region of training points
threshold_uncertainty = 2.0; #threshold for uncertainty

test_values = []; # this stores the values of new function values
true_values = []; # this stores true values
test_uncertainty = []; # this stores the uncertainty at different test points

update_count = 0

for i=1:size(X_test,2)
    predicted_value, predicted_uncertainty = FITC_predict(Xu_op,X_test[:,i],μ_u,Σ_u,σ_op,l_op);
    true_val = true_process(X_test[:,i])
    append!(test_values,predicted_value[1]);
    append!(test_uncertainty,sqrt(predicted_uncertainty)[1]);
    append!(true_values,true_val);
    
    # Now if the new uncertainty is greater than the threshold, we will observe the new function value and update
    # the posterior info p(f_u|f), i.e. update μ_u and Σ_u
    if sqrt(predicted_uncertainty)[1] > threshold_uncertainty #abs(sqrt(predicted_uncertainty)[1] - σ_op) < 1e-2 || sqrt(predicted_uncertainty)[1] > σ_op 
        f_new = true_process(X_test[:,i]); #get the new observation
        x_train = hcat(x_train,X_test[:,i]);
        Xu_op = hcat(Xu_op,X_test[:,i]);
        append!(y_train,f_new);
        μ_u, Σ_u = FITC_post_indu(x_train, Xu_op, y_train, σ_op, l_op);
        
    #elseif sqrt(predicted_uncertainty)[1] > threshold_uncertainty && sqrt(predicted_uncertainty)[1] < σ_op
        #f_new = true_process(X_test[:,i]); #get the new observation
        
        #update our belief about inducing points
        #μ_u, Σ_u = FITC_online_update(Xu_op,X_test[:,i],f_new,μ_u,Σ_u,σ_op,l_op)
        
        update_count += 1
    end
end



In [206]:
diff_val = abs.(true_values - test_values)

100-element Array{Float64,1}:
 2.474868061867437
 0.201941477483091
 1.0715506645922135
 0.9587175861356214
 1.127031879216437
 0.5268618559463937
 0.135475877484875
 1.9497488630660107
 0.3941929037723928
 0.20674221716040941
 1.1412096560671003
 0.8453835697479537
 0.7604759027378805
 ⋮
 0.03692034556406654
 1.0403744306083667
 0.8984257993747139
 0.5876540235105954
 0.005254005178541021
 0.5882811743657914
 0.39973262273917687
 0.8509354188283371
 1.1327028510966848
 0.3133777611092561
 0.08110759899041198
 2.669430450651639

In [207]:
length(findall(x->x<1, diff_val))

69

In [151]:
true_values[98]

3.8236255993877153

In [131]:
test_values[98]

2.768327270920563

In [208]:
test_uncertainty

100-element Array{Any,1}:
 1.3914362333614345
 1.2470814175215736
 0.438280721912232
 0.8091338261269616
 1.5646344025008045
 1.1918472374616498
 1.330239904517321
 0.45331825977953194
 2.0706121993379973
 0.06506002710245219
 0.9730122219758361
 0.7985670080269375
 0.6495383644937344
 ⋮
 1.107053328847957
 0.7730360741295981
 0.9506320443536189
 0.7421244623933039
 0.39052845620235443
 1.5375727310885408
 0.7385124396144505
 1.237764396136931
 1.384443673945532
 0.6858066655124615
 1.516278844271956
 1.2449698692166247

In [209]:
μ_u

117-element Array{Float64,1}:
  4.270367112520697
  5.696256517343923
 -0.4188347086873479
  1.6786972693249576
  2.864598526004346
  1.2148522202007168
  5.733125528311544
 -3.722479698106123
 -1.6811037358943812
 -0.7797711651301357
  3.965582103446725
 -3.564054721312357
  4.215350208131009
  ⋮
 -0.8971937819425866
 -4.213009699518236
  4.8845243705139865
  2.740637553243134
 -5.506260212223151
  5.5523984175902665
 -0.5497662402749242
  2.246625784043753
 -0.3535050019277455
 -4.1502243174982585
 -4.933057495295901
 -2.916001619225517

In [210]:
a = [1,5,0,-3]

4-element Array{Int64,1}:
  1
  5
  0
 -3

In [211]:
b = [2,1,2,3]

4-element Array{Int64,1}:
 2
 1
 2
 3

In [215]:
a.<b

4-element BitArray{1}:
 1
 0
 1
 1

In [217]:
length(findall(x->x==0,a .< b)) >= 1

true

In [225]:
c = vcat(a',b')

2×4 Array{Int64,2}:
 1  5  0  -3
 2  1  2   3

5