Merge #63

63: WIP: add SVD functionality r=odunbar a=odunbar Resolves #57 . We add the ability for the Gaussian process to learn non - diagonal covariance matrix for the observational noise by applying and SVD. Resolve #58 We add functionality to toggle whether we wish to learn the observational noise. and add (mathematically correct) default values when we are not. We also have 2 examples: - [x] Gaussian process plot. We train a GP on some training points in 2D space, and plot the mean and variance (compared with the underlying model and observational noise) - in the untransformed space - [x] Noise learning test. We train a GP with known noise, with `learn_noise = false` and with `learn_noise = true` we compare the learned `WhiteKernel` parameters to the true unlearned parameters. Coauthored with @bielim Co-authored-by: odunbar <odunbar@caltech.edu> Co-authored-by: Melanie Bieli <melanie.bieli@bluewin.ch> Co-authored-by: bielim <bielim@users.noreply.github.com> Co-authored-by: odunbar <47412152+odunbar@users.noreply.github.com>
CliMA · Jul 21, 2020 · 78eb27b · 78eb27b
2 parents 34459fe + 442269c
commit 78eb27b
Show file tree

Hide file tree

Showing 14 changed files with 1,384 additions and 339 deletions.
diff --git a/Manifest.toml b/Manifest.toml
diff --git a/Project.toml b/Project.toml
@@ -13,9 +13,12 @@ GaussianProcesses = "891a1506-143c-57d2-908e-e1f8e92e6de9"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 Optim = "429524aa-4258-5aef-a3af-852621145aeb"
 Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
+Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
 PyCall = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 ScikitLearn = "3646fa90-6ef7-5e7e-9f22-8aca16db6324"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
+StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
+StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd"
 Sundials = "c3572dad-4567-51f8-b174-8c6c989267f4"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/examples/GPEmulator/learn_noise.jl b/examples/GPEmulator/learn_noise.jl
@@ -0,0 +1,132 @@
+# Import modules
+using Random
+using Distributions
+using Statistics
+using LinearAlgebra
+using CalibrateEmulateSample.GPEmulator
+
+###############################################################################
+#                                                                             #
+#   This examples shows how to fit a Gaussian Process (GP) regression model   #
+#   using GPEmulator and demonstrates how the GPObj's built-in Singular       #
+#   Value Decomposition (SVD) decorrelates the training data and maps into    #
+#   a space where the covariance of the observational noise is the identity.  #
+#                                                                             #
+#   When training a GP, the hyperparameters of its kernel (or covariance      #
+#   function) are optimized with respect to the given input-output pairs.     #
+#   When training a separate GP for each output component (as is usually      #
+#   done for multidimensional output, rather than fitting one multi-output    #
+#   model), one assumes that their covariance functions are independent.      #
+#   The SVD transformation ensures that this is a valid assumption.           #
+#                                                                             #
+#   The decorrelation by the SVD is done automatically when instantiating     #
+#   a `GPObj`, but the user can choose to have the `predict` function         #
+#   return its predictions in the original space (by setting                  #
+#   transform_to_real=true)                                                   # 
+#                                                                             #
+#   Training points: {(x_i, y_i)} (i=1,...,n), where x_i ∈ ℝ ² and y_i ∈ ℝ ²  #
+#   The y_i are assumed to be related to the x_i by:                          #
+#   y_i = G(x_i) + η, where G is a map from ℝ ² to ℝ ², and η is noise drawn  #
+#   from a 2D normal distribution with known mean μ and covariance Σ          #
+#   Two Gaussian Process models are fit, one that predicts y_i[1] from x_i    #
+#   and one that predicts y_i[2] from x_i.                                    #
+#                                                                             #
+#   The GPEmulator module can be used as a standalone module to fit           #
+#   Gaussian Process regression models, but it was originally designed as     #
+#   the "Emulate" step of the "Calibrate-Emulate-Sample" framework            #
+#   developed at CliMA.                                                       #
+#   Further reading on Calibrate-Emulate-Sample:                              #
+#       Cleary et al., 2020                                                   #
+#       https://arxiv.org/abs/2001.03689                                      #
+#                                                                             #
+###############################################################################
+
+rng_seed = 41
+Random.seed!(rng_seed)
+
+gppackage = GPJL()
+pred_type = YType()
+
+# Generate training data (x-y pairs, where x ∈ ℝ ᵖ, y ∈ ℝ ᵈ)
+# x = [x1, x2]: inputs/predictors/features/parameters
+# y = [y1, y2]: outputs/predictands/targets/observables
+# The observables y are related to the parameters x by: 
+# y = G(x1, x2) + η, 
+# where G(x1, x2) := [sin(x1) + cos(x2), sin(x1) - cos(x2)], and η ~ N(0, Σ)
+n = 200  # number of training points
+p = 2   # input dim 
+d = 2   # output dim
+
+X = 2.0 * π * rand(n, p)
+
+# G(x1, x2)
+g1x = sin.(X[:, 1]) .+ cos.(X[:, 2])
+g2x = sin.(X[:, 1]) .- cos.(X[:, 2])
+gx = [g1x g2x]
+
+# Add noise η
+μ = zeros(d) 
+Σ = 0.1 * [[0.8, 0.2] [0.2, 0.5]] # d x d
+noise_samples = rand(MvNormal(μ, Σ), n)' 
+
+# y = G(x) + η
+Y = gx .+ noise_samples
+
+# Fit 2D Gaussian Process regression model
+# (To be precise: We fit two models, one that predicts y1 from x1 and x2, and 
+# one that predicts y2 from x1 and x2)
+# Setting GPkernel=nothing leads to the creation of a default squared 
+# exponential kernel. 
+# Setting noise_learn=true leads to the addition of white noise to the kernel, 
+# whose hyperparameter -- the signal variance, or "noise" -- will be learned 
+# in the training phase via an optimization procedure.
+# Because of the SVD transformation applied to the output, we expect the 
+# learned noise to be close to 1.
+gpobj1 = GPObj(X, Y, gppackage, GPkernel=nothing, obs_noise_cov=Σ,
+               normalized=true, noise_learn=true, prediction_type=pred_type)
+
+println("\n-----------")
+println("Results of training Gaussian Process models with noise_learn=true")
+println("-----------")
+
+println("\nKernel of the GP trained to predict y1 from x=(x1, x2):")
+println(gpobj1.models[1].kernel)
+println("Learned noise parameter, σ_1:")
+learned_σ_1 = sqrt(gpobj1.models[1].kernel.kright.σ2)
+println("σ_1 = $learned_σ_1")
+# Check if the learned noise is approximately 1
+@assert(isapprox(learned_σ_1, 1.0; atol=0.1))
+
+println("\nKernel of the GP trained to predict y2 from x=(x1, x2):")
+println(gpobj1.models[2].kernel)
+println("Learned noise parameter, σ_2:")
+learned_σ_2 = sqrt(gpobj1.models[2].kernel.kright.σ2)
+println("σ_2 = $learned_σ_2")
+# Check if the learned noise is approximately 1
+@assert(isapprox(learned_σ_2, 1.0; atol=0.1))
+println("------------------------------------------------------------------\n")
+
+# For comparison: When noise_learn is set to false, the observational noise
+# is set to 1.0 and is not learned/optimized during the training. But thanks
+# to the SVD, 1.0 is the correct value to use.
+gpobj2 = GPObj(X, Y, gppackage, GPkernel=nothing, obs_noise_cov=Σ,
+               normalized=true, noise_learn=false, prediction_type=pred_type)
+
+println("\n-----------")
+println("Results of training Gaussian Process models with noise_learn=false")
+println("-----------")
+
+println("\nKernel of the GP trained to predict y1 from x=(x1, x2):")
+# Note: In contrast to the kernels of the gpobj1 models, these ones do not
+# have a white noise ("Noise") kernel component
+println(gpobj2.models[1].kernel) 
+# logNoise is given as log(sqrt(noise))
+obs_noise_1 = exp(gpobj2.models[1].logNoise.value^2)
+println("Observational noise: $obs_noise_1")
+
+println("\nKernel of the GP trained to predict y1 from x=(x1, x2):")
+println(gpobj2.models[2].kernel)
+# logNoise is given as log(sqrt(noise))
+obs_noise_2 = exp(gpobj2.models[2].logNoise.value^2)
+println("Observational noise: $obs_noise_2")
+println("------------------------------------------------------------------")
diff --git a/examples/GPEmulator/plot_GP.jl b/examples/GPEmulator/plot_GP.jl
@@ -0,0 +1,170 @@
+# Import modules
+using Random
+using Distributions
+using Statistics
+using Plots; pyplot(size=(1500, 700))
+Plots.scalefontsizes(1.3)
+using LinearAlgebra
+using CalibrateEmulateSample.GPEmulator
+
+###############################################################################
+#                                                                             #
+#   This examples shows how to fit a Gaussian Process regression model        #
+#   using GPEmulator, and how to plot the mean and variance                   #
+#                                                                             #
+#   Training points: {(x_i, y_i)} (i=1,...,n), where x_i ∈ ℝ ² and y_i ∈ ℝ ²  #
+#   The y_i are assumed to be related to the x_i by:                          #
+#   y_i = G(x_i) + η, where G is a map from ℝ ² to ℝ ², and η is noise drawn  #
+#   from a 2D normal distribution with known mean μ and covariance Σ          #
+#                                                                             #
+#   Two Gaussian Process models are fit, one that predicts y_i[1] from x_i    #
+#   and one that predicts y_i[2] from x_i. Fitting separate models for        #
+#   y_i[1] and y_i[2] requires the outputs to be independent - since this     #
+#   cannot be assumed to be the case a priori, the training data are          #
+#   decorrelated by perfoming a Singular Value Decomposition (SVD) on the     #
+#   noise covariance Σ, and each model is then trained in the decorrelated    #
+#   space.                                                                    #
+#                                                                             #
+#   The decorrelation is done automatically when instantiating a `GPObj`,     #
+#   but the user can choose to have the `predict` function return its         #
+#   predictions in the original space (by setting transform_to_real=true)     # 
+#                                                                             #
+#   The GPEmulator module can be used as a standalone module to fit           #
+#   Gaussian Process regression models, but it was originally designed as     #
+#   the "Emulate" step of the "Calibrate-Emulate-Sample" framework            #
+#   developed at CliMA.                                                       #
+#   Further reading on Calibrate-Emulate-Sample:                              #
+#       Cleary et al., 2020                                                   #
+#       https://arxiv.org/abs/2001.03689                                      #
+#                                                                             #
+###############################################################################
+
+function meshgrid(vx::AbstractVector{T}, vy::AbstractVector{T}) where T
+    m, n = length(vy), length(vx)
+    gx = reshape(repeat(vx, inner = m, outer = 1), m, n)
+    gy = reshape(repeat(vy, inner = 1, outer = n), m, n)
+
+    return gx, gy
+end
+
+rng_seed = 41
+Random.seed!(rng_seed)
+
+gppackage = GPJL()
+pred_type = YType()
+
+# Generate training data (x-y pairs, where x ∈ ℝ ᵖ, y ∈ ℝ ᵈ)
+# x = [x1, x2]: inputs/predictors/features/parameters
+# y = [y1, y2]: outputs/predictands/targets/observables
+# The observables y are related to the parameters x by: 
+# y = G(x1, x2) + η, 
+# where G(x1, x2) := [sin(x1) + cos(x2), sin(x1) - cos(x2)], and η ~ N(0, Σ)
+n = 50  # number of training points
+p = 2   # input dim 
+d = 2   # output dim
+
+X = 2.0 * π * rand(n, p)
+
+# G(x1, x2)
+g1x = sin.(X[:, 1]) .+ cos.(X[:, 2])
+g2x = sin.(X[:, 1]) .- cos.(X[:, 2])
+gx = [g1x g2x]
+
+# Add noise η
+μ = zeros(d) 
+Σ = 0.1 * [[0.8, 0.0] [0.0, 0.5]] # d x d
+noise_samples = rand(MvNormal(μ, Σ), n)' 
+
+# y = G(x) + η
+Y = gx .+ noise_samples
+
+# Fit 2D Gaussian Process regression model
+# (To be precise: We fit two models, one that predicts y1 from x1 and x2, 
+# and one that predicts y2 from x1 and x2)
+# Setting GPkernel=nothing leads to the creation of a default squared 
+# exponential kernel. 
+# Setting noise_learn=true leads to the addition of white noise to the
+# kernel
+gpobj = GPObj(X, Y, gppackage, GPkernel=nothing, obs_noise_cov=Σ, 
+              normalized=true, noise_learn=true, prediction_type=pred_type)
+
+# Plot mean and variance of the predicted observables y1 and y2
+# For this, we generate test points on a x1-x2 grid.
+n_pts = 200
+x1 = range(0.0, stop=2*π, length=n_pts)
+x2 = range(0.0, stop=2*π, length=n_pts) 
+X1, X2 = meshgrid(x1, x2)
+# Input for predict has to be of size N_samples x input_dim
+inputs = hcat(X1[:], X2[:])
+
+font = Plots.font("Helvetica", 18)
+fontdict = Dict(:guidefont=>font, :xtickfont=>font, :ytickfont=>font, 
+                :legendfont=>font)
+for y_i in 1:d
+    # Predict on the grid points (note that `predict` returns the full
+    # covariance matrices, not just the variance -- gp_cov is a vector
+    # of covariance matrices)
+    gp_mean, gp_cov = GPEmulator.predict(gpobj, 
+                                         inputs, 
+                                         transform_to_real=true)
+    # Reshape gp_cov to size N_samples x output_dim
+    gp_var_temp = [diag(gp_cov[j]) for j in 1:length(gp_cov)] # (40000,)
+    gp_var = vcat([x' for x in gp_var_temp]...) # 40000 x 2
+
+    mean_grid = reshape(gp_mean[:, y_i], n_pts, n_pts)
+    p1 = plot(x1, x2, mean_grid, st=:surface, camera=(-30, 30), c=:cividis, 
+              xlabel="x1", ylabel="x2", zlabel="mean of y"*string(y_i),
+              zguidefontrotation=90)
+
+    var_grid = reshape(gp_var[:, y_i], n_pts, n_pts)
+    p2 = plot(x1, x2, var_grid, st=:surface, camera=(-30, 30), c=:cividis,
+              xlabel="x1", ylabel="x2", zlabel="var of y"*string(y_i),
+              zguidefontrotation=90)
+
+    plot(p1, p2, layout=(1, 2), legend=false)
+    savefig("GP_test_y"*string(y_i)*"_predictions.png")
+end
+
+# Plot the true components of G(x1, x2)
+g1_true = sin.(inputs[:, 1]) .+ cos.(inputs[:, 2])
+g1_true_grid = reshape(g1_true, n_pts, n_pts)
+p3 = plot(x1, x2, g1_true_grid, st=:surface, camera=(-30, 30), c=:cividis, 
+          xlabel="x1", ylabel="x2", zlabel="sin(x1) + cos(x2)",
+          zguidefontrotation=90)
+savefig("GP_test_true_g1.png")
+
+g2_true = sin.(inputs[:, 1]) .- cos.(inputs[:, 2])
+g2_true_grid = reshape(g2_true, n_pts, n_pts)
+p4 = plot(x1, x2, g2_true_grid, st=:surface, camera=(-30, 30), c=:cividis, 
+          xlabel="x1", ylabel="x2", zlabel="sin(x1) - cos(x2)",
+          zguidefontrotation=90)
+g_true_grids = [g1_true_grid, g2_true_grid]
+savefig("GP_test_true_g2.png")
+
+
+# Plot the difference between the truth and the mean of the predictions
+for y_i in 1:d
+    # Predict on the grid points (note that `predict` returns the full
+    # covariance matrices, not just the variance -- gp_cov is a vector
+    # of covariance matrices)
+    gp_mean, gp_cov = GPEmulator.predict(gpobj, 
+                                         inputs, 
+                                         transform_to_real=true)
+    # Reshape gp_cov to size N_samples x output_dim
+    gp_var_temp = [diag(gp_cov[j]) for j in 1:length(gp_cov)] # (40000,)
+    gp_var = vcat([x' for x in gp_var_temp]...) # 40000 x 2
+
+    mean_grid = reshape(gp_mean[:, y_i], n_pts, n_pts)
+    var_grid = reshape(gp_var[:, y_i], n_pts, n_pts)
+    # Compute and plot 1/variance * (truth - prediction)^2
+    zlabel = "1/var * (true_y"*string(y_i)*" - predicted_y"*string(y_i)*")^2"
+    p5 = plot(x1, x2, 1.0 ./ var_grid .* (g_true_grids[y_i] .- mean_grid).^2, 
+              st=:surface, camera=(-30, 30), c=:magma, zlabel=zlabel,
+              xlabel="x1", ylabel="x2", 
+              zguidefontrotation=90)
+
+    savefig("GP_test_y"*string(y_i)*"_difference_truth_prediction.png")
+end
+
+
+Plots.scalefontsizes(1/1.3)
diff --git a/src/EKI.jl b/src/EKI.jl
@@ -44,7 +44,8 @@ end
 function EKIObj(parameters::Array{FT, 2},
                 parameter_names::Vector{String},
                 t_mean,
-                t_cov::Array{FT, 2}) where {FT<:AbstractFloat}
+                obs_noise_cov::Array{FT, 2};
+                Δt=FT(1)) where {FT<:AbstractFloat}
 
     # ensemble size
     N_ens = size(parameters)[1]
@@ -57,9 +58,9 @@ function EKIObj(parameters::Array{FT, 2},
     # error store
     err = FT[]
     # timestep store
-    Δt = FT[]
+    Δt = Array([Δt])
 
-    EKIObj{FT,IT}(u, parameter_names, t_mean, t_cov, N_ens, g, err, Δt)
+    EKIObj{FT,IT}(u, parameter_names, t_mean, obs_noise_cov, N_ens, g, err, Δt)
 end
 
 
@@ -95,7 +96,7 @@ function compute_error(eki)
 end
 
 
-function update_ensemble!(eki::EKIObj{FT}, g; cov_threshold::FT=0.01, Δt_new = nothing) where {FT}
+function update_ensemble!(eki::EKIObj{FT}, g; cov_threshold::FT=0.01, Δt_new=nothing) where {FT}
     # u: N_ens x N_params
     u = eki.u[end]
     cov_init = cov(eki.u[end], dims=1)