In [None]:
include("emulation_functions.jl")
using LinearAlgebra
using Random
using NCDatasets
using NetCDF
using LaTeXStrings
using BenchmarkTools
using Suppressor
using StatsBase
using Turing
using ScikitLearn
using CSV
using JLD2
using DataFrames
using Dates

In [None]:
load_from_file = true

In [None]:
# import data
fname_params = "sobol_sequence_ensemble_1300_QMC_modex_para_0_normalized.txt"
# Load param vals, remove 2 missing columns at end
df = CSV.read(fname_params, DataFrame, header=false)
s = Matrix(df);
df = df[!, map(x->!all(ismissing, x), eachcol(df))]
df = Matrix(DataFrame([[names(df)]; collect.(eachrow(df))], [:column; Symbol.(axes(df, 1))]))[:,2:end]

fname_GPP = "US_Me2_GPP_g0001_g1300_2003_2007_res05.nc"
GPP =ncread(fname_GPP, "GPP");
time = ncread(fname_GPP, "time")
ELM_Me2 = Dataset("US_Me2_GPP_g0001_g1300_2003_2007_res05.nc")
t = ELM_Me2["time"]
time1=year.(t)+dayofyear.(t)/365

GPP1 = dropdims(GPP,dims=1)
id=findall(.!all.(x -> x .== 0, eachcol(GPP1)));

Separate test and train data

In [None]:
# Set a seed for reproducibility
Random.seed!(11);
indices_train0 = StatsBase.sample(1:1300, 1000, replace=false)
indices_test0 = [i for i in 1:1300 if i ∉ indices_train0]
indices_train=intersect(indices_train0, id)
indices_test=intersect(indices_test0, id)
println(size(indices_test))
scatter(s[indices_train ,1], s[indices_train ,2], xlabel="p1", ylabel="p2", title="1000 training set", legend=false, grid=false, framestyle=:box)

In [None]:
# dimensions: N_time points X N_observations
y_train = Float64.(GPP[1,:,indices_train])
p_train = Matrix(Float64.(df[:, indices_train]))
p_train_norm = p_train # Using normalized parameter values for training 

y_test = Float64.(GPP[1,:,indices_test])
p_test = Matrix(Float64.(df[:, indices_test]));

y_all = Float64.(GPP[1,:,:])
p_all = Matrix(Float64.(df[:, :]));

In [None]:
println(size(p_test))

Define covariance and mean functions for GP

In [None]:
# Functions for defining inputs to GP Training
# Select mean and covariance function
mZero = GaussianProcesses.MeanZero() # Zero mean function
kern = GaussianProcesses.SEArd(zeros(size(p_train, 1)), 0.0)

In [None]:
if !load_from_file
    # For PCA method, each column of y_train is an observation 
    pca_fit= MultivariateStats.fit(PCA, y_train; pratio=1.0, maxoutdim=12) # perform PCA on X, retaining first K PCs
    Z_train = project(pca_fit, y_train)
    Z_test = project(pca_fit, y_test)

    μ_z = mean(Z_train, dims=2)
    σ_z = std(Z_train, dims=2)

    Z_train_std = (Z_train .- μ_z) ./ σ_z # Standardize the latent variables

    array_GP = Array{GPE}(undef,size(pca_fit, 2)) # Create an empty matrix
    for j = 1: size(pca_fit, 2)     
        gp = GaussianProcesses.GP(p_train_norm, Z_train_std[j, :], mZero, kern)
        optimize!(gp, domean = true, kern = true, noise = false, f_tol = 1e-40, g_tol = 1e-14, iterations = 2000)   # Optimizat parameters
        array_GP[j] = deepcopy(gp)   # Save each learned GP model to array_GP via deepcopy.
    end

    p_emulator = (GPs = array_GP, T_PCA = pca_fit, μ_z = μ_z, σ_z = σ_z);
    #@save "emulator_GPP.jld2" array_GP pca_fit μ_z σ_z
    jldsave("emulator_GPP_US_Me2.jld2"; array_GP, pca_fit, μ_z, σ_z)
else
    @load "emulator_GPP_US_Me2.jld2" array_GP pca_fit μ_z σ_z
    p_emulator = (GPs = array_GP, T_PCA = pca_fit, μ_z = μ_z, σ_z = σ_z);
end


In [None]:
# Construct predictions for all PCAS
N_PCA = 12
@time GPP_emulator = emulator(p_test', p_emulator, N_PCA);

In [None]:
y_test_m=mean(y_test,dims=2)
GPP_emulator_m=mean(GPP_emulator,dims=2)
p1 = plot(xlabel="year", ylabel=L"\mathrm{GPP (gC\, m^{-2}\, d^{-1})}", legend=true, title="", grid=false, framestyle=:box, foreground_color_border=:black, foreground_color_axis=:black, tick_direction=:in, tickfontsize=8, foreground_color_text=:black, labelfontsize=8,fontfamily="Computer Modern")
plot!(p1, time1, y_test_m*3600*24, label="ELM Output", linewidth=2, linecolor=:red)
plot!(p1, time1, GPP_emulator_m, label="Emulator Output",linewidth=1, linecolor=:blue)
annotate!(2003, 20, text("(a)", 9, :left),fontfamily="Computer Modern")
p2=scatter(y_test_m*3600*24, GPP_emulator_m, label = "", xlabel="ELM Output", ylabel="Emulator Output",  marker=:circle, markerstrokecolor=:black, markercolor=:magenta, marker_alpha=0.3, grid=false, framestyle=:box, foreground_color_border=:black, foreground_color_axis=:black, tick_direction=:in, tickfontsize=8, foreground_color_text=:black, fontfamily="Computer Modern", labelfontsize=8,xguidefontcolor = :magenta, yguidefontcolor = :magenta)
annotate!(1, 19, text("(b)", 9, :left),fontfamily="Computer Modern")
rms_error_emul(Y, p, p_emulator) = [rmsd(Y, emulator(p', p_emulator, J)) for J = 1:size(p_emulator.T_PCA, 2)]; # compute RMSE for all J <= K
xtickmarks=[1:12]
p3=plot(rms_error_emul(y_test*3600*24, p_test, p_emulator), grid=false, framestyle=:box, foreground_color_border=:black, foreground_color_axis=:black, tick_direction=:in, tickfontsize=8, foreground_color_text=:black, fontfamily="Computer Modern", marker=:circle, label="Emulation error",legendfontsize=8, xticks=(1:12, ["$i" for i in 1:12]), linewidth=5, linecolor=:red, markersize=6, markerstrokecolor=:red, markercolor=:red, title="", xlabel="Number of Principal Components", ylabel="RMSE", labelfontsize=8, xguidefontcolor = :red, yguidefontcolor=:blue)
plot!(rms_error(pca_fit, y_test)*3600*24, fontfamily="Computer Modern", marker=:circle, label="Reconstruction error", xticks=(1:12, ["$i" for i in 1:12]), linewidth=5, linecolor=:blue, markersize=6, markerstrokecolor=:blue, markercolor=:blue)
annotate!(2, 1.6, text("(c)", 9, :left), fontfamily="Computer Modern")
emu_test=emulator(p_test', p_emulator, 12)
mGPP_emu_test = mean(emu_test, dims=1)
mGPP_y_test = mean(y_test, dims=1)
p4=scatter(mGPP_y_test*3600*24, mGPP_emu_test, label = "", xlabel="ELM Output", ylabel="Emulator Output",  marker=:circle, markerstrokecolor=:black, markercolor=:magenta, marker_alpha=0.3, grid=false, framestyle=:box, foreground_color_border=:black, foreground_color_axis=:black, tick_direction=:in, tickfontsize=8, foreground_color_text=:black, fontfamily="Computer Modern", labelfontsize=8,xguidefontcolor = :magenta, yguidefontcolor = :magenta)
annotate!(2, 15, text("(d)", 9,:left),fontfamily="Computer Modern")
savefig(plot(p1, p2, p3,p4, layout=(2,2)),"fig3_emulator_evaluation.pdf")