In [6]:
using JLD2
using PyPlot
using StatsBase # Statistics
using Distributions

# using ScikitLearn # machine learning package
# @sk_import gaussian_process : GaussianProcessRegressor
# @sk_import gaussian_process.kernels : Matern

using PyCall
# @pyimport torch
@pyimport gpytorch
@pyimport botorch

# config plot settings
PyPlot.matplotlib.style.use("ggplot")
rcParams = PyPlot.PyDict(PyPlot.matplotlib."rcParams")
rcParams["font.size"] = 16;

In [2]:
@load joinpath(pwd(), "targets_and_normalized_features.jld2") X henry_y gcmc_y

3-element Vector{Symbol}:
 :X
 :henry_y
 :gcmc_y

In [3]:
botorch.models.SingleTaskGP

PyObject <class 'botorch.models.gp_regression.SingleTaskGP'>

In [None]:
# construct and fit GP model
model = SingleTaskGP(X[ids_acquired, :], y_acquired)
mll = ExactMarginalLogLikelihood(model.likelihood, model)
fit_gpytorch_model(mll)

In [None]:
# from botorch.models import FixedNoiseGP, SingleTaskGP
# from gpytorch.kernels import ScaleKernel
# from gpytorch.mlls import ExactMarginalLogLikelihood
# from botorch import fit_gpytorch_model
# from scipy.stats import norm
# from botorch.acquisition.analytic import ExpectedImprovement

In [7]:
likelihood = gpytorch.likelihoods.MultitaskGaussianLikelihood(num_tasks=2)

PyObject MultitaskGaussianLikelihood(
  (raw_task_noises_constraint): GreaterThan(1.000E-04)
  (raw_noise_constraint): GreaterThan(1.000E-04)
)

In [None]:
# model = MultitaskGPModel(train_x, train_y, likelihood)

## Multi-fidelity BO Function

In [None]:
### procedure:
# 1. select initial COF identifiers set to train GP 
# 2. initialize and normalize array of initial target data 
# 3. itterate through budgetted number of BO runs
#    a. construct GP model with kernel
#    b. fit GP to current data for acquired COFs => gives ŷ(x)
#    c. construct acquiaition function A(x)
#    d. determine which COF to acquire next => evaluate argmax(A(x))
#       i. for EI track if this is an exploitation or an exploration
#    e. append COF identified in step 3.d to list of acquired COFs 
# 4. update final set of acquired COF data and normalize
# 5. return the IDs for the set of acquired COFs
###
"""
# Arguments
- `X`: feature matrix
- `y_lf`: low-fidelity target vector
- `y_hf`: high_fidelity target vector
- `nb_iterations`: maximum number of BO iterations (experiment budget)
- `which_acquisition`: which acquisition function to implement
` `store_explore_exploit_terms`: whether or not to keep track of the explore and exploit 
                                 terms from the acqisition for the acquired material at each iteration
- `sample_gp`: whether or not to store sample GP functions
- `initialize_with`: specify which and/or how many materials to initialize the search
- `kwargs`: dictionary of optional keyword arguments
"""
function run_bayesian_optimization(X, y_lf, y_hf, nb_iterations::Int, 
                                   nb_COFs_initialization::Int;
                                   which_acquisition::Symbol=:EI,
                                   store_explore_exploit_terms::Bool=false,
                                   initialize_with::Union{Array{Int, 1}, Nothing}=nothing,
                                   kwargs::Dict{Symbol, Any}=Dict{Symbol, Any}())
    # quick checks
    @assert nb_iterations > nb_COFs_initialization "More initializations than itterations not allowed."
    @assert which_acquisition in [:UCB] "Acquisition function not supported:\t $(which_acquisition)"
    
    # create array to store explore-explot terms if needed
    if store_explore_exploit_terms
        explore_exploit_balance = []
    end
    
    ###
    #  initialize array to track fidelities:
    #    low fidelity  => 0
    #    high fidelity => 1
    ###
    fidelity_query = zeros(nb_iterations)
    
    # initialize Normal Distribution for EI
    normal = Normal()
    
    ###
    #  1. randomly select COF IDs for training initial GP
    #     or use COFs passes into function
    ###
    if isnothing(initialize_with)
        ids_acquired = StatsBase.sample(1:nb_COFs, nb_COFs_initialization, replace=false)
        @assert length(unique(ids_acquired)) == nb_COFs_initialization
    else
        # initialize using a specified set of indecies
        ids_acquired = initialize_with
        @assert length(unique(ids_acquired)) == nb_COFs_initialization
    end
     
    ###
    #  3. itterate through budgetted number of BO runs
    ###
    for i in range(nb_COFs_initialization, stop=nb_iterations)
        ###
        #  a-b. construct and fit GP model for both fidelities
        ###
        kernel = Matern(nu=2.5, length_scale=0.25) 
        model = GaussianProcessRegressor(kernel=kernel, normalize_y=true, n_restarts_optimizer=5)
        model.fit(X[ids_acquired, :], y[ids_acquired])
        
        kernel_hf = Matern(nu=2.5, length_scale=0.25) 
        model_hf = GaussianProcessRegressor(kernel=kernel, normalize_y=true, n_restarts_optimizer=5)
#         model_hf.fit(X[ids_acquired, :], y[ids_acquired])
        
        if sample_gp # Currently not working
            sample_y = model.sample_y(X)
            push!(store_sample_y, sample_y)
        end
        
        ŷ_lf, σ_lf = model_lf.predict(X, return_std=true)
        
        ###
        #  c. setup acquisition function
        ###

        
        ###
        #  d. determine which COF to acquire next
        #     and with which fidelity
        ###
        ids_sorted_by_aquisition = sortperm(acquisition_values, rev=true)
        for id_max_aquisition in ids_sorted_by_aquisition
            if ! (id_max_aquisition in ids_acquired)
                ###
                #  e. acqurie this COF (i.e. update list)
                ###
                push!(ids_acquired, id_max_aquisition)
                
                ###
                #  Determine at which Fidelity the COF will be evaluated
                ###
                fidelity_query[i] = 0 # low  fidelity
                fidelity_query[i] = 1 # high fidelity
                
                if store_explore_exploit_terms
                    # store explore and exploit terms
                    push!(explore_exploit_balance, 
                          [exploit_term[id_max_aquisition], explore_term[id_max_aquisition]])
                end
                break
            end
        end
        # quick check
        @assert length(ids_acquired) == i + 1
    end
    
    # quick check (remember to account for final COF to be acquired)
    @assert length(ids_acquired) == nb_iterations + 1 "length(ids_acquired) = $(length(ids_acquired))"
    
    ###
    #  5. return the IDs for the set of acquired COFs
    ###
    return ids_acquired, explore_exploit_balance
end