## System Description
1. We have a set of COFs from a database. Each COF is characterized by a feature vector $$x_{COF} \in X \subset R^d$$ were d=14.


2. We have **two different types** of simulations to calculate **the same material property $S_{Xe/Kr}$**. Therefore, we have a Single-Task/Objective (find the material with the optimal selevtivity), Multi-Fidelity problem. 
    1. low-fidelity  = Henry coefficient calculation - MC integration - cost=1
    2. high-fidelity = GCMC mixture simulation - 80:20 (Kr:Xe) at 298 K and 1.0 bar - cost=30


3. We will initialize the system with *two* COFs at both fidelities in order to initialize the Covariance Matrix.
    - The fist COF will be the one closest to the center of the normalized feature space
    - The second COF will be chosen at random


4. Each surrogate model will **only train on data acquired at its level of fidelity** (Heterotopic data). $$X_{lf} \neq X_{hf} \subset X$$
    1. We could use the augmented EI acquisition function from [here](https://link.springer.com/content/pdf/10.1007/s00158-005-0587-0.pdf)
    2. We could use a naive implementation of the [misoKG](https://papers.nips.cc/paper/2017/file/df1f1d20ee86704251795841e6a9405a-Paper.pdf) acquisition function
    3. **OR** we could use the acquisition function from the [tutorial](https://botorch.org/tutorials/discrete_multi_fidelity_bo)


5. **kernel model**: 
    1.  We need a Gaussian Process (GP) that will give a *correlated output for each fidelity* i.e. we need a vector-valued kernel
    2. Given the *cost aware* acquisition function, we anticipate the number of training points at each fidelity *will not* be equal (asymmetric scenario) $$n_{lf} > n_{hf}$$
        - perhaps we can force the symmetric case, $n_{lf} = n_{hf} = n$, if we can include `missing` or `empty` entries in the training sets.

In [1]:
import torch
import gpytorch
from botorch.models import SingleTaskMultiFidelityGP
from botorch.models.transforms.outcome import Standardize
from gpytorch.mlls import ExactMarginalLogLikelihood
from botorch import fit_gpytorch_model
from scipy.stats import norm
import math 
import numpy as np
import matplotlib.pyplot as plt
import h5py
import os

In [2]:
###
#  Load Data
###
f = h5py.File("targets_and_normalized_features.jld2", "r")
# feature matrix
X = torch.from_numpy(np.transpose(f["X"][:]))
# simulation data
y = [torch.from_numpy(np.transpose(f["henry_y"][:])), 
     torch.from_numpy(np.transpose(f["gcmc_y"][:]))]
# associated simulation costs
cost = [torch.from_numpy(np.transpose(f["henry_total_elapsed_time"][:])), 
        torch.from_numpy(np.transpose(f["gcmc_elapsed_time"][:]))]

print("raw data - \n\tX:", X.shape)
for f in range(2):
    print("\tfidelity:", f)
    print("\t\ty:", y[f].shape)
    print("\t\tcost: ", cost[f].shape)

raw data - 
	X: torch.Size([608, 14])
	fidelity: 0
		y: torch.Size([608])
		cost:  torch.Size([608])
	fidelity: 1
		y: torch.Size([608])
		cost:  torch.Size([608])


## Helper Functions

#### Construct Initial Inputs

In [3]:
# find COF closest to the center of feature space
def get_initializing_COF(X):
    # center of feature space
    feature_center = np.ones(X.shape[1]) * 0.5
    # max possible distance between normalized features
    min_dist = np.inf
    min_id = 0 # dummy id 
    for i in range(nb_COFs):
        dist = sum((X[i] - feature_center) * (X[i] - feature_center)).item()
        if dist < min_dist:
            min_dist = dist
            min_id = i
    return min_id

In [4]:
# construct feature matrix of acquired points
def build_X_train(ids_acquired, fidelity_acquired):
    return torch.cat((X[ids_acquired, :], fidelity_acquired), dim=1)

# construct output vector for acquired points
def build_y_train(ids_acquired, fidelity_acquired):
    train_y = torch.tensor((), dtype=torch.float64).new_zeros((ids_acquired.shape[0], 1))
    for i, fid in enumerate(fidelity_acquired):
        train_y[i][0] = y[fid][ids_acquired[i]]
    return train_y

# construct vector to track accumulated cost of acquired points
def build_cost(ids_acquired, fidelity_acquired):
    costs_acquired = torch.tensor((), dtype=torch.float64).new_zeros((ids_acquired.shape[0], 1))
    for i, fid in enumerate(fidelity_acquired):
        costs_acquired[i][0] = cost[fid][ids_acquired[i]]
    return costs_acquired

#### Surrogate Model

In [5]:
def train_surrogate_model(X_train, y_train):
    model = SingleTaskMultiFidelityGP(
        X_train, 
        y_train, 
        outcome_transform=Standardize(m=1), # m is the output dimension
        data_fidelity=X_train.shape[1] - 1
    )   
    mll = ExactMarginalLogLikelihood(model.likelihood, model)
    fit_gpytorch_model(mll)
    return mll, model

#### Acquisition Function

In [6]:
# calculate posterior mean and variance
def mu_sigma(model, X, fidelity):
    f = torch.tensor((), dtype=torch.float64).new_ones((nb_COFs, 1)) * fidelity
    X_f = torch.cat((X, f), dim=1) # last col is associated fidelity
    f_posterior = model.posterior(X_f)
    return f_posterior.mean.squeeze().detach().numpy(), f_posterior.variance.squeeze().detach().numpy()

# get the current "effective best solution" 
def get_y_max(ids_acquired, fidelity_acquired, desired_fidelity):
    y_max = torch.tensor((), dtype=torch.float64).new_zeros(1)
    for i, fid in enumerate(fidelity_acquired):
        if (fid == desired_fidelity) & (y[fid][ids_acquired[i]] > y_max):
            y_max = y[fid][ids_acquired[i]]
    return y_max.item()

In [7]:
def augment_1(model, X, fidelity, ids_acquired, fidelity_acquired):
    # get posterior at each fidelity
    f_mu, f_sigma = mu_sigma(model, X, fidelity) 
    hf_mu, hf_sigma = mu_sigma(model, X, 1)
    # get "best linear predictor" at each fidelity
    f_y_max = get_y_max(ids_acquired, fidelity_acquired, fidelity)
    hf_y_max = get_y_max(ids_acquired, fidelity_acquired, 1)
    # calculate correlation
    a1 = ((f_mu - f_y_max) * (hf_mu - hf_y_max)) / (f_sigma * hf_sigma)
    return a1

def augment_2(model, X, fidelity, ids_acquired, fidelity_acquired):
    # variance of random error associated with system at given fidelity
    noise_var = model.likelihood.noise.detach().numpy() # not 100% on this
    
    f_mu, f_sigma = mu_sigma(model, X, fidelity)
    f_y_max = get_y_max(ids_acquired, fidelity_acquired, fidelity)
    
    cov = ((f_mu - f_y_max)*(f_mu - f_y_max))/(f_sigma * f_sigma)
    
    a2 = 1 #- (np.sqrt(noise_var) / np.sqrt(cov + noise_var))
    return a2

# calculate the cost ratio
def cost_ratio(fidelity, fidelity_acquired, costs_acquired):
    avg_cost_lf = torch.mean(costs_acquired[fidelity_acquired == fidelity]).item()
    avg_cost_hf = torch.mean(costs_acquired[fidelity_acquired == 1]).item()
    return avg_cost_hf/ avg_cost_lf

# Expected Imrovement function, only use hf
def EI_hf(model, X, ids_acquired, fidelity_acquired):
    hf_mu, hf_sigma = mu_sigma(model, X, 1)
    y_max = get_y_max(ids_acquired, fidelity_acquired, 1)
    
    z = (hf_mu - y_max) / hf_sigma
    explore_term = hf_sigma * norm.pdf(z)
    exploit_term = (hf_mu - y_max) * norm.cdf(z)
    ei = explore_term + exploit_term
    return np.maximum(ei, np.zeros(nb_COFs))

# 1. We're going to get a sorted list of propoed acquisition values at each fidelity 
# 2. Then, we will determine which value is highest and from which fidelity it was derived
# 3. Add that data point and associated fidelity to the training data
# 4. Retrian model and repreat process
def acquisition(model, X, fidelity, ids_acquired, fidelity_acquired, costs_acquired):
    # expected improvement
    ei = EI_hf(model, X, ids_acquired, fidelity_acquired)
    
    # augmenting functions
    a1 = augment_1(model, X, fidelity, ids_acquired, fidelity_acquired)
    a2 = augment_2(model, X, fidelity, ids_acquired, fidelity_acquired)
    a3 = cost_ratio(fidelity, fidelity_acquired, costs_acquired)
    acquisition_values = ei * a1 * a2 * a3
    
    return acquisition_values.argsort()[::-1], acquisition_values # sort in descending order

## Run MFBO

In [8]:
###
#  construct initial inputs
###
nb_COFs = X.shape[0] # total number of COFs data points 
nb_iterations = 12   # BO budget

ids_acquired = torch.ones(1, dtype=int) * get_initializing_COF(X)
fidelity_acquired = torch.ones((1, 1), dtype=int) # start with high-fidelity
costs_acquired = build_cost(ids_acquired, fidelity_acquired)

X_train = build_X_train(ids_acquired, fidelity_acquired)
y_train = build_y_train(ids_acquired, fidelity_acquired)

print("Initialization - \n")
print("\tid acquired = ", ids_acquired.item())
print("\tfidelity acquired = ", fidelity_acquired.item())
print("\tcosts acquired = ", costs_acquired.item(), " [min]")

print("\tTraining data:\n")
print("\t\t X train shape = ", X_train.shape)
print("\t\t y train shape = ", y_train.shape)
print("\t\t training feature vector = \n", X_train[0])

Initialization - 

	id acquired =  25
	fidelity acquired =  1
	costs acquired =  399.7576660990715  [min]
	Training data:

		 X train shape =  torch.Size([1, 15])
		 y train shape =  torch.Size([1, 1])
		 training feature vector = 
 tensor([0.1500, 0.4533, 0.1088, 0.5523, 0.4387, 0.1463, 0.3480, 0.2643, 0.0000,
        0.1769, 0.2237, 0.0000, 0.0000, 0.3471, 1.0000], dtype=torch.float64)


In [9]:
###
#  Evaluate acquisition function at each fidelity
###
for i in range(1, nb_iterations):
    mll, model = train_surrogate_model(X_train, y_train)
    
    ###
    #  Acquisition Function
    ###
    lf_acquisition_sorted, lf_acq = acquisition(model, X, 0, ids_acquired, fidelity_acquired, costs_acquired)
    hf_acquisition_sorted, hf_acq = acquisition(model, X, 1, ids_acquired, fidelity_acquired, costs_acquired)
    
    if 0 not in fidelity_acquired: # if we don't have any low-fidelity points force the choice
        for id_ in lf_acquisition_sorted:
            if not id_.item() in ids_acquired:
                lf_id_max_aquisition = id_.item()
                break
        ids_acquired = torch.cat((ids_acquired, torch.ones(1, dtype=int) * lf_id_max_aquisition))
        fidelity_acquired = torch.cat((fidelity_acquired, torch.zeros((1, 1), dtype=int)))
    else:
        for id_ in lf_acquisition_sorted:
            if not id_ in ids_acquired:
                lf_id_max_aquisition = id_.item()
                break
        for id_ in hf_acquisition_sorted:
            if not id_ in ids_acquired:
                hf_id_max_aquisition = id_.item()
                break
        if lf_acq[lf_id_max_aquisition] > hf_acq[hf_id_max_aquisition]:
            ids_acquired = torch.cat((ids_acquired, torch.ones(1, dtype=int) * lf_id_max_aquisition))
            fidelity_acquired = torch.cat((fidelity_acquired, torch.zeros((1, 1), dtype=int)))
        else:
            ids_acquired = torch.cat((ids_acquired, torch.ones(1, dtype=int) * hf_id_max_aquisition))
            fidelity_acquired = torch.cat((fidelity_acquired, torch.ones((1, 1), dtype=int)))
    
    # update training sets and cost
    X_train = build_X_train(ids_acquired, fidelity_acquired)
    y_train = build_y_train(ids_acquired, fidelity_acquired)
    costs_acquired = build_cost(ids_acquired, fidelity_acquired)

In [10]:
ids_acquired

tensor([ 25, 607, 409,  71,  73, 257, 414, 309,  46, 311, 595,  69])

In [11]:
fidelity_acquired

tensor([[1],
        [0],
        [1],
        [0],
        [1],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [1]])

In [12]:
costs_acquired

tensor([[399.7577],
        [ 13.7481],
        [214.4359],
        [  2.7140],
        [ 50.0721],
        [  8.9267],
        [ 13.7461],
        [ 11.9421],
        [ 12.7020],
        [ 16.2924],
        [ 29.0854],
        [144.5020]], dtype=torch.float64)