## System Description
1. We have a set of COFs from a database. Each COF is characterized by a feature vector $$x_{COF} \in X \subset R^d$$ were d=14.


2. We have **two different types** of simulations to calculate **the same material property $S_{Xe/Kr}$**. Therefore, we have a Single-Task/Objective (find the material with the optimal selevtivity), Multi-Fidelity problem. 
    1. low-fidelity  = Henry coefficient calculation - MC integration - cost=1
    2. high-fidelity = GCMC mixture simulation - 80:20 (Kr:Xe) at 298 K and 1.0 bar - cost=30


3. We will initialize the system with *two* COFs at both fidelities in order to initialize the Covariance Matrix.
    - The fist COF will be the one closest to the center of the normalized feature space
    - The second COF will be chosen at random


4. Each surrogate model will **only train on data acquired at its level of fidelity** (Heterotopic data). $$X_{lf} \neq X_{hf} \subset X$$
    1. We are using the augmented EI acquisition function from [here](https://link.springer.com/content/pdf/10.1007/s00158-005-0587-0.pdf)


5. **kernel model**: 
    1.  We need a Gaussian Process (GP) that will give a *correlated output for each fidelity* i.e. we need a vector-valued kernel
    2. Given the *cost aware* acquisition function, we anticipate the number of training points at each fidelity *will not* be equal (asymmetric scenario) $$n_{lf} > n_{hf}$$
        - perhaps we can force the symmetric case, $n_{lf} = n_{hf} = n$, if we can include `missing` or `empty` entries in the training sets.

### Strategy
1. Implement SingleTaskMultiFidelity Gp
2. Get augmented EI working


In [1]:
import torch
import gpytorch
from botorch.models import SingleTaskMultiFidelityGP
from botorch.models.transforms.outcome import Standardize
from gpytorch.mlls import ExactMarginalLogLikelihood
from botorch import fit_gpytorch_model

from scipy.stats import norm
import math 
import numpy as np
import h5py
import matplotlib.pyplot as plt
import os

#### load data

In [2]:
f = h5py.File("targets_and_normalized_features.jld2", "r")
# feature matrix
X = torch.from_numpy(np.transpose(f["X"][:]))
# simulation data
henry_y = torch.from_numpy(np.transpose(f["henry_y"][:]))
gcmc_y  = torch.from_numpy(np.transpose(f["gcmc_y"][:]))
# associated simulation costs
henry_cost = torch.from_numpy(np.transpose(f["henry_total_elapsed_time"][:]))
gcmc_cost  = torch.from_numpy(np.transpose(f["gcmc_elapsed_time"][:]))

print("raw data - \nX:", X.shape)
print("henry_y:", henry_y.shape)
print("gcmc_y: ", gcmc_y.shape)

raw data - 
X: torch.Size([608, 14])
henry_y: torch.Size([608])
gcmc_y:  torch.Size([608])


#### Construct initial imputs

In [3]:
###
# helper functions
###
# find COF closest to the center of feature space
def get_initializing_COF(X):
    # center of feature space
    feature_center = np.ones(X.shape[1]) * 0.5
    # max possible distance between normalized features
    min_dist = X.shape[1] * np.sqrt(2)
    min_id = 0 # dummy id 
    for i in range(0, nb_COFs - 1):
        dist = np.sqrt(sum((X[i] - feature_center) * (X[i] - feature_center)).item())
        if dist < min_dist:
            min_dist = dist
            min_id = i
    return min_id

# construct feature matrix of acquired points
def build_X_train(ids_acquired, fidelity_acquired):
    return torch.cat((X[ids_acquired, :], fidelity_acquired), dim=1)

# construct output vector for acquired points
def build_y_train(ids_acquired, fidelity_acquired):
    train_y = torch.tensor((), dtype=torch.float64).new_zeros((ids_acquired.shape[0], 1))
    for i, fid in enumerate(fidelity_acquired):
        if fid == 0:
            train_y[i][0] = henry_y[ids_acquired[i]]
        else:
            train_y[i][0] = gcmc_y[ids_acquired[i]]
    return train_y

# construct vector to track accumulated cost of acquired points
def build_cost(ids_acquired, fidelity_acquired):
    costs_acquired = torch.tensor((), dtype=torch.float64).new_zeros((ids_acquired.shape[0], 1))
    for i, fid in enumerate(fidelity_acquired):
        if fid == 0:
            costs_acquired[i][0] = henry_cost[ids_acquired[i]]
        else:
            costs_acquired[i][0] = gcmc_cost[ids_acquired[i]]
    return costs_acquired

###
#  construct initial inputs
###
nb_COFs = X.shape[0] # total number of COFs data points 
nb_COFs_initialization = 1

ids_acquired = np.ones(nb_COFs_initialization, dtype=int) * get_initializing_COF(X)
fidelity_acquired = torch.ones((nb_COFs_initialization, 1), dtype=int) # start with high-fidelity
costs_acquired = build_cost(ids_acquired, fidelity_acquired)

X_train = build_X_train(ids_acquired, fidelity_acquired)
y_train = build_y_train(ids_acquired, fidelity_acquired)

## Run MFBO

#### train surrogate model

In [4]:
def train_surrogate_model(X_train, y_train):
    model = SingleTaskMultiFidelityGP(
        X_train, 
        y_train, 
        outcome_transform=Standardize(m=1), # m is the output dimension
        data_fidelity=X_train.shape[1] - 1
    )   
    mll = ExactMarginalLogLikelihood(model.likelihood, model)
    fit_gpytorch_model(mll)
    return mll, model

mll, model = train_surrogate_model(X_train, y_train)

### Acquisition Function

In [5]:
# calculate posterior mean and variance
def mu_sigma(model, X, fidelity):
    f = torch.tensor((), dtype=torch.float64).new_ones((nb_COFs, 1)) * fidelity
    X_f = torch.cat((X, f), dim=1) # last col is associated fidelity
    f_posterior = model.posterior(X_f)
    return f_posterior.mean.squeeze().detach().numpy(), f_posterior.variance.squeeze().detach().numpy()

# get the current "effective best solution" 
def get_y_max(ids_acquired, fidelity_acquired, desired_fidelity):
    y_max = torch.tensor((), dtype=torch.float64).new_zeros(1)
    for i, fid in enumerate(fidelity_acquired):
        if (fid == 1 & desired_fidelity == 1):
            if gcmc_y[ids_acquired[i]] > y_max:
                y_max = gcmc_y[ids_acquired[i]]
        elif (fid == 0 & desired_fidelity == 0):
            if henry_y[ids_acquired[i]] > y_max:
                y_max = henry_y[ids_acquired[i]]
    return y_max.item()

# Expected Imrovement function
def EI_hf(model, X, ids_acquired, fidelity_acquired):
    mu_hf, sigma_hf = mu_sigma(model, X, 1) # only use hf
    y_max = get_y_max(ids_acquired, fidelity_acquired, 1)
    
    z = (mu_hf - y_max) / sigma_hf
    explore_term = sigma_hf * norm.pdf(z)
    exploit_term = (mu_hf - y_max) * norm.cdf(z)
    ei = explore_term + exploit_term
    return np.maximum(ei, np.zeros(nb_COFs))

In [7]:
def augment_1(model, X, ids_acquired, fidelity_acquired):
    # get posterior at each fidelity
    lf_mu, lf_sigma = mu_sigma(model, X, 0)
    hf_mu, hf_sigma = mu_sigma(model, X, 1)
    # get "best linear predictor" at each fidelity
    lf_y_max = get_y_max(ids_acquired, fidelity_acquired, 0)
    hf_y_max = get_y_max(ids_acquired, fidelity_acquired, 1)
    # calculate covariance
    a1 = ((lf_mu - lf_y_max) * (hf_mu - hf_y_max)) / (lf_sigma * hf_sigma)
    return a1

In [8]:
def augment_2(model, X, ids_acquired, fidelity_acquired):
    return 0.0

In [9]:
# calculate the cost ratio (α3)
def cost_ratio(fidelity_acquired, costs_acquired):
    avg_cost_hf = np.mean(costs_acquired[fidelity_acquired == 1])
    avg_cost_lf = np.mean(costs_acquired[fidelity_acquired == 0])
    return avg_cost_hf / avg_cost_lf

In [11]:
ei = EI_hf(model, X, ids_acquired, fidelity_acquired)
ei.shape

(608,)

In [12]:
def acquisiiton(model, X, ids_acquired, fidelity_acquired, costs_acquired):
    # expected improvement
    ei = EI_hf(model, X, ids_acquired, fidelity_acquired)
    
    # augmenting functions
    a1 = augment_1(model, X, ids_acquired, fidelity_acquired)
    a2 = augment_2(model, X, ids_acquired, fidelity_acquired)
    a3 = cost_ratio(fidelity_acquired, costs_acquired)
    
    acquisition_values = ei * a1 * a2 * a3
    return acquisition_values.argsort(descending=True)

ids_sorted_by_aquisition = acquisiiton(model, X, ids_acquired, fidelity_acquired, costs_acquired)

TypeError: mean() received an invalid combination of arguments - got (dtype=NoneType, out=NoneType, axis=NoneType, ), but expected one of:
 * (*, torch.dtype dtype)
 * (tuple of ints dim, bool keepdim, *, torch.dtype dtype)
 * (tuple of names dim, bool keepdim, *, torch.dtype dtype)
