# Exploration of Cornell-MOE 

In [1]:
import sys

In [2]:
#Custom package environment
sys.path.insert(0,'/KIMEL/tigrlab/projects/jjeyachandra/gmsh-sdk/lib/')
sys.path.insert(0,'/home/jjeyachandra/simnibs_2.1.2/miniconda2/envs/simnibs_env/lib/python2.7/site-packages')
sys.path.insert(0,'/home/jjeyachandra/simnibs_2.1.2/Python_modules/src')

In [3]:
import numpy as np
import os

In [4]:
#Add cornell library
sys.path.insert(0, '/projects/jjeyachandra/Cornell-MOE/')

In [5]:
from examples import synthetic_functions

#Define an objective function using their example func
objective_func = synthetic_functions.Hartmann3()

In [12]:
from moe.optimal_learning.python.cpp_wrappers.domain import TensorProductDomain as cTensorProductDomain
from moe.optimal_learning.python.python_version.domain import TensorProductDomain
from moe.optimal_learning.python.geometry_utils import ClosedInterval

#Make an n-dimensional domain containing closed intervals
search_domain = TensorProductDomain([ClosedInterval(bound[0],bound[1]) for bound in objective_func._search_domain])
cpp_search_domain = cTensorProductDomain([ClosedInterval(bound[0],bound[1]) for bound in objective_func._search_domain])

In [15]:
#Array to fill in with initial points
init_pts = np.zeros( (objective_func._num_init_pts, objective_func._dim) )
init_pts[:, :objective_func._dim] = search_domain.generate_uniform_random_points_in_domain(
                                            objective_func._num_init_pts)
print(init_pts)

[[ 0.00127755  0.64758549  0.51351195]
 [ 0.39874241  0.28674006  0.84441703]
 [ 0.75829803  0.8475034   0.05003889]]


In [16]:
#Evaluate at init pts using noise-less sampling
observations = np.array([objective_func.evaluate_true(p).sum() for p in init_pts])
print(observations)

[ -1.62839345e+01  -1.03606579e+01  -9.12807754e-03]


In [17]:
from moe.optimal_learning.python.data_containers import HistoricalData, SamplePoint

#Store observations into a data_container and display
init_data = HistoricalData(dim = objective_func._dim, num_derivatives=0)
init_data.append_sample_points([(c,v,0) for c,v in zip(init_pts,observations)])
init_data.json_payload()

{'points_sampled': [{'point': [0.0012775507773065131,
    0.64758549275119104,
    0.51351194737167627],
   'value': [-16.283934540806573],
   'value_var': 0.0},
  {'point': [0.39874241402977839, 0.28674005964724192, 0.84441703184238515],
   'value': [-10.360657862424077],
   'value_var': 0.0},
  {'point': [0.75829802875039043, 0.84750340439225014, 0.050038886074943847],
   'value': [-0.0091280775395779015],
   'value_var': 0.0}]}

Setting up the log-likelihood minimization of $\mathcal{GP}$ model

In [20]:
from moe.optimal_learning.python.cpp_wrappers.log_likelihood_mcmc import GaussianProcessLogLikelihoodMCMC
from moe.optimal_learning.python.default_priors import DefaultPrior
#Priors for (?)
# l : D length scales
# mu : len(y) number of means
# v : number of noise values
# t : covariance amplitude
# Dimensions = D + (len(y)) + t
# Noise Terms = len(y)
prior = DefaultPrior(n_dims=objective_func._dim + 2,num_noise=1)
gp_ll = GaussianProcessLogLikelihoodMCMC(historical_data=init_data,derivatives = [],
                                        prior=prior, chain_length=1000, burnin_steps=2000,
                                        n_hypers=2**4, noisy=False)
## n_hypers is a lower-limit on the number of MCMC chains to use
gp_ll.train()

In [26]:
from moe.optimal_learning.python.python_version.optimization import GradientDescentOptimizer, GradientDescentParameters
from moe.optimal_learning.python.cpp_wrappers.optimization import GradientDescentOptimizer as cGDOpt
from moe.optimal_learning.python.cpp_wrappers.optimization import GradientDescentParameters as cGDParams
sgd_params = cGDParams(num_multistarts=1, 
                       max_num_steps=6,
                       max_num_restarts=1,
                       num_steps_averaged=3,
                       gamma=0.0,
                       pre_mult=1.0,
                       max_relative_change=0.2,
                       tolerance=1.0e-10)

In [92]:
from moe.optimal_learning.python.cpp_wrappers.expected_improvement import ExpectedImprovement
from moe.optimal_learning.python.cpp_wrappers.expected_improvement import multistart_expected_improvement_optimization as meio

def gen_sample_from_qei(gp, search_domain,sgd_params, num_samples, num_mc=1e4, lhc_iter=2e4):
    
    qEI = ExpectedImprovement(gaussian_process=gp, num_mc_iterations=num_mc)
    optimizer = cGDOpt(search_domain, qEI, sgd_params, int(lhc_iter))
    points_to_sample = []
    ei_list = []
    
    points_to_sample.append(meio(optimizer, None, num_samples,use_gpu=False,which_gpu=0,
                                max_num_threads=8))
    
    
    qEI.set_current_point(points_to_sample[0])
    ei_list.append(qEI.compute_expected_improvement())
    return points_to_sample[0], ei_list[0]

In [125]:
#Sample points using SGD maximization of q-EI
points_to_sample, ei = gen_sample_from_qei(gp_ll.models[0],cpp_search_domain,sgd_params,8,num_mc=2**10)
print(ei)

2.87497362126


In [131]:
#KEY: Evaluate the suggested points -- should parallelize and maximize number of evaluated points at a time
sampled_points = [objective_func.evaluate_true(x).sum() for x in points_to_sample]

#evidence tuples
evidence_tup = [ (c,v,0) for c,v in zip(points_to_sample,sampled_points)]

In [133]:
#Update the model
gp_ll.add_sampled_points(evidence_tup)
gp_ll.train()