In [1]:
import os
import sys

current_dir = os.path.abspath(os.path.dirname("Tests_Bayesian_Optimization.ipynb"))
parent_dir = os.path.abspath(os.path.join(current_dir, os.pardir))
sys.path.append(parent_dir)

from ppe.bayesian_optimization import Bayesian_Optimization
import numpy as np
import pymc as pm

## Example 1: Simple Gaussian model

We assume $ Y \sim \mathcal{N}(\theta, \sigma)$, with $\theta \sim \mathcal{N}(\mu_1, \sigma_1)$. Then, we have the hyperparameter vector $\pmb{\lambda} = [\mu_1, \sigma, \sigma_1]$. Also, for $ A = (a,b] $, we know that 

$$\mathbb{P}_{A|\pmb{\lambda}} = \Phi \Big((b - \mu_1)/\sqrt{\sigma^2 + \sigma_1^2} \Big) - \Phi \Big((a - \mu_1)/\sqrt{\sigma^2 + \sigma_1^2} \Big)$$

In [2]:
## Defining the pymc model

def gaussian_model(lam, n_samples):

    gaussian_model = pm.Model()

    with gaussian_model:

        theta = pm.Normal("theta", mu=lam["mu_1"], sigma=lam["sigma_1"])

        Y_obs = pm.Normal("Y_obs", mu=theta, sigma=lam["sigma"], observed=1.)

    with gaussian_model:

        idata = pm.sample_prior_predictive(samples = n_samples)

    return idata


## The partitions and expert probabilities for the target Y

partitions = [np.array([[-1000, -5], [-5, -1], [-1, 3], [3, 1000]])]

expert_probs = [np.array([0.1, 0.2, 0.6, 0.1])]



BO = Bayesian_Optimization(pymc_sampling_func=gaussian_model,
                              J = 1,
                              alpha=None,
                              target_type="continuous",
                              target_samples=1000)




param_names = ["mu_1", "sigma_1", "sigma", "alpha"] ## hyperparameter names. Alpha is optimized as well
param_types = ["range", "range", "range", "range"]
param_bounds = [[-10., 10.], [0.00001, 4.], [0.00001, 4.], [0.1, 20.]] ## bounds for each hyperparameter
param_expected_vals = [-1, 3, 1, None] ## expected values for the hyperparameters
param_weights = [1, 0.5, 0.5] ## level of confidence for each expected value



best_params = BO.optimize_hyperparams(param_names=param_names,
                                      param_types = param_types,
                                      param_bounds=param_bounds,
                                      param_expected_vals = param_expected_vals,
                                      param_weights = param_weights,
                                      partitions=partitions,
                                      expert_probs=expert_probs,
                                      n_trials=75)


[INFO 07-15 18:03:39] ax.service.utils.instantiation: Inferred value type of ParameterType.FLOAT for parameter mu_1. If that is not the expected value type, you can explicitly specify 'value_type' ('int', 'float', 'bool' or 'str') in parameter dict.
[INFO 07-15 18:03:39] ax.service.utils.instantiation: Inferred value type of ParameterType.FLOAT for parameter sigma_1. If that is not the expected value type, you can explicitly specify 'value_type' ('int', 'float', 'bool' or 'str') in parameter dict.
[INFO 07-15 18:03:39] ax.service.utils.instantiation: Inferred value type of ParameterType.FLOAT for parameter sigma. If that is not the expected value type, you can explicitly specify 'value_type' ('int', 'float', 'bool' or 'str') in parameter dict.
[INFO 07-15 18:03:39] ax.service.utils.instantiation: Inferred value type of ParameterType.FLOAT for parameter alpha. If that is not the expected value type, you can explicitly specify 'value_type' ('int', 'float', 'bool' or 'str') in parameter d

In [3]:
best_params

{'mu_1': -0.522516247185786,
 'sigma_1': 0.48562313258287004,
 'sigma': 3.1090032269874164,
 'alpha': 15.752870270298981}

In [4]:
BO.eval_function(best_params, partitions, expert_probs) ##alpha

Sampling: [Y_obs, theta]
INFO:pymc.sampling.forward:Sampling: [Y_obs, theta]


Array(17.100998, dtype=float32)

In [5]:
BO.get_model_probs(best_params, partitions)

Sampling: [Y_obs, theta]
INFO:pymc.sampling.forward:Sampling: [Y_obs, theta]


[array([0.064, 0.382, 0.447, 0.107])]

## Second example: binary classification in the presence of covariates (taken from the paper)

We consider a generative model for binary data in the presence of a vector of covariates. The observable variable conditioned on the parameters is distributed according to a Bernoulli model and we take a multivariate Gaussian distribution as the prior distribution for the vector of parameters in the predictor function. This can be formalized as:

$$ y|\pmb{\theta} \sim \pmb{\mathcal{B}(\Phi (\pmb{x}^{\text{T}}\pmb{\theta}))}, $$

where

$$\pmb{\theta} \sim \mathcal{N}_D(\mu, \Sigma).$$

This gives us

$$ y \sim \mathcal{B}(p(\pmb{x}, \pmb{\lambda})),$$

where 

$$ p(\pmb{x}, \pmb{\lambda}) = \Phi \Big(\frac{\pmb{x}^{\text{T}}\pmb{\mu}}{\sqrt{1 + \pmb{x}^{\text{T}}\Sigma\pmb{x}}}\Big).$$

Our hyperparameter set is defined as $\pmb{\lambda} = [\mu, \Sigma]$. Since we have binary classification, there are only two partitions, ${0,1}$. If we define $A_1 = \mathbb{P}(y=0)$ and $A_2 = \mathbb{P}(y=1)$ we get:

$$\mathbb{P}_{A_1 | \pmb{\lambda}} = 1 -  p(\pmb{x}^{\text{T}}\pmb{\lambda}) \text{ and } \mathbb{P}_{A_2 | \pmb{\lambda}} = p(\pmb{x}^{\text{T}}\pmb{\lambda}).$$

For this example we assume $D=5$ and that the variables are independent with one another, meaning that $\Sigma$ is a diagonal matrix.

In [6]:
def classification_model(lam, n_samples):

    RANDOM_SEED = 8927

    covs = np.array([[1.3, 0.7, 0.5, -0.7, -0.5],
                     [1, 0.5, 0.4, -0.8, 0],
                     [0.3, 0.7, 2, -2, 0.2]])

    classification_model = pm.Model()

    with classification_model:

        theta_1 = pm.Normal("theta_1", mu=lam["mu_1"], sigma=lam["sigma_1"])
        theta_2 = pm.Normal("theta_2", mu=lam["mu_2"], sigma=lam["sigma_2"])
        theta_3 = pm.Normal("theta_3", mu=lam["mu_3"], sigma=lam["sigma_3"])
        theta_4 = pm.Normal("theta_4", mu=lam["mu_4"], sigma=lam["sigma_4"])
        theta_5 = pm.Normal("theta_5", mu=lam["mu_5"], sigma=lam["sigma_5"])

        theta = pm.math.stack([theta_1, theta_2, theta_3, theta_4, theta_5])

        p = pm.math.dot(covs, theta)

        p = 0.5 + 0.5 * pm.math.erf(p / pm.math.sqrt(2))

        Y_obs = pm.Bernoulli("Y_obs", p = p, observed=np.ones(covs.shape[0]))

    with classification_model:
        idata = pm.sample_prior_predictive(random_seed=RANDOM_SEED, samples = n_samples)

    return idata



BO = Bayesian_Optimization(pymc_sampling_func=classification_model,
                              J = 3,
                              alpha=None,
                              target_type="discrete",
                              target_samples=1000)


param_names = ["mu_1", "sigma_1",
               "mu_2", "sigma_2",
               "mu_3", "sigma_3",
               "mu_4", "sigma_4",
               "mu_5", "sigma_5",
               "alpha"]

param_types = ["range"]*len(param_names)


partitions = np.array([0, 1]) ## binary partition

expert_probs = [np.array([0.65, 0.35]),
                np.array([0.3, 0.7]),
                np.array([0.2, 0.8])]



param_bounds = [[-6., 6.], [0.00001, 2.],
                [-6., 6.], [0.00001, 2.],
                [-6., 6.], [0.00001, 2.],
                [-6., 6.], [0.00001, 2.],
                [-6., 6.], [0.00001, 2.],
                [0.1, 200.]]

param_expected_vals = [1, 1,
                       -0.2, 1,
                       -1, 1,
                       1, 1,
                       -0.5, 1,
                       None]



param_weights = [1, 0.5,
                 1, 0.5,
                 1, 0.5,
                 1, 0.5,
                 1, 0.5]


best_params = BO.optimize_hyperparams(param_names=param_names,
                                      param_types=param_types,
                                      param_bounds=param_bounds,
                                      param_expected_vals = param_expected_vals,
                                      param_weights = param_weights,
                                      partitions=partitions,
                                      expert_probs=expert_probs,
                                      n_trials=100)



[INFO 07-15 18:04:25] ax.service.utils.instantiation: Inferred value type of ParameterType.FLOAT for parameter mu_1. If that is not the expected value type, you can explicitly specify 'value_type' ('int', 'float', 'bool' or 'str') in parameter dict.
[INFO 07-15 18:04:25] ax.service.utils.instantiation: Inferred value type of ParameterType.FLOAT for parameter sigma_1. If that is not the expected value type, you can explicitly specify 'value_type' ('int', 'float', 'bool' or 'str') in parameter dict.
[INFO 07-15 18:04:25] ax.service.utils.instantiation: Inferred value type of ParameterType.FLOAT for parameter mu_2. If that is not the expected value type, you can explicitly specify 'value_type' ('int', 'float', 'bool' or 'str') in parameter dict.
[INFO 07-15 18:04:25] ax.service.utils.instantiation: Inferred value type of ParameterType.FLOAT for parameter sigma_2. If that is not the expected value type, you can explicitly specify 'value_type' ('int', 'float', 'bool' or 'str') in parameter 

In [7]:
best_params

{'mu_1': -0.3847012931323004,
 'sigma_1': 1.1132331287115156,
 'mu_2': -0.20954295563542402,
 'sigma_2': 0.7591995439168872,
 'mu_3': 0.37755903054663786,
 'sigma_3': 1.325778222069404,
 'mu_4': -1.015487063836261,
 'sigma_4': 0.43747042398082286,
 'mu_5': 1.7483695465164661,
 'sigma_5': 1.285837784746775,
 'alpha': 16.932610813055206}

In [8]:
BO.eval_function(best_params, partitions, expert_probs)

Sampling: [Y_obs, theta_1, theta_2, theta_3, theta_4, theta_5]
INFO:pymc.sampling.forward:Sampling: [Y_obs, theta_1, theta_2, theta_3, theta_4, theta_5]


Array(75.95865, dtype=float32)

In [9]:
BO.get_model_probs(best_params, partitions)

Sampling: [Y_obs, theta_1, theta_2, theta_3, theta_4, theta_5]
INFO:pymc.sampling.forward:Sampling: [Y_obs, theta_1, theta_2, theta_3, theta_4, theta_5]


[array([0.629, 0.371]), array([0.385, 0.615]), array([0.173, 0.827])]

In [10]:
partitions = np.array([0, 1])  ## We have a classification task


## input for the expert probabilities (J=6 covariate sets in total)

expert_probs = [np.array([0.8854, 0.1146]),
                np.array([0.7857, 0.2143]),
                np.array([0.2237, 0.7763]),
                np.array([0.9023, 0.0977]),
                np.array([0.8167, 0.1833]),
                np.array([0.7857, 0.2143])]


## the logistic regression model

def trauma_center_model(lam, n_samples):


    ## values for the covariates. The first row (column when transposed) is for the intercept

    covs = np.array([[1,1,1,1,1,1],
                     [25,25,41,41,33,33],
                     [7.84, 3.34, 3.34, 7.84, 5.74, 5.74],
                     [60, 10, 60, 10, 35, 35],
                     [0,0,1,1,0,1],
                     [0,0,60,10,0,35]]).T

    trauma_center_model = pm.Model()

    with trauma_center_model:

        theta_1 = pm.Normal("theta_1", mu=lam["mu_1"], sigma=lam["sigma_1"])
        theta_2 = pm.Normal("theta_2", mu=lam["mu_2"], sigma=lam["sigma_2"])
        theta_3 = pm.Normal("theta_3", mu=lam["mu_3"], sigma=lam["sigma_3"])
        theta_4 = pm.Normal("theta_4", mu=lam["mu_4"], sigma=lam["sigma_4"])
        theta_5 = pm.Normal("theta_5", mu=lam["mu_5"], sigma=lam["sigma_5"])
        theta_6 = pm.Normal("theta_6", mu=lam["mu_6"], sigma=lam["sigma_6"])

        theta = pm.math.stack([theta_1, theta_2, theta_3, theta_4, theta_5, theta_6])

        xTtheta = pm.math.dot(covs, theta)

        p = pm.math.exp(xTtheta) / (1 + pm.math.exp(xTtheta))

        Y_obs = pm.Bernoulli("Y_obs", p = p, observed=np.ones(covs.shape[0]))

    with trauma_center_model:
        idata = pm.sample_prior_predictive(samples=n_samples)

    return idata



BO = Bayesian_Optimization(pymc_sampling_func=trauma_center_model,
                              J = 6,
                              alpha=None,
                              target_type="discrete",
                              target_samples=1500)





param_names = ["mu_1", "sigma_1",
               "mu_2", "sigma_2",
               "mu_3", "sigma_3",
               "mu_4", "sigma_4",
               "mu_5", "sigma_5",
               "mu_6", "sigma_6",
               "alpha"]


param_types = ["range", "range",
               "range", "range",
               "range", "range",
               "range", "range",
               "range", "range",
               "range", "range",
               "range"]

'''   
param_types = ["range", "choice",
               "range", "choice",
               "range", "choice",
               "range", "choice",
               "range", "choice",
               "range", "choice",
               "range"]'''


## the bounds for each hyperparameter

param_bounds = [[-4., 4.], [0.00001, 3.],
                [-4., 4.], [0.00001, 3.],
                [-4., 4.], [0.00001, 3.],
                [-4., 4.], [0.00001, 3.],
                [-4., 4.], [0.00001, 3.],
                [-4., 4.], [0.00001, 3.],
                [0.1, 100.]]

'''
param_bounds = [[-4., 4.], [float(10**i) for i in range(-5,1)],
                [-4., 4.], [float(10**i) for i in range(-5,1)],
                [-4., 4.], [float(10**i) for i in range(-5,1)],
                [-4., 4.], [float(10**i) for i in range(-5,1)],
                [-4., 4.], [float(10**i) for i in range(-5,1)],
                [-4., 4.], [float(10**i) for i in range(-5,1)],
                [0.1, 100.]]
'''

## expected values (taken from the thesis, these correspond to the posterior means and standard deviations from the original study)

param_expected_vals = [-1.8, 1.1,
                       0.07, 0.02,
                       -0.6, 0.14,
                       0.05, 0.01,
                       -1.1, 1.06,
                       -0.02, 0.03,
                       None]



'''
param_expected_vals = [-1.8, None,
                       0.07, None,
                       -0.6, None,
                       0.05, None,
                       -1.1, None,
                       -0.02, None,
                       None]
'''
## Full confidence for each expected hyperparameter value (all weights=1)

param_weights = np.ones(len(param_bounds))



best_params = BO.optimize_hyperparams(param_names=param_names,
                                      param_types=param_types,
                                      param_bounds=param_bounds,
                                      param_expected_vals = param_expected_vals,
                                      param_weights = param_weights,
                                      partitions=partitions,
                                      expert_probs=expert_probs,
                                      n_trials=100)



[INFO 07-15 18:05:49] ax.service.utils.instantiation: Inferred value type of ParameterType.FLOAT for parameter mu_1. If that is not the expected value type, you can explicitly specify 'value_type' ('int', 'float', 'bool' or 'str') in parameter dict.
[INFO 07-15 18:05:49] ax.service.utils.instantiation: Inferred value type of ParameterType.FLOAT for parameter sigma_1. If that is not the expected value type, you can explicitly specify 'value_type' ('int', 'float', 'bool' or 'str') in parameter dict.
[INFO 07-15 18:05:49] ax.service.utils.instantiation: Inferred value type of ParameterType.FLOAT for parameter mu_2. If that is not the expected value type, you can explicitly specify 'value_type' ('int', 'float', 'bool' or 'str') in parameter dict.
[INFO 07-15 18:05:49] ax.service.utils.instantiation: Inferred value type of ParameterType.FLOAT for parameter sigma_2. If that is not the expected value type, you can explicitly specify 'value_type' ('int', 'float', 'bool' or 'str') in parameter 

In [11]:
best_params

{'mu_1': -0.3744375752154969,
 'sigma_1': 0.17430732478906982,
 'mu_2': -1.9871814590513117,
 'sigma_2': 2.597273680177337,
 'mu_3': 1.289513699642841,
 'sigma_3': 0.029998576410422972,
 'mu_4': -0.6503347252700871,
 'sigma_4': 2.338757210311327,
 'mu_5': -2.203323706696862,
 'sigma_5': 1.3945311381405014,
 'mu_6': 1.1421140663650808,
 'sigma_6': 0.5220988249271138,
 'alpha': 5.775264249422241}

In [12]:
BO.get_model_probs(best_params, partitions)

Sampling: [Y_obs, theta_1, theta_2, theta_3, theta_4, theta_5, theta_6]
INFO:pymc.sampling.forward:Sampling: [Y_obs, theta_1, theta_2, theta_3, theta_4, theta_5, theta_6]


[array([0.70666667, 0.29333333]),
 array([0.77333333, 0.22666667]),
 array([0.61866667, 0.38133333]),
 array([0.724, 0.276]),
 array([0.75466667, 0.24533333]),
 array([0.64533333, 0.35466667])]

In [13]:
expert_probs

[array([0.8854, 0.1146]),
 array([0.7857, 0.2143]),
 array([0.2237, 0.7763]),
 array([0.9023, 0.0977]),
 array([0.8167, 0.1833]),
 array([0.7857, 0.2143])]

In [14]:
BO.eval_function(best_params, partitions, expert_probs)

Sampling: [Y_obs, theta_1, theta_2, theta_3, theta_4, theta_5, theta_6]
INFO:pymc.sampling.forward:Sampling: [Y_obs, theta_1, theta_2, theta_3, theta_4, theta_5, theta_6]


Array(4.3246517, dtype=float32)