In [1]:
from bayes_opt import BayesianOptimization
from bayes_opt.logger import JSONLogger
from bayes_opt.event import Events
from bayes_opt.util import load_logs
import numpy as np
import xgboost as xgb

  from pandas import MultiIndex, Int64Index


# Black Box Function Setup

In [2]:
# Load in XGBoost GBDT model
bst = xgb.Booster()
bst.load_model(fname='bfm_XGBModel_5-5_Epo-300000.json')

In [3]:
# Define all the attributes we have varied in .yaml file
attribute_names = ["Al+++","Ca++","Fe++","K+","Na+","Mg++","SiO2(aq)","Cl-"]

In [4]:
# Normalisation factors
y_norm = 96.2359084
x_norms = [0.0199996777771118,0.0199998101672167,0.019999719657439,0.019999945896381,0.0199997455006281,0.0199994907051103,0.0199990220873342,0.0199997402413945]

In [5]:
# Upper and lower bounds of the parameter space to be explored.
upper_x = 0.016
lower_x = 0

In [6]:
# The (ab)normalization functions and scaling function
def NormalizeData(data, upper):
    return data / upper
def AbNormalizeData(data, upper):
    return data * upper

In [7]:
def black_box_function(x1,x2,x3,x4,x5,x6,x7,x8):
    """
    Function with unknown internals we wish to maximize.
    """
    # Normalisation of xs:
    for _ in x_norms:
        x1_norm = NormalizeData(x1,_)
        x2_norm = NormalizeData(x2,_)
        x3_norm = NormalizeData(x3,_)
        x4_norm = NormalizeData(x4,_)
        x5_norm = NormalizeData(x5,_)
        x6_norm = NormalizeData(x6,_)
        x7_norm = NormalizeData(x7,_)
        x8_norm = NormalizeData(x8,_)
    # Matrixification of the xs:
    matrix_norm = np.ones((1, 8))
    matrix_norm[:,0] = x1_norm
    matrix_norm[:,1] = x2_norm
    matrix_norm[:,2] = x3_norm
    matrix_norm[:,3] = x4_norm
    matrix_norm[:,4] = x5_norm
    matrix_norm[:,5] = x6_norm
    matrix_norm[:,6] = x7_norm
    matrix_norm[:,7] = x8_norm
    # Prediction of a y value from xs:
    ypred_arr_norm = bst.predict(xgb.DMatrix(matrix_norm))
    # Abnormalisation of a normalised y back to a regular y value
    ypred_arr = AbNormalizeData(ypred_arr_norm, y_norm)
    ypred_arr
    return (ypred_arr[0]).astype(float)

# Parameter Space Setup

In [8]:
# Bounded region of parameter space
pbounds = {'x1': (lower_x, upper_x),'x2': (lower_x, upper_x),'x3': (lower_x, upper_x),'x4': (lower_x, upper_x),'x5': (lower_x, upper_x),'x6': (lower_x, upper_x),'x7': (lower_x, upper_x),'x8': (lower_x, upper_x)}

# Initial Bayesian Optimisation Round - 100 init runs

In [10]:
# Setting up the optimiser for use:
optimizer = BayesianOptimization(
    f=black_box_function,
    pbounds=pbounds,
    verbose=2, # verbose = 1 prints only when a maximum is observed, verbose = 0 is silent
    random_state=1,
)

In [11]:
# Setting up a logger to place rounds of optimisation into a histroical backup
logger = JSONLogger(path="./BayesOptLogs_one.json")
optimizer.subscribe(Events.OPTIMIZATION_STEP, logger)

In [12]:
# Initial round of random guesses across the parameter space
optimizer.maximize(
    init_points=100,
    n_iter=0,
)

In [13]:
# Printing the highest target as a function of its predictors
print(optimizer.max)

{'target': 2.482495069503784, 'params': {'x1': 0.0003443968843871677, 'x2': 0.015167523379482897, 'x3': 0.01323384753873172, 'x4': 0.00024030369187395806, 'x5': 0.002819140089200884, 'x6': 0.005313017189869392, 'x7': 0.0020959495169746703, 'x8': 0.012951851074025314}}


In [14]:
# printing the maximum target value obtained during the initial set of runs
max_val = optimizer.max["target"]
print(f"Total Carbonates in Fracture by Optimisation Initiation Fluid: {str(round(max_val, 3))} %")

Total Carbonates in Fracture by Optimisation Initiation Fluid: 2.482 %


# Further Optimisation 

In [18]:
# Setting up the secondary optimiser for use
subsequent_optimizer = BayesianOptimization(
    f=black_box_function,
    pbounds=pbounds,
    verbose=2,
    random_state=7,
)
print(len(subsequent_optimizer.space))

0


In [19]:
# Loading the initial runs
load_logs(subsequent_optimizer, logs=["./BayesOptLogs_one.json"])
# Checking the optimiser has seen the previous runs
print("Subsequent optimizer is now aware of {} points.".format(len(subsequent_optimizer.space)))
# Printing the highest target as a function of its predictors
print(subsequent_optimizer.max)

Subsequent optimizer is now aware of 100 points.
{'target': 2.482495069503784, 'params': {'x1': 0.0003443968843871677, 'x2': 0.015167523379482897, 'x3': 0.01323384753873172, 'x4': 0.00024030369187395806, 'x5': 0.002819140089200884, 'x6': 0.005313017189869392, 'x7': 0.0020959495169746703, 'x8': 0.012951851074025314}}


In [20]:
# Checking the current maxmimum found based on the initial runs
max_val = subsequent_optimizer.max["target"]
print(f"Total Carbonates in Fracture by Optimisation Initiation Fluid: {str(round(max_val, 3))} %")

Total Carbonates in Fracture by Optimisation Initiation Fluid: 2.482 %


In [21]:
# starting a logger to log the iterative runs
logger = JSONLogger(path="./BayesOptLogs_two.json")
subsequent_optimizer.subscribe(Events.OPTIMIZATION_STEP, logger)

In [22]:
# starting the second round of optimisation
subsequent_optimizer.maximize(
    init_points=0,
    n_iter=500,
)

In [23]:
# Printing the highest target as a function of its predictors
print(subsequent_optimizer.max)

{'target': 3.411043882369995, 'params': {'x1': 0.0, 'x2': 0.0, 'x3': 0.016, 'x4': 0.0, 'x5': 0.0, 'x6': 0.0, 'x7': 0.0, 'x8': 0.008032778113953}}


In [24]:
max_val = subsequent_optimizer.max["target"]
print(f"Total Carbonates in Fracture by Optimisation Continuation Fluid: {str(round(max_val, 3))} %")

Total Carbonates in Fracture by Optimisation Continuation Fluid: 3.411 %
