# Setting up a 3D Bayesian optimization problem with gaussian process regression

Jackson S. Bentley, Sumner B. Harris
09/01/25

In [1]:
# Step 1: Clean uninstall of conflicting packages
!pip uninstall -y jax jaxlib ml-dtypes gpax

# Step 2: Install compatible versions
!pip install jax==0.6.2 jaxlib==0.6.2 ml_dtypes==0.5.1

# Step 3: Install gpax version that supports jax>=0.6.2
!pip install gpax==0.1.9

# Step 4: Force CPU use and test
import jax
import gpax

jax.config.update("jax_platform_name", "cpu")  # CPU only
gpax.utils.enable_x64()

print("✅ GPAX is ready with JAX version:", jax.__version__)


Found existing installation: jax 0.7.2
Uninstalling jax-0.7.2:
  Successfully uninstalled jax-0.7.2
Found existing installation: jaxlib 0.7.2
Uninstalling jaxlib-0.7.2:
  Successfully uninstalled jaxlib-0.7.2
Found existing installation: ml_dtypes 0.5.3
Uninstalling ml_dtypes-0.5.3:
  Successfully uninstalled ml_dtypes-0.5.3
[0mCollecting jax==0.6.2
  Downloading jax-0.6.2-py3-none-any.whl.metadata (13 kB)
Collecting jaxlib==0.6.2
  Downloading jaxlib-0.6.2-cp312-cp312-manylinux2014_x86_64.whl.metadata (1.3 kB)
Collecting ml_dtypes==0.5.1
  Downloading ml_dtypes-0.5.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (21 kB)
Downloading jax-0.6.2-py3-none-any.whl (2.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.7/2.7 MB[0m [31m57.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading jaxlib-0.6.2-cp312-cp312-manylinux2014_x86_64.whl (89.9 MB)
[2K   [91m━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.1/89.9 MB[0m [31m184

KeyboardInterrupt: 

In [None]:
# Define a degree symbol for later plotting
deg_sgn = '\N{DEGREE SIGN}'


# Define some functions for basic utilities

I define some normalization functions and a function to package up data to save/load and plot results.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import rcParams, font_manager
import urllib.request

font_url = "https://github.com/google/fonts/raw/main/ofl/carlito/Carlito-Regular.ttf"
font_path = "/usr/share/fonts/truetype/Carlito-Regular.ttf"
urllib.request.urlretrieve(font_url, font_path)
font_manager.fontManager.addfont(font_path)

# List all fonts Matplotlib can see (optional, to verify Carlito is detected)
for f in font_manager.findSystemFonts(fontpaths=None, fontext='ttf'):
    if "Carlito" in f:
        print(f)
    if "Carlito" not in f:
        print('not there')

# Set global font to Carlito
plt.rcParams['font.family'] = 'Carlito'
plt.rcParams['mathtext.fontset'] = 'custom'  # ensures math uses the same font family
plt.rcParams['mathtext.rm'] = 'Carlito'
plt.rcParams['mathtext.it'] = 'Carlito:italic'
plt.rcParams['mathtext.bf'] = 'Carlito:bold'

alpha = 6.0606*2   # minmax: 6.0606; zscore: 23.6427 -- after B35, c=3.945
beta = 0.0876    # minmax: 0.0876; zscore: 0.3817  -- after B35
gamma = 4.3879   # minmax: 4.3879; zscore: 16.3955 -- after B35
delta = 0.0338   # minmax: 0.0338; zscore: 0.17   -- after B35

# Normalize function (works for any number of dims)
def normalize(data, min_val, max_val):
    return (data - min_val) / (max_val - min_val)

# Inverse normalization function
def inverse_normalize(norm_data, min_val, max_val):
    return norm_data * (max_val - min_val) + min_val

def save_data(
    file_name,
    X_train, y_train,
    X_test,
    x1, x2, x3,           # now saving three parameter grids
    y_pred, y_sampled,
    acq, next_point,
    running_best,
    iteration
):
    np.savez(
        file_name,
        X_train=X_train,
        y_train=y_train,
        X_test=X_test,
        x1=x1,
        x2=x2,
        x3=x3,
        y_pred=y_pred,
        y_sampled=y_sampled,
        acq=acq,
        next_point=next_point,
        running_best=running_best,
        iteration=iteration
    )

def load_data(file_name):
    ds = np.load(file_name, allow_pickle=True)
    return (
        ds['X_train'],
        ds['y_train'],
        ds['X_test'],
        ds['x1'],
        ds['x2'],
        ds['x3'],
        ds['y_pred'],
        ds['y_sampled'],
        ds['acq'],
        ds['next_point'],
        ds['running_best'],
        ds['iteration']
    )

def update_datapoints(X_new, y_new, X_train, y_train):
    """
    X_new: (N_samples, N_dims) normalized
    y_new: (N_samples,)
    """
    X_train = jnp.append(X_train, X_new, axis=0)
    y_train = jnp.append(y_train, y_new, axis=0)
    return X_train, y_train

import numpy as np
import matplotlib.pyplot as plt

def plot_3d_projections(
    field_flat,      # 1D array of length nP * nT * nF
    P_array,         # 1D array of length nP (physical values)
    T_array,         # 1D array of length nT (physical values)
    F_array,         # 1D array of length nF (physical values)
    title: str,
    X_train: np.ndarray = None,  # shape (n_samples,3) in normalized space
    y_train: np.ndarray = None,  # shape (n_samples,)
    deg_sgn: str = '°',
    vmin=None,
    vmax=None
):
    """
    Plot 3 projections (P vs T, P vs F, T vs F) of a 3D field,
    and scatter the measured X_train points colored by y_train.
    """
    # Convert to numpy
    field = np.asarray(field_flat)
    nP, nT, nF = len(P_array), len(T_array), len(F_array)
    if field.size != nP * nT * nF:
        raise ValueError(f"field length {field.size} != {nP}*{nT}*{nF}")

    # Reshape into 3D
    field3d = field.reshape(nP, nT, nF)

    # Compute 2D projections
    proj_PT = field3d.mean(axis=2)  # shape (nP, nT)
    proj_PF = field3d.mean(axis=1)  # shape (nP, nF)
    proj_TF = field3d.mean(axis=0)  # shape (nT, nF)

    # Set up subplots
    fig, axs = plt.subplots(1, 3, figsize=(18, 6), dpi=250, constrained_layout=True)
    # Set consistent tick label sizes for all subplots
    for ax in axs:
        ax.tick_params(axis='both', labelsize=12)

    font_title = 22
    font_label = 18
    font_tick = 14


    # T vs P
    im0 = axs[0].pcolormesh(P_array, T_array, proj_PT.T, shading='auto', cmap='nipy_spectral', vmin=vmin, vmax=vmax)
    axs[0].set_xlabel('log$_{10}$ O$_2$ Partial Pressure (log$_{10}$ Torr)', fontsize=font_label)
    axs[0].set_ylabel(f'Deposition Temperature ({deg_sgn}C)', fontsize=font_label)
    axs[0].set_title(f'{title} → T vs P', fontsize=font_title)
    fig.colorbar(im0, ax=axs[0], orientation='vertical')

    # F vs P
    im1 = axs[1].pcolormesh(P_array, F_array, proj_PF.T, shading='auto', cmap='nipy_spectral', vmin=vmin, vmax=vmax)
    axs[1].set_xlabel('log$_{10}$ O$_2$ Partial Pressure (log$_{10}$ Torr)', fontsize=font_label)
    axs[1].set_ylabel('Fluence (J/cm$^2$)', fontsize=font_label)
    axs[1].set_title(f'{title} → F vs P', fontsize=font_title)
    fig.colorbar(im1, ax=axs[1], orientation='vertical')

    # F vs T
    im2 = axs[2].pcolormesh(T_array, F_array, proj_TF.T, shading='auto', cmap='nipy_spectral', vmin=vmin, vmax=vmax)
    axs[2].set_xlabel(f'Deposition Temperature ({deg_sgn}C)', fontsize=font_label)
    axs[2].set_ylabel('Fluence (J/cm$^2$)', fontsize=font_label)
    axs[2].set_title(f'{title} → F vs T', fontsize=font_title)
    fig.colorbar(im2, ax=axs[2], orientation='vertical')

    # Scatter measured points colored by y_train
    if X_train is not None and y_train is not None:
        # Denormalize train points
        # Use different values than original script to inverse_normalize data, use values for normalization (below, pressure_min, pressure_max...)
        #P_train = inverse_normalize(X_train[:,0], P_array.min(), P_array.max())
        #T_train = inverse_normalize(X_train[:,1], T_array.min(), T_array.max())
        #F_train = inverse_normalize(X_train[:,2], F_array.min(), F_array.max())
        P_train = inverse_normalize(X_train[:,0], pressure_min, pressure_max)
        T_train = inverse_normalize(X_train[:,1], T_min, T_max)
        F_train = inverse_normalize(X_train[:,2], fluence_min, fluence_max)
        y_vals  = y_train.flatten()

        # common scatter kwargs
        scatter_kwargs = dict(c=y_vals, cmap='Reds', edgecolor='k', s=200, alpha=0.8)

        # P vs T
        sc0 = axs[0].scatter(P_train, T_train, **scatter_kwargs, vmin=0.3, vmax=3.1)
        #fig.colorbar(sc0, ax=axs[0], orientation='vertical', label='y_train')

        # P vs F
        sc1 = axs[1].scatter(P_train, F_train, **scatter_kwargs, vmin=0.3, vmax=3.1)
        #fig.colorbar(sc1, ax=axs[1], orientation='vertical', label='y_train')

        # T vs F
        sc2 = axs[2].scatter(T_train, F_train, **scatter_kwargs, vmin=0.3, vmax=3.1)
        #fig.colorbar(sc2, ax=axs[2], orientation='vertical', label='y_train')

        # === Add shared colorbar next to the top-right of axs[2] ===
        from mpl_toolkits.axes_grid1.inset_locator import inset_axes

        # Create a small inset colorbar next to the title of the last subplot
        cbar_ax = inset_axes(
            axs[2],              # attach to third subplot (rightmost)
            width="60%",          # width of colorbar
            height="5%",        # height of colorbar
            loc='upper left',    # relative to the axis
            bbox_to_anchor=(0.55, 0.3, 1, 1),  # (x, y, width, height)
            bbox_transform=axs[2].transAxes,
            borderpad=0,
        )

        cbar = fig.colorbar(sc0, cax=cbar_ax, orientation='horizontal')
        cbar.set_label('Sample\nScores', fontsize=font_label)
        cbar.ax.tick_params(labelsize=font_tick)




    #fig.tight_layout()
    fig.suptitle(
    f"{title}: Min-Max Scaling\n"
    r"$y_{\mathrm{train}} = \alpha \cdot |c - 3.945| + \beta \cdot R_{RMS} + \gamma \cdot FWHM + \delta \cdot LaVO_{4}$" + "\n" +
    f"[α = {alpha}, β = {beta}, γ = {gamma}, δ = {delta}]",
    fontsize=16,
    fontweight='bold',
    y=1.17
    )
    plt.show()


# Set up the grid size

In [None]:
import numpy as np

# --- Pressure (same as before) ---
start, stop = 3e-8, 3e-4
num_points_per_decade = 10
total_decades = int(np.log10(stop / start))
total_points = total_decades * num_points_per_decade + 1

# log-space array (in decades) then take log to work in log-pressure
Pressure = np.log10(np.geomspace(start, stop, num=total_points))
pressure_min, pressure_max = Pressure.min(), Pressure.max()

# --- Temperature (same as before) ---
T_min, T_max, T_stepsize = 500, 835, 20
Temperature = np.arange(T_min, T_max, T_stepsize, dtype=np.float32)

# --- Fluence  ---
fluence_min, fluence_max, fluence_stepsize = 0.8, 2.2, 0.1
Fluence = np.arange(fluence_min, fluence_max, fluence_stepsize, dtype=np.float32)

# --- Make normalized grids ---
P_norm = normalize(Pressure, pressure_min, pressure_max)
T_norm = normalize(Temperature, T_min, T_max)
F_norm = normalize(Fluence, fluence_min, fluence_max)

# 3D meshgrid
Pg, Tg, Fg = np.meshgrid(P_norm, T_norm, F_norm, indexing='ij')

# flatten each and stack into N×3 array
P_flat = Pg.reshape(-1, 1)
T_flat = Tg.reshape(-1, 1)
F_flat = Fg.reshape(-1, 1)

points_3d = np.hstack((P_flat, T_flat, F_flat))

print('3D parameter space has size:', points_3d.shape)
print('Pressure points:', Pressure.shape)
print('Temperature points:', Temperature.shape)
print('Fluence points:', Fluence.shape)
print('Back-transformed Pressure:', 10**(Pressure))


# Initialize the noise and kernel lengthscale prior distributions and visualize

In [None]:
import numpyro
from numpyro import distributions
from jax import random

# Define custom priors
lengthscale_prior_dist = distributions.LogNormal(-0.0, 1.0)
noise_prior_dist = distributions.HalfNormal(0.5)
#noise_prior_dist = distributions.LogNormal(0, 1)
#lengthscale_prior_dist = distributions.LogNormal(0, 1)

rng_key = random.PRNGKey(0)
noise_dist = numpyro.sample("k_length", noise_prior_dist,rng_key=rng_key, sample_shape=(1,10000))
length_dist = numpyro.sample("k_length", lengthscale_prior_dist,rng_key=rng_key, sample_shape=(1,10000))

#_ = plt.hist(length_dist,bins=500); plt.show()
#_ = plt.hist(noise_dist,bins=500); plt.show()

# Set up code for each GP step
Here, we use variational inference GP with a Matern kernel.

Currently, we're using the knowledge expected improvement function.

In [None]:
import jax.numpy as jnp
import jax
import gpax

# enable 64-bit precision
gpax.utils.enable_x64()

def step_GP(X_measured, y_measured, X_unmeasured,
            noise_prior_dist=None,
            lengthscale_prior_dist=None):
    """Single GP step in 3D (T, P, Fluence)."""
    # 1) RNG keys
    rng_key1, rng_key2 = gpax.utils.get_keys()

    # 2) Initialize a 3-dimensional GP
    gp_model = gpax.viGP(
        input_dim=3,
        kernel='Matern',
        noise_prior_dist=noise_prior_dist,
        lengthscale_prior_dist=lengthscale_prior_dist
    )

    # 3) Fit via HMC
    print('Training Model.')
    gp_model.fit(rng_key1, X_measured, y_measured, jitter=1e-2)

    # 4) Predict at all unmeasured points
    print('Getting Model Predictions.')
    y_pred, y_sampled = gp_model.predict_in_batches(
        rng_key2,
        X_unmeasured,
        noiseless=False,
        jitter=1e-2
    )

    # 5) Compute Expected Improvement
    print('Calculating acquisition.')
    acquisition = gpax.acquisition.EI(
        rng_key2,
        gp_model,
        X_unmeasured,
        maximize=False,
        recent_points=X_measured,
        noiseless=False,
        jitter=1e-2,
        penalty='delta'
    )

    # 6) (Optional) get posterior samples of hyperparameters
    print('Getting parameter samples.')
    paras = gp_model.get_samples()

    return acquisition, y_pred, y_sampled, paras


# Input initial seed points

Here, I input your initial data save them to the "BO_initial_data.npz" file

In [None]:
### --- Import all data here --- ###
import numpy as np

from google.colab import files

### --- Import all data here --- ###
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os

# Load the Excel spreadsheet
file_path = '/content/LaVO3_PLD_Parameters_working_pressure_fixed_cleaned_up_no_10nm_no_LaVO4_to_B23.xlsx'  # Change this to your actual file path
#file_path = '/content/LaVO3_PLD_Parameters_cleaned_up_STO_no_10nm_SBH.xlsx'
sheet_name = 'Sheet1'         # Modify if your data is on a different sheet

# Read the Excel file into a DataFrame
df = pd.read_excel(file_path, sheet_name=sheet_name)

# Assume the unique sample identifiers are in a column called 'Sample ID' and so on

sample_ids = df['Sample ID']
dates = df['Date (YYMMDD)']
substrates = df['Substrate']
fluence = df['Fluence (J/cm2)']
spot_size = df['Spot Size (mmxmm)']
o2_flow = df['O2 Flow (sccm)']
o2_pressure = df['O2 Pressure (Torr)']
working_pressure = df['Working pressure (Torr)']
base_pressure = df['Base Pressure (Torr)']
target = df['Target (1")']
number_shots = df['Number Shots']
substrate_temp = df['Substrate Temp. (C) (based on average of temperature measured near substrate)']
rep_rate = df['Rep Rate (Hz)']
xrd = df['XRD']
xrr = df['XRR']
thickness_la_vo3 = df['Thickness (nm) (LaVO3 .cif, XRR)']
thickness_la_vo4 = df['Thickness (nm) (LaVO4, XRR)']
afm = df['AFM']
roughness = df['Roughness (nm) (standard deviation of height: 3x3 um, 2 Hz, 512 sam/l, B25 sub only = 0.147 )']
thickness_fringes = df['Thickness - fitting fringes (nm)']
c_lattice_parameter = df['C Lattice Parameter from Fringes (Å) (or 2Tw  peak position*)']
number_unit_cells = df['Number Unit Cells from Fringes']
thickness_per_shot = df['Thickness /shot from Fringes (nm/shot)']
rocking_curve = df['Rocking Curve']
fwhm_rocking_curve = df['FWHM (deg) (Gaussian fitting of rocking curve: B25: sub only, 1 twin=0.0050 )']
narrow_scan = df['Narrow Scan (43-51, 005, 2)']
integrated_intensity = df['Integrated LaVO4 Intensity: 48-51, normalized, x10^6 [B25 sub only= 4.7 (250520)]']
rsm = df['RSM']

seed_points_raw = np.column_stack((o2_pressure, substrate_temp, fluence))


### --- Setup objective function --- ###

lvo_c_lit = 3.945

lattice_parameter_mismatch = np.abs(c_lattice_parameter - lvo_c_lit)
y_1 = alpha * lattice_parameter_mismatch
y_2 = beta * (roughness)
y_3 = gamma * (fwhm_rocking_curve)
y_4 = delta * (integrated_intensity)
y_train = (y_1 + y_2 + y_3 + y_4)

# Normalize the seed points into [0,1] space
seed_points = np.zeros_like(seed_points_raw, dtype=np.float32)
# pressure: we normalize log-pressure
seed_points[:, 0] = normalize(np.log10(seed_points_raw[:, 0]), pressure_min, pressure_max)
# temperature:
seed_points[:, 1] = normalize(seed_points_raw[:, 1], T_min, T_max)
# fluence: normalize using your fluence bounds
seed_points[:, 2] = normalize(seed_points_raw[:, 2], fluence_min, fluence_max)

X_train = seed_points

# X_test should now be your full 3D grid, e.g. `points_3d` from before
X_test = points_3d

# Save the initial data, now including x3=Fluence
file_name = "BO_initial_data"
save_data(
    file_name,
    X_train, y_train,
    X_test,
    x1=Pressure,
    x2=Temperature,
    x3=Fluence,
    y_pred=None,
    y_sampled=None,
    acq=None,
    next_point=None,
    running_best=100,
    iteration=0
)

print('y_1, peak position:', y_1)
print('y_2, roughness:', y_2)
print('y_3, fwhm_rocking_curve:', y_3)
print('y_4, integrated_intensity:', y_4)
print('y_train:', y_train)
print()
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)
print("X_test shape:", X_test.shape)
print("Raw seed points:\n", seed_points_raw)
print("Any NaNs:", np.isnan(y_train).any())
print("Any infs:", np.isinf(y_train).any())
print("y_train range:", y_train.min(), "to", y_train.max())
nan_mask = np.isnan(y_train)

# Show indices of NaNs
nan_indices = np.where(nan_mask)[0]
print("Indices with NaN values in y_train:", nan_indices)


# GP-BO Loop with EI acquisition

In [None]:
def run_BO_step(previous_checkpoint_filename, X_new_unnormalized=None, y_new=None):
    # 1) Load your 3D data at the current step
    X_train, y_train, X_test, x1, x2, x3, \
    y_pred, y_sampled, acq, next_point, \
    running_best, iter = load_data(previous_checkpoint_filename)

    # 2) (Optional) work in normalized y-space
    y_train_normalized = y_train

    ##########------Update dataset with new value----###########
    if (X_new_unnormalized is not None and y_new is not None):
        iter += 1
        print('Updating dataset.')
        # normalize the new data points in each of the 3 dims
        X_new = np.zeros_like(X_new_unnormalized, dtype=np.float32)
        X_new[:, 0] = normalize(np.log10(X_new_unnormalized[:, 0]), pressure_min, pressure_max)
        X_new[:, 1] = normalize(X_new_unnormalized[:, 1], T_min, T_max)
        X_new[:, 2] = normalize(X_new_unnormalized[:, 2], fluence_min, fluence_max)
        print(X_train.shape,y_train.shape)
        print(X_new.shape, y_new.shape)
        X_train, y_train = update_datapoints(X_new, y_new, X_train, y_train)

        # update running best if improved
        if y_new < running_best:
            print('Updating best data point.')
            running_best = y_new

    print(f"#######---Exploration step {iter}---#######")

    # 3) Fit GP and compute acquisition on the full 3D grid
    acq, y_pred, y_sampled, _params = step_GP(X_train, y_train, X_test)

    # 4) Select the next sampling point
    next_idx = acq.argmax()
    X_next = X_test[next_idx].reshape(1, -1)

    # 5) Current predicted best
    best_idx = y_pred.argmin()
    X_pred_best = X_test[best_idx].reshape(1, -1)

    # 6) Optional stopping criterion
    if acq.max() < 0.01:
        print('%%%%%%%%%%%%%%% SUGGESTED STOPPING POINT %%%%%%%%%%%%%%%')

    # 7) Save checkpoint (now including x3)
    result_file_path = f'BO_checkpoint_{iter}.npz'
    save_data(
        result_file_path,
        X_train, y_train, X_test,
        x1, x2, x3,
        y_pred, y_sampled, acq,
        next_point, running_best,
        iteration=iter
    )

    # 8) Denormalize next & best points
    next_P = inverse_normalize(X_next[0, 0], pressure_min, pressure_max)
    next_T = inverse_normalize(X_next[0, 1], T_min, T_max)
    next_F = inverse_normalize(X_next[0, 2], fluence_min, fluence_max)

    print(
        '%%%%%%%%% Next experiment %%%%%%%%%\n'
        f'Pressure = {10**(next_P):.2e}, '
        f'Temperature = {next_T:.0f}, '
        f'Fluence = {next_F:.2e}'
    )

    pred_best_P = inverse_normalize(X_pred_best[0, 0], pressure_min, pressure_max)
    pred_best_T = inverse_normalize(X_pred_best[0, 1], T_min, T_max)
    pred_best_F = inverse_normalize(X_pred_best[0, 2], fluence_min, fluence_max)

    print(
        '%%%%%%%%% Current Predicted Best %%%%%%%%%\n'
        f'Pressure = {10**(pred_best_P):.2e}, '
        f'Temperature = {pred_best_T:.0f}, '
        f'Fluence = {pred_best_F:.2e}'
    )

    # 9) Current best measured point
    best_meas_idx = np.argmin(y_train.flatten())
    meas_best_P = inverse_normalize(X_train[best_meas_idx, 0], pressure_min, pressure_max)
    meas_best_T = inverse_normalize(X_train[best_meas_idx, 1], T_min, T_max)
    meas_best_F = inverse_normalize(X_train[best_meas_idx, 2], fluence_min, fluence_max)
    meas_best_y = y_train.flatten()[best_meas_idx]

    print(
        '%%%%%%%%% Current Best Measured %%%%%%%%%\n'
        f'Pressure = {10**(meas_best_P):.2e}, '
        f'Temperature = {meas_best_T:.0f}, '
        f'Fluence = {meas_best_F:.2e}, '
        f'y = {meas_best_y:.4f}'
    )

    # 9) Plot the 3D results
    plot_3d_projections(
        y_pred, x1, x2, x3,
        title='GP Mean',
        X_train=X_train,
        y_train=y_train,
        vmin=0.5,
        vmax=2.5
    )

    plot_3d_projections(
        y_sampled, x1, x2, x3,
        title='GP Variance',
        X_train=X_train,
        y_train=y_train,
        vmin=0.2,
        vmax=1.2
    )

    plot_3d_projections(
        acq, x1, x2, x3,
        title='Acquisition',
        X_train=X_train,
        y_train=y_train,
        vmin=0,
        vmax=0.4
    )
    return result_file_path


# Below is the optimization loop to run

In [None]:
# Load the data at the current step
# for the very first training, you need to load the initial data (seed points)

previous_checkpoint_filename = '/content/BO_initial_data.npz'

results_file = run_BO_step(previous_checkpoint_filename) # give the GP the previous data
print('The results of this step are saved to a file named:', results_file)


# Now, go grow a sample with the suggested conditions, do XRD and AFM analyses, and enter it below

In [None]:
### B24

X_new = np.array([[1.8E-5, 715, 1.0]]) # These are the conditions that you actually used for the growth

lattice_parameter_mismatch = np.abs(3.945 - lvo_c_lit)
y_1 = alpha * lattice_parameter_mismatch
y_2 = beta * 0.395
y_3 = gamma * 0.0361
y_4 = delta * 10.2
y_new = (y_1 + y_2 + y_3 + y_4)


y_new = np.array([y_new])
#Shape needs to be (1,) not (1,1)
print('initial shape',y_new.shape)

previous_checkpoint_filename = '/content/BO_checkpoint_0.npz'

results_file = run_BO_step(previous_checkpoint_filename, X_new, y_new)# Give the GP the previous data, and the new data
print('The results of this step are saved to a file named:', results_file)

# Go grow another sample

In [None]:
### B25

X_new = np.array([[2.1E-5, 720, 1.0]]) # These are the conditions that you actually used for the growth

lattice_parameter_mismatch = np.abs(3.941 - lvo_c_lit)
y_1 = alpha * lattice_parameter_mismatch
y_2 = beta * 0.973
y_3 = gamma * 0.0387
y_4 = delta * 9.3
y_new = (y_1 + y_2 + y_3 + y_4)


y_new = np.array([y_new])
#Shape needs to be (1,) not (1,1)
print('initial shape',y_new.shape)

previous_checkpoint_filename = '/content/BO_checkpoint_1.npz'

results_file = run_BO_step(previous_checkpoint_filename, X_new, y_new)# Give the GP the previous data, and the new data
print('The results of this step are saved to a file named:', results_file)

# Go grow another sample

In [None]:
### B26

X_new = np.array([[2.3E-7, 510, 0.8]]) # These are the conditions that you actually used for the growth

lattice_parameter_mismatch = np.abs(4.041 - lvo_c_lit)
y_1 = alpha * lattice_parameter_mismatch
y_2 = beta * 0.303
y_3 = gamma * 0.0607
y_4 = delta * 8.6
y_new = (y_1 + y_2 + y_3 + y_4)


y_new = np.array([y_new])
#Shape needs to be (1,) not (1,1)
print('initial shape',y_new.shape)

previous_checkpoint_filename = '/content/BO_checkpoint_2.npz'

results_file = run_BO_step(previous_checkpoint_filename, X_new, y_new)# Give the GP the previous data, and the new data
print('The results of this step are saved to a file named:', results_file)

# And so on...

In [None]:
### B27

X_new = np.array([[4.4e-5, 815, 0.8]]) # These are the conditions that you actually used for the growth

lattice_parameter_mismatch = np.abs(3.927 - lvo_c_lit)
y_1 = alpha * lattice_parameter_mismatch
y_2 = beta * 7.49
y_3 = gamma * 0.25
y_4 = delta * 33.7
y_new = (y_1 + y_2 + y_3 + y_4)


y_new = np.array([y_new])
#Shape needs to be (1,) not (1,1)
print('initial shape',y_new.shape)

previous_checkpoint_filename = '/content/BO_checkpoint_3.npz'

results_file = run_BO_step(previous_checkpoint_filename, X_new, y_new)# Give the GP the previous data, and the new data
print('The results of this step are saved to a file named:', results_file)

In [None]:
### B28

X_new = np.array([[2.2e-7, 535, 1.6]]) # These are the conditions that you actually used for the growth

lattice_parameter_mismatch = np.abs(4.110 - lvo_c_lit)
y_1 = alpha * lattice_parameter_mismatch
y_2 = beta * 0.321
y_3 = gamma * 0.0432
y_4 = delta * 5.1
y_new = (y_1 + y_2 + y_3 + y_4)


y_new = np.array([y_new])
#Shape needs to be (1,) not (1,1)
print('initial shape',y_new.shape)

previous_checkpoint_filename = '/content/BO_checkpoint_4.npz'

results_file = run_BO_step(previous_checkpoint_filename, X_new, y_new)# Give the GP the previous data, and the new data
print('The results of this step are saved to a file named:', results_file)

In [None]:
### B29

X_new = np.array([[3.6e-6, 655, 1.5]]) # These are the conditions that you actually used for the growth

lattice_parameter_mismatch = np.abs(3.992 - lvo_c_lit)
y_1 = alpha * lattice_parameter_mismatch
y_2 = beta * 0.197
y_3 = gamma * 0.0463
y_4 = delta * 5.5
y_new = (y_1 + y_2 + y_3 + y_4)


y_new = np.array([y_new])
#Shape needs to be (1,) not (1,1)
print('initial shape',y_new.shape)

previous_checkpoint_filename = '/content/BO_checkpoint_5.npz'

results_file = run_BO_step(previous_checkpoint_filename, X_new, y_new)# Give the GP the previous data, and the new data
print('The results of this step are saved to a file named:', results_file)

In [None]:
### B30

X_new = np.array([[4e-7, 795, 0.83]]) # These are the conditions that you actually used for the growth

lattice_parameter_mismatch = np.abs(3.965 - lvo_c_lit)
y_1 = alpha * lattice_parameter_mismatch
y_2 = beta * 0.183
y_3 = gamma * 0.0387
y_4 = delta * 5.1
y_new = (y_1 + y_2 + y_3 + y_4)


y_new = np.array([y_new])
#Shape needs to be (1,) not (1,1)
print('initial shape',y_new.shape)

previous_checkpoint_filename = '/content/BO_checkpoint_6.npz'

results_file = run_BO_step(previous_checkpoint_filename, X_new, y_new)# Give the GP the previous data, and the new data
print('The results of this step are saved to a file named:', results_file)

In [None]:
### B31

X_new = np.array([[6e-6, 795, 0.83]]) # These are the conditions that you actually used for the growth

lattice_parameter_mismatch = np.abs(3.935 - lvo_c_lit)
y_1 = alpha * lattice_parameter_mismatch
y_2 = beta * 0.237
y_3 = gamma * 0.0557
y_4 = delta * 8.5
y_new = (y_1 + y_2 + y_3 + y_4)


y_new = np.array([y_new])
#Shape needs to be (1,) not (1,1)
print('initial shape',y_new.shape)

previous_checkpoint_filename = '/content/BO_checkpoint_7.npz'

results_file = run_BO_step(previous_checkpoint_filename, X_new, y_new)# Give the GP the previous data, and the new data
print('The results of this step are saved to a file named:', results_file)

In [None]:
### B32

X_new = np.array([[1e-6, 800, 0.83]]) # These are the conditions that you actually used for the growth

lattice_parameter_mismatch = np.abs(3.9475 - lvo_c_lit)
y_1 = alpha * lattice_parameter_mismatch
y_2 = beta * 0.217
y_3 = gamma * 0.04
y_4 = delta * 6.1
y_new = (y_1 + y_2 + y_3 + y_4)


y_new = np.array([y_new])
#Shape needs to be (1,) not (1,1)
print('initial shape',y_new.shape)

previous_checkpoint_filename = '/content/BO_checkpoint_8.npz'

results_file = run_BO_step(previous_checkpoint_filename, X_new, y_new)# Give the GP the previous data, and the new data
print('The results of this step are saved to a file named:', results_file)

In [None]:
### A83

X_new = np.array([[8.7e-5, 535, 1.8]]) # These are the conditions that you actually used for the growth

lattice_parameter_mismatch = np.abs(3.98 - lvo_c_lit)
y_1 = alpha * lattice_parameter_mismatch
y_2 = beta * 0.397
y_3 = gamma * 0.0532
y_4 = delta * 4.6
y_new = (y_1 + y_2 + y_3 + y_4)


y_new = np.array([y_new])
#Shape needs to be (1,) not (1,1)
print('initial shape',y_new.shape)

previous_checkpoint_filename = '/content/BO_checkpoint_9.npz'

results_file = run_BO_step(previous_checkpoint_filename, X_new, y_new)# Give the GP the previous data, and the new data
print('The results of this step are saved to a file named:', results_file)

In [None]:
### B33

X_new = np.array([[9.7e-5, 525, 0.83]]) # These are the conditions that you actually used for the growth

lattice_parameter_mismatch = np.abs(3.943 - lvo_c_lit)
y_1 = alpha * lattice_parameter_mismatch
y_2 = beta * 0.182
y_3 = gamma * 0.0498
y_4 = delta * 4.6
y_new = (y_1 + y_2 + y_3 + y_4)


y_new = np.array([y_new])
#Shape needs to be (1,) not (1,1)
print('initial shape',y_new.shape)

previous_checkpoint_filename = '/content/BO_checkpoint_10.npz'

results_file = run_BO_step(previous_checkpoint_filename, X_new, y_new)# Give the GP the previous data, and the new data
print('The results of this step are saved to a file named:', results_file)

In [None]:
### B34

X_new = np.array([[2.1e-4, 510, 0.8]]) # These are the conditions that you actually used for the growth

lattice_parameter_mismatch = np.abs(3.924 - lvo_c_lit)
y_1 = alpha * lattice_parameter_mismatch
y_2 = beta * 2.31
y_3 = gamma * 0.1766
y_4 = delta * 4.1
y_new = (y_1 + y_2 + y_3 + y_4)


y_new = np.array([y_new])
#Shape needs to be (1,) not (1,1)
print('initial shape',y_new.shape)

previous_checkpoint_filename = '/content/BO_checkpoint_11.npz'

results_file = run_BO_step(previous_checkpoint_filename, X_new, y_new)# Give the GP the previous data, and the new data
print('The results of this step are saved to a file named:', results_file)

In [None]:
### B35

X_new = np.array([[1.4e-4, 635, 1.3]]) # These are the conditions that you actually used for the growth

lattice_parameter_mismatch = np.abs(3.943 - lvo_c_lit)
y_1 = alpha * lattice_parameter_mismatch
y_2 = beta * 4.79
y_3 = gamma * 0.2548
y_4 = delta * 15.8
y_new = (y_1 + y_2 + y_3 + y_4)


y_new = np.array([y_new])
#Shape needs to be (1,) not (1,1)
print('initial shape',y_new.shape)

previous_checkpoint_filename = '/content/BO_checkpoint_12.npz'

results_file = run_BO_step(previous_checkpoint_filename, X_new, y_new)# Give the GP the previous data, and the new data
print('The results of this step are saved to a file named:', results_file)

In [None]:
### B36

X_new = np.array([[3.1e-5, 510, 0.8]]) # These are the conditions that you actually used for the growth

lattice_parameter_mismatch = np.abs(3.9565 - lvo_c_lit)
y_1 = alpha * lattice_parameter_mismatch
y_2 = beta * 0.177
y_3 = gamma * 0.0518
y_4 = delta * 7.0
y_new = (y_1 + y_2 + y_3 + y_4)


y_new = np.array([y_new])
#Shape needs to be (1,) not (1,1)
print('initial shape',y_new.shape)

previous_checkpoint_filename = '/content/BO_checkpoint_13.npz'

results_file = run_BO_step(previous_checkpoint_filename, X_new, y_new)# Give the GP the previous data, and the new data
print('The results of this step are saved to a file named:', results_file)

In [None]:
### B37

X_new = np.array([[1.4e-6, 805, 0.8]]) # These are the conditions that you actually used for the growth

lattice_parameter_mismatch = np.abs(3.9475 - lvo_c_lit)
y_1 = alpha * lattice_parameter_mismatch
y_2 = beta * 0.234
y_3 = gamma * 0.047
y_4 = delta * 6.2
y_new = (y_1 + y_2 + y_3 + y_4)


y_new = np.array([y_new])
#Shape needs to be (1,) not (1,1)
print('initial shape',y_new.shape)

previous_checkpoint_filename = '/content/BO_checkpoint_14.npz'

results_file = run_BO_step(previous_checkpoint_filename, X_new, y_new)# Give the GP the previous data, and the new data
print('The results of this step are saved to a file named:', results_file)

In [None]:
### B38

X_new = np.array([[1.1e-6, 815, 0.8]]) # These are the conditions that you actually used for the growth

lattice_parameter_mismatch = np.abs(3.947 - lvo_c_lit)
y_1 = alpha * lattice_parameter_mismatch
y_2 = beta * 0.225
y_3 = gamma * 0.0417
y_4 = delta * 6.2
y_new = (y_1 + y_2 + y_3 + y_4)


y_new = np.array([y_new])
#Shape needs to be (1,) not (1,1)
print('initial shape',y_new.shape)

previous_checkpoint_filename = '/content/BO_checkpoint_15.npz'

results_file = run_BO_step(previous_checkpoint_filename, X_new, y_new)# Give the GP the previous data, and the new data
print('The results of this step are saved to a file named:', results_file)