# Batch BO Runs for Multiple Fingerprint Methods

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from utils import SmilesToDescriptors, plot_results

# ─── BayBE imports (one-time) ─────────────────────────────────────────────
from baybe.parameters import CustomDiscreteParameter
from baybe.searchspace import SearchSpace
from baybe.targets import NumericalTarget
from baybe.objectives import SingleTargetObjective
from baybe import Campaign
from baybe.simulation import simulate_scenarios
from baybe.recommenders import (
    BotorchRecommender, FPSRecommender,
    TwoPhaseMetaRecommender, RandomRecommender
)
# ──────────────────────────────────────────────────────────────────────────

# ─── Dataset pools ────────────────────────────────────────────────────────
df = pd.read_csv("dataset.csv")

reactant_1_pool  = df['reactant_1'].unique().tolist()
catalyst_1_pool  = df['catalyst_1'].unique().tolist()
catalyst_2_pool  = df['catalyst_2'].unique().tolist()

# column to optimise
objective_col = "ee_R"
# ──────────────────────────────────────────────────────────────────────────

# ─── Global BO settings ───────────────────────────────────────────────────
BATCH_SIZE       = 1
N_DOE_ITERATIONS = 100
N_MC_ITERATIONS  = 5

botorch_fps_recommender = TwoPhaseMetaRecommender(
    initial_recommender=FPSRecommender(),
    recommender=BotorchRecommender(),
    switch_after=10,
)
fps_recommender    = FPSRecommender()
random_recommender = RandomRecommender()
# ──────────────────────────────────────────────────────────────────────────

# ─── Helper function to run one BO for a given descriptor method ──────────
def run_bo_with_method(method_name):
    # 1. build descriptor DataFrames
    r_desc  = SmilesToDescriptors(reactant_1_pool,  method=method_name)
    c1_desc = SmilesToDescriptors(catalyst_1_pool,  method=method_name)
    c2_desc = SmilesToDescriptors(catalyst_2_pool,  method=method_name)

    # 2. wrap each in a CustomDiscreteParameter
    p_r  = CustomDiscreteParameter(name="reactant_1",         data=r_desc,  decorrelate=0.7)
    p_c1 = CustomDiscreteParameter(name="catalyst_1",         data=c1_desc, decorrelate=0.7)
    p_c2 = CustomDiscreteParameter(name="catalyst_2",         data=c2_desc, decorrelate=0.7)

    parameters  = [p_r, p_c1, p_c2]
    searchspace = SearchSpace.from_product(parameters)

    # 3. build target + objective once
    target     = NumericalTarget(name=objective_col, mode="MAX")
    objective  = SingleTargetObjective(target=target)

    # 4. campaign dictionary
    campaigns = {
        "Botorch FPS": Campaign(searchspace, objective, botorch_fps_recommender),
        "FPS":         Campaign(searchspace, objective, fps_recommender),
        "Random":      Campaign(searchspace, objective, random_recommender),
    }

    # 5. lookup table for simulate_scenarios
    lookup = df[['reactant_1', 'catalyst_1', 'catalyst_2', objective_col]]

    # 6. run simulation
    results = simulate_scenarios(
        campaigns,
        lookup,
        batch_size       = BATCH_SIZE,
        n_doe_iterations = N_DOE_ITERATIONS,
        n_mc_iterations  = N_MC_ITERATIONS,
    )
    return results, lookup
# ──────────────────────────────────────────────────────────────────────────


In [None]:
METHODS = ['Morgan', 'MACCS', 'RDK', 'EState', 'OneHot']

for method in METHODS:
    print(f"\n=== Running with {method} fingerprint ===")
    results, lookup = run_bo_with_method(method)

    plot_results(results, lookup, figure_name=f"{method}_bo.png", nbr_controls=2)
    plt.suptitle(f"Bayesian Optimisation – {method}", fontsize=16, y=1.02)
    plt.show()
