In [None]:
from __future__ import annotations

import sys
from pathlib import Path
sys.path.append(str(Path("..") / "src"))

from search_space_init import SearchSpaceConfig, init_search_space
from sdl import run_sdl_bo

import time
import pickle
from dataclasses import dataclass
from datetime import datetime
from pathlib import Path
import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
cfg = SearchSpaceConfig(
    compounds_csv="../data/compounds.csv",          # required
    processed_seed_csv='../data/train_data/train_data.csv',                     # optional hot-start
    cmp_features_csv=None,                       # optional (if you already computed)
    out_df_csv="../data/processed.csv",
    out_X_csv="../data/featurized_processed.csv",
    out_y_csv="../data/labels.csv",
    max_components=3,
    include_binary=True,
    include_ternary=True,
)

X, y, df = init_search_space(cfg)

  df_all = pd.concat([seed, space], ignore_index=True)
100%|██████████| 26/26 [00:00<00:00, 184.30it/s]


### Bayesian optimization

In [2]:
# -----------------------
# Constants / inputs
# -----------------------
CHEMICALS = [
    "Dimethyl Malonate",
    "Cyclohexyl Methacrylate",
    "Cyclohexyl Acetate",
    "Octyl Octanoate",
    "Propylene Glycol Propyl Ether",
    "1,4-Dichlorobutane",
    "1-Butanol",
    "Diethyl Malonate",
    "Ethyl Laurate",
    "Ethyl 4-Methylbenzoate",
    "2,6-Dimethyl-4-Heptanone",
    "Ethyl Acetoacetate",
    "Ethyl Levulinate",
    "Isoamyl Isovalerate",
    "Cuminaldehyde",
    "Cyclohexyl Butyrate",
    "2-Butanol",
    "2-Pentanol",
    "3-Pentanol",
    "N,N Diethyl Hydroxylamine",
    "3-Methyl 2-Butanol",
    "Y-Nonanoic Lactone",
    "2-Nonanone",
    "Diethylene Glycol Monoethyl Ether Acetate",
    "Acetophenone",
    "Phenyl Acetate",
]

INVENTORY_XLSX = "BO-Inventory.xlsx"
NL_THRESHOLD = 0.02  # linearity flag threshold


# -----------------------
# Args as dataclass
# -----------------------
@dataclass
class BOArgs:
    df_path: Path
    compounds_path: Path
    cmp_features_path: Path
    df_featurized_path: Path
    labels_path: Path

    label: str = "value"
    nb_iterations: int = 200
    which_acquisition: str = "EI"  # default (overridden per-iteration in wrapper)
    store_explore_exploit_terms: bool = True

    FABO: bool = True
    FS_method: str = "mRMR"
    min_features: int = 5
    max_features: int = 10

    kernel_function: str = "default"
    teta: float = 0.3
    n_batch: int = 1
    syn_effect_AF: bool = True
    AF_batch_size: int = 10000

    search_space_init_stat: bool = False
    n_data_gathering: int = 50


args = BOArgs(
    df_path=Path("../data/processed.csv"),
    compounds_path=Path("../data/compounds.csv"),
    cmp_features_path=Path("../data/cmp_features.csv"),
    df_featurized_path=Path("../data/featurized_processed.csv"),
    labels_path=Path("../data/labels.csv"),
    nb_iterations=200,
    FABO=True,
    FS_method="mRMR",
    min_features=5,
    max_features=10,
    kernel_function="default",
    teta=0.3,
    n_batch=1,
    syn_effect_AF=True,
    AF_batch_size=10000,
    search_space_init_stat=False,
    n_data_gathering=50,
)


run_sdl_bo(args, CHEMICALS)



++++++++++++++++++++++++
Iteration 1/200
Mode: data gathering (max sigma)


100%|██████████| 5/5 [00:00<00:00, 39.62it/s]
100%|██████████| 6/6 [00:00<00:00, 25.84it/s]
100%|██████████| 7/7 [00:00<00:00, 33.76it/s]
100%|██████████| 8/8 [00:00<00:00, 30.53it/s]
100%|██████████| 9/9 [00:00<00:00, 25.38it/s]
100%|██████████| 10/10 [00:00<00:00, 34.97it/s]


Saved top_features to: FAMBO/results/features/2026-02-24/1_1306.pkl

++++++++++++++++++++++++
Iteration 2/200
Mode: data gathering (max sigma)


100%|██████████| 5/5 [00:00<00:00, 23.92it/s]
100%|██████████| 6/6 [00:00<00:00, 22.71it/s]
100%|██████████| 7/7 [00:00<00:00, 19.77it/s]
100%|██████████| 8/8 [00:00<00:00, 20.37it/s]
100%|██████████| 9/9 [00:00<00:00, 20.81it/s]
100%|██████████| 10/10 [00:00<00:00, 21.57it/s]


Saved top_features to: FAMBO/results/features/2026-02-24/2_1307.pkl

++++++++++++++++++++++++
Iteration 3/200
Mode: data gathering (max sigma)


100%|██████████| 5/5 [00:00<00:00, 22.19it/s]
100%|██████████| 6/6 [00:00<00:00, 19.99it/s]
100%|██████████| 7/7 [00:00<00:00, 23.00it/s]
100%|██████████| 8/8 [00:00<00:00, 22.56it/s]
100%|██████████| 9/9 [00:00<00:00, 22.63it/s]
100%|██████████| 10/10 [00:00<00:00, 20.29it/s]


Saved top_features to: FAMBO/results/features/2026-02-24/3_1307.pkl

++++++++++++++++++++++++
Iteration 4/200
Mode: data gathering (max sigma)


100%|██████████| 5/5 [00:00<00:00, 24.94it/s]
100%|██████████| 6/6 [00:00<00:00, 21.82it/s]
100%|██████████| 7/7 [00:00<00:00, 21.07it/s]
100%|██████████| 8/8 [00:00<00:00, 19.52it/s]
100%|██████████| 9/9 [00:00<00:00, 17.19it/s]
100%|██████████| 10/10 [00:00<00:00, 22.63it/s]


Saved top_features to: FAMBO/results/features/2026-02-24/4_1307.pkl

++++++++++++++++++++++++
Iteration 5/200
Mode: data gathering (max sigma)


100%|██████████| 5/5 [00:00<00:00, 21.45it/s]
100%|██████████| 6/6 [00:00<00:00, 21.51it/s]
100%|██████████| 7/7 [00:00<00:00, 22.04it/s]
100%|██████████| 8/8 [00:00<00:00, 20.92it/s]
100%|██████████| 9/9 [00:00<00:00, 21.16it/s]
100%|██████████| 10/10 [00:00<00:00, 18.78it/s]


Saved top_features to: FAMBO/results/features/2026-02-24/5_1307.pkl

++++++++++++++++++++++++
Iteration 6/200
Mode: data gathering (max sigma)


100%|██████████| 5/5 [00:00<00:00, 20.37it/s]
100%|██████████| 6/6 [00:00<00:00, 20.60it/s]
100%|██████████| 7/7 [00:00<00:00, 19.28it/s]
100%|██████████| 8/8 [00:00<00:00, 19.63it/s]
100%|██████████| 9/9 [00:00<00:00, 19.49it/s]
100%|██████████| 10/10 [00:00<00:00, 19.14it/s]


Saved top_features to: FAMBO/results/features/2026-02-24/6_1307.pkl

++++++++++++++++++++++++
Iteration 7/200
Mode: data gathering (max sigma)


100%|██████████| 5/5 [00:00<00:00, 23.40it/s]
100%|██████████| 6/6 [00:00<00:00, 19.33it/s]
100%|██████████| 7/7 [00:00<00:00, 22.48it/s]
100%|██████████| 8/8 [00:00<00:00, 19.34it/s]
100%|██████████| 9/9 [00:00<00:00, 19.59it/s]
100%|██████████| 10/10 [00:00<00:00, 19.68it/s]


Saved top_features to: FAMBO/results/features/2026-02-24/7_1308.pkl

++++++++++++++++++++++++
Iteration 8/200
Mode: data gathering (max sigma)


100%|██████████| 5/5 [00:00<00:00, 22.35it/s]
100%|██████████| 6/6 [00:00<00:00, 21.51it/s]
100%|██████████| 7/7 [00:00<00:00, 18.31it/s]
100%|██████████| 8/8 [00:00<00:00, 19.61it/s]
100%|██████████| 9/9 [00:00<00:00, 20.56it/s]
100%|██████████| 10/10 [00:00<00:00, 19.18it/s]


Saved top_features to: FAMBO/results/features/2026-02-24/8_1308.pkl

++++++++++++++++++++++++
Iteration 9/200
Mode: data gathering (max sigma)


100%|██████████| 5/5 [00:00<00:00, 23.35it/s]
100%|██████████| 6/6 [00:00<00:00, 21.60it/s]
100%|██████████| 7/7 [00:00<00:00, 18.67it/s]


KeyboardInterrupt: 