In [1]:
from baybe.targets import NumericalTarget
from baybe.objectives import SingleTargetObjective
from baybe.parameters import SubstanceParameter
from baybe.searchspace import SearchSpace
from baybe import Campaign
from baybe.exceptions import NotEnoughPointsLeftError
import pandas as pd
import json, math
import random
import numpy as np
import torch

In [2]:
for target_reaction in ['Reaction_CO_1.5h', 'Reaction_CO_biphenyl', 'Reaction_CO_ortho', 'Reaction_CO_Cl', 'Reaction_CS', 'Reaction_CN', 'Reaction_2+2',
                        'Reaction_CF3', 'Reaction_CH2CF3', 'Reaction_CH2F', 'Reaction_Cy', 'Reaction_SCF3',
                        'Reaction_OCH2F', 'Reaction_P', 'Reaction_Si']:

    print(f'Running BOsearch for {target_reaction}...')

    file_path = f"../../data/data_BO/data_60_{target_reaction}.csv"
    df = pd.read_csv(file_path)
    id_smiles_dict = (df.drop_duplicates("ID", keep="last")
        .set_index("ID")["smiles"].to_dict())
    
    # ID -> Yield
    def evaluate(ID):
        y = df.set_index("ID")["yield"].get(ID)
        return y

    # ID -> index
    def convert(ID):
        idx = df.index[df["ID"].eq(ID)].item()
        return idx
    
    target = NumericalTarget(name="Yield")
    objective = SingleTargetObjective(target=target)
    
    parameters = [
        SubstanceParameter(
            name="ID",
            data=id_smiles_dict,
            encoding="MORDRED", 
        ),
    ]
    
    searchspace = SearchSpace.from_product(parameters)
    
    rows = []
    for row_idx in range(60):
        random.seed(42)
        np.random.seed(42)
        torch.manual_seed(42)
        
        idx_list, yield_list = [], []
        campaign = Campaign(searchspace, objective)
        
        ci = df.columns.get_loc("ID")  
        first_id = df.iat[row_idx, ci]
        y = evaluate(first_id)
        idx_list.append(convert(first_id))
        yield_list.append(int(y))
        df_ini = pd.DataFrame({
            "ID": [first_id],
            "Yield":[float(y)]
        })
        campaign.add_measurements(df_ini)
        
        while True:
            try:
                df_rec = campaign.recommend(batch_size=1) 
            except NotEnoughPointsLeftError:
                break
        
            y = evaluate(df_rec.iat[0, 0])
        
            df_rec["Yield"] = [float(y)]
            idx_list.append(convert(df_rec.iat[0, 0]))
            yield_list.append(int(y))
            campaign.add_measurements(df_rec)
        rows.append({"selected_indices_per_step": idx_list, "actual_rewards_per_step": yield_list})
        
    df_out = pd.DataFrame.from_records(rows)   
    df_out.to_csv(f"../results/BOsearch_mordred/BOsearch_{target_reaction}_result.csv", index=False)

Running BOsearch for Reaction_CO_1.5h...
Running BOsearch for Reaction_CO_biphenyl...
Running BOsearch for Reaction_CO_ortho...
Running BOsearch for Reaction_CO_Cl...
Running BOsearch for Reaction_CS...
Running BOsearch for Reaction_CN...
Running BOsearch for Reaction_2+2...




Running BOsearch for Reaction_CF3...
Running BOsearch for Reaction_CH2CF3...
Running BOsearch for Reaction_CH2F...




Running BOsearch for Reaction_Cy...




Running BOsearch for Reaction_SCF3...
Running BOsearch for Reaction_OCH2F...
Running BOsearch for Reaction_P...
Running BOsearch for Reaction_Si...


