In [1]:
import pandas as pd
from edbo.utils import Data
from edbo.bro import BO_express
import random
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [2]:
random.seed(42)

file_path = "../../../../data/data_61/data_BO_61_CO_biphenyl/data_Reaction_CO_biphenyl.csv"
df = pd.read_csv(file_path)
rewards = df['yield']
features = df.drop(columns=['Name', 'ID', 'yield'])
OPS_desc = Data(features)
components = {'OPS': '<defined in desc>'}
desc = {'OPS': OPS_desc.data}

output_list = []
for first_index in range(60):
    remaining_indices = set(range(len(OPS_desc.data)))
    selected_indices = [first_index]
    remaining_indices.remove(first_index)

    random.seed(first_index)
    second_index = random.choice(list(remaining_indices))
    remaining_indices.remove(second_index)

    selected_data = pd.DataFrame({'yield': [rewards.iloc[first_index], rewards.iloc[second_index]]}).reset_index(drop=True)
    selected_features = desc['OPS'].iloc[[first_index, second_index]].reset_index(drop=True)
    fs_selected_samples = pd.concat([selected_features, selected_data], axis=1)
    fs_selected_samples.insert(0, 'Index', [first_index, second_index])
    fs_selected_samples.drop(columns=["HOMO", "LUMO", "E_S1", "f_S1", "E_T1", "dEST", "dDM"], errors='ignore', inplace=True)
    fs_selected_samples.to_csv('temp_file/temp_initial.csv', index=False)

    bo = BO_express(reaction_components=components,
                    descriptor_matrices=desc,
                    acquisition_function='EI',
                    init_method='rand',
                    batch_size=1,
                    target='yield')
    bo.add_results('temp_file/temp_initial.csv')
    fs_results = fs_selected_samples.drop(columns=["HOMO", "LUMO", "E_S1", "f_S1", "E_T1", "dEST", "dDM"], errors="ignore")

    index_list = []
    reward_list = []
    for i in range(59):
        bo.run()
        bo.export_proposed(f'temp_file/selected_OPS{i}.csv')
        selected_OPS = pd.read_csv(f'temp_file/selected_OPS{i}.csv')
        smiles_index = selected_OPS.loc[0, 'smiles_index']
        bo_index = df.query("smiles == @smiles_index").index[0]

        index_list.append(bo_index)
        reward_list.append(rewards.iloc[bo_index])
        if bo_index == 60:
            break

        selected_OPS.at[0, 'yield'] = rewards.iloc[bo_index]
        selected_OPS.to_csv(f'temp_file/temp{i}.csv', index=False)
        bo.add_results(f'temp_file/temp{i}.csv')

    indices = pd.DataFrame(index_list, columns=['Index'])
    final_rewards = pd.DataFrame(reward_list, columns=['yield'])
    result = pd.concat([indices, final_rewards], axis=1)
    final_result = pd.concat([fs_results, result], ignore_index=True)
    index_list = final_result["Index"].tolist()
    yield_list = final_result["yield"].tolist()
    output = pd.DataFrame({"selected_indices_per_step": [index_list],
                           "actual_rewards_per_step": [yield_list]})
    output_list.append(output)

final_output = pd.concat(output_list, ignore_index=True)
final_output.to_csv('../../../results_OoS/BOsearch_EI/BOsearch_Reaction_CO_biphenyl_result.csv', index=False)