In [1]:
from edbo.plus.optimizer_botorch import EDBOplus
import pandas as pd

In [2]:
reaction_components = {
    "Azodicarboxylates": ["DIAD", "DBAD", "DBnAD", "DCAD", "ADDP", "ADDM"],
    "Phosphines": ["PPh3", "Ph2PPy", "DAP", "Tris-FPP", "TPTP", "PCy3", "TFP"],
    "Temperatures":  [5, 20, 35],
    "Phosphine Equiv": [1.05, 1.2, 1.4],
    "Azadicarboxylate Equiv.": [1.05, 1.2, 1.4],
    "N-Boc Pyrrolidine": [0.95, 1.05, 1.2],
    "Solvents": ["2-MeTHF", "MeCN", "PhMe", "EtOAc"]
}


# reaction_components = {
#     'solvent': ['THF', 'Toluene', 'DMSO'],
#     'T': [-10, 0, 10, 25],
#     'concentration': [0.1, 0.2, 1.0]
# }

# reaction_components = {
#     'Ligand' : ['L1', 'L2', 'L3', 'L4', 'L5', 'L6'],
#     'Base' : ['Na2CO3', 'K3PO4', 'Cs2CO3', 'NaOtBu'],
#     'solvent': ['THF', 'Toluene', 'dioxane', 'DME'],
#     'T': [60, 80, 100],
# }

In [3]:
reaction_components

{'Azodicarboxylates': ['DIAD', 'DBAD', 'DBnAD', 'DCAD', 'ADDP', 'ADDM'],
 'Phosphines': ['PPh3', 'Ph2PPy', 'DAP', 'Tris-FPP', 'TPTP', 'PCy3', 'TFP'],
 'Temperatures': [5, 20, 35],
 'Phosphine Equiv': [1.05, 1.2, 1.4],
 'Azadicarboxylate Equiv.': [1.05, 1.2, 1.4],
 'N-Boc Pyrrolidine': [0.95, 1.05, 1.2],
 'Solvents': ['2-MeTHF', 'MeCN', 'PhMe', 'EtOAc']}

In [4]:
scope_df = EDBOplus().generate_reaction_scope(
    components=reaction_components, 
    filename='my_optimization.csv',
    check_overwrite=False
)

scope_df

Generating reaction scope...


Unnamed: 0,Azodicarboxylates,Phosphines,Temperatures,Phosphine Equiv,Azadicarboxylate Equiv.,N-Boc Pyrrolidine,Solvents
0,DIAD,PPh3,5,1.05,1.05,0.95,2-MeTHF
1,DIAD,PPh3,5,1.05,1.05,0.95,MeCN
2,DIAD,PPh3,5,1.05,1.05,0.95,PhMe
3,DIAD,PPh3,5,1.05,1.05,0.95,EtOAc
4,DIAD,PPh3,5,1.05,1.05,1.05,2-MeTHF
...,...,...,...,...,...,...,...
13603,ADDM,TFP,35,1.40,1.40,1.05,EtOAc
13604,ADDM,TFP,35,1.40,1.40,1.20,2-MeTHF
13605,ADDM,TFP,35,1.40,1.40,1.20,MeCN
13606,ADDM,TFP,35,1.40,1.40,1.20,PhMe


In [5]:
scope_df.to_csv("scope.csv", index=False, mode='w', header=list(reaction_components.keys()))

In [6]:
scope_df.head()

Unnamed: 0,Azodicarboxylates,Phosphines,Temperatures,Phosphine Equiv,Azadicarboxylate Equiv.,N-Boc Pyrrolidine,Solvents
0,DIAD,PPh3,5,1.05,1.05,0.95,2-MeTHF
1,DIAD,PPh3,5,1.05,1.05,0.95,MeCN
2,DIAD,PPh3,5,1.05,1.05,0.95,PhMe
3,DIAD,PPh3,5,1.05,1.05,0.95,EtOAc
4,DIAD,PPh3,5,1.05,1.05,1.05,2-MeTHF


In [7]:
recomendations = EDBOplus().run(
    data_frame = scope_df,
    filename='my_optimization.csv',  # Previously generated scope.
    objectives=['yield', 'purity',],  # Objectives to be optimized.
    objective_mode=['max', 'max'],  # Maximize yield and ee but minimize side_product.
    batch=96,  # Number of experiments in parallel that we want to perform in this round.
    columns_features='all', # features to be included in the model.
    init_sampling_method='cvtsampling'  # initialization method.
)

# recomendations = EDBOplus().run(
#     data_frame = scope_df,
#     filename='my_optimization.csv',  # Previously generated scope.
#     objectives=['yield'],  # Objectives to be optimized.
#     objective_mode=['max'],  # Maximize yield and ee but minimize side_product.
#     batch=4,  # Number of experiments in parallel that we want to perform in this round.
#     columns_features='all', # features to be included in the model.
#     init_sampling_method='cvtsampling'  # initialization method.
# )

The following columns are categorical and will be encoded using One-Hot-Encoding: ['Azodicarboxylates', 'Phosphines', 'Solvents']
Sampling type:  selection 

ideas
<idaes.surrogate.pysmo.sampling.CVTSampling object at 0x00000220F87BA550>

Number of unique samples returned by sampling algorithm: 88
Creating a priority list using random sampling: cvtsampling




In [8]:
# recomendations.reset_index(drop=True, inplace=True)

In [None]:
recomendations.to_csv('re1.csv', index = False)

In [None]:
recomendations.head(5)

In [None]:
re1 = pd.read_csv("./re1.csv")
re1.head()

In [None]:
re1.loc[0, 'yield'] = 20.5
re1.loc[0, 'purity'] = 40


re1.loc[1, 'yield'] = 40.5
re1.loc[1, 'purity'] = 50


re1.loc[2, 'yield'] = 60
re1.loc[2, 'purity'] = 70

re1.loc[0, 'yield'] = 20.5
re1.loc[0, 'purity'] = 40


re1.loc[1, 'yield'] = 40.5
re1.loc[1, 'purity'] = 50

In [None]:
re1.head()

In [None]:
re1.to_csv('round0.csv', index=False)

In [None]:
round0 = pd.read_csv("./round0.csv")
round0.head()

In [None]:
recomendations2 = EDBOplus().run(
    data_frame = round0,
    filename='my_optimization.csv',  # Previously generated scope.
    objectives=['yield', 'ee', 'side_product'],  # Objectives to be optimized.
    objective_mode=['max', 'max', 'min'],  # Maximize yield and ee but minimize side_product.
    batch=3,  # Number of experiments in parallel that we want to perform in this round.
    columns_features='all', # features to be included in the model.
    init_sampling_method='cvtsampling'  # initialization method.
)

In [None]:
recomendations2.to_csv("re2.csv", index = False)