In [1]:
from edbo.plus.optimizer_botorch import EDBOplus
import pandas as pd

In [2]:
reaction_components = {
    'solvent': ['THF', 'Toluene', 'DMSO'],
    'T': [-10, 0, 10, 25],
    'concentration': [0.1, 0.2, 1.0]
}

In [3]:
EDBOplus().generate_reaction_scope(
    components=reaction_components, 
    filename='my_optimization.csv',
    check_overwrite=False
)

Generating reaction scope...


Unnamed: 0,solvent,T,concentration
0,THF,-10,0.1
1,THF,-10,0.2
2,THF,-10,1.0
3,THF,0,0.1
4,THF,0,0.2
5,THF,0,1.0
6,THF,10,0.1
7,THF,10,0.2
8,THF,10,1.0
9,THF,25,0.1


In [4]:
import pandas as pd
df_scope = pd.read_csv('my_optimization.csv')  # Load csv file.

In [5]:
df_scope

Unnamed: 0,solvent,T,concentration
0,THF,-10,0.1
1,THF,-10,0.2
2,THF,-10,1.0
3,THF,0,0.1
4,THF,0,0.2
5,THF,0,1.0
6,THF,10,0.1
7,THF,10,0.2
8,THF,10,1.0
9,THF,25,0.1


In [6]:
n_combinations = len(df_scope)
print(f"Your reaction scope has {n_combinations} combinations.")

Your reaction scope has 36 combinations.


In [7]:
EDBOplus().run(
    filename='my_optimization.csv',  # Previously generated scope.
    objectives=['yield', 'ee', 'side_product'],  # Objectives to be optimized.
    objective_mode=['max', 'max', 'min'],  # Maximize yield and ee but minimize side_product.
    batch=3,  # Number of experiments in parallel that we want to perform in this round.
    columns_features='all', # features to be included in the model.
    init_sampling_method='cvtsampling'  # initialization method.
)

The following columns are categorical and will be encoded using One-Hot-Encoding: ['solvent']
Sampling type:  selection 


Number of unique samples returned by sampling algorithm: 3
Creating a priority list using random sampling: cvtsampling


Unnamed: 0,solvent,T,concentration,yield,ee,side_product,priority
32,DMSO,10,1.0,PENDING,PENDING,PENDING,1
8,THF,10,1.0,PENDING,PENDING,PENDING,1
19,Toluene,10,0.2,PENDING,PENDING,PENDING,1
0,THF,-10,0.1,PENDING,PENDING,PENDING,0
26,DMSO,-10,1.0,PENDING,PENDING,PENDING,0
21,Toluene,25,0.1,PENDING,PENDING,PENDING,0
22,Toluene,25,0.2,PENDING,PENDING,PENDING,0
23,Toluene,25,1.0,PENDING,PENDING,PENDING,0
24,DMSO,-10,0.1,PENDING,PENDING,PENDING,0
25,DMSO,-10,0.2,PENDING,PENDING,PENDING,0


In [8]:
df_edbo = pd.read_csv('my_optimization.csv')
df_edbo.head(5)

Unnamed: 0,solvent,T,concentration,yield,ee,side_product,priority
0,DMSO,10,1.0,PENDING,PENDING,PENDING,1
1,THF,10,1.0,PENDING,PENDING,PENDING,1
2,Toluene,10,0.2,PENDING,PENDING,PENDING,1
3,THF,-10,0.1,PENDING,PENDING,PENDING,0
4,DMSO,-10,1.0,PENDING,PENDING,PENDING,0


In [9]:
df_edbo = pd.read_csv('my_optimization.csv')
df_edbo.head(5)

Unnamed: 0,solvent,T,concentration,yield,ee,side_product,priority
0,DMSO,10,1.0,PENDING,PENDING,PENDING,1
1,THF,10,1.0,PENDING,PENDING,PENDING,1
2,Toluene,10,0.2,PENDING,PENDING,PENDING,1
3,THF,-10,0.1,PENDING,PENDING,PENDING,0
4,DMSO,-10,1.0,PENDING,PENDING,PENDING,0


In [10]:
df_edbo.loc[0, 'yield'] = 20.5
df_edbo.loc[0, 'ee'] = 40
df_edbo.loc[0, 'side_product'] = 0.1

In [11]:
df_edbo.head(5)

Unnamed: 0,solvent,T,concentration,yield,ee,side_product,priority
0,DMSO,10,1.0,20.5,40,0.1,1
1,THF,10,1.0,PENDING,PENDING,PENDING,1
2,Toluene,10,0.2,PENDING,PENDING,PENDING,1
3,THF,-10,0.1,PENDING,PENDING,PENDING,0
4,DMSO,-10,1.0,PENDING,PENDING,PENDING,0


In [12]:
df_edbo.loc[1, 'yield'] = 50.3
df_edbo.loc[1, 'ee'] = 10
df_edbo.loc[1, 'side_product'] = 0.2

In [13]:
df_edbo.head(5)

Unnamed: 0,solvent,T,concentration,yield,ee,side_product,priority
0,DMSO,10,1.0,20.5,40,0.1,1
1,THF,10,1.0,50.3,10,0.2,1
2,Toluene,10,0.2,PENDING,PENDING,PENDING,1
3,THF,-10,0.1,PENDING,PENDING,PENDING,0
4,DMSO,-10,1.0,PENDING,PENDING,PENDING,0


In [14]:
df_edbo.to_csv('my_optimization_round0.csv', index=False)

In [9]:
df_edbo_round0 = pd.read_csv("../../../data/re1.csv")
df_edbo_round0.head(5)

Unnamed: 0.1,Unnamed: 0,solvent,T,concentration,yield,ee,side_product,priority
0,32,DMSO,10,1.0,20.5,40,0.1,1
1,8,THF,10,1.0,50.3,10,0.2,1
2,19,Toluene,10,0.2,PENDING,PENDING,PENDING,1
3,0,THF,-10,0.1,PENDING,PENDING,PENDING,0
4,26,DMSO,-10,1.0,PENDING,PENDING,PENDING,0


In [11]:
d = EDBOplus().run(
    data_frame= df_edbo_round0,
    filename='my_optimization_round0.csv',  # Previous scope (including observations).
    objectives=['yield', 'ee', 'side_product'],  # Objectives to be optimized.
    objective_mode=['max', 'max', 'min'],  # Maximize yield and ee but minimize side_product.
    batch=3,  # Number of experiments in parallel that we want to perform in this round.
    columns_features='all', # features to be included in the model.
    init_sampling_method='cvtsampling'  # initialization method.
)

The following columns are categorical and will be encoded using One-Hot-Encoding: ['solvent']
Using EHVI acquisition function.
Using hyperparameters optimized for continuous variables.
Using hyperparameters optimized for continuous variables.
Using hyperparameters optimized for continuous variables.
Number of QMC samples using SobolQMCNormalSampler sampler: 512
Acquisition function optimized.
Predictions obtained and expected improvement obtained.


In [12]:
d

Unnamed: 0.1,Unnamed: 0,solvent,T,concentration,yield,ee,side_product,priority
35,35,DMSO,25,1.0,PENDING,PENDING,PENDING,1.0
10,28,DMSO,0,0.2,PENDING,PENDING,PENDING,1.0
19,1,THF,-10,0.2,PENDING,PENDING,PENDING,1.0
16,34,DMSO,25,0.2,PENDING,PENDING,PENDING,0.0
15,33,DMSO,25,0.1,PENDING,PENDING,PENDING,0.0
14,31,DMSO,10,0.2,PENDING,PENDING,PENDING,0.0
13,30,DMSO,10,0.1,PENDING,PENDING,PENDING,0.0
12,29,DMSO,0,1.0,PENDING,PENDING,PENDING,0.0
11,27,DMSO,0,0.1,PENDING,PENDING,PENDING,0.0
4,26,DMSO,-10,1.0,PENDING,PENDING,PENDING,0.0


In [17]:
df_predictions_round0 = pd.read_csv('pred_my_optimization_round0.csv')
df_predictions_round0.style.background_gradient(subset=['priority'], cmap='plasma')

Unnamed: 0,solvent,T,concentration,yield,ee,side_product,priority,yield_predicted_mean,yield_predicted_variance,yield_expected_improvement,ee_predicted_mean,ee_predicted_variance,ee_expected_improvement,side_product_predicted_mean,side_product_predicted_variance,side_product_expected_improvement
0,THF,-10,0.2,PENDING,PENDING,PENDING,1.0,35.401515,106.348013,77.612274,24.998475,107.061758,78.131722,0.150005,0.356873,0.427037
1,DMSO,25,1.0,PENDING,PENDING,PENDING,1.0,35.388955,106.281195,77.553162,25.011119,106.994491,78.084151,0.149963,0.356648,0.426842
2,DMSO,0,0.2,PENDING,PENDING,PENDING,1.0,35.327674,104.666452,76.239101,25.072812,105.368911,76.819453,0.149757,0.35123,0.422648
3,Toluene,25,1.0,PENDING,PENDING,PENDING,0.0,35.389097,106.282733,77.554453,25.010976,106.99604,78.085316,0.149963,0.356653,0.426846
4,Toluene,25,0.2,PENDING,PENDING,PENDING,0.0,35.389187,106.283698,77.555264,25.010886,106.997011,78.086046,0.149964,0.356657,0.426849
5,Toluene,25,0.1,PENDING,PENDING,PENDING,0.0,35.389211,106.283951,77.555477,25.010862,106.997267,78.086238,0.149964,0.356658,0.426849
6,Toluene,10,1.0,PENDING,PENDING,PENDING,0.0,34.34167,23.938856,12.172579,26.065433,24.099519,13.05938,0.146449,0.080332,0.250805
7,Toluene,10,0.2,PENDING,PENDING,PENDING,0.0,34.383269,27.152537,14.630186,26.023555,27.334769,15.530646,0.146588,0.091116,0.25399
8,Toluene,10,0.1,PENDING,PENDING,PENDING,0.0,34.393614,27.974466,15.263226,26.01314,28.162214,16.166053,0.146623,0.093874,0.254927
9,Toluene,0,1.0,PENDING,PENDING,PENDING,0.0,35.328145,104.684494,76.253682,25.072337,105.387074,76.833685,0.149759,0.35129,0.422695
