In [1]:
'''
Steps to get EDBOplus working:
1)Create and activate a conda environment
2)Download and navigate to the edbopaper directory in a terminal window (or Anaconda prompt window) and pip install -r requirements.txt
3)Put this .ipynb file in ANY directory (doesn't matter where it is).
4)Set the path to edbopaper in sys.path.append below
5)...
6)Profit.

'''

import sys
import numpy as np

sys.path.append('/path/to/edbopaper/')

In [2]:
from plus.optimizer import EDBOplus

In [3]:
#setting up reaction components
#np.arange creates a list of numbers from (start,stop,stepsize)
components = {
              'temperature':np.arange(30,140,3).tolist(),   # Discrete grid of concentrations
              'time': np.arange(1,45,2).tolist(),
              'stoichiometry': np.arange(0.33,0.66,0.025).tolist()}

In [4]:
#need to generage the data scope first
scope = EDBOplus.generate_reaction_scope(components=components)

Generating reaction scope...


In [5]:
#this line is not needed, but wanted to just randomly sample 5 experiments
sampling = EDBOplus._init_sampling(scope,batch=3,sampling_method='seed',seed=42)

Using seeded random sampling (seed=42).
Creating a priority list using random sampling: seed


In [6]:
#Run these experiments from initial sampling if desired, but this will be the same 5 experiments when EDBOplus.run is called since it uses the same initial sampling method (unless changed from random seed sampling)
print(sampling[sampling["priority"]==1])

        temperature  time  stoichiometry  priority
4900             33    17           0.49         1
48963            63    32           0.57         1
106173          103     6           0.45         1


In [7]:
#Initialize EDBOplus class that will store predicted means, variances, etc.
bo = EDBOplus()

args = {'objectives': ['production rate (g/hr)','yield'], 'objective_mode': ['max','max'], 'batch': 3, 'seed': 42}


In [31]:
#Run this cell on scope data (csv file called reaction.csv)
bo.run(**args)

Using hyperparameters optimized for continuous variables.
Using hyperparameters optimized for continuous variables.
Using hyperparameters optimized for continuous variables.


Unnamed: 0,temperature,time,stoichiometry,priority,response
65994,113,44,0.33,1.0,PENDING
77768,121,44,0.33,1.0,PENDING
54293,138,44,0.65,1.0,PENDING
7,103,16,0.33,0.0,PENDING
20,103,15,0.62,0.0,PENDING
...,...,...,...,...,...
0,139,44,0.65,-1.0,2.6
159714,63,32,0.57,-1.0,0.6
1,138,44,0.33,-1.0,2.5
2,30,44,0.33,-1.0,0.3


In [16]:
#Update reaction.csv with response values for highest priority samples (or update other priority reactions if you're a rebel and like to do what you want)

#Re-run the above cell to train the model and suggest next experiments 

In [2]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go


In [13]:
df = pd.read_csv('reaction.csv')
df2 = pd.read_csv('pred_reaction.csv')

#plot pareto front 

In [4]:
print(df.head(10))
print(df.tail(10))

   temperature  time  stoichiometry  priority response
0          113    44           0.33       1.0      2.8
1          121    44           0.33       1.0      2.9
2          138    44           0.65       1.0      2.7
3          103    16           0.33       0.0  PENDING
4          103    15           0.62       0.0  PENDING
5          103    15           0.60       0.0  PENDING
6          103    15           0.59       0.0  PENDING
7          103    15           0.53       0.0  PENDING
8          103    15           0.61       0.0  PENDING
9           85     1           0.33       0.0  PENDING
        temperature  time  stoichiometry  priority response
159710           66    30           0.39       0.0  PENDING
159711          139     1           0.65      -1.0     -1.4
159712          139     1           0.33      -1.0     -1.4
159713           33    17           0.49      -1.0      0.1
159714          139    44           0.33      -1.0      2.4
159715          139    44          

In [5]:
print(df2.head())


   temperature  time  stoichiometry  priority response  \
0          113    44           0.33       1.0  PENDING   
1          138    44           0.65       1.0  PENDING   
2          121    44           0.33       1.0  PENDING   
3           48    15           0.53       0.0  PENDING   
4           30     1           0.51       0.0  PENDING   

   response_predicted_mean  response_predicted_variance  \
0                 1.844631                     0.894921   
1                 2.052948                     0.987047   
2                 1.904908                     0.900682   
3                -0.077786                     0.937294   
4                -0.816877                     1.160737   

   response_expected_improvement  
0                       0.099471  
1                       0.179225  
2                       0.113766  
3                       0.000588  
4                       0.000540  


In [6]:
print(df2.tail())

        temperature  time  stoichiometry  priority response  \
159715          139    44           0.65      -1.0      2.6   
159716           63    32           0.57      -1.0      0.6   
159717          138    44           0.33      -1.0      2.5   
159718           30    44           0.33      -1.0      0.3   
159719          103     6           0.45      -1.0      0.9   

        response_predicted_mean  response_predicted_variance  \
159715                 2.059554                     0.988818   
159716                 0.903379                     0.894312   
159717                 2.026337                     0.924308   
159718                 1.159854                     1.020273   
159719                -0.375322                     0.890206   

        response_expected_improvement  
159715                       0.181755  
159716                       0.009960  
159717                       0.150739  
159718                       0.036472  
159719                       0.00009

In [37]:
df = df.replace('PENDING',pd.NaT)
df = df.dropna(axis=0)
df['response'] = df['response'].apply(pd.to_numeric)
max_value = df['response'].max()
fig = px.scatter(df,x=results_length, y='response',size=df['response']+2 ,color='response',hover_data=['temperature','time','stoichiometry'])
fig.show()
print('Highest response value is {}'.format(df['response'].max()))

Highest response value is 2.9


In [35]:
results_length = []
for i in range(len(df['priority']==-1)):
    results_length.append(i)

print(results_length)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
