In [1]:
'''
Steps to get EDBOplus working:
1)Create and activate a conda environment
2)Download and navigate to the edbopaper directory in a terminal window (or Anaconda prompt window) and pip install -r requirements.txt
3)Put this .ipynb file in ANY directory (doesn't matter where it is).
4)Set the path to edbopaper in sys.path.append below
5)...
6)Profit.

'''

import sys
import numpy as np

sys.path.append('/path/to/edbopaper/')

In [2]:
from plus.optimizer import EDBOplus

In [3]:
#setting up reaction components
#np.arange creates a list of numbers from (start,stop,stepsize)
components = {
              'temperature':np.arange(30,140,1).tolist(),   # Discrete grid of concentrations
              'time': np.arange(1,45,1).tolist(),
              'stoichiometry': np.arange(0.33,0.66,0.01).tolist()}

In [4]:
#need to generage the data scope first
scope = EDBOplus.generate_reaction_scope(components=components)

Generating reaction scope...


In [5]:
#this line is not needed, but wanted to just randomly sample 5 experiments
sampling = EDBOplus._init_sampling(scope,batch=3,sampling_method='seed',seed=42)

Using seeded random sampling (seed=42).
Creating a priority list using random sampling: seed


In [6]:
#Run these experiments from initial sampling if desired, but this will be the same 5 experiments when EDBOplus.run is called since it uses the same initial sampling method (unless changed from random seed sampling)
print(sampling[sampling["priority"]==1])

        temperature  time  stoichiometry  priority
4900             33    17           0.49         1
48963            63    32           0.57         1
106173          103     6           0.45         1


In [7]:
#Initialize EDBOplus class that will store predicted means, variances, etc.
bo = EDBOplus()

args = {'objectives': ['response'], 'objective_mode': ['max'], 'batch': 3, 'seed': 42}


In [31]:
#Run this cell on scope data (csv file called reaction.csv)
bo.run(**args)

Using hyperparameters optimized for continuous variables.
Using hyperparameters optimized for continuous variables.
Using hyperparameters optimized for continuous variables.


Unnamed: 0,temperature,time,stoichiometry,priority,response
65994,113,44,0.33,1.0,PENDING
77768,121,44,0.33,1.0,PENDING
54293,138,44,0.65,1.0,PENDING
7,103,16,0.33,0.0,PENDING
20,103,15,0.62,0.0,PENDING
...,...,...,...,...,...
0,139,44,0.65,-1.0,2.6
159714,63,32,0.57,-1.0,0.6
1,138,44,0.33,-1.0,2.5
2,30,44,0.33,-1.0,0.3


In [9]:
#Update reaction.csv with response values for highest priority samples (or update other priority reactions if you're a rebel and like to do what you want)

#Re-run the above cell to train the model and suggest next experiments 

In [12]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go


In [13]:
df = pd.read_csv('reaction.csv')
df2 = pd.read_csv('pred_reaction.csv')


In [23]:
print(df.head(10))
print(df.tail(10))

   temperature  time  stoichiometry  priority response
0          139    44           0.65       1.0  PENDING
1          138    44           0.33       1.0  PENDING
2           30    44           0.33       1.0  PENDING
3          103    15           0.53       0.0  PENDING
4          103    15           0.61       0.0  PENDING
5           85     1           0.33       0.0  PENDING
6          103    15           0.56       0.0  PENDING
7          103    16           0.33       0.0  PENDING
8          103    15           0.65       0.0  PENDING
9          103    15           0.58       0.0  PENDING
        temperature  time  stoichiometry  priority response
159710           66    30           0.39       0.0  PENDING
159711           66    30           0.42       0.0  PENDING
159712           66    30           0.33       0.0  PENDING
159713           66    30           0.43       0.0  PENDING
159714           63    32           0.57      -1.0      0.6
159715           33    17          

In [29]:
print(df2.head())


   temperature  time  stoichiometry  priority response  \
0          139    44           0.65       1.0  PENDING   
1           30    44           0.33       1.0  PENDING   
2          138    44           0.33       1.0  PENDING   
3           30     1           0.37       0.0  PENDING   
4           30     1           0.46       0.0  PENDING   

   response_predicted_mean  response_predicted_variance  \
0                 1.196786                     0.876853   
1                 1.435548                     0.835949   
2                 1.452742                     0.735898   
3                -0.346499                     0.690460   
4                -0.406981                     0.642466   

   response_expected_improvement  
0                   3.417285e-02  
1                   5.152763e-02  
2                   3.442527e-02  
3                   5.442872e-06  
4                   8.398001e-07  


In [30]:
print(df2.tail())

        temperature  time  stoichiometry  priority response  \
159715           33    17           0.49      -1.0      0.1   
159716          139    44           0.33      -1.0      2.4   
159717          139     1           0.33      -1.0     -1.4   
159718          139     1           0.65      -1.0     -1.4   
159719          103     6           0.45      -1.0      0.9   

        response_predicted_mean  response_predicted_variance  \
159715                 0.226712                     0.498180   
159716                 1.452418                     0.737831   
159717                -0.408814                     0.561973   
159718                -0.618544                     0.583633   
159719                -0.261562                     0.422420   

        response_expected_improvement  
159715                   6.720973e-07  
159716                   3.473042e-02  
159717                   3.034647e-08  
159718                   1.223619e-08  
159719                   9.482680e-1

In [33]:
fig = px.scatter(df, y='response', color='response',size=df['response']+2,hover_data=['response'])

TypeError: can only concatenate str (not "int") to str