In [12]:
'''
Steps to get EDBOplus working:
1)Create and activate a conda environment
2)Download and navigate to the edbopaper directory in a terminal window (or Anaconda prompt window) and pip install -r requirements.txt
3)Put this .ipynb file in ANY directory (doesn't matter where it is).
4)Set the path to edbopaper in sys.path.append below
5)...
6)Profit.

'''

import sys
import numpy as np

sys.path.append('/home/sanjay/AFRL/git_edbo/edbopaper')

In [13]:
from plus.optimizer_botorch import EDBOplus

In [14]:
#setting up reaction components
#np.arange creates a list of numbers from (start,stop,stepsize)
components = {
              'temperature':np.arange(30,140,3).tolist(),   # Discrete grid of concentrations
              'time': np.arange(1,45,2).tolist(),
              'stoichiometry': np.arange(0.33,0.66,0.025).tolist()}

In [15]:
#need to generage the data scope first
scope = EDBOplus.generate_reaction_scope(components=components)

Generating reaction scope...


In [16]:
#Initialize EDBOplus class that will store predicted means, variances, etc.
bo = EDBOplus()

args = {'objectives': ['production_rate_(g/hr)','yield'], 'objective_mode': ['max','max'], 'batch': 3, 'seed': 42}


In [27]:
#Run this cell on scope data (csv file called reaction.csv)
bo.run(**args)

Using EHVI acquisition function.
Using hyperparameters optimized for continuous variables.
Using hyperparameters optimized for continuous variables.
Number of QMC samples: 128
Acquisition function optimized.
Predictions obtained and expected improvement obtained.


Unnamed: 0,temperature,time,stoichiometry,production_rate_(g/hr),yield,priority
30,138,41,0.330,PENDING,PENDING,1.0
670,132,37,0.405,PENDING,PENDING,1.0
2473,114,43,0.380,PENDING,PENDING,1.0
3,138,43,0.655,PENDING,PENDING,0.0
4,138,43,0.630,PENDING,PENDING,0.0
...,...,...,...,...,...,...
2,114,43,0.455,1.9,87,-1.0
11392,102,23,0.405,1.2,78,-1.0
11393,54,41,0.580,0.6,41,-1.0
11394,45,29,0.605,0.4,35,-1.0


In [18]:
#Update reaction.csv with response values for highest priority samples (or update other priority reactions if you're a rebel and like to do what you want)

#Re-run the above cell to train the model and suggest next experiments 

In [28]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go


In [29]:
df = pd.read_csv('reaction.csv')
df2 = pd.read_csv('pred_reaction.csv')



In [30]:
print(df.head(10))
print(df.tail(10))

   temperature  time  stoichiometry production_rate_(g/hr)    yield  priority
0          138    41          0.330                PENDING  PENDING       1.0
1          132    37          0.405                PENDING  PENDING       1.0
2          114    43          0.380                PENDING  PENDING       1.0
3          138    43          0.655                PENDING  PENDING       0.0
4          138    43          0.630                PENDING  PENDING       0.0
5          138    43          0.605                PENDING  PENDING       0.0
6          138    43          0.580                PENDING  PENDING       0.0
7          138    43          0.555                PENDING  PENDING       0.0
8          138    43          0.530                PENDING  PENDING       0.0
9          138    43          0.505                PENDING  PENDING       0.0
       temperature  time  stoichiometry production_rate_(g/hr)    yield  \
11386           30     1          0.330                PENDING  PEN

In [31]:
print(df2.head())


   temperature  time  stoichiometry production_rate_(g/hr)    yield  priority  \
0          138    41          0.330                PENDING  PENDING       1.0   
1          132    37          0.405                PENDING  PENDING       1.0   
2          114    43          0.380                PENDING  PENDING       1.0   
3          138    43          0.655                PENDING  PENDING       0.0   
4          138    43          0.630                PENDING  PENDING       0.0   

   production_rate_(g/hr)_predicted_mean  \
0                               1.899254   
1                               1.777100   
2                               1.853312   
3                               1.836890   
4                               1.850768   

   production_rate_(g/hr)_predicted_variance  \
0                                   0.069743   
1                                   0.022145   
2                                   0.027731   
3                                   0.090820   
4       

In [32]:
print(df2.tail())

       temperature  time  stoichiometry production_rate_(g/hr) yield  \
11391          114    43          0.455                    1.9    87   
11392          102    23          0.405                    1.2    78   
11393           54    41          0.580                    0.6    41   
11394           45    29          0.605                    0.4    35   
11395           45     7          0.655                    0.2    21   

       priority  production_rate_(g/hr)_predicted_mean  \
11391      -1.0                               1.828068   
11392      -1.0                               1.171900   
11393      -1.0                               0.648476   
11394      -1.0                               0.392452   
11395      -1.0                               0.194253   

       production_rate_(g/hr)_predicted_variance  \
11391                                   0.008292   
11392                                   0.014044   
11393                                   0.014262   
11394     

In [44]:
df = df.replace('PENDING',pd.NaT)
df = df.dropna(axis=0)
df['production_rate_(g/hr)'] = df['production_rate_(g/hr)'].apply(pd.to_numeric)
df['yield'] = df['yield'].apply(pd.to_numeric)
results_length = []
for i in range(len(df['priority']==-1)):
    results_length.append(i)
print(results_length)
max_value_yield = df['yield'].max()
max_value_production_rate = df['production_rate_(g/hr)'].max()




fig = px.scatter(df,x='yield', y='production_rate_(g/hr)',size='yield',color='yield',hover_data=['temperature','time','stoichiometry'])
fig.show()
print('Highest yield is {}%'.format(df['yield'].max()))
print('Highest production rate is {}g/hr'.format(df['production_rate_(g/hr)'].max()))

[0, 1, 2, 3, 4, 5, 6, 7, 8]


Highest yield is 88%
Highest production rate is 1.9g/hr
