In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
#os.chdir('../')

In [3]:
plt.rcParams['mathtext.fontset'] = 'cm'
plt.rcParams["font.family"] = "serif"
plt.rcParams["font.serif"] = ["Times New Roman"
                                                ] + plt.rcParams["font.serif"]
plt.rcParams['font.size'] = 13
plt.rcParams['figure.dpi'] = 300

# The Data

In [4]:
data = pd.read_excel('../data/data.xlsx')
data = data.drop(columns=['S/N'])
data.head()

Unnamed: 0,cDen,Pot,Sn %,pH,C2H4,CO,H2,EtoH,FORM
0,150,3.5,100,14.05,0,23,12,0,61
1,150,3.3,80,14.05,0,23,7,0,66
2,150,3.2,50,14.05,0,34,5,3,52
3,150,3.1,10,14.05,1,42,5,2,42
4,150,3.0,5,14.05,4,48,5,10,19


In [5]:
features_col = list(data.columns[:4])
target_col = list(data.columns[4:])
#target_col = [target_col[0], target_col[2]]
print('Features: ', features_col)
print('Target: ', target_col)

Features:  ['cDen', 'Pot', 'Sn %', 'pH']
Target:  ['C2H4', 'CO', 'H2', 'EtoH', 'FORM']


In [6]:
# normalize the data in target columns by 100
data[target_col] = data[target_col] / 100
data.head(2)

Unnamed: 0,cDen,Pot,Sn %,pH,C2H4,CO,H2,EtoH,FORM
0,150,3.5,100,14.05,0.0,0.23,0.12,0.0,0.61
1,150,3.3,80,14.05,0.0,0.23,0.07,0.0,0.66


In [7]:
data.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
cDen,35.0,269.171429,119.205824,141.0,150.0,250.0,350.0,450.0
Pot,35.0,3.86,0.500118,2.8,3.55,4.0,4.15,4.7
Sn %,35.0,35.428571,38.820314,0.0,3.0,10.0,80.0,100.0
pH,35.0,12.844,2.447214,8.02,14.05,14.05,14.05,14.05
C2H4,35.0,0.079714,0.103483,0.0,0.0,0.04,0.095,0.37
CO,35.0,0.219429,0.12312,0.05,0.125,0.19,0.275,0.5
H2,35.0,0.137429,0.093442,0.05,0.07,0.1,0.155,0.37
EtoH,35.0,0.106,0.132114,0.0,0.0,0.06,0.14,0.48
FORM,35.0,0.353714,0.217067,0.07,0.14,0.38,0.54,0.7


In [8]:
# normailze the data in features columns to range [0, 1]
minX = data[features_col].min()
maxX = data[features_col].max()

data[features_col] = (data[features_col] - minX) / (maxX - minX)
data.head(2)

Unnamed: 0,cDen,Pot,Sn %,pH,C2H4,CO,H2,EtoH,FORM
0,0.029126,0.368421,1.0,1.0,0.0,0.23,0.12,0.0,0.61
1,0.029126,0.263158,0.8,1.0,0.0,0.23,0.07,0.0,0.66


## ML model

In [9]:
import torch

from botorch.models import SingleTaskGP
from gpytorch.kernels import LinearKernel, MaternKernel, RBFKernel, ScaleKernel

from botorch.models.transforms.outcome import Standardize

from gpytorch.mlls import ExactMarginalLogLikelihood
from botorch import fit_gpytorch_model
from sklearn.model_selection import train_test_split
from scipy.stats import norm
import numpy as np
from sklearn.metrics import r2_score, mean_absolute_error

In [10]:
def extract_train_data(data, idx, seed):
    X = data[features_col].values
    y = data[target_col[idx]].values

    ids_train, ids_test = train_test_split(range(X.shape[0]), test_size=0.25, random_state=seed)
    # train surrogate model for test data, on acquired set up till top COF was found.
    X_train = torch.from_numpy(X[ids_train, :])
    X_test  = torch.from_numpy(X[ids_test, :])

    y_train = torch.from_numpy(y[ids_train].reshape(-1, 1))
    y_test  = torch.from_numpy(y[ids_test].reshape(-1, 1))
    return X,  X_train, y_train, X_test, y_test

In [11]:
def retreive_model(idx, seed):

    X, X_train, y_train, X_test, y_test = extract_train_data(data=data, idx=idx, seed=seed)

    if idx == 0:
        kernel = MaternKernel()
    else:
        kernel = RBFKernel()
    
    gp = SingleTaskGP(X_train, y_train, outcome_transform=Standardize(m=1), covar_module=kernel)
    gp.load_state_dict(torch.load(f'../models/gp_{target_col[idx]}.pt'))

    return gp, X


In [12]:
gp_H2, X_train = retreive_model(idx=2, seed=0)
gp_ethane, _ = retreive_model(idx=0, seed=0)

In [25]:
features_col

['cDen', 'Pot', 'Sn %', 'pH']

In [44]:
import pickle
def polymer_properties(x1, x2, x3, x4):
    """
    """
    # merge the input data
    X_ = np.column_stack((x1, x2, x3, x4))
    # load pickle and scale it
    # with open('../data/min_max_X.pkl', 'rb') as f:
    #     min_max_X = pickle.load(f)

    # print(min_max_X)

    # X_ = (X_ - min_max_X[0]) / (min_max_X[1] - min_max_X[0])
    
    X_ = torch.from_numpy(X_[:, :])

    gp_H2.eval()
    gp_ethane.eval()

    with torch.no_grad():
        y_H2 = gp_H2.posterior(X_).mean
        y_ethane = gp_ethane.posterior(X_).mean

    return {"C2H4" : float(y_ethane), "H2" : float(y_H2)}

In [45]:
x1 = 1 # np.linspace(0, 1, 100)
x2 = 1 #np.linspace(0, 1, 100)
x3 = 1 #np.linspace(0, 1, 100)
x4 = 1 #np.linspace(0, 1, 100)

polymer_properties(x1, x2, x3, x4)

{'C2H4': 0.013543507305906338, 'H2': 0.3265349355740676}

In [46]:
from ax.service.ax_client import AxClient, ObjectiveProperties

obj1_name = "C2H4" 
obj2_name = "H2"

# CHANGE: add the historical data that was pulled from the literature
X_train = data[features_col]

# CHANGE: calculate the y_train values using the polymer_properties function
y_train = data[[target_col[0], target_col[2]]]

In [47]:
for i in range(len(X_train)):
    print(f"i: {i}, X_train: {X_train.iloc[i]}, y_train: {y_train.iloc[i]}")

i: 0, X_train: cDen    0.029126
Pot     0.368421
Sn %    1.000000
pH      1.000000
Name: 0, dtype: float64, y_train: C2H4    0.00
H2      0.12
Name: 0, dtype: float64
i: 1, X_train: cDen    0.029126
Pot     0.263158
Sn %    0.800000
pH      1.000000
Name: 1, dtype: float64, y_train: C2H4    0.00
H2      0.07
Name: 1, dtype: float64
i: 2, X_train: cDen    0.029126
Pot     0.210526
Sn %    0.500000
pH      1.000000
Name: 2, dtype: float64, y_train: C2H4    0.00
H2      0.05
Name: 2, dtype: float64
i: 3, X_train: cDen    0.029126
Pot     0.157895
Sn %    0.100000
pH      1.000000
Name: 3, dtype: float64, y_train: C2H4    0.01
H2      0.05
Name: 3, dtype: float64
i: 4, X_train: cDen    0.029126
Pot     0.105263
Sn %    0.050000
pH      1.000000
Name: 4, dtype: float64, y_train: C2H4    0.04
H2      0.05
Name: 4, dtype: float64
i: 5, X_train: cDen    0.029126
Pot     0.105263
Sn %    0.030000
pH      1.000000
Name: 5, dtype: float64, y_train: C2H4    0.07
H2      0.05
Name: 5, dtype: float6

In [48]:
X_train.head(2)

Unnamed: 0,cDen,Pot,Sn %,pH
0,0.029126,0.368421,1.0,1.0
1,0.029126,0.263158,0.8,1.0


In [49]:
X_train.columns = ['x1', 'x2', 'x3', 'x4']

In [50]:
y_train = [polymer_properties(**row[1]) for row in X_train.iterrows()]

In [51]:
y_train

[{'C2H4': 0.003819656545782188, 'H2': 0.1154817319105637},
 {'C2H4': 0.004521887106442371, 'H2': 0.07866365074761596},
 {'C2H4': 0.022577450710898866, 'H2': 0.047680326185577715},
 {'C2H4': 0.08206277222040526, 'H2': 0.05086580531316014},
 {'C2H4': 0.09433071036870783, 'H2': 0.06584101721204484},
 {'C2H4': 0.09787966617620139, 'H2': 0.06835975349974906},
 {'C2H4': 0.10923458588036923, 'H2': 0.09421997695772155},
 {'C2H4': -0.0033668195942405388, 'H2': 0.109791399289147},
 {'C2H4': -0.002305250194943209, 'H2': 0.09999265282661446},
 {'C2H4': 0.0260581821997862, 'H2': 0.07694884760077395},
 {'C2H4': 0.11150067323763747, 'H2': 0.051462187101096915},
 {'C2H4': 0.1221852161031444, 'H2': 0.05332185728690532},
 {'C2H4': 0.13449798001204671, 'H2': 0.0704178307079611},
 {'C2H4': 0.1607944741236078, 'H2': 0.12577272832353878},
 {'C2H4': 0.00026932103069986224, 'H2': 0.13442989747984685},
 {'C2H4': -3.403938923077221e-05, 'H2': 0.13141245267067347},
 {'C2H4': 0.024913640860748557, 'H2': 0.1190622

In [60]:
# Define the number of training examples
n_train = len(X_train)

ax_client = AxClient(random_seed=42) # CHANGE: add random seed for reproducibility

ax_client.create_experiment(
    parameters=[
        {"name": "x1", "type": "range", "bounds": [0.0, 1.0]}, # CHANGE: update parameter
        {"name": "x2", "type": "range", "bounds": [0.0, 1.0]}, # CHANGE: update parameter
        {"name": "x3", "type": "range", "bounds": [0.0, 1.0]}, # CHANGE: add new parameter
        {"name": "x4", "type": "range", "bounds": [0.0, 1.0]},
    ],
    objectives={
        obj1_name: ObjectiveProperties(minimize=False, threshold=0.0), # CHANGE: set minimize to False and change threshold
        obj2_name: ObjectiveProperties(minimize=True, threshold=0.0), # CHANGE: set minimize to False and change threshold
    }
)

# Add existing data to the AxClient
for i in range(n_train):
    parameterization = X_train.iloc[i].to_dict()

    ax_client.attach_trial(parameterization)
    ax_client.complete_trial(trial_index=i, raw_data=y_train[i])

[INFO 06-07 23:23:07] ax.service.ax_client: Starting optimization with verbose logging. To disable logging, set the `verbose_logging` argument to `False`. Note that float values in the logs are rounded to 6 decimal points.
[INFO 06-07 23:23:07] ax.service.utils.instantiation: Inferred value type of ParameterType.FLOAT for parameter x1. If that is not the expected value type, you can explicity specify 'value_type' ('int', 'float', 'bool' or 'str') in parameter dict.
[INFO 06-07 23:23:07] ax.service.utils.instantiation: Inferred value type of ParameterType.FLOAT for parameter x2. If that is not the expected value type, you can explicity specify 'value_type' ('int', 'float', 'bool' or 'str') in parameter dict.
[INFO 06-07 23:23:07] ax.service.utils.instantiation: Inferred value type of ParameterType.FLOAT for parameter x3. If that is not the expected value type, you can explicity specify 'value_type' ('int', 'float', 'bool' or 'str') in parameter dict.
[INFO 06-07 23:23:07] ax.service.uti

In [61]:
for _ in range(100): # CHANGE: increase number of trials

    parameterization, trial_index = ax_client.get_next_trial()
    # CHANGE: pull all added parameters from the parameterization
    x1 = parameterization["x1"]
    x2 = parameterization["x2"]
    x3 = parameterization["x3"]
    x4 = parameterization["x4"]

    results = polymer_properties(x1, x2, x3, x4) 
    ax_client.complete_trial(trial_index=trial_index, raw_data=results)

pareto_results = ax_client.get_pareto_optimal_parameters()

[INFO 06-07 23:23:17] ax.service.ax_client: Generated new trial 35 with parameters {'x1': 0.997513, 'x2': 0.104366, 'x3': 0.822979, 'x4': 0.419432}.
[INFO 06-07 23:23:17] ax.service.ax_client: Completed trial 35 with data: {'C2H4': (0.087805, None), 'H2': (0.155608, None)}.
[INFO 06-07 23:23:17] ax.service.ax_client: Generated new trial 36 with parameters {'x1': 0.452701, 'x2': 0.838067, 'x3': 0.07327, 'x4': 0.61617}.
[INFO 06-07 23:23:17] ax.service.ax_client: Completed trial 36 with data: {'C2H4': (0.110432, None), 'H2': (0.080426, None)}.
[INFO 06-07 23:23:17] ax.service.ax_client: Generated new trial 37 with parameters {'x1': 0.218184, 'x2': 0.350223, 'x3': 0.697927, 'x4': 0.173966}.
[INFO 06-07 23:23:17] ax.service.ax_client: Completed trial 37 with data: {'C2H4': (0.049572, None), 'H2': (0.131283, None)}.
[INFO 06-07 23:23:17] ax.service.ax_client: Generated new trial 38 with parameters {'x1': 0.731992, 'x2': 0.584297, 'x3': 0.448306, 'x4': 0.853206}.
[INFO 06-07 23:23:17] ax.ser

In [62]:
pareto_results

{}

In [59]:
p_op = ax_client.get_pareto_optimal_parameters()

# parse p_op values to get parameters and values
p_op_index = list(p_op.keys())
p_op_params = [p_op[i][0] for i in p_op_index]
p_op_values = [p_op[i][1][0] for i in p_op_index]

# organize the results into a dataframe
pareto_results = pd.DataFrame(p_op_params, columns=["x1", "x2", "x3", "x4"])
pareto_results["C2H4"] = [v["C2H4"] for v in p_op_values]
pareto_results["H2"] = [v["H2"] for v in p_op_values]
pareto_results.index = p_op_index
display(pareto_results.round(2))

[INFO 06-07 23:22:51] ax.modelbridge.torch: The observations are identical to the last set of observations used to fit the model. Skipping model fitting.


Unnamed: 0,x1,x2,x3,x4,C2H4,H2
