In [52]:
# Import Modules
import math
import numpy as np
from sklearn.gaussian_process import GaussianProcessRegressor
import matplotlib.pyplot as plt
from warnings import catch_warnings
from warnings import simplefilter
import scipy.stats as stats
import itertools
import pandas as pd
from sklearn.model_selection import train_test_split, KFold

In [53]:
# Data Treatment
# data_path = "C:/Users/USER/Documents/workspace/BMED/DB_bench_BMED_for_LA.xlsx"
data_path = "C:/Users/bsjun/Documents/workspace/BMED/DB_bench_BMED_for_LA.xlsx"
raw_Data= pd.read_excel(data_path,sheet_name="data_for_ML")

MVs = raw_Data[['T_operation','V_operation','E_operation','C_F_LA','C_A_LA']]
CVs = raw_Data[['SEC','J_LA']]

X = MVs.values
Y = CVs.values

In [54]:
# evaluation function
def eval(y):
	wt1 = 0.5
	wt2 = 0.5
	return wt1*(1-y[0]/maxSEC) + wt2*y[1]/maxJLA

In [55]:
# surrogate or approximation for the objective function
def surrogate(model, X):
    # catch any warning generated when making a prediction
    with catch_warnings():
        # ignore generated warnings
        simplefilter('ignore')
        return model.predict(X, return_std=True)

In [56]:
# Expected improvement acquisition function
def acquisition(X, Xsamples, model):
    # calculate the best surrgate score found so far
    yhat, _ = surrogate(model, X)
    score1 = np.zeros(len(yhat))
    for i in range(len(yhat)):
        score1[i] = eval(yhat[i])
    best = max(score1)

    # calculate mean and stdev via surrogate function
    mu, std = surrogate(model, Xsamples)
    score2 = np.zeros(len(mu))
    for i in range(len(mu)):
        score2[i] = eval(mu[i])
    # Calculate the expected improvement (EI)
    # Clip std to avoid division by zero
    std = np.clip(std, 1e-9, None)  # Replace None with a suitable upper bound if needed
    std2 = [i[0] for i in std]
    z = score2 - best / std2
    ei = (score2 - best) * stats.norm.cdf(z) + std2 * stats.norm.pdf(z)
    return ei

In [57]:
# optimize the acquisition function
def opt_acquisition(X, y, model):
    # grid search, generate samples
    Tsample = np.linspace(25,35,31)
    Vsample = np.linspace(10,35,31)
    Esample = np.linspace(0.25,1,31)
    Fsample = np.linspace(-0.1,5.2,31)
    Asample = np.linspace(-0.1,2.2,31)
    Xsamples = np.asarray(list(itertools.product(Tsample,Vsample,Esample,Fsample,Asample)))
    
    # calculate the acquisition function for each sample
    scores = acquisition(X, Xsamples, model)
    # locate the index of the largest scores
    ix = np.argmax(scores)
    return Xsamples[ix]

In [58]:
# plot real observation vs surrogate function
def plot(X, y, model):
    # Split inputs
    Xe1 = [X[i][0] for i in range(len(X))]
    Xe2 = [X[i][1] for i in range(len(X))]
    Xe3 = [X[i][2] for i in range(len(X))]
    ye1 = [y[i][0] for i in range(len(y))]
    ye2 = [y[i][1] for i in range(len(y))]

    # scatter plot of imputs
    fig, axes = plt.subplots(2,3)
    axes[0,0].scatter(Xe1, ye1)
    axes[0,1].scatter(Xe2, ye1)
    axes[0,2].scatter(Xe3, ye1)
    axes[1,0].scatter(Xe1, ye2)
    axes[1,1].scatter(Xe2, ye2)
    axes[1,2].scatter(Xe3, ye2)

    # line plot of surragte function acorss domain
    Xp1 = np.append(Xe1,10 + 10*np.random.random(20))
    Xp2 = np.append(Xe2,2*np.random.random(20))
    Xp3 = np.append(Xe3,-10 + 20*np.random.random(20))
    Xpred = np.asarray(list(itertools.product(Xp1,Xp2,Xp3)))

    ypred, _ = surrogate(model, Xpred)
    yp1 = [i[0] for i in ypred]
    yp2 = [i[1] for i in ypred]

    Xpl1 = [i[0] for i in Xpred]
    Xpl2 = [i[1] for i in Xpred]
    Xpl3 = [i[2] for i in Xpred]

    axes[0,0].scatter(Xpl1, yp1, s=1)
    axes[0,1].scatter(Xpl2, yp1, s=1)
    axes[0,2].scatter(Xpl3, yp1, s=1)
    axes[1,0].scatter(Xpl1, yp2, s=1)
    axes[1,1].scatter(Xpl2, yp2, s=1)
    axes[1,2].scatter(Xpl3, yp2, s=1)
    # show the plot
    plt.show()

In [97]:
# Train Set Normalization
# split the data
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state=42)

T = [i[0] for i in X_train]
V = [i[1] for i in X_train]
E = [i[2] for i in X_train]
F = [i[3] for i in X_train]
A = [i[4] for i in X_train]

SEC = [i[0] for i in Y_train]
JLA = [i[1] for i in Y_train]

rX_train = list(range(len(X_train)))
rY_train = list(range(len(X_train)))

# Z-score normalization
for i in range(len(X_train)):
    iX = [(T[i]-np.average(T))/np.std(T),(V[i]-np.average(V))/np.std(V),(E[i]-np.average(E))/np.std(E),(F[i]-np.average(F))/np.std(F),(A[i]-np.average(A))/np.std(A)]
    iY = [(SEC[i]-np.average(SEC))/np.std(SEC),(JLA[i]-np.average(JLA))/np.std(JLA)]
    rX_train[i] = iX
    rY_train[i] = iY
rX_train = np.asarray(rX_train)
rY_train = np.asarray(rY_train)
print(rY_train)

[[-1.71155012e-01 -7.37456403e-01]
 [-5.83803445e-01  1.20104324e-01]
 [ 9.92370266e-01 -7.29469080e-01]
 [-5.44548208e-01 -6.79787782e-01]
 [-8.77810380e-01 -6.44548771e-01]
 [ 4.00212200e-02 -7.11008702e-01]
 [-2.33855778e-01 -3.15877546e-01]
 [-5.25479308e-01 -6.61737747e-01]
 [ 7.75879987e-01 -6.06986658e-01]
 [-4.97817891e-01 -9.98846325e-02]
 [-3.19850306e-01  7.10068736e-01]
 [ 5.12852235e+00 -7.38062272e-01]
 [-7.11143512e-02 -7.27203254e-01]
 [-2.21498556e-01 -6.93964579e-01]
 [-6.95614183e-01  1.66510430e+00]
 [-4.89898140e-01  1.12249094e+00]
 [-8.70481218e-01 -6.85305893e-01]
 [ 1.30837184e+00 -7.18051362e-01]
 [-6.11344437e-01 -1.90338118e-01]
 [ 1.59731227e+00 -7.22558410e-01]
 [-4.17727632e-01  1.39322921e+00]
 [ 1.90878204e+00 -7.51510533e-01]
 [-5.60133080e-01  3.33031831e+00]
 [-3.19099190e-01  1.54972645e-01]
 [-4.30020459e-01  1.80154527e+00]
 [ 4.27623134e-02 -7.53587332e-01]
 [ 1.83669745e-04 -3.12974473e-01]
 [ 2.28377045e+00 -7.34366448e-01]
 [-5.07817089e-01 -4

In [60]:





# 5-Fold Cross Validation
kfold = KFold(n_splits=5, shuffle=True, random_state=42)


for train_index, val_index in kfold.split(X_train):
    X_train_fold, X_val_fold = X_train[train_index], X_train[val_index]
    Y_train_fold, Y_val_fold = Y_train[train_index], Y_train[val_index]

    model = GaussianProcessRegressor()
    model.fit(X_train_fold,Y_train_fold)



# # define the model
# model = GaussianProcessRegressor()
# model.fit(X,y)

# x = opt_acquisition(X, y, model)
# actual = [67.06,49.637]
# est, _ = surrogate(model, [X[2]])
# print(f'>x = {x}, f()={est}, actual={actual}')
# print(np.exp(x[0]),np.exp(x[0])*5)


In [61]:
# best result
score3=np.zeros(len(y))
for i in range(len(y)):
        score3[i] = eval(y[i])
ix = np.argmin(score3)
print(X[ix], y[ix])

plt.scatter(score3)

,[16.0505649,1.19114408,-0.68813221]
,[67.19,49.637]



NameError: name 'y' is not defined