In [48]:
# Import Modules
import math
import numpy as np
from sklearn.gaussian_process import GaussianProcessRegressor
import matplotlib.pyplot as plt
from warnings import catch_warnings
from warnings import simplefilter
import scipy.stats as stats
import itertools
import pandas as pd
from sklearn.model_selection import train_test_split, KFold

In [49]:
# Data Treatment
# data_path = "C:/Users/USER/Documents/workspace/BMED/DB_bench_BMED_for_LA.xlsx"
data_path = "C:/Users/bsjun/Documents/workspace/BMED/DB_bench_BMED_for_LA.xlsx"
raw_Data= pd.read_excel(data_path,sheet_name="data_for_ML")

MVs = raw_Data[['T_operation','V_operation','E_operation','C_F_LA','C_A_LA']]
CVs = raw_Data[['SEC','J_LA']]

X = MVs.values
Y = CVs.values

In [50]:
# surrogate or approximation for the objective function
def surrogate(model, X):
    # catch any warning generated when making a prediction
    with catch_warnings():
        # ignore generated warnings
        simplefilter('ignore')
        return model.predict(X, return_std=True)

In [51]:
# Expected improvement acquisition function
def acquisition(X, Xsamples, model):
    # calculate the best surrgate score found so far
    yhat, _ = surrogate(model, X)
    best1, best2 = min([i[0] for i in yhat]), max([i[1] for i in yhat])

    # calculate mean and stdev via surrogate function
    mu, std = surrogate(model, Xsamples)

    # Calculate the expected improvement (EI)
    # Clip std to avoid division by zero
    std = np.clip(std, 1e-9, None)  # Replace None with a suitable upper bound if needed
    std2 = [i[0] for i in std]
    z = score2 - best / std2
    ei = (score2 - best) * stats.norm.cdf(z) + std2 * stats.norm.pdf(z)
    return ei

In [52]:
# optimize the acquisition function
def opt_acquisition(X, y, model):
    # grid search, generate samples
    Tsample = np.linspace(25,35,31)
    Vsample = np.linspace(10,35,31)
    Esample = np.linspace(0.25,1,31)
    Fsample = np.linspace(-0.1,5.2,31)
    Asample = np.linspace(-0.1,2.2,31)
    Xsamples = np.asarray(list(itertools.product(Tsample,Vsample,Esample,Fsample,Asample)))
    
    # calculate the acquisition function for each sample
    scores = acquisition(X, Xsamples, model)
    # locate the index of the largest scores
    ix = np.argmax(scores)
    return Xsamples[ix]

In [53]:
# plot real observation vs surrogate function
def plot(X, y, model):
    # Split inputs
    Xe1 = [X[i][0] for i in range(len(X))]
    Xe2 = [X[i][1] for i in range(len(X))]
    Xe3 = [X[i][2] for i in range(len(X))]
    ye1 = [y[i][0] for i in range(len(y))]
    ye2 = [y[i][1] for i in range(len(y))]

    # scatter plot of imputs
    fig, axes = plt.subplots(2,3)
    axes[0,0].scatter(Xe1, ye1)
    axes[0,1].scatter(Xe2, ye1)
    axes[0,2].scatter(Xe3, ye1)
    axes[1,0].scatter(Xe1, ye2)
    axes[1,1].scatter(Xe2, ye2)
    axes[1,2].scatter(Xe3, ye2)

    # line plot of surragte function acorss domain
    Xp1 = np.append(Xe1,10 + 10*np.random.random(20))
    Xp2 = np.append(Xe2,2*np.random.random(20))
    Xp3 = np.append(Xe3,-10 + 20*np.random.random(20))
    Xpred = np.asarray(list(itertools.product(Xp1,Xp2,Xp3)))

    ypred, _ = surrogate(model, Xpred)
    yp1 = [i[0] for i in ypred]
    yp2 = [i[1] for i in ypred]

    Xpl1 = [i[0] for i in Xpred]
    Xpl2 = [i[1] for i in Xpred]
    Xpl3 = [i[2] for i in Xpred]

    axes[0,0].scatter(Xpl1, yp1, s=1)
    axes[0,1].scatter(Xpl2, yp1, s=1)
    axes[0,2].scatter(Xpl3, yp1, s=1)
    axes[1,0].scatter(Xpl1, yp2, s=1)
    axes[1,1].scatter(Xpl2, yp2, s=1)
    axes[1,2].scatter(Xpl3, yp2, s=1)
    # show the plot
    plt.show()

In [73]:
# Train Set Normalization
# split the data
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state=42)

T = [i[0] for i in X_train]
V = [i[1] for i in X_train]
E = [i[2] for i in X_train]
F = [i[3] for i in X_train]
A = [i[4] for i in X_train]

SEC = [i[0] for i in Y_train]
JLA = [i[1] for i in Y_train]

rX_train = list(range(len(X_train)))
rY_train = list(range(len(X_train)))

# Z-score normalization
for i in range(len(X_train)):
    iX = [(T[i]-np.average(T))/np.std(T),(V[i]-np.average(V))/np.std(V),(E[i]-np.average(E))/np.std(E),(F[i]-np.average(F))/np.std(F),(A[i]-np.average(A))/np.std(A)]
    iY = [(SEC[i]-np.average(SEC))/np.std(SEC),(JLA[i]-np.average(JLA))/np.std(JLA)]
    rX_train[i] = iX
    rY_train[i] = iY
rX_train = np.asarray(rX_train)
rY_train = np.asarray(rY_train)

model = GaussianProcessRegressor()
model.fit(rX_train,rY_train)

yhat, _ = surrogate(model, rX_train)
best1, best2 = min([i[0] for i in yhat]), max([i[1] for i in yhat])

Ts = np.linspace(25,35,3)
Vs = np.linspace(10,35,3)
Es = np.linspace(0.25,1,3)
Fs = np.linspace(-0.1,5.2,3)
As = np.linspace(-0.1,2.2,3)
Xs = np.asarray(list(itertools.product(Ts,Vs,Es,Fs,As)))

rTs = [i[0] for i in Xs]
rVs = [i[1] for i in Xs]
rEs = [i[2] for i in Xs]
rFs = [i[3] for i in Xs]
rAs = [i[4] for i in Xs]
rX_sample = list(range(len(Xs)))
for i in range(len(rX_sample)):
    iXs = [(rTs[i]-np.average(rTs))/np.std(rTs),(rVs[i]-np.average(rVs))/np.std(rVs),(rEs[i]-np.average(rEs))/np.std(rEs),(rFs[i]-np.average(rFs))/np.std(rFs),(rAs[i]-np.average(rAs))/np.std(rAs)]
    rX_sample[i] = iXs

mu, std = surrogate(model, rX_sample)

mu1 = [i[0] for i in mu]
mu2 = [i[1] for i in mu]
std1 = [i[0] for i in std]
std1 = [i[1] for i in std]


# std = np.clip(std, 1e-9, None)  # Replace None with a suitable upper bound if needed
# std2 = [i[0] for i in std]
# z = score2 - best / std2
# ei = (score2 - best) * stats.norm.cdf(z) + std2 * stats.norm.pdf(z)


[3.7768268501263265, 0.16068809742046142, -3.235962301116615, 0.28887491698287704, 0.7233762573323474, -8.944991716267623, 1.195716589672827, -0.7661732567313493, -0.5624614579235185, 5.474865152190432, 0.23293795783411042, -4.690803480859358, 0.4187536828630982, 1.0486246888240203, -12.96651653271374, 1.7333060848109767, -1.110617045995923, -0.8152697024615918, 1.7708355195584318, 0.0753535945900694, -1.5171804236711068, 0.13544910230245932, 0.3392182792078984, -4.193830264958706, 0.5606388463640712, -0.35918491925338003, -0.26356382469013795, -1.6802684653250708, 0.40944392657456774, 1.0138025573442064, 0.42964945774178886, -4.012005408573032, 4.642035495677646, 1.6436096554561743, -4.629428825927562, -16.460055010484623, -2.435700916924816, 0.5935932293918995, 1.4699293196529197, 0.6228459913681945, -5.8155025799756, 6.730117623719934, 2.3825994873277807, -6.710516199189101, -23.859542784894643, -0.7878126946507109, 0.1921195842007819, 0.4760573792219702, 0.20150658957783207, -1.880

In [55]:
mu, std = surrogate(model, Xsamples)


In [56]:
std

array([[1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.

In [57]:





# 5-Fold Cross Validation
kfold = KFold(n_splits=5, shuffle=True, random_state=42)


for train_index, val_index in kfold.split(X_train):
    X_train_fold, X_val_fold = X_train[train_index], X_train[val_index]
    Y_train_fold, Y_val_fold = Y_train[train_index], Y_train[val_index]

    model = GaussianProcessRegressor()
    model.fit(X_train_fold,Y_train_fold)



# # define the model
# model = GaussianProcessRegressor()
# model.fit(X,y)

# x = opt_acquisition(X, y, model)
# actual = [67.06,49.637]
# est, _ = surrogate(model, [X[2]])
# print(f'>x = {x}, f()={est}, actual={actual}')
# print(np.exp(x[0]),np.exp(x[0])*5)


In [58]:
# best result
score3=np.zeros(len(y))
for i in range(len(y)):
        score3[i] = eval(y[i])
ix = np.argmin(score3)
print(X[ix], y[ix])

plt.scatter(score3)

,[16.0505649,1.19114408,-0.68813221]
,[67.19,49.637]



NameError: name 'y' is not defined