In [2]:
import os
import numpy as np
import matplotlib.pyplot as plt
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import Matern, ConstantKernel as C, WhiteKernel
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from scipy.stats import norm
from scipy.optimize import minimize

# ------------------------------------------------------------
# Load initial data
# ------------------------------------------------------------
X = np.load("initial_inputs.npy")        # shape (n0, d)
y = np.load("initial_outputs.npy")       # shape (n0,)

# ------------------------------------------------------------
# Append new information
# ------------------------------------------------------------
X = np.append(X,[[0.000000, 0.246841, 0.408148, 0.217147, 0.377534, 0.746590]], axis=0)  # Append week1 inputs
y = np.append(y, 2.302777955923556)         # Append week1 outputs

X = np.append(X,[[0.000000, 0.181575, 0.435826, 0.062960, 0.361632, 0.858542]], axis=0)  # Append week2 inputs
y = np.append(y, 1.3276855486043753)          # Append week2 outputs

# Save the updated data
np.save("updated_inputs_PW2.npy", X)
np.save("updated_outputs_PW2.npy", y)

# ------------------------------------------------------------
# Normalise inputs and outputs
# ------------------------------------------------------------
x_scaler = StandardScaler()
Xn = x_scaler.fit_transform(X)

y_mean = y.mean()
y_std = y.std() if y.std() > 0 else 1.0
yn = (y - y_mean) / y_std                 # GP works better with normalized target


def visualize_iteration(Xn, seed_points, suggested_point):
    """
    Simple 2D visualization of current iteration:
    - Xn: existing normalized input data (n x 2)
    - seed_points: candidate points used for local optimization (m x 2)
    - suggested_point: final chosen query point (1 x 2)
    """
    if Xn.shape[1] != 2:
        print("This visualization works only for 2D input data.")
        return

    plt.figure(figsize=(8, 6))
    plt.scatter(Xn[:, 0], Xn[:, 1], c='blue', label='Existing points', s=40, alpha=0.7)
    plt.scatter(seed_points[:, 0], seed_points[:, 1], c='orange', label='Seed points', s=50, alpha=0.7, edgecolors='k')
    plt.scatter(suggested_point[0], suggested_point[1], c='red', label='Suggested point', s=120, marker='*', edgecolors='k')

    plt.title("Bayesian Optimization Progress (2D)")
    plt.xlabel("Feature 1 (normalized)")
    plt.ylabel("Feature 2 (normalized)")
    #plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()

def visualize_svm(Xn, yn, candidates_raw, good_candidates_raw,
                  suggested_point, svm_clf, x_scaler):
    """
    Plot the SVM decision boundary together with:
      • all random candidates (gray)
      • candidates kept by SVM (orange)
      • existing data (blue)
      • final suggestion (red star)
    """
    if Xn.shape[1] != 2:
        print("SVM visualisation works only for 2-D inputs.")
        return

    # Create a dense grid for the contour
    h = 0.01
    x_min, x_max = Xn[:,0].min()-0.2, Xn[:,0].max()+0.2
    y_min, y_max = Xn[:,1].min()-0.2, Xn[:,1].max()+0.2
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                         np.arange(y_min, y_max, h))
    grid = np.c_[xx.ravel(), yy.ravel()]
    Z = svm_clf.decision_function(grid)
    Z = Z.reshape(xx.shape)

    plt.figure(figsize=(9,7))
    # Decision boundary
    plt.contourf(xx, yy, Z, levels=[-np.inf,0,np.inf],
                 colors=['#FFDDDD','#DDFFDD'], alpha=0.3)
    plt.contour(xx, yy, Z, levels=[0], linewidths=2, colors='k')

    # All random candidates
    plt.scatter(candidates_raw[:,0], candidates_raw[:,1],
                c='lightgray', s=15, alpha=0.6, label='All random candidates')

    # Candidates kept by SVM
    plt.scatter(good_candidates_raw[:,0], good_candidates_raw[:,1],
                c='orange', s=40, edgecolor='k', label='SVM-filtered candidates')

    # Existing points (colour-coded by class for reference)
    median_y = np.median(yn)
    colors = np.where(yn >= median_y, 'blue', 'cyan')
    plt.scatter(Xn[:,0], Xn[:,1], c=colors, s=80,
                edgecolor='k', label='Existing points (≥/ < median)')

    # Suggested point
    plt.scatter(suggested_point[0], suggested_point[1], c='red',
                s=150, marker='*', edgecolor='k', label='Suggested point')

    plt.xlim(x_min, x_max)
    plt.ylim(y_min, y_max)
    plt.title("SVM Decision Boundary + Candidate Filtering")
    plt.xlabel("Feature 1 (norm.)")
    plt.ylabel("Feature 2 (norm.)")
    plt.legend(loc='upper left')
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()


# ------------------------------------------------------------
# Fit a Gaussian Process surrogate
# ------------------------------------------------------------
d = X.shape[1]
kernel = C(1.0, (1e-3, 1e3)) * Matern(length_scale=np.ones(d),
                                      length_scale_bounds=(1e-5, 1e9),
                                      nu=2.5)
kernel += WhiteKernel(noise_level=1e-6,
                      noise_level_bounds=(1e-15, 1e1))

gp = GaussianProcessRegressor(kernel=kernel,
                              normalize_y=False,
                              n_restarts_optimizer=10,
                              random_state=0)
gp.fit(Xn, yn)

# ------------------------------------------------------------
# Add SVM to classify promising regions
# ------------------------------------------------------------
if len(yn) > 1:  # need at least two points for classification
    median_y = np.median(yn)
    labels = (yn >= median_y).astype(int)
    svm_clf = SVC(kernel='rbf', C=1.0)
    svm_clf.fit(Xn, labels)
else:
    svm_clf = None

# ------------------------------------------------------------
# Define Expected Improvement acquisition (for maximization)
# ------------------------------------------------------------
f_best = yn.max()
xi = 0.01     # exploration parameter

def predict_raw(x_raw):
    """Predict mean and std in normalized GP space for a raw input x_raw."""
    x = np.atleast_2d(x_raw)
    xn = x_scaler.transform(x)
    mu, sigma = gp.predict(xn, return_std=True)
    return mu.ravel(), sigma.ravel()

def expected_improvement_raw(x_raw, xi=xi):
    """Compute EI for a raw input x_raw."""
    mu, sigma = predict_raw(x_raw)
    sigma = np.maximum(sigma, 1e-9)       # avoid division by zero
    z = (mu - f_best - xi) / sigma
    ei = (mu - f_best - xi) * norm.cdf(z) + sigma * norm.pdf(z)
    #print ("x_raw=", x_raw, "mu=", mu, "sigma=", sigma,"ei=",ei)
    return ei.ravel()[0]

# ------------------------------------------------------------
# Search for the next query point
# ------------------------------------------------------------
# Domain is assumed to be [0,1]^d (based on inspection of initial data)
bounds = [(0.0, 1.0)] * d

# Global random search to find good seeds
n_seeds = 500
rng = np.random.default_rng(1)
# Need to decide where the random points needs to be
candidates_raw = rng.uniform(0.0, 1.0, size=(n_seeds, d))


print ("Original candidates=", candidates_raw.shape)
good_candidates_raw = candidates_raw.copy()

if svm_clf is not None:
    candidates_n = x_scaler.transform(candidates_raw)
    predicted_labels = svm_clf.predict(candidates_n)
    good_idx = np.where(predicted_labels == 1)[0]
    if len(good_idx) > 0:
        good_candidates_raw = candidates_raw[good_idx]
        print(f"Reduced candidates to {len(good_candidates_raw)} promising ones using SVM.")

print ("Reduced candidates=", good_candidates_raw.shape)

# ---- EI evaluation on the (possibly reduced) candidate set ----
ei_vals = np.array([expected_improvement_raw(c) for c in good_candidates_raw])
best_idx = np.argmax(ei_vals)
print ("first point ei_vals[",best_idx,"]=", ei_vals[best_idx])

# ---- Take top-10 seeds for local optimisation ----
seed_points = good_candidates_raw[np.argsort(-ei_vals)[:10]]
print ("seed_points=\n", seed_points)

best_x = None
best_val = -1.0
for s in seed_points:
    res = minimize(lambda xx: -expected_improvement_raw(xx),
                   x0=s,
                   bounds=bounds,
                   method="L-BFGS-B",
                   options={'maxiter':300})
    if res.success:
        val = -res.fun
        print ( "seed=\t", s, "\tEI=", val)
        #visualize_iteration(Xn, seed_points, best_x)
        if val > best_val:
            best_val = val
            best_x = res.x.copy()
            print ( "best_x=\t", best_x, "\tEI=", val )
        
        

# Fallback if optimizer fails
if best_x is None:
    best_x = good_candidates_raw[best_idx]
    best_val = ei_vals[best_idx]

# ------------------------------------------------------------
# Report results
# ------------------------------------------------------------
suggested_point = np.atleast_1d(best_x)
mu_s, sigma_s = predict_raw(suggested_point.reshape(1, -1))

# Convert mean/std back to original y-scale
mu_orig = mu_s * y_std + y_mean
sigma_orig = sigma_s * y_std
next_query = "-".join([f"{xi:.6f}" for xi in suggested_point])

print("Suggested next query point (raw input space):", next_query)
print("Expected Improvement at this point (normalized):", best_val)
print("GP predicted mean at this point (original y scale):", mu_orig)
print("GP predicted stddev at this point (original y scale):", sigma_orig)

# Visualise 1. Standard BO progress plot
visualize_iteration(X, seed_points, best_x)

# 2. SVM decision-boundary plot (only when SVM was trained)
if svm_clf is not None:
    visualize_svm(X, y,
                  candidates_raw,
                  good_candidates_raw,
                  best_x,
                  svm_clf,
                  x_scaler)

Original candidates= (500, 6)
Reduced candidates to 222 promising ones using SVM.
Reduced candidates= (222, 6)
first point ei_vals[ 61 ]= 0.0038755914121805825
seed_points=
 [[0.04179161 0.10815954 0.51877912 0.25248938 0.28553513 0.74146953]
 [0.15138617 0.02850641 0.8662415  0.32973822 0.52113121 0.78725794]
 [0.15134593 0.04806967 0.43582599 0.21565299 0.47825274 0.82019648]
 [0.08054686 0.37115249 0.40375614 0.51302692 0.35142922 0.73466979]
 [0.37502874 0.27413178 0.68273606 0.29779484 0.36765987 0.72391271]
 [0.11717227 0.26873358 0.33643323 0.88592358 0.25074066 0.68187264]
 [0.24372164 0.29458141 0.95708403 0.36201606 0.28897611 0.72001112]
 [0.48050616 0.28141192 0.71281495 0.3820169  0.42517713 0.81624986]
 [0.20140189 0.45372918 0.25293035 0.20711901 0.36779487 0.80220494]
 [0.62814318 0.34820984 0.81076953 0.13730046 0.46254633 0.84077602]]
seed=	 [0.04179161 0.10815954 0.51877912 0.25248938 0.28553513 0.74146953] 	EI= 0.06440010085616427
best_x=	 [0.         0.16642896 0.5