# modules

In [79]:
import numpy as np
import pandas as pd

from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C
from sklearn.decomposition import PCA

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import VarianceThreshold

import matplotlib.pyplot as plt

# data

In [4]:
!pwd

/home/brouthen/sid/CNN-HPO-Carbon/src/surrogate_modeling/rbf/playground


In [80]:
df = pd.read_csv('../../../../dataset/surrogate.csv')

# process

In [81]:
cat_cols = df.select_dtypes(include=['object']).columns

In [82]:
df = pd.get_dummies(df, columns=cat_cols)

# split

In [83]:
X = df.drop(columns=['train_accuracy', 'test_accuracy'])
y = df['test_accuracy']
y_ = df['train_accuracy']

In [85]:
X.shape, y.shape

((63, 123), (63,))

In [9]:
y.mean(), y.std(), y_.mean(), y_.std()

(np.float64(71.28746031746032),
 5.231961936410191,
 np.float64(72.69552380952379),
 5.741509621809161)

In [67]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.1, random_state=42)

In [200]:
X_train = X
X_val = X
y_train = y
y_val = y

In [68]:
selector = VarianceThreshold(threshold=0.01)
X_train = selector.fit_transform(X_train)
X_val = selector.transform(X_val)

In [69]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

In [70]:
pca = PCA(n_components=0.9)
X_train = pca.fit_transform(X_train)
X_val = pca.transform(X_val)

In [32]:
X_train.shape, X_val.shape

((50, 106), (13, 106))

# model

In [71]:
kernel = C(1, (1e-3, 1e3)) * RBF(length_scale=1, length_scale_bounds=(1e-3, 1e3))

gpr = GaussianProcessRegressor(kernel=kernel, alpha=1e-3, normalize_y=True, n_restarts_optimizer=200)

gpr.fit(X_train, y_train)
print("Fitted kernel:", gpr.kernel_)

Fitted kernel: 1**2 * RBF(length_scale=4.08)


In [72]:
y_pred_val, y_std_val = gpr.predict(X_val, return_std=True)
mse = mean_squared_error(y_val, y_pred_val)
print(f"\nValidation RMSE: {np.sqrt(mse):.4f}\n")


Validation RMSE: 4.3904



In [39]:
y_pred_val

array([71.55060835, 69.0423331 , 73.06658503, 70.53660801, 70.96998976,
       71.88627471, 70.9139938 , 70.50995032, 71.98804694, 70.94118538,
       72.8919247 , 70.82261152, 73.08423302])

In [78]:
X.columns.to_list()

['initial_conv_hp_channels',
 'initial_conv_hp_kernel_size',
 'initial_conv_hp_stride',
 'block_0_expanded_channels',
 'block_0_use_se',
 'block_0_se_squeeze_factor',
 'block_0_channels',
 'block_0_kernel_size',
 'block_0_stride',
 'block_1_expanded_channels',
 'block_1_use_se',
 'block_1_se_squeeze_factor',
 'block_1_channels',
 'block_1_kernel_size',
 'block_1_stride',
 'block_2_expanded_channels',
 'block_2_use_se',
 'block_2_se_squeeze_factor',
 'block_2_channels',
 'block_2_kernel_size',
 'block_2_stride',
 'block_3_expanded_channels',
 'block_3_use_se',
 'block_3_se_squeeze_factor',
 'block_3_channels',
 'block_3_kernel_size',
 'block_3_stride',
 'block_4_expanded_channels',
 'block_4_use_se',
 'block_4_se_squeeze_factor',
 'block_4_channels',
 'block_4_kernel_size',
 'block_4_stride',
 'block_5_expanded_channels',
 'block_5_use_se',
 'block_5_se_squeeze_factor',
 'block_5_channels',
 'block_5_kernel_size',
 'block_5_stride',
 'block_6_expanded_channels',
 'block_6_use_se',
 'blo

In [43]:
confidence_z = {
    0.40: 0.524,   # 40%
    0.50: 0.674,   # 50%
    0.60: 0.841,   # 60%
    0.70: 1.036,   # 70%
    0.80: 1.282,   # 80%
    0.85: 1.440,   # 85%
    0.90: 1.645,   # 90%
    0.95: 1.960,   # 95%
    0.98: 2.326,   # 98%
    0.99: 2.576,   # 99%
    0.999: 3.291,  # 99.9%
    0.9999: 3.891  # 99.99%
}

In [73]:
confidence = 0.9
z = confidence_z[confidence]
print(f"Confidence level: {confidence*100:.1f}%")

for x_val, y_true in zip(X_val, y_val):
    x_val = x_val.reshape(1, -1)
    y_pred, y_std = gpr.predict(x_val, return_std=True)
    mean = y_pred[0]
    std = y_std[0]
    ci = z * std
    print(f"Val Sample:  True = {y_true:.3f}, Predicted = {mean:.3f}, CI ≈ [{mean - ci:.3f}, {mean + ci:.3f}]")

Confidence level: 90.0%
Val Sample:  True = 67.440, Predicted = 71.937, CI ≈ [63.458, 80.415]
Val Sample:  True = 63.400, Predicted = 70.497, CI ≈ [61.966, 79.027]
Val Sample:  True = 69.890, Predicted = 72.132, CI ≈ [63.596, 80.667]
Val Sample:  True = 71.760, Predicted = 70.832, CI ≈ [62.299, 79.365]
Val Sample:  True = 65.660, Predicted = 70.921, CI ≈ [62.403, 79.439]
Val Sample:  True = 73.410, Predicted = 71.046, CI ≈ [62.623, 79.469]
Val Sample:  True = 64.920, Predicted = 69.939, CI ≈ [61.719, 78.158]


In [74]:
from scipy.stats import norm

confidence = 0.9
z = confidence_z[confidence]
print(f"Confidence level: {confidence*100:.1f}%")

kappa = z
xi = 0.01
y_best = max(y_train)

for x_val, y_true in zip(X_val, y_val):
    x_val = x_val.reshape(1, -1)
    y_pred, y_std = gpr.predict(x_val, return_std=True)
    mean = y_pred[0]
    std = y_std[0]
    ci = z * std
    ucb = mean + kappa * std
    if std == 0:
        ei = 0.0
    else:
        z_ei = (mean - y_best - xi) / std
        ei = (mean - y_best - xi) * norm.cdf(z_ei) + std * norm.pdf(z_ei)

    print(
        f"True: {y_true:.3f} | Pred: {mean:.3f} | CI ±{ci:.3f} → [{mean - ci:.3f}, {mean + ci:.3f}]"
        f" | UCB: {ucb:.3f} | EI: {ei:.6f}"
    )

Confidence level: 90.0%
True: 67.440 | Pred: 71.937 | CI ±8.479 → [63.458, 80.415] | UCB: 80.415 | EI: 0.116721
True: 63.400 | Pred: 70.497 | CI ±8.531 → [61.966, 79.027] | UCB: 79.027 | EI: 0.060557
True: 69.890 | Pred: 72.132 | CI ±8.535 → [63.596, 80.667] | UCB: 80.667 | EI: 0.131624
True: 71.760 | Pred: 70.832 | CI ±8.533 → [62.299, 79.365] | UCB: 79.365 | EI: 0.071532
True: 65.660 | Pred: 70.921 | CI ±8.518 → [62.403, 79.439] | UCB: 79.439 | EI: 0.073963
True: 73.410 | Pred: 71.046 | CI ±8.423 → [62.623, 79.469] | UCB: 79.469 | EI: 0.073919
True: 64.920 | Pred: 69.939 | CI ±8.219 → [61.719, 78.158] | UCB: 78.158 | EI: 0.035914


In [76]:
from sklearn.linear_model import Ridge, Lasso, BayesianRidge
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
import numpy as np

# Optional: add more models here
regressors = {
    "Ridge": Ridge(),
    "Lasso": Lasso(),
    "BayesianRidge": BayesianRidge(),
    "RandomForest": RandomForestRegressor(),
    "GradientBoosting": GradientBoostingRegressor(),
    "SVR": SVR(),
    "KNN": KNeighborsRegressor(),
    "DecisionTree": DecisionTreeRegressor(),
}

print("Benchmarking regressors...\n")

results = []

for name, model in regressors.items():
    # Wrap with a pipeline that includes feature scaling
    pipe = make_pipeline(StandardScaler(), model)
    pipe.fit(X_train, y_train)
    y_pred = pipe.predict(X_val)
    rmse = np.sqrt(mean_squared_error(y_val, y_pred))
    results.append((name, rmse))
    print(f"{name:<18} RMSE: {rmse:.4f}")


Benchmarking regressors...

Ridge              RMSE: 4.5738
Lasso              RMSE: 4.5055
BayesianRidge      RMSE: 4.8840
RandomForest       RMSE: 4.1550
GradientBoosting   RMSE: 4.4979
SVR                RMSE: 5.8962
KNN                RMSE: 4.1741
DecisionTree       RMSE: 5.6034
