## Libraries

In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_california_housing
from sklearn.metrics import r2_score, mean_absolute_error
from pathlib import Path

In [2]:
from bayes_opt import BayesianOptimization
from bayes_opt.logger import JSONLogger
from bayes_opt.event import Events

In [3]:
from sklearn.svm import SVR

## Load data

In [4]:
# Matrix with symbolic feature transformation
X = pd.read_csv("..//GpTrans_CalHousing.csv")
X.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23,11.175543,10.759103,8.68473,8.87721,10.751828,8.727825,12.896293,8.365159,8.406236,10.715167
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22,9.207254,8.69845,7.438368,7.657262,8.689452,7.488576,10.304921,7.063999,7.112613,8.644046
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,37.85,-122.24,11.897622,11.553657,9.063846,9.239798,11.546885,9.102674,13.920885,8.807435,8.847419,11.512753
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25,11.553437,11.27075,8.700351,8.866149,11.263807,8.735819,13.616723,8.508264,8.55158,11.228815
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25,11.157841,10.947246,8.276987,8.427746,10.940099,8.305442,13.268036,8.158162,8.206642,10.904067


In [5]:
# load california housing target
housing = fetch_california_housing()

y = housing.target

In [6]:
# Splitting to training and testing data
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.3, random_state = 4)

In [66]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

## Optimization function

In [67]:
def opt_bas(C, epsilon, gamma):
    
    # parameters
    hyperparameters = {
        'kernel' : "rbf",
        'C' : C, 
        'epsilon' : epsilon, 
        'gamma' : gamma,
    }
    
    # fit and predict
    model = SVR(**hyperparameters).fit(X_train, y_train)

    y_pred = model.predict(X_test)
    
    # get score
    metric = mean_absolute_error(y_test, y_pred)
    
    return -metric

## Run optimization

In [68]:
class newJSONLogger(JSONLogger):

      def __init__(self, path):
            self._path=None
            super(JSONLogger, self).__init__()
            self._path = path if path[-5:] == ".jsonl" else path + ".jsonl"

In [73]:
# Bounded region of parameter space
pbounds = {'C': (200, 400), 'epsilon': (0.002, 0.3), 'gamma': (0.02, 0.15)}

# Bayes optimizer instantiation
optimizer = BayesianOptimization(f=opt_bas, 
                                 pbounds=pbounds, 
                                 random_state=1, verbose=2, 
                                )

# keep data
log_path = Path().resolve() / "Logs" / "California_mae_let.jsonl"
logger = newJSONLogger(path = str(log_path))
optimizer.subscribe(Events.OPTIMIZATION_STEP, logger)

In [74]:
optimizer.maximize(init_points=10, n_iter=500)