In [9]:
import numpy as np
from sklearn.metrics import matthews_corrcoef
#! pip install scikit-optimize
from skopt import BayesSearchCV
from skopt.space import Real, Integer, Categorical
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import PredefinedSplit
from sklearn.svm import SVC
from sklearn.metrics import matthews_corrcoef

In [8]:
#create the pipeline
pipeline = Pipeline([("scaler" , StandardScaler()) , ("svm" , SVC())])

In [None]:
# search spaces for rbf kernel and polynomial
search_spaces = [
    ({
        "svm__kernel": Categorical(["rbf"]),
        "svm__C": Real(1e-5, 1e5, prior="log-uniform"),                
        "svm__gamma": Real(1e-6, 1e4, prior="log-uniform"), 
    }, 100),
    ({
        "svm__kernel": Categorical(["poly"]),
        "svm__C": Real(1e-3, 1e3, prior="log-uniform"),
        "svm__gamma": Real(1e-4, 1e1, prior="log-uniform"),
        "svm__degree": Integer(2, 5),
    }, 100),
]

# now we have to manually tell the BayesSearch function which datasets to use as training and validation
x_all = np.concatenate([x_train, x_val])
y_all = np.concatenate([y_train, y_val])

#we create arrays that map each training and testing example to -1 (training) or 0 (validatio)
test_fold = np.r_[
    -np.ones(len(x_train), dtype=int),
     np.zeros(len(x_val),   dtype=int)
]
#we create the predifined split
cv = PredefinedSplit(test_fold)

#set up the BayesSearch
bayes = BayesSearchCV(
    estimator=pipeline,
    search_spaces=search_spaces,
    scoring="matthews_corrcoef",   
    n_jobs=-1,
    refit=False,                 
    random_state=42,
    verbose=0,
    cv=cv
)
bayes.fit(x_all, y_all)  # here we perform the bayes search

print("\n[Best parameters found:] ")
print(bayes.best_params_)
print(f"[Best MCC @val] {bayes.best_score_:.4f}")

# final refit
pipeline.set_params(**bayes.best_params_).fit(x_train, y_train)