In [1]:
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import cross_val_score
from skopt.utils import use_named_args
from skopt.space import Categorical
from skopt.space import Integer
from skopt import BayesSearchCV
from skopt import gp_minimize
from skopt.space import Real
from sklearn.svm import SVC
from numpy import mean
from numpy import std
import pandas as pd

In [2]:
URL = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/ionosphere.csv'
df  = pd.read_csv(URL, header = None)

df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,25,26,27,28,29,30,31,32,33,34
0,1,0,0.99539,-0.05889,0.85243,0.02306,0.83398,-0.37708,1.0,0.0376,...,-0.51171,0.41078,-0.46168,0.21266,-0.3409,0.42267,-0.54487,0.18641,-0.453,g
1,1,0,1.0,-0.18829,0.93035,-0.36156,-0.10868,-0.93597,1.0,-0.04549,...,-0.26569,-0.20468,-0.18401,-0.1904,-0.11593,-0.16626,-0.06288,-0.13738,-0.02447,b
2,1,0,1.0,-0.03365,1.0,0.00485,1.0,-0.12062,0.88965,0.01198,...,-0.4022,0.58984,-0.22145,0.431,-0.17365,0.60436,-0.2418,0.56045,-0.38238,g
3,1,0,1.0,-0.45161,1.0,1.0,0.71216,-1.0,0.0,0.0,...,0.90695,0.51613,1.0,1.0,-0.20099,0.25682,1.0,-0.32382,1.0,b
4,1,0,1.0,-0.02401,0.9414,0.06531,0.92106,-0.23255,0.77152,-0.16399,...,-0.65158,0.1329,-0.53206,0.02431,-0.62197,-0.05707,-0.59573,-0.04608,-0.65697,g


In [3]:
data = df.values
x, y = data[:, :-1], data[:, -1]

x.shape, y.shape

((351, 34), (351,))

In [4]:
model    = SVC()
cv       = RepeatedStratifiedKFold(n_splits = 10, n_repeats = 3, random_state = 99)
m_scores = cross_val_score(model, x, y, scoring = 'accuracy',
                            cv = cv, n_jobs = -1, error_score = 'raise')

f'Accuracy | mean : {mean(m_scores):.3f}, std : {std(m_scores):.3f}'

'Accuracy | mean : 0.939, std : 0.031'

In [5]:
search_space = list()
search_space.append(Real(1e-6, 100.0, 'log-uniform', name = 'C'))
search_space.append(Categorical(['linear', 'poly', 'rbf', 'sigmoid'], name = 'kernel'))
search_space.append(Integer(1, 5, name = 'degree'))
search_space.append(Real(1e-6, 100.0, 'log-uniform', name = 'gamma'))

In [6]:
@use_named_args(search_space)
def evaluate_model(**params):
    
    model = SVC()
    model.set_params(**params)
    
    cv       = RepeatedStratifiedKFold(n_splits = 10, n_repeats = 3, random_state = 99)
    result   = cross_val_score(model, x, y, cv = cv, n_jobs = -1, scoring = 'accuracy')
    estimate = mean(result)
    
    return 1.0 - estimate

In [7]:
result = gp_minimize(evaluate_model, search_space)
print(f'  Best Accuracy : {1.0 - result.fun:.3f}')
print(f'Best Parameters : {result.x}')

  Best Accuracy : 0.954
Best Parameters : [4.995963927977056, 'rbf', 1, 0.35501717588313997]


In [8]:
params           = dict()
params['C']      = (1e-6, 100.0, 'log-uniform')
params['gamma']  = (1e-6, 100.0, 'log-uniform')
params['degree'] = (1, 5)
params['kernel'] = ['linear', 'poly', 'rbf', 'sigmoid']

In [9]:
cv     = RepeatedStratifiedKFold(n_splits = 10, n_repeats = 3, random_state = 99)
search = BayesSearchCV(estimator = SVC(), search_spaces = params, n_jobs = -1, cv = cv)
search.fit(x, y)

print(f'  Best Accuracy :  {search.best_score_:.3f}')
print(f'Best Parameters : {search.best_params_}')

  Best Accuracy :  0.954
Best Parameters : OrderedDict([('C', 15.462966286051001), ('degree', 2), ('gamma', 0.03961050594881091), ('kernel', 'rbf')])
