In [1]:

# manually tune svm model hyperparameters using skopt on the ionosphere dataset
from numpy import mean
from pandas import read_csv
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.svm import SVC
from skopt.space import Integer
from skopt.space import Real
from skopt.space import Categorical
from skopt.utils import use_named_args
from skopt import gp_minimize
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from scipy.stats import loguniform
 
 # load dataset
url = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/ionosphere.csv'
dataframe = read_csv(url, header=None)
# split into input and output elements
data = dataframe.values
X, y = data[:, :-1], data[:, -1]



In [12]:
# define the space of hyperparameters to search
search_space = list()
search_space.append(Real(1e-6, 100.0, 'log-uniform', name='C'))
search_space.append(Categorical(['linear', 'poly', 'rbf', 'sigmoid'], name='kernel'))
search_space.append(Integer(1, 5, name='degree'))
search_space.append(Real(1e-6, 100.0, 'log-uniform', name='gamma'))
 
# define the function used to evaluate a given configuration
@use_named_args(search_space)
def evaluate_model(**params):
	# configure the model with specific hyperparameters
	model = SVC()
	model.set_params(**params)
	# define test harness
	cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
	# calculate 5-fold cross validation
	result = cross_val_score(model, X, y, cv=cv, n_jobs=-1, scoring='accuracy')
	# calculate the mean of the scores
	estimate = mean(result)
	# convert from a maximizing score to a minimizing score
	return 1.0 - estimate
 


# perform optimization
result = gp_minimize(evaluate_model, search_space)
# summarizing finding:
print('Best Accuracy: %.3f' % (1.0 - result.fun))
print('Best Parameters: %s' % (result.x))

Best Accuracy: 0.943
Best Parameters: [73.39347383699788, 'rbf', 5, 0.011904656102972951]


In [4]:
params = dict()

params['C'] = loguniform(1e-6, 100.0)
params['gamma'] = loguniform(1e-6, 100.0)
params['degree'] = (1,5)
params['kernel'] = ['linear', 'poly', 'rbf', 'sigmoid']
# define evaluation
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
# define the search
model = SVC()
search = RandomizedSearchCV(model, params, n_iter=500, scoring='accuracy', n_jobs=-1, cv=cv, random_state=1)
# perform the search
search.fit(X, y)
# report the best result
print(search.best_score_)
print(search.best_params_)

0.9534391534391534
{'C': 18.80813079792687, 'degree': 1, 'gamma': 0.038891546143828476, 'kernel': 'rbf'}


In [7]:
params = dict()

params['C'] = [1e-6,1e-5,1e-4,1e-3,1e-2,1e-1,1,10,100]
params['gamma'] = [1e-6,1e-5,1e-4,1e-3,1e-2,1e-1,1,10,100]
params['degree'] = (1,5)
params['kernel'] = ['linear', 'poly', 'rbf', 'sigmoid'] # 9 * 9 * 2 * 4 = 648
# define evaluation
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
# define the search
model = SVC()
search = GridSearchCV(model, params, scoring='accuracy', n_jobs=-1, cv=cv)
# perform the search
search.fit(X, y)
# report the best result
print(search.best_score_)
print(search.best_params_)

0.942010582010582
{'C': 10, 'degree': 1, 'gamma': 0.1, 'kernel': 'rbf'}
