In [1]:
from sklearn.datasets import fetch_openml

mnist = fetch_openml('mnist_784', version=1, as_frame=False)
X, Y = mnist['data'], mnist['target']

In [15]:
X.shape, Y.shape

((70000, 784), (70000,))

In [124]:
X_train, X_mini, X_test, Y_train, Y_mini, Y_test = X[:60000], X[:1000], X[60000:], Y[:60000], Y[:1000], Y[60000:]


In [233]:
from sklearn.experimental import enable_halving_search_cv
from sklearn.model_selection import HalvingGridSearchCV
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_mini_scaled = scaler.fit_transform(X_mini)

halving_params = [{
    'kernel' : ['poly'],
    'degree' : [1, 2, 3, 4, 5],
    'gamma' : [0.01, 0.1, 1, 10, 100],
    'C' : [0.01, 0.1, 1, 10, 100, 1000]
    },
    {
    'kernel' : ['rbf'],
    'gamma' : [0.01, 0.1, 1, 10, 100],
    'C' : [0.01, 0.1, 1, 10, 100, 1000]
}]

halving_model = SVC()

halving_cv = HalvingGridSearchCV(halving_model, halving_params,
                    n_jobs=-1)
halving_cv.fit(X_mini_scaled, Y_mini)

HalvingGridSearchCV(estimator=SVC(), n_jobs=-1,
                    param_grid=[{'C': [0.01, 0.1, 1, 10, 100, 1000],
                                 'degree': [1, 2, 3, 4, 5],
                                 'gamma': [0.01, 0.1, 1, 10, 100],
                                 'kernel': ['poly']},
                                {'C': [0.01, 0.1, 1, 10, 100, 1000],
                                 'gamma': [0.01, 0.1, 1, 10, 100],
                                 'kernel': ['rbf']}],
                    refit=<function _refit_callable at 0x7fc713f11d30>)

In [234]:
halving_cv.best_params_ # {'C': 0.01, 'degree': 1, 'gamma': 1, 'kernel': 'poly'}

{'C': 0.01, 'degree': 1, 'gamma': 1, 'kernel': 'poly'}

In [236]:
halving_cv.cv_results_
# The best models are all linear, but maybe using a small amount of data
# and using iterative halving search favors a linear model

{'iter': array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]),
 'n_resources': array([100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
        100, 100, 100, 100

In [241]:
from scipy.stats import loguniform
from sklearn.model_selection import RandomizedSearchCV

# Hyperparameter space of one order of magnitude above and below
# optimal hyperparameters from wide search including ties found
# in the results
linear_params = {
    'C' : loguniform(0.001, 10),
    'gamma' : loguniform(0.001, 10)
}

linear_model = SVC(kernel='poly', degree=1)

linear_cv = RandomizedSearchCV(linear_model, linear_params,
                    random_state=42,
                    n_iter=10000,
                    n_jobs=-1)
linear_cv.fit(X_mini_scaled, Y_mini)

RandomizedSearchCV(estimator=SVC(degree=1, kernel='poly'), n_iter=10000,
                   n_jobs=-1,
                   param_distributions={'C': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7fc706113700>,
                                        'gamma': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7fc700ade040>},
                   random_state=42)

In [242]:
linear_cv.best_params_ # {'C': 0.0124801809651176, 'gamma': 0.16470698221724311}

{'C': 0.0124801809651176, 'gamma': 0.16470698221724311}

In [202]:
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score

default_linear_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('svc', SVC(kernel='poly', degree=1, random_state=42))
])

default_linear_pipeline.fit(X_train, Y_train)
default_linear_predictions = default_linear_pipeline.predict(X_test)
default_linear_accuracy = accuracy_score(Y_test, default_linear_predictions)
default_linear_accuracy # 0.9452

0.9452

In [256]:
best_linear_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('svc', SVC(kernel='poly', degree=1, C=0.0124801809651176, gamma=0.16470698221724311, random_state=42))
])

best_linear_pipeline.fit(X_train, Y_train)
best_linear_predictions = best_linear_pipeline.predict(X_test)
best_linear_accuracy = accuracy_score(Y_test, best_linear_predictions)
best_linear_accuracy # 0.9462

0.9462

In [155]:
from sklearn.model_selection import GridSearchCV

wide_params = {
    'gamma' : [10 ** n for n in range(-3, 4)],
    'C' : [10 ** n for n in range(-3, 4)]
}
wide_model = SVC(kernel='rbf')

wide_cv = GridSearchCV(wide_model, wide_params,
                    n_jobs=-1,
                    cv=3)
wide_cv.fit(X_mini_scaled, Y_mini)

GridSearchCV(cv=3, estimator=SVC(), n_jobs=-1,
             param_grid={'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000],
                         'gamma': [0.001, 0.01, 0.1, 1, 10, 100, 1000]})

In [156]:
wide_cv.best_params_ # {'C': 10, 'gamma': 0.001}

{'C': 10, 'gamma': 0.001}

In [157]:
wide_cv.cv_results_ # gamma of 0.001 and C from 10 to 1000 perform well

{'mean_fit_time': array([0.43465932, 0.48566341, 0.46227829, 0.48460507, 0.4829936 ,
        0.41995541, 0.46915571, 0.47598561, 0.37939469, 0.46089085,
        0.40395236, 0.38387076, 0.43284965, 0.33432929, 0.43362188,
        0.45684234, 0.40517004, 0.45827198, 0.4253455 , 0.420427  ,
        0.4503394 , 0.31417632, 0.46178937, 0.49937288, 0.45593747,
        0.46614695, 0.47317568, 0.37577232, 0.26682242, 0.38175861,
        0.40501928, 0.4188691 , 0.37918154, 0.42034372, 0.40403994,
        0.24015037, 0.44637219, 0.45060158, 0.40969094, 0.43044543,
        0.42467427, 0.37659319, 0.24575631, 0.40368096, 0.42033704,
        0.42370693, 0.36540357, 0.37902204, 0.32799006]),
 'std_fit_time': array([0.02805747, 0.00786345, 0.03067166, 0.02218246, 0.00511053,
        0.04354851, 0.00347268, 0.00324904, 0.06287127, 0.00266559,
        0.0759752 , 0.05611864, 0.00664532, 0.06801612, 0.00803534,
        0.00235072, 0.01498422, 0.00483374, 0.02551191, 0.03238777,
        0.00207819, 0.004

In [181]:
# Hyperparameter space of one order of magnitude above and below
# optimal hyperparameters from wide search including ties found
# in the results
narrow_params = {
    'gamma' : loguniform(.0001, 0.01),
    'C' : loguniform(1, 10000)
}
narrow_model = SVC(kernel='rbf')

narrow_cv = RandomizedSearchCV(narrow_model, narrow_params,
                    n_jobs=-1,
                    cv=3,
                    n_iter=10000)
narrow_cv.fit(X_mini_scaled, Y_mini)

RandomizedSearchCV(cv=3, estimator=SVC(), n_iter=10000, n_jobs=-1,
                   param_distributions={'C': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7fc71864b910>,
                                        'gamma': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7fc715ba0a30>})

In [182]:
narrow_cv.best_params_ # {'C': 9.929441284906707, 'gamma': 0.0005208949079636633}

{'C': 9.929441284906707, 'gamma': 0.0005208949079636633}

In [230]:
rbf_default_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('svc', SVC(kernel='rbf', random_state=42))
])

rbf_default_pipeline.fit(X_train, Y_train)
rbf_default_predictions = rbf_default_pipeline.predict(X_test)
rbf_default_accuracy = accuracy_score(Y_test, rbf_default_predictions)
rbf_default_accuracy # 0.966

0.966

In [185]:
rbf_best_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('svc', SVC(kernel='rbf', C=9.929441284906707, gamma=0.0005208949079636633, random_state=42))
])

rbf_best_pipeline.fit(X_train, Y_train)
rbf_best_predictions = rbf_best_pipeline.predict(X_test)
rbf_best_accuracy = accuracy_score(Y_test, rbf_best_predictions)
rbf_best_accuracy # 0.9713

0.9713

In [227]:
random_params = {
    'degree' : [2, 3, 4, 5],
    'gamma' : loguniform(0.001, 1000),
    'C' : loguniform(0.001, 1000)
}
random_model = SVC(kernel='poly')

random_cv = RandomizedSearchCV(random_model, random_params,
                    n_iter=10000,
                    random_state=42,
                    n_jobs=-1,
                    cv=3)
random_cv.fit(X_mini_scaled, Y_mini)

RandomizedSearchCV(cv=3, estimator=SVC(kernel='poly'), n_iter=10000, n_jobs=-1,
                   param_distributions={'C': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7fc6f7f1dd90>,
                                        'degree': [2, 3, 4, 5],
                                        'gamma': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7fc700a36eb0>},
                   random_state=42)

In [228]:
random_cv.best_params_ # {'C': 0.17670169402947947, 'degree': 2, 'gamma': 0.012606912518374066}

{'C': 0.17670169402947947, 'degree': 2, 'gamma': 0.012606912518374066}

In [216]:
default_poly_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('svc', SVC(kernel='poly', degree=2, C=1, random_state=42))
])

default_poly_pipeline.fit(X_train, Y_train)
default_poly_predictions = default_poly_pipeline.predict(X_test)
default_poly_accuracy = accuracy_score(Y_test, default_poly_predictions)
default_poly_accuracy # 0.9714

0.9714

In [226]:
poly_best_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('svc', SVC(kernel='poly', degree=2, C=0.17670169402947947, gamma=0.012606912518374066, random_state=42))
])

poly_best_pipeline.fit(X_train, Y_train)
poly_best_predictions = poly_best_pipeline.predict(X_test)
poly_best_accuracy = accuracy_score(Y_test, poly_best_predictions)
poly_best_accuracy # 0.9765

0.9765