In [1]:
from sklearn.datasets import fetch_openml

mnist = fetch_openml('mnist_784', version=1, as_frame=False)
X, Y = mnist['data'], mnist['target']

In [2]:
X.shape, Y.shape

((70000, 784), (70000,))

In [3]:
X_train, X_mini, X_test, Y_train, Y_mini, Y_test = X[:60000], X[:1000], X[60000:], Y[:60000], Y[:1000], Y[60000:]


In [4]:
from sklearn.experimental import enable_halving_search_cv
from sklearn.model_selection import HalvingGridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC

scaler = StandardScaler()
X_mini_scaled = scaler.fit_transform(X_mini)

halving_params = [{
    'kernel' : ['poly'],
    'degree' : [1, 2, 3, 4, 5],
    'gamma' : [0.01, 0.1, 1, 10, 100],
    'C' : [0.01, 0.1, 1, 10, 100, 1000]
    },
    {
    'kernel' : ['rbf'],
    'gamma' : [0.01, 0.1, 1, 10, 100],
    'C' : [0.01, 0.1, 1, 10, 100, 1000]
}]

halving_model = SVC()

halving_cv = HalvingGridSearchCV(halving_model, halving_params,
                    random_state=42,
                    n_jobs=-1)
halving_cv.fit(X_mini_scaled, Y_mini)
halving_cv.best_params_ # {'C': 0.01, 'degree': 1, 'gamma': 1, 'kernel': 'poly'}
# The best models are all linear, but using a small amount of data
# and using iterative halving search may favor a linear model

{'C': 0.01, 'degree': 1, 'gamma': 1, 'kernel': 'poly'}

In [5]:
from sklearn.model_selection import RandomizedSearchCV

# Hyperparameter space of one order of magnitude above and below
# optimal hyperparameters from wide search including ties found
# in the results
linear_params = {
    'C' : loguniform(0.001, 10),
    'gamma' : loguniform(0.001, 10)
}

linear_model = SVC(kernel='poly', degree=1)

linear_cv = RandomizedSearchCV(linear_model, linear_params,
                    random_state=42,
                    n_iter=10000,
                    n_jobs=-1)
linear_cv.fit(X_mini_scaled, Y_mini)
linear_cv.best_params_
# {'C': 0.0124801809651176, 'gamma': 0.16470698221724311}

{'C': 0.0124801809651176, 'gamma': 0.16470698221724311}

In [6]:
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score

default_linear_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('svc', SVC(kernel='poly', degree=1, random_state=42))
])

default_linear_pipeline.fit(X_train, Y_train)
default_linear_predictions = default_linear_pipeline.predict(X_test)
default_linear_accuracy = accuracy_score(Y_test, default_linear_predictions)
default_linear_accuracy # 0.9452

0.9452

In [7]:
best_linear_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('svc', SVC(kernel='poly', degree=1, C=0.0124801809651176, gamma=0.16470698221724311, random_state=42))
])

best_linear_pipeline.fit(X_train, Y_train)
best_linear_predictions = best_linear_pipeline.predict(X_test)
best_linear_accuracy = accuracy_score(Y_test, best_linear_predictions)
best_linear_accuracy # 0.9462

0.9462

In [8]:
from sklearn.model_selection import GridSearchCV

wide_params = {
    'gamma' : [10 ** n for n in range(-3, 4)],
    'C' : [10 ** n for n in range(-3, 4)]
}
wide_model = SVC(kernel='rbf')

wide_cv = GridSearchCV(wide_model, wide_params,
                    n_jobs=-1,
                    cv=3)
wide_cv.fit(X_mini_scaled, Y_mini)
wide_cv.best_params_
# {'C': 10, 'gamma': 0.001}

{'C': 10, 'gamma': 0.001}

In [9]:
# Hyperparameter space of one order of magnitude above and below
# optimal hyperparameters from wide search including ties found
# in the results
narrow_params = {
    'gamma' : loguniform(.0001, 0.01),
    'C' : loguniform(1, 10000)
}
narrow_model = SVC(kernel='rbf')

narrow_cv = RandomizedSearchCV(narrow_model, narrow_params,
                    n_jobs=-1,
                    cv=3,
                    random_state=42,
                    n_iter=10000)
narrow_cv.fit(X_mini_scaled, Y_mini)
narrow_cv.best_params_
# {'C': 10.412771143213734, 'gamma': 0.0005293052314471038}

{'C': 10.412771143213734, 'gamma': 0.0005293052314471038}

In [10]:
rbf_default_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('svc', SVC(kernel='rbf', random_state=42))
])

rbf_default_pipeline.fit(X_train, Y_train)
rbf_default_predictions = rbf_default_pipeline.predict(X_test)
rbf_default_accuracy = accuracy_score(Y_test, rbf_default_predictions)
rbf_default_accuracy # 0.966

0.966

In [11]:
rbf_best_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('svc', SVC(kernel='rbf', C=10.412771143213734, gamma=0.0005293052314471038, random_state=42))
])

rbf_best_pipeline.fit(X_train, Y_train)
rbf_best_predictions = rbf_best_pipeline.predict(X_test)
rbf_best_accuracy = accuracy_score(Y_test, rbf_best_predictions)
rbf_best_accuracy # 0.9716

0.9716

In [12]:
random_params = {
    'degree' : [2, 3, 4, 5],
    'gamma' : loguniform(0.001, 1000),
    'C' : loguniform(0.001, 1000)
}
random_model = SVC(kernel='poly')

random_cv = RandomizedSearchCV(random_model, random_params,
                    n_iter=10000,
                    random_state=42,
                    n_jobs=-1,
                    cv=3)
random_cv.fit(X_mini_scaled, Y_mini)
random_cv.best_params_
# {'C': 0.17670169402947947, 'degree': 2, 'gamma': 0.012606912518374066}

{'C': 0.17670169402947947, 'degree': 2, 'gamma': 0.012606912518374066}

In [13]:
default_poly_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('svc', SVC(kernel='poly', degree=2, C=1, random_state=42))
])

default_poly_pipeline.fit(X_train, Y_train)
default_poly_predictions = default_poly_pipeline.predict(X_test)
default_poly_accuracy = accuracy_score(Y_test, default_poly_predictions)
default_poly_accuracy # 0.9714

0.9714

In [14]:
poly_best_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('svc', SVC(kernel='poly', degree=2, C=0.17670169402947947, gamma=0.012606912518374066, random_state=42))
])

poly_best_pipeline.fit(X_train, Y_train)
poly_best_predictions = poly_best_pipeline.predict(X_test)
poly_best_accuracy = accuracy_score(Y_test, poly_best_predictions)
poly_best_accuracy # 0.9765

0.9765

In [88]:
from sklearn.base import BaseEstimator, TransformerMixin
import numpy as np
from scipy.ndimage.interpolation import shift

# If border_only=False (default), before use, call X_train_expanded = np.repeat(X_train, area, axis=0) and
# Y_train_expanded = np.repeat(Y_train, area) where area =
# (2 * self.shift + 1) ** 2 to create space for the translated instance
class BorderTranslator(BaseEstimator, TransformerMixin):
    # Call with border_only=True when testing an already trained
    # model
    def __init__(self, width, height, shift=1):
        self.width = width
        self.height = height
        self.shift = shift
    def fit(self, X, y=None):
        return self
    def transform(self, X):
        # A given pixel is shifted to each position in a
        # square around the starting point
        transformed_images = np.zeros((len(X), self.width * self.height))
        side = (2 * self.shift) + 1
        area = side ** 2
        for index in range(len(transformed_images)):
            vertical =  ((index % area) // side) - 1
            horizontal = (index % side) - 1
            image = transformed_images[index]
            shifted_image = shift(image.reshape(self.height, self.width),
                                  [vertical, horizontal], cval=0, order=0,
                                  prefilter=False).reshape(self.height *
                                                           self.width)
            transformed_images[index] = shifted_image
        return transformed_images

In [92]:
def add_shifts(X, Y, width, height, distance = 1):
    side = (2 * distance) + 1
    area = side ** 2
    expanded_X = np.repeat(X, area, axis=0)
    expanded_Y = np.repeat(Y, area)
    for index in range(len(expanded_X)):
        vertical =  ((index % area) // side) - 1
        horizontal = (index % side) - 1
        image = expanded_X[index]
        shifted_image = shift(image.reshape(height, width), [vertical, horizontal], cval=0, order=0, prefilter=False).reshape(height * width)
        expanded_X[index] = shifted_image
    return expanded_X, expanded_Y

In [97]:
X_train_augmented, Y_train_augmented = add_shifts(X_train, Y_train, width=28, height=28)

augmented_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('svc', SVC(kernel='poly', degree=2, random_state=42))
])

augmented_pipeline.fit(X_train_augmented, Y_train_augmented)
augmented_predictions = augmented_pipeline.predict(X_test)
augmented_accuracy = accuracy_score(Y_test, augmented_predictions)
augmented_accuracy # 0.9845

0.9845

In [107]:
poly_translate_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('svc', SVC(kernel='poly', degree=2, random_state=42))
])

poly_translate_params = {
    'svc__gamma' : loguniform(10 ** -5, 10 ** 5),
    'svc__C' : loguniform(10 ** -5, 10 ** 5),
}

poly_translate_cv = HalvingRandomSearchCV(poly_translate_pipeline, poly_translate_params,
                    random_state=42,
                    n_jobs=-1,
                    cv=2)
poly_translate_cv.fit(X_train_augmented, Y_train_augmented)
poly_translate_cv.best_params_ # {'svc__C': 35371.44537353213, 'svc__gamma': 239.31458473007902}

{'svc__C': 35371.44537353213, 'svc__gamma': 239.31458473007902}

In [119]:
best_augmented_predictions = poly_translate_cv.best_estimator_.predict(X_test)
best_augmented_accuracy = accuracy_score(Y_test, best_augmented_predictions)
best_augmented_accuracy # 0.9845 same as default

0.9845