In [1]:
import warnings

warnings.filterwarnings('ignore')

In [2]:
from sklearn.datasets import fetch_openml

X, y = fetch_openml('mnist_784', version=1, return_X_y=True, as_frame=False)

In [3]:
X[:5]

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [4]:
y[:5]

array(['5', '0', '4', '1', '9'], dtype=object)

In [5]:
X_train, X_test = X[:60000], X[60000:]
y_train, y_test = y[:60000], y[60000:]

In [6]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaled_X_train = scaler.fit_transform(X_train)
scaled_X_test = scaler.transform(X_test)

In [7]:
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score

linear_svc = LinearSVC(loss='hinge', random_state=42)
linear_svc.fit(scaled_X_train, y_train)

y_pred = linear_svc.predict(scaled_X_train)
accuracy_score(y_train, y_pred)


0.9298333333333333

In [8]:
linear_svc = LinearSVC(random_state=42)
linear_svc.fit(scaled_X_train, y_train)

y_pred = linear_svc.predict(scaled_X_train)
accuracy_score(y_train, y_pred)

0.9214

In [9]:
from sklearn.svm import SVC

svc = SVC(kernel='linear', random_state=42)
svc.fit(scaled_X_train, y_train)

y_pred = svc.predict(scaled_X_train)
accuracy_score(y_train, y_pred)

0.9821

In [10]:
svc = SVC(kernel='rbf', random_state=42)
svc.fit(scaled_X_train, y_train)

y_pred = svc.predict(scaled_X_train)
accuracy_score(y_train, y_pred)

0.9866333333333334

In [11]:
from sklearn.linear_model import SGDClassifier

sgd_clf = SGDClassifier(loss='hinge', random_state=42)
sgd_clf.fit(scaled_X_train, y_train)

y_pred = sgd_clf.predict(scaled_X_train)
accuracy_score(y_train, y_pred)

0.9014166666666666

In [12]:
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import reciprocal, uniform

params = {
    'kernel': ['linear', 'rbf', 'poly'],
    'degree': uniform(1, 10),
    'gamma': reciprocal(0.001, 0.1),
    'C': uniform(1, 10)
}

svc = SVC()
rcv = RandomizedSearchCV(svc, params, n_iter=20, n_jobs=-1, cv=3, random_state=42)
rcv.fit(scaled_X_train, y_train)

RandomizedSearchCV(cv=3, estimator=SVC(), n_iter=20, n_jobs=-1,
                   param_distributions={'C': <scipy.stats._distn_infrastructure.rv_frozen object at 0x0000025D673AFE08>,
                                        'degree': <scipy.stats._distn_infrastructure.rv_frozen object at 0x0000025D1A3F27C8>,
                                        'gamma': <scipy.stats._distn_infrastructure.rv_frozen object at 0x0000025D6745CFC8>,
                                        'kernel': ['linear', 'rbf', 'poly']},
                   random_state=42)

In [13]:
print(f'best score: {rcv.best_score_}')
print(f'best estimator: {rcv.best_estimator_}')

best score: 0.9643666666666667
best estimator: SVC(C=5.234014807063696, degree=4.948815181755697, gamma=0.0038634593707206704,
    kernel='poly')


In [14]:
y_pred = rcv.best_estimator_.predict(scaled_X_train)
accuracy_score(y_train, y_pred)

0.9999833333333333

In [16]:
y_pred = rcv.best_estimator_.predict(scaled_X_test)
accuracy_score(y_test, y_pred)

0.9673