# Getting the data

In [None]:
from sklearn.datasets import fetch_openml

mnist = fetch_openml('mnist_784', version=1, cache=True, as_frame=False)

x = mnist['data']
y = mnist['target']

In [2]:
x_train = x[:60000]
x_test = x[60000:]
y_train = y[:60000]
y_test = y[60000:]

# Trying LinearSVC

In [3]:
from sklearn.svm import LinearSVC

lin_clf = LinearSVC()
lin_clf.fit(x_train, y_train)



In [4]:
from sklearn.metrics import accuracy_score

y_hat = lin_clf.predict(x_train)
accuracy_score(y_train, y_hat)

0.8854

In [5]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

x_train_scaled = scaler.fit_transform(x_train)
lin_clf.fit(x_train_scaled, y_train)



In [6]:
y_hat = lin_clf.predict(x_train_scaled)
accuracy_score(y_train, y_hat)

0.9196666666666666

# Trying SVC

In [7]:
from sklearn.svm import SVC
# from sklearn.multiclass import OneVsRestClassifier
svm_clf = SVC(gamma="scale")
svm_clf.fit(x_train_scaled[:10000], y_train[:10000])

In [8]:
y_hat = svm_clf.predict(x_train_scaled)
accuracy_score(y_train, y_hat)

0.9455333333333333

# Implementing ReandomizedSearch for SVC

In [9]:
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import reciprocal, uniform

param_distributions = {"gamma": reciprocal(0.001, 0.1), 
                       "C": uniform(1, 10)}
rnd_search_cv = RandomizedSearchCV(svm_clf, param_distributions, n_iter=10,
                                   verbose=2, cv=3)
rnd_search_cv.fit(x_train_scaled[:10000], y_train[:10000])

Fitting 3 folds for each of 10 candidates, totalling 30 fits
[CV] END .....C=4.098916628044561, gamma=0.09543226339113445; total time=  31.2s
[CV] END .....C=4.098916628044561, gamma=0.09543226339113445; total time=  30.9s
[CV] END .....C=4.098916628044561, gamma=0.09543226339113445; total time=  30.3s
[CV] END .....C=4.420375550686728, gamma=0.04574637025517726; total time=  30.5s
[CV] END .....C=4.420375550686728, gamma=0.04574637025517726; total time=  30.2s
[CV] END .....C=4.420375550686728, gamma=0.04574637025517726; total time=  30.0s
[CV] END ..C=1.6937654463190546, gamma=0.0017327348294908403; total time=  10.3s
[CV] END ..C=1.6937654463190546, gamma=0.0017327348294908403; total time=  10.7s
[CV] END ..C=1.6937654463190546, gamma=0.0017327348294908403; total time=   9.9s
[CV] END ...C=1.3813286307758623, gamma=0.004346118312662776; total time=  17.2s
[CV] END ...C=1.3813286307758623, gamma=0.004346118312662776; total time=  17.0s
[CV] END ...C=1.3813286307758623, gamma=0.004346

In [10]:
rnd_search_cv.best_estimator_

In [12]:
rnd_search_cv.best_score_

0.9399999088091172

In [13]:
best_classifier = rnd_search_cv.best_estimator_

best_classifier.fit(x_train_scaled, y_train)

In [14]:
y_hat = best_classifier.predict(x_train_scaled)
accuracy_score(y_train, y_hat)

0.9978333333333333

In [15]:
x_test_scaled = scaler.fit_transform(x_test)

In [16]:
y_hat = best_classifier.predict(x_test_scaled)
accuracy_score(y_test, y_hat)

0.9721