In [1]:
from sklearn.datasets import fetch_openml

mnist = fetch_openml('mnist_784', version=1, cache=True, as_frame=False)

In [6]:
X = mnist["data"]
y = mnist["target"]

#dataset is already shuffled so not shuffling again
X_train = X[:60000]
y_train = y[:60000]
X_test = X[60000:]
y_test = y[60000:]

In [7]:
#lets scale the data

from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [8]:
from sklearn.svm import LinearSVC, SVC
from sklearn.metrics import accuracy_score

lin_svc = LinearSVC(random_state=42)
lin_svc.fit(X_train,y_train)

y_pred = lin_svc.predict(X_train)
accuracy_score(y_train,y_pred)



0.9214

In [9]:
X_train_clipped = X_train[:10000]
y_train_clipped = y_train[:10000]

svm = SVC(random_state=42)
svm.fit(X_train_clipped,y_train_clipped)

y_pred_clipped = svm.predict(X_train_clipped)
accuracy_score(y_train_clipped,y_pred_clipped)

0.9832

Results indicate that we might be better off with non linear classifier

Now let's hypertune the model for gamma and c

In [15]:
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import reciprocal, uniform

param_distributions = {"gamma": reciprocal(0.001, 0.1), "C": uniform(0,10)}
rnd_search_cv = RandomizedSearchCV(svm, param_distributions, n_iter=50, verbose=2, cv = 3)
rnd_search_cv.fit(X_train_clipped, y_train_clipped)

Fitting 3 folds for each of 50 candidates, totalling 150 fits
[CV] END .....C=3.031790298888458, gamma=0.04096607936594001; total time=  48.7s
[CV] END .....C=3.031790298888458, gamma=0.04096607936594001; total time=  49.4s
[CV] END .....C=3.031790298888458, gamma=0.04096607936594001; total time=  52.9s
[CV] END ....C=5.707981742776754, gamma=0.018387207960123133; total time=  46.9s
[CV] END ....C=5.707981742776754, gamma=0.018387207960123133; total time=  48.3s
[CV] END ....C=5.707981742776754, gamma=0.018387207960123133; total time=  48.3s
[CV] END ....C=0.7894816122764758, gamma=0.09183627566515716; total time=  50.4s
[CV] END ....C=0.7894816122764758, gamma=0.09183627566515716; total time=  49.3s
[CV] END ....C=0.7894816122764758, gamma=0.09183627566515716; total time=  47.9s
[CV] END ...C=0.5520021970802247, gamma=0.002623673901065145; total time=  20.2s
[CV] END ...C=0.5520021970802247, gamma=0.002623673901065145; total time=  20.3s
[CV] END ...C=0.5520021970802247, gamma=0.00262

RandomizedSearchCV(cv=3, estimator=SVC(random_state=42), n_iter=50,
                   param_distributions={'C': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7feb2ddc4f10>,
                                        'gamma': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7feb2ddc4fd0>},
                   verbose=2)

In [19]:
rnd_search_cv.best_estimator_

SVC(C=4.756402521709702, gamma=0.001221787193314281, random_state=42)

In [17]:
rnd_search_cv.best_score_

0.9408997588421109

In [20]:
rnd_search_cv.best_estimator_.fit(X_train, y_train)

SVC(C=4.756402521709702, gamma=0.001221787193314281, random_state=42)

In [21]:
y_pred = rnd_search_cv.best_estimator_.predict(X_train)
accuracy_score(y_train,y_pred)

0.9967333333333334

Lets test the model now

In [22]:
y_pred_test = rnd_search_cv.best_estimator_.predict(X_test)
accuracy_score(y_test,y_pred_test)

0.9728