## Loading the dataset

In [1]:
import pandas as pd
from sklearn import datasets, svm, metrics
from sklearn.model_selection import train_test_split

digits = pd.read_csv("train.csv")

## Making model

In [2]:
n_samples = len(digits.values)
x = digits.drop(['label'], axis=1)
y = digits['label']
data = x.values.reshape((n_samples, -1))
X_train, X_test, y_train, y_test = train_test_split(
    x, y, test_size=0.5, shuffle=False)

In [3]:
clf = svm.SVC()
clf.fit(X_train, y_train)

SVC()

In [4]:
predicted = clf.predict(X_test)

## Checking model's accuracy

In [5]:
print(f"Classification report for classifier {clf}:\n"
      f"{metrics.classification_report(y_test, predicted)}\n")

Classification report for classifier SVC():
              precision    recall  f1-score   support

           0       0.98      0.99      0.98      2088
           1       0.98      0.99      0.99      2312
           2       0.97      0.97      0.97      2069
           3       0.97      0.96      0.96      2174
           4       0.96      0.97      0.97      1997
           5       0.97      0.97      0.97      1897
           6       0.98      0.98      0.98      2062
           7       0.97      0.98      0.97      2234
           8       0.97      0.96      0.97      2058
           9       0.96      0.95      0.95      2109

    accuracy                           0.97     21000
   macro avg       0.97      0.97      0.97     21000
weighted avg       0.97      0.97      0.97     21000




In [6]:
print(f"Confusion matrix for classifier {clf}:\n"
      f"{metrics.confusion_matrix(y_test, predicted)}\n")

Confusion matrix for classifier SVC():
[[2061    0    2    1    3    4    9    0    6    2]
 [   0 2284   11    4    3    1    0    4    5    0]
 [   6    2 2006   12    9    3    6   15    9    1]
 [   4    4   22 2080    1   19    2   11   20   11]
 [   6    5    5    0 1934    0    9    1    3   34]
 [   7    2    5   22    5 1833   15    1    4    3]
 [  11    3    3    0    4   12 2026    0    3    0]
 [   3    6    8    0    9    0    0 2179    2   27]
 [   2   13    9    9   10   16    7    4 1981    7]
 [   6    6    3   24   28    5    0   28   13 1996]]



In [7]:
print(clf.score(X_test, y_test))

0.9704761904761905


## Saving the model

In [8]:
import pickle
pickle.dump(clf, open('model.sav', 'wb'))