# scikit-learn classification using SVC - recognizing hand-written digits

This lesson uses the [MNIST Digits dataset](course_datasets.md#mnist-digits). It uses a support vector classifier.

It is based on the tutorial in the scikit-learn documentation [here](https://scikit-learn.org/stable/auto_examples/classification/plot_digits_classification.html#sphx-glr-auto-examples-classification-plot-digits-classification-py).


In [1]:
import matplotlib.pyplot as plt

from sklearn import datasets, svm, metrics  # import datasets, classifiers and performance metrics
from sklearn.model_selection import train_test_split    # import train_test_split function

In [None]:
digits = datasets.load_digits()  # load the digits dataset

_, axes = plt.subplots(nrows=1, ncols=4, figsize=(10, 3))   # create a subplot for each image
for ax, image, label in zip(axes, digits.images, digits.target):
    #ax.set_axis_off()
    ax.imshow(image, cmap=plt.cm.gray_r, interpolation='nearest')
    ax.set_title('Training: %i' % label)

In [None]:
numbers = [1,2,3]
letters = ['a','b','c']
#list(zip(numbers, letters))
# for number, letter in zip(numbers, letters):
#     print(number, letter)   # prints 1 a, 2 b, 3 c
[{x : y} for x, y in zip(numbers, letters)]
    

In [None]:
# create an array and show it
my_nymber_array = [[1,2,3], [4,5,6], [7,8,9]]
# set figsize to 10, 3  (width, height)
_, axes = plt.subplots(nrows=1, ncols=1, figsize=(5, 1))

plt.imshow(my_nymber_array, cmap=plt.cm.gray_r, interpolation='nearest')

In [8]:
n_samples = len(digits.images)
data = digits.images.reshape((n_samples, -1))
clf = svm.SVC(gamma=0.001)
X_train, X_test, y_train, y_test = train_test_split(data, digits.target, test_size=0.5, shuffle=False)

In [None]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

In [None]:
clf.fit(X_train, y_train)

In [None]:
predicted = clf.predict(X_test)
my_list =list(zip (predicted, y_test))
[(x,y) for x, y in my_list if x != y]  # list of tuples where the first element is not equal to the second element


In [None]:
_, axes = plt.subplots(nrows=1, ncols=4, figsize=(10, 3))
for ax, image, prediction in zip(axes, X_test, predicted):
    ax.set_axis_off()
    image = image.reshape(8, 8)
    ax.imshow(image, cmap=plt.cm.gray_r, interpolation='nearest')
    ax.set_title(f'Prediction: {prediction}')

In [None]:
print(f"Classification report for classifier {clf}:\n" f"{metrics.classification_report(y_test, predicted)}\n")

In [None]:
disp = metrics.ConfusionMatrixDisplay.from_predictions(y_test, predicted)
disp.figure_.suptitle("Confusion Matrix")
print(f"{disp.confusion_matrix}")
plt.show()