# Develop a program to perform image classification on scikit learn’s digits dataset. Classify the images from the data set using the three classifiers mentioned above and compare the classification results.

## Create samples

In [1]:
from sklearn import datasets, neighbors, linear_model, svm
from sklearn.model_selection import train_test_split
from sklearn.metrics import recall_score, accuracy_score, confusion_matrix
from sklearn.ensemble import RandomForestClassifier
X_digits, y_digits = datasets.load_digits(return_X_y=True)
X_digits = X_digits / X_digits.max()
n_samples = len(X_digits)

## Split the data, here we use 25%

In [2]:
X_train, X_test, y_train, y_test = train_test_split(X_digits, y_digits, test_size=0.25)

## Do KNN classification (KNN has the highest accuracy.)

In [3]:
knn = neighbors.KNeighborsClassifier(n_neighbors=3)  # accuracy highest when n_neighbors = 3
logistic = linear_model.LogisticRegression(max_iter=1000)
knn_model = knn.fit(X_train, y_train)

## Do prediction based on KNN

In [4]:
knn_y_predict = knn_model.predict(X_test)
knn_score = 100 * accuracy_score(y_test, knn_y_predict)
knn_avg_recall = 100 * recall_score(y_test, knn_y_predict, average='macro')

## Do SVM classification

In [5]:
svm_model = svm.SVC(decision_function_shape='ovo')
svm_model.fit(X_train, y_train)

SVC(decision_function_shape='ovo')

## Do predictions based on SVM

In [6]:
svm_y_predict = svm_model.predict(X_test)
svm_score = 100 * accuracy_score(y_test, svm_y_predict)
svm_avg_recall = 100 * recall_score(y_test, svm_y_predict, average='macro')

## Do Random Forest Classification

In [7]:
rf_model = RandomForestClassifier(n_estimators=100)
rf_model.fit(X_train, y_train)

RandomForestClassifier()

## Do predictions based on Random Forest

In [8]:
rf_y_predict = rf_model.predict(X_test)
rf_score = 100 * accuracy_score(y_test, rf_y_predict)
rf_avg_recall = 100 * recall_score(y_test, rf_y_predict, average='macro')

## Print Scores

In [9]:
print('KNN score: %.4f %%' % knn_score)
print('KNN Recall score: %.4f %%' % knn_avg_recall)
print('SVM score: %.4f %%' % svm_score)
print('SVM Recall score: %.4f %%' % svm_avg_recall)
print('Random Forest score: %.4f %%' % rf_score)
print('Random Forest Recall score: %.4f %%' % rf_avg_recall)

KNN score: 99.3333 %
KNN Recall score: 99.3138 %
SVM score: 98.6667 %
SVM Recall score: 98.7009 %
Random Forest score: 98.0000 %
Random Forest Recall score: 97.9355 %


## Print the confusion matrix of KNN

In [10]:
knn_confusion_matrix = confusion_matrix(y_test, knn_y_predict)
print(knn_confusion_matrix)

[[49  0  0  0  0  0  0  0  0  0]
 [ 0 40  0  0  0  0  0  0  0  0]
 [ 0  0 46  0  0  0  0  0  0  0]
 [ 0  0  0 47  0  0  0  0  0  0]
 [ 0  0  0  0 36  0  0  0  0  0]
 [ 0  0  0  0  0 51  0  0  0  1]
 [ 0  0  0  0  0  0 48  0  0  0]
 [ 0  0  0  0  0  0  0 51  0  0]
 [ 0  0  0  0  0  1  0  0 39  0]
 [ 0  0  0  1  0  0  0  0  0 40]]
