
# Classification of  hand-written digits using scikit-learn


An example showing how we can classify hand-written digits using scikit-learn. 
The example is adapted from 

https://scikit-learn.org/stable/auto_examples/classification/plot_digits_classification.html



Import required python packages

In [None]:
from sklearn import datasets, svm, metrics
from sklearn.naive_bayes import MultinomialNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression 
from sklearn.ensemble import RandomForestClassifier,GradientBoostingClassifier 
from sklearn.svm import SVC 
from sklearn.neural_network import MLPClassifier
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split


Load the dataset

In [None]:
digits = datasets.load_digits()

Print the dataset. The dataset consists of two main parts:
1. digits.data is the input (flattened pixel values for each handwritten image)

2. digits.target is the output (ground-truth labels)


In [None]:
print(digits)

We can obtain the number of data points using .shape command 

In [None]:
digits.data.shape

plot the digits.data with associated labels

In [None]:
fig = plt.figure()
for index in range(8):
    plt.subplot(2, 4, index+1)
    plt.axis('off')
    plt.imshow(digits.data[index+1,:].reshape(8,8),cmap=plt.cm.gray_r, interpolation='nearest')
    plt.title('Training: %i' % digits.target[index+1])

Pixel values will be the input (X) and labels will be the output (y)

In [None]:
X = digits.data
y= digits.target

Split data into train and test subsets

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, shuffle=True)

Import different classifiers

In [None]:
# Multinomial Naive Bayes Classifier  
def naive_bayes_classifier(train_x, train_y):   
    model = MultinomialNB(alpha=0.01)  
    model.fit(train_x, train_y)  
    return model  
  
  
# KNN Classifier  
def knn_classifier(train_x, train_y):   
    model = KNeighborsClassifier(n_neighbors=3)
    model.fit(train_x, train_y)  
    return model  
  
  
# Logistic Regression Classifier  
def logistic_regression_classifier(train_x, train_y):    
    model = LogisticRegression(penalty='l2')  
    model.fit(train_x, train_y)  
    return model  
  
  
# Random Forest Classifier  
def random_forest_classifier(train_x, train_y):    
    model = RandomForestClassifier(n_estimators=8)  
    model.fit(train_x, train_y)  
    return model  
  
  

  
# GBDT(Gradient Boosting Decision Tree) Classifier  
def gradient_boosting_classifier(train_x, train_y):   
    model = GradientBoostingClassifier(n_estimators=200)  
    model.fit(train_x, train_y)  
    return model  
  
  
# SVM Classifier  
def svm_classifier(train_x, train_y):    
    model = SVC(kernel='rbf', probability=True)  
    model.fit(train_x, train_y)  
    return model  
  
#MLP Classifier
def mlp_classifier(train_x,train_y):
    model =  MLPClassifier(hidden_layer_sizes=(100,), max_iter=15, alpha=1e-4,
                    solver='sgd', verbose=10, tol=1e-4, random_state=1,
                    learning_rate_init=.1)
    model.fit(train_x,train_y)
    return model

Choose a classifier:

In [None]:
model = random_forest_classifier(X_train, y_train)

Now predict the value of the digit of the test dataset

In [None]:
predicted = model.predict(X_test)

Compute the prediction accuracy

In [None]:
accuracy = metrics.accuracy_score(y_test, predicted)  
print ('classification accuracy : %.2f%%' % (100 * accuracy))

plot test data with predicted labels

In [None]:
fig = plt.figure()
for index in range(12):
    plt.subplot(3, 4, index+1)
    plt.axis('off')
    plt.imshow(X_test[index+1,:].reshape(8,8),cmap=plt.cm.gray_r, interpolation='nearest')
    plt.title('Prediction: %i' % predicted[index+1])