**Hand Written Digit Prediction - Classification Analysis**

 The digits dataset consists of 8x8 pixel images of digits. The images attribute of the dataset stores 8x8 arrays of grayscale values for each image. We will use these arrays to visualize the first 4 images. The target attribute of the dataset stores the digit each image represents

IMPORT LIBRARY

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

IMPORT DATA

In [None]:
from sklearn.datasets import load_digits
df = load_digits()

In [None]:
_,axes=plt.subplots(nrows=1,ncols=4,figsize=(10,3))
for ax,image,label in zip(axes,df.images,df.target):
  ax.set_axis_off()
  ax.imshow(image,cmap=plt.cm.gray_r,interpolation='nearest')
  ax.set_title('Training: %i' % label)


DATA PREPROCESSING

In [None]:
df.images.shape

(1797, 8, 8)

In [None]:
df.images[0]

array([[ 0.,  0.,  5., 13.,  9.,  1.,  0.,  0.],
       [ 0.,  0., 13., 15., 10., 15.,  5.,  0.],
       [ 0.,  3., 15.,  2.,  0., 11.,  8.,  0.],
       [ 0.,  4., 12.,  0.,  0.,  8.,  8.,  0.],
       [ 0.,  5.,  8.,  0.,  0.,  9.,  8.,  0.],
       [ 0.,  4., 11.,  0.,  1., 12.,  7.,  0.],
       [ 0.,  2., 14.,  5., 10., 12.,  0.,  0.],
       [ 0.,  0.,  6., 13., 10.,  0.,  0.,  0.]])

In [None]:
df.images[0].shape

(8, 8)

In [None]:
len(df.images)

1797

In [None]:
n_samples=len(df.images)
data=df.images.reshape((n_samples,-1))

In [None]:
data[0]

array([ 0.,  0.,  5., 13.,  9.,  1.,  0.,  0.,  0.,  0., 13., 15., 10.,
       15.,  5.,  0.,  0.,  3., 15.,  2.,  0., 11.,  8.,  0.,  0.,  4.,
       12.,  0.,  0.,  8.,  8.,  0.,  0.,  5.,  8.,  0.,  0.,  9.,  8.,
        0.,  0.,  4., 11.,  0.,  1., 12.,  7.,  0.,  0.,  2., 14.,  5.,
       10., 12.,  0.,  0.,  0.,  0.,  6., 13., 10.,  0.,  0.,  0.])

In [None]:
data[0].shape

(64,)

In [None]:
data.shape

(1797, 64)

SCALING IMAGE DATA

In [None]:
data.min()

0.0

In [None]:
data.max()

16.0

In [None]:
data=data/16

In [None]:
data.min()

0.0

In [None]:
data.max()

1.0

In [None]:
data[0]

array([0.    , 0.    , 0.3125, 0.8125, 0.5625, 0.0625, 0.    , 0.    ,
       0.    , 0.    , 0.8125, 0.9375, 0.625 , 0.9375, 0.3125, 0.    ,
       0.    , 0.1875, 0.9375, 0.125 , 0.    , 0.6875, 0.5   , 0.    ,
       0.    , 0.25  , 0.75  , 0.    , 0.    , 0.5   , 0.5   , 0.    ,
       0.    , 0.3125, 0.5   , 0.    , 0.    , 0.5625, 0.5   , 0.    ,
       0.    , 0.25  , 0.6875, 0.    , 0.0625, 0.75  , 0.4375, 0.    ,
       0.    , 0.125 , 0.875 , 0.3125, 0.625 , 0.75  , 0.    , 0.    ,
       0.    , 0.    , 0.375 , 0.8125, 0.625 , 0.    , 0.    , 0.    ])

TRAIN TEST SPLIT DATA

In [None]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(data,df.target,test_size=0.3)
x_train.shape,x_test.shape,y_train.shape,y_test.shape

((1257, 64), (540, 64), (1257,), (540,))

RANDOM FOREST MODEL

In [None]:
from sklearn.ensemble import RandomForestClassifier
rf=RandomForestClassifier()
rf.fit(x_train,y_train)

PREDICT TEST DATA

In [None]:
y_pred=rf.predict(x_test)
y_pred

array([3, 6, 7, 9, 4, 5, 7, 4, 5, 2, 0, 9, 8, 9, 3, 1, 4, 1, 6, 6, 2, 9,
       7, 0, 3, 9, 5, 7, 7, 1, 8, 4, 2, 7, 9, 7, 4, 5, 4, 0, 8, 6, 8, 0,
       4, 8, 1, 4, 8, 2, 6, 1, 3, 8, 2, 1, 0, 6, 0, 3, 5, 4, 6, 3, 6, 0,
       7, 1, 8, 9, 9, 9, 3, 7, 1, 2, 1, 8, 4, 0, 3, 0, 6, 0, 4, 4, 5, 0,
       1, 4, 7, 4, 6, 8, 4, 4, 2, 1, 0, 6, 4, 9, 3, 2, 3, 5, 1, 9, 9, 5,
       7, 3, 4, 3, 7, 0, 6, 7, 0, 1, 0, 4, 3, 3, 1, 9, 3, 3, 7, 7, 3, 4,
       3, 8, 8, 4, 2, 1, 4, 7, 4, 6, 8, 6, 3, 2, 0, 3, 6, 1, 6, 8, 1, 6,
       9, 9, 9, 9, 9, 9, 5, 0, 0, 5, 3, 2, 2, 6, 4, 9, 1, 5, 1, 7, 4, 9,
       2, 4, 5, 7, 7, 6, 6, 5, 9, 1, 4, 3, 2, 2, 3, 1, 9, 1, 9, 9, 4, 8,
       6, 5, 8, 4, 9, 3, 2, 8, 4, 6, 5, 1, 3, 7, 8, 6, 5, 4, 6, 0, 5, 0,
       2, 1, 1, 8, 6, 6, 4, 2, 4, 3, 3, 1, 7, 1, 4, 3, 9, 1, 8, 1, 5, 7,
       9, 5, 5, 7, 9, 2, 4, 0, 7, 8, 6, 4, 9, 4, 1, 4, 8, 9, 1, 7, 4, 0,
       4, 2, 0, 3, 8, 7, 8, 0, 0, 6, 5, 3, 3, 7, 2, 2, 0, 9, 7, 1, 4, 8,
       2, 0, 0, 1, 1, 5, 9, 5, 0, 4, 6, 7, 5, 9, 3,

MODEL ACCURACY

In [None]:
from sklearn.metrics import confusion_matrix,classification_report
confusion_matrix(y_test,y_pred)

array([[50,  0,  0,  0,  1,  0,  0,  0,  0,  0],
       [ 0, 70,  0,  0,  0,  1,  0,  0,  0,  0],
       [ 1,  0, 49,  1,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0, 49,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0, 59,  0,  0,  1,  0,  0],
       [ 0,  0,  0,  0,  0, 43,  0,  0,  0,  1],
       [ 1,  0,  0,  0,  0,  0, 55,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0, 52,  1,  0],
       [ 0,  0,  1,  0,  1,  0,  0,  0, 43,  0],
       [ 0,  0,  0,  2,  0,  1,  0,  0,  0, 57]])

In [None]:
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.96      0.98      0.97        51
           1       1.00      0.99      0.99        71
           2       0.98      0.96      0.97        51
           3       0.94      1.00      0.97        49
           4       0.97      0.98      0.98        60
           5       0.96      0.98      0.97        44
           6       1.00      0.98      0.99        56
           7       0.98      0.98      0.98        53
           8       0.98      0.96      0.97        45
           9       0.98      0.95      0.97        60

    accuracy                           0.98       540
   macro avg       0.97      0.98      0.98       540
weighted avg       0.98      0.98      0.98       540

