# Classification of MNIST dataset digits
## Labels = 10
## Training Samples =  60,000
## Testing Samples =   10,000

In [1]:
# Modules
from PIL import Image
import dhash
import numpy as np
import scipy
from keras.datasets import mnist
import sklearn.preprocessing as preproc
import matplotlib.pyplot as plt
from sklearn import metrics

%matplotlib inline
np.random.seed(123)  # for reproducibility


Using Theano backend.


## 2. Load MNIST data from keras

In [2]:
(X_train, Y_train), (X_test, Y_test) = mnist.load_data()

In [3]:
# Label encoder for future use
le = preproc.LabelEncoder()
labels = [0,1,2,3,4,5,6,7,8,9,'a','b','c','d','e','f']
le.fit(labels)

LabelEncoder()

## 3. Image Hashing on rows and columns  for training set

In [4]:
# size of image and output format
size = 8
format_ = '0' + str(size**2) + 'b'
X = X_train

#preallocate
X_train_hashed = np.zeros((X.shape[0], size**2 * 2));

for idx , Img in enumerate(X):
    row, col = dhash.dhash_row_col( Image.fromarray(Img) , size = size)
    hash_ = format(row, format_) + format(col, format_)
    # hash_ is string
    for colidx,num in enumerate(hash_):
        X_train_hashed[idx,colidx] = int(num)

## 4. Image Hashing on rows and columns  for training set

In [5]:
X = X_test
X_test_hashed = np.zeros((X.shape[0], size**2 * 2));

for idx , Img in enumerate(X):
    row, col = dhash.dhash_row_col( Image.fromarray(Img) , size = size)
    hash_ = format(row, format_) + format(col, format_)
    
    # hash_ is string
    for col,num in enumerate(hash_):
        X_test_hashed[idx,col] = int(num)

## 5. Classification Tree

In [6]:
from sklearn import tree
from sklearn import metrics
# cols and rows
clasifier = tree.DecisionTreeClassifier()
clasifier = clasifier.fit(X_train_hashed, Y_train)
y_pred = clasifier.predict(X_test_hashed)
print("Classification Report:\n%s" % metrics.classification_report(Y_test, y_pred))

Classification Report:
             precision    recall  f1-score   support

          0       0.92      0.91      0.92       980
          1       0.95      0.96      0.95      1135
          2       0.84      0.81      0.83      1032
          3       0.79      0.80      0.80      1010
          4       0.83      0.84      0.83       982
          5       0.79      0.79      0.79       892
          6       0.89      0.87      0.88       958
          7       0.85      0.85      0.85      1028
          8       0.76      0.75      0.76       974
          9       0.80      0.82      0.81      1009

avg / total       0.84      0.84      0.84     10000



In [7]:
print("Cohen kappa Score:\n%s" % metrics.cohen_kappa_score(Y_test, y_pred))

Cohen kappa Score:
0.825149360018


In [8]:
print("Hamming Loss Score:\n%s" % metrics.hamming_loss(Y_test, y_pred))

Hamming Loss Score:
0.1573


## 6. Random Tree

In [9]:
from sklearn.ensemble import RandomForestClassifier
RFC = RandomForestClassifier(n_estimators=10)
RFC = RFC.fit(X_train_hashed, Y_train)
y_pred = RFC.predict(X_test_hashed)

In [10]:
print("Classification Report:\n%s" % metrics.classification_report(Y_test, y_pred))

Classification Report:
             precision    recall  f1-score   support

          0       0.92      0.98      0.95       980
          1       0.97      0.98      0.98      1135
          2       0.91      0.93      0.92      1032
          3       0.88      0.91      0.89      1010
          4       0.92      0.91      0.91       982
          5       0.91      0.88      0.89       892
          6       0.95      0.95      0.95       958
          7       0.94      0.92      0.93      1028
          8       0.90      0.86      0.88       974
          9       0.92      0.88      0.90      1009

avg / total       0.92      0.92      0.92     10000



In [11]:
print("Cohen kappa Score:\n%s" % metrics.cohen_kappa_score(Y_test, y_pred))

Cohen kappa Score:
0.911849354304


In [12]:
print("Hamming Loss Score:\n%s" % metrics.hamming_loss(Y_test, y_pred))

Hamming Loss Score:
0.0793
