In [0]:
# Marek Ochocki (marcopolo97@vp.pl) i Łukasz Gosek (lukaszjgosek@gmail.com)

In [0]:
%tensorflow_version 1.x
import cv2
import numpy as np
import matplotlib.pyplot as plt

In [0]:
def showOpencvImage(image, isGray=False):
    fig = plt.figure(figsize=(6, 6))
    plt.imshow(image, cmap = 'gray')
    plt.show()

def openCVHOG(im):
    winSize = (20,20)
    blockSize = (10,10)
    blockStride = (5,5)
    cellSize = (10,10)
    nbins = 9
    derivAperture = 1
    winSigma = -1.
    histogramNormType = 0
    L2HysThreshold = 0.2
    gammaCorrection = 1
    nlevels = 64
    signedGradients = True

    hog = cv2.HOGDescriptor(winSize,blockSize,blockStride,cellSize,nbins,derivAperture,winSigma,histogramNormType,L2HysThreshold,gammaCorrection,nlevels, signedGradients)
    descriptor = np.ravel(hog.compute(im))
    
    return descriptor

In [0]:
from keras.datasets import mnist

(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

In [0]:
im_list = [train_images[i] for i in range(0,train_images.shape[0])] + [test_images[i] for i in range(0,test_images.shape[0])]
hogdata = [openCVHOG(im) for im in im_list]
imData = np.float32(hogdata).reshape(-1,81)

trainingSetsCount = 600
testingSetsCount = 400
lastTestingSetIndex = trainingSetsCount + testingSetsCount

# **SVM model without deskew preprocessing**

In [20]:
from sklearn import svm
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import GridSearchCV

clf = GridSearchCV(svm.SVC(), {'C':[1+i*0.1 for i in range(20)], 'gamma':[i*0.1 for i in range(1, 10)]})
clf.fit(imData[0:trainingSetsCount,:],train_labels[0:trainingSetsCount])
model = clf.best_estimator_

pred_labels = model.predict(imData[trainingSetsCount:lastTestingSetIndex,:])
mask = pred_labels==train_labels[trainingSetsCount:lastTestingSetIndex]
correct = np.count_nonzero(mask)
cm = confusion_matrix(train_labels[trainingSetsCount:lastTestingSetIndex], pred_labels)

print(correct*100.0/pred_labels.size)
print(cm)
print(classification_report(train_labels[trainingSetsCount:lastTestingSetIndex], pred_labels))

91.25
[[39  0  0  0  0  0  0  0  0  0]
 [ 0 37  0  0  0  0  0  0  0  0]
 [ 0  0 33  0  0  0  0  1  0  1]
 [ 0  0  1 31  0  1  0  0  0  1]
 [ 1  0  0  0 40  0  3  0  1  1]
 [ 0  0  2  1  0 36  0  0  1  1]
 [ 1  1  0  0  1  0 36  0  1  0]
 [ 0  0  5  2  1  0  0 47  0  0]
 [ 0  1  0  0  0  0  0  0 36  1]
 [ 3  0  0  0  0  0  1  1  0 30]]
              precision    recall  f1-score   support

           0       0.89      1.00      0.94        39
           1       0.95      1.00      0.97        37
           2       0.80      0.94      0.87        35
           3       0.91      0.91      0.91        34
           4       0.95      0.87      0.91        46
           5       0.97      0.88      0.92        41
           6       0.90      0.90      0.90        40
           7       0.96      0.85      0.90        55
           8       0.92      0.95      0.94        38
           9       0.86      0.86      0.86        35

    accuracy                           0.91       400
   macro avg 

# **Random Tree Classifier without deskew preprocessing**

In [21]:
from sklearn.ensemble import RandomForestClassifier

clf = GridSearchCV(RandomForestClassifier(), {'max_depth':[i for i in range(5, 20)], 'n_estimators':[i*5 for i in range(1, 20)]})
clf.fit(imData[0:trainingSetsCount,:],train_labels[0:trainingSetsCount])
rfc = clf.best_estimator_

pred_labels = rfc.predict(imData[trainingSetsCount:lastTestingSetIndex,:])

mask = pred_labels==train_labels[trainingSetsCount:lastTestingSetIndex]
correct = np.count_nonzero(mask)
cm = confusion_matrix(train_labels[trainingSetsCount:lastTestingSetIndex], pred_labels)

print(correct*100.0/pred_labels.size)
print(cm)
print(classification_report(train_labels[trainingSetsCount:lastTestingSetIndex], pred_labels))

89.25
[[36  2  1  0  0  0  0  0  0  0]
 [ 0 37  0  0  0  0  0  0  0  0]
 [ 0  0 33  0  1  0  0  0  0  1]
 [ 0  0  1 31  0  2  0  0  0  0]
 [ 0  0  0  0 42  1  1  0  0  2]
 [ 0  0  1  1  0 35  1  0  2  1]
 [ 2  1  0  0  1  1 35  0  0  0]
 [ 0  0  4  3  1  0  0 47  0  0]
 [ 0  1  0  0  0  3  0  0 32  2]
 [ 3  0  0  0  0  1  1  1  0 29]]
              precision    recall  f1-score   support

           0       0.88      0.92      0.90        39
           1       0.90      1.00      0.95        37
           2       0.82      0.94      0.88        35
           3       0.89      0.91      0.90        34
           4       0.93      0.91      0.92        46
           5       0.81      0.85      0.83        41
           6       0.92      0.88      0.90        40
           7       0.98      0.85      0.91        55
           8       0.94      0.84      0.89        38
           9       0.83      0.83      0.83        35

    accuracy                           0.89       400
   macro avg 

# **Neural Network without deskew preprocessing**

In [22]:
from keras import models
from keras import layers

network = models.Sequential()
network.add(layers.Dense(512, activation='relu', input_shape=(28 * 28,)))
network.add(layers.Dense(10, activation='softmax'))
network.compile(optimizer='rmsprop',
                loss='categorical_crossentropy',
                metrics=['accuracy'])

train_images_for_network = train_images.reshape((60000, 28 * 28))
train_images_for_network = train_images_for_network.astype('float32') / 255

test_images_for_network = test_images.reshape((10000, 28 * 28))
test_images_for_network = test_images_for_network.astype('float32') / 255

Instructions for updating:
If using Keras pass *_constraint arguments to layers.


In [0]:
from keras.utils import to_categorical

encoded_train_labels = to_categorical(train_labels)
encoded_test_labels = to_categorical(test_labels)

In [24]:
network.fit(train_images_for_network, encoded_train_labels, epochs=5, batch_size=128)
test_loss, test_acc = network.evaluate(test_images_for_network, encoded_test_labels)
print('test_acc:', test_acc)

pred_probabilities = network.predict(test_images_for_network)

pred_labels = np.argmax(pred_probabilities,-1)
from sklearn.metrics import classification_report, confusion_matrix

cm = confusion_matrix(test_labels, pred_labels)
print(cm)
print(classification_report(test_labels, pred_labels))



Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
test_acc: 0.9797999858856201
[[ 970    0    0    2    1    0    3    1    2    1]
 [   0 1128    1    1    0    0    2    0    3    0]
 [   5    1 1001    6    4    0    2    6    7    0]
 [   0    0    1  992    0    2    0    3    5    7]
 [   1    0    1    1  973    0    2    0    1    3]
 [   2    0    0   10    1  865    4    0    7    3]
 [   2    2    0    1    9    5  937    0    2    0]
 [   1    6    8    4    4    0    0  992    3   10]
 [   0    0    2    3    4    3    0    2  955    5]
 [   1    4    0    4   10    1    0    3    1  985]]
              precision    recall  f1-score   support

           0       0.99      0.99      0.99       980
           1       0.99      0.99      0.99      1135
           2       0.99      0.97      0.98      1032
           3       0.97      0.98      0.98      1010
           4       0.97      0.99      0.98       982
           5       0.99      0.97      0.98       892
          

Dla każdego z klasyfikatorów ich wyniki są porównywalne do tych z deskew preprocessingiem: \
AVC: 91% -> 91% \
RTC: 82.75% -> 87.5% \
ANN: 97.8% -> 98.2%

# **AVC with raw data**

In [25]:
train_images_raw = train_images.reshape((60000, 28 * 28))
test_images_raw = test_images.reshape((10000, 28 * 28))

clf = GridSearchCV(svm.SVC(), {'C':[1+i*0.1 for i in range(20)], 'gamma':[i*0.1 for i in range(1, 10)]})
clf.fit(train_images_raw[0:trainingSetsCount,:], train_labels[0:trainingSetsCount])
model = clf.best_estimator_

pred_labels = model.predict(test_images_raw[0:testingSetsCount,:])
mask = pred_labels==test_labels[0:testingSetsCount]
correct = np.count_nonzero(mask)
cm = confusion_matrix(test_labels[0:testingSetsCount], pred_labels)

print(correct*100.0/pred_labels.size)
print(cm)
print(classification_report(test_labels[0:testingSetsCount], pred_labels))

14.25
[[ 0 33  0  0  0  0  0  0  0  0]
 [ 0 57  0  0  0  0  0  0  0  0]
 [ 0 44  0  0  0  0  0  0  0  0]
 [ 0 35  0  0  0  0  0  0  0  0]
 [ 0 46  0  0  0  0  0  0  0  0]
 [ 0 42  0  0  0  0  0  0  0  0]
 [ 0 34  0  0  0  0  0  0  0  0]
 [ 0 41  0  0  0  0  0  0  0  0]
 [ 0 27  0  0  0  0  0  0  0  0]
 [ 0 41  0  0  0  0  0  0  0  0]]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        33
           1       0.14      1.00      0.25        57
           2       0.00      0.00      0.00        44
           3       0.00      0.00      0.00        35
           4       0.00      0.00      0.00        46
           5       0.00      0.00      0.00        42
           6       0.00      0.00      0.00        34
           7       0.00      0.00      0.00        41
           8       0.00      0.00      0.00        27
           9       0.00      0.00      0.00        41

    accuracy                           0.14       400
   macro avg 

  _warn_prf(average, modifier, msg_start, len(result))


# **RFC with raw data**

In [26]:

clf = GridSearchCV(RandomForestClassifier(), {'max_depth':[i for i in range(5, 20)], 'n_estimators':[i*5 for i in range(1, 20)]})
clf.fit(train_images_raw[0:trainingSetsCount,:], train_labels[0:trainingSetsCount])
rfc = clf.best_estimator_

pred_labels = rfc.predict(test_images_raw[0:testingSetsCount,:])

mask = pred_labels==test_labels[0:testingSetsCount]
correct = np.count_nonzero(mask)
cm = confusion_matrix(test_labels[0:testingSetsCount], pred_labels)

print(correct*100.0/pred_labels.size)
print(cm)
print(classification_report(test_labels[0:testingSetsCount], pred_labels))

83.5
[[33  0  0  0  0  0  0  0  0  0]
 [ 0 57  0  0  0  0  0  0  0  0]
 [ 1  2 34  1  1  0  0  5  0  0]
 [ 1  0  1 27  0  3  0  2  0  1]
 [ 0  1  1  0 35  0  1  0  0  8]
 [ 1  1  0  7  2 28  0  1  1  1]
 [ 1  0  3  0  1  1 28  0  0  0]
 [ 0  1  1  1  2  0  0 35  0  1]
 [ 2  1  1  0  0  0  0  0 21  2]
 [ 0  1  0  2  0  0  0  1  1 36]]
              precision    recall  f1-score   support

           0       0.85      1.00      0.92        33
           1       0.89      1.00      0.94        57
           2       0.83      0.77      0.80        44
           3       0.71      0.77      0.74        35
           4       0.85      0.76      0.80        46
           5       0.88      0.67      0.76        42
           6       0.97      0.82      0.89        34
           7       0.80      0.85      0.82        41
           8       0.91      0.78      0.84        27
           9       0.73      0.88      0.80        41

    accuracy                           0.83       400
   macro avg  

AVC nie poradził sobie z surowymi danymi, natomiast wynik RFC jest podobny do tego z preprocessingiem.