In [0]:
# Marek Ochocki (marcopolo97@vp.pl) i Łukasz Gosek (lukaszjgosek@gmail.com)

In [3]:
%tensorflow_version 1.x
import cv2
import numpy as np
import matplotlib.pyplot as plt

TensorFlow 1.x selected.


In [0]:
def showOpencvImage(image, isGray=False):
    fig = plt.figure(figsize=(6, 6))
    plt.imshow(image, cmap = 'gray')
    plt.show()

def openCVHOG(im):
    winSize = (20,20)
    blockSize = (10,10)
    blockStride = (5,5)
    cellSize = (10,10)
    nbins = 9
    derivAperture = 1
    winSigma = -1.
    histogramNormType = 0
    L2HysThreshold = 0.2
    gammaCorrection = 1
    nlevels = 64
    signedGradients = True

    hog = cv2.HOGDescriptor(winSize,blockSize,blockStride,cellSize,nbins,derivAperture,winSigma,histogramNormType,L2HysThreshold,gammaCorrection,nlevels, signedGradients)
    descriptor = np.ravel(hog.compute(im))
    
    return descriptor

In [5]:
from keras.datasets import mnist

(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

Using TensorFlow backend.


Downloading data from https://s3.amazonaws.com/img-datasets/mnist.npz


In [0]:
im_list = [train_images[i] for i in range(0,train_images.shape[0])] + [test_images[i] for i in range(0,test_images.shape[0])]
hogdata = [openCVHOG(im) for im in im_list]
imData = np.float32(hogdata).reshape(-1,81)

trainingSetsCount = 600
testingSetsCount = 400
lastTestingSetIndex = trainingSetsCount + testingSetsCount

# **SVM model without deskew preprocessing**

In [37]:
from sklearn import svm
from sklearn.metrics import classification_report, confusion_matrix

def cross_validate_SVM(k, images, labels, C, gamma):
  all_scores = []
  num_val_samples = int(len(images)/k)
  for i in range(k):
    val_data = images[i * num_val_samples: (i + 1) * num_val_samples]
    val_targets = labels[i * num_val_samples: (i + 1) * num_val_samples]

    partial_train_data = np.concatenate((images[:i * num_val_samples], images[(i + 1) * num_val_samples:]), axis=0)
    partial_train_targets = np.concatenate((labels[:i * num_val_samples],labels[(i + 1) * num_val_samples:]), axis=0)
    
    model = svm.SVC(C=C,gamma=gamma)
    model.fit(partial_train_data, partial_train_targets)

    
    pred_labels = model.predict(val_data)
    mask = pred_labels==val_targets
    correct = np.count_nonzero(mask)

    all_scores.append(correct*100.0/pred_labels.size)
  return sum(all_scores) / len(all_scores)

cross_validation_results = []

for C in np.arange(1.8, 3.0, 0.1):
  for gamma in np.arange(0.3, 1.1, 0.1):
    cross_validation_results.append([cross_validate_SVM(10, imData[:1000], train_labels[:1000], C, gamma), C, gamma])

results = [i[0] for i in cross_validation_results]
best_result = cross_validation_results[results.index(max(results))]
print("Best C and gamma: " + str(best_result[1]) + ", " + str(best_result[2]))

Best C and gamma: 1.8, 0.3


In [38]:

model = svm.SVC(C=1.8,gamma=0.3)
model = model.fit(imData[0:trainingSetsCount,:],train_labels[0:trainingSetsCount])

pred_labels = model.predict(imData[trainingSetsCount:lastTestingSetIndex,:])
mask = pred_labels==train_labels[trainingSetsCount:lastTestingSetIndex]
correct = np.count_nonzero(mask)
cm = confusion_matrix(train_labels[trainingSetsCount:lastTestingSetIndex], pred_labels)

print(correct*100.0/pred_labels.size)
print(cm)
print(classification_report(train_labels[trainingSetsCount:lastTestingSetIndex], pred_labels))

90.75
[[38  0  1  0  0  0  0  0  0  0]
 [ 0 37  0  0  0  0  0  0  0  0]
 [ 0  0 32  0  0  0  0  2  0  1]
 [ 0  0  1 31  0  1  0  0  0  1]
 [ 0  0  0  0 39  1  4  0  1  1]
 [ 0  0  2  1  0 36  0  0  1  1]
 [ 1  1  0  0  1  0 36  0  1  0]
 [ 0  0  5  1  1  0  0 48  0  0]
 [ 0  1  0  0  0  0  0  0 36  1]
 [ 3  0  0  0  0  0  1  1  0 30]]
              precision    recall  f1-score   support

           0       0.90      0.97      0.94        39
           1       0.95      1.00      0.97        37
           2       0.78      0.91      0.84        35
           3       0.94      0.91      0.93        34
           4       0.95      0.85      0.90        46
           5       0.95      0.88      0.91        41
           6       0.88      0.90      0.89        40
           7       0.94      0.87      0.91        55
           8       0.92      0.95      0.94        38
           9       0.86      0.86      0.86        35

    accuracy                           0.91       400
   macro avg 

# **Random Tree Classifier without deskew preprocessing**

In [39]:
from sklearn.ensemble import RandomForestClassifier
def cross_validate_RFC(k, images, labels, max_depth, n_estimators, max_features):
  all_scores = []
  num_val_samples = int(len(images)/k)
  for i in range(k):
    val_data = images[i * num_val_samples: (i + 1) * num_val_samples]
    val_targets = labels[i * num_val_samples: (i + 1) * num_val_samples]

    partial_train_data = np.concatenate((images[:i * num_val_samples], images[(i + 1) * num_val_samples:]), axis=0)
    partial_train_targets = np.concatenate((labels[:i * num_val_samples],labels[(i + 1) * num_val_samples:]), axis=0)
    
    model = RandomForestClassifier(max_depth=max_depth, n_estimators=n_estimators, max_features=max_features)
    model.fit(partial_train_data, partial_train_targets)

    
    pred_labels = model.predict(val_data)
    mask = pred_labels==val_targets
    correct = np.count_nonzero(mask)

    all_scores.append(correct*100.0/pred_labels.size)
  return sum(all_scores) / len(all_scores)


cross_validation_results = []

for n_estimators in range(5, 100, 5):
  cross_validation_results.append([cross_validate_RFC(10, imData[:1000], train_labels[:1000], 15, n_estimators, 60), n_estimators])

results = [i[0] for i in cross_validation_results]
best_result = cross_validation_results[results.index(max(results))]
print("Best n_estimators: " + str(best_result[1]))

Best n_estimators: 85


In [40]:
rfc = RandomForestClassifier(max_depth=15, n_estimators=85, max_features=60)
rfc = rfc.fit(imData[0:trainingSetsCount,:],train_labels[0:trainingSetsCount])

pred_labels = rfc.predict(imData[trainingSetsCount:lastTestingSetIndex,:])

mask = pred_labels==train_labels[trainingSetsCount:lastTestingSetIndex]
correct = np.count_nonzero(mask)
cm = confusion_matrix(train_labels[trainingSetsCount:lastTestingSetIndex], pred_labels)

print(correct*100.0/pred_labels.size)
print(cm)
print(classification_report(train_labels[trainingSetsCount:lastTestingSetIndex], pred_labels))

83.25
[[26  1  2  0  0  0 10  0  0  0]
 [ 0 36  0  0  0  1  0  0  0  0]
 [ 0  0 32  0  0  1  0  0  1  1]
 [ 0  0  2 29  0  3  0  0  0  0]
 [ 0  0  0  0 43  1  2  0  0  0]
 [ 0  0  0  2  1 30  1  1  2  4]
 [ 1  0  0  0  2  2 34  0  1  0]
 [ 0  0  4  2  1  0  0 48  0  0]
 [ 1  4  0  0  1  3  0  0 27  2]
 [ 1  1  2  0  0  1  0  2  0 28]]
              precision    recall  f1-score   support

           0       0.90      0.67      0.76        39
           1       0.86      0.97      0.91        37
           2       0.76      0.91      0.83        35
           3       0.88      0.85      0.87        34
           4       0.90      0.93      0.91        46
           5       0.71      0.73      0.72        41
           6       0.72      0.85      0.78        40
           7       0.94      0.87      0.91        55
           8       0.87      0.71      0.78        38
           9       0.80      0.80      0.80        35

    accuracy                           0.83       400
   macro avg 

# **Neural Network without deskew preprocessing**

In [0]:
from keras import models
from keras import layers

network = models.Sequential()
network.add(layers.Dense(512, activation='relu', input_shape=(28 * 28,)))
network.add(layers.Dense(10, activation='softmax'))
network.compile(optimizer='rmsprop',
                loss='categorical_crossentropy',
                metrics=['accuracy'])

train_images_for_network = train_images.reshape((60000, 28 * 28))
train_images_for_network = train_images_for_network.astype('float32') / 255

test_images_for_network = test_images.reshape((10000, 28 * 28))
test_images_for_network = test_images_for_network.astype('float32') / 255








In [0]:
from keras.utils import to_categorical

encoded_train_labels = to_categorical(train_labels)
encoded_test_labels = to_categorical(test_labels)

In [0]:
network.fit(train_images_for_network, encoded_train_labels, epochs=5, batch_size=128)
test_loss, test_acc = network.evaluate(test_images_for_network, encoded_test_labels)
print('test_acc:', test_acc)

pred_probabilities = network.predict(test_images_for_network)

pred_labels = np.argmax(pred_probabilities,-1)
from sklearn.metrics import classification_report, confusion_matrix

cm = confusion_matrix(test_labels, pred_labels)
print(cm)
print(classification_report(test_labels, pred_labels))


Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where



Epoch 1/5





Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
test_acc: 0.9801
[[ 970    1    1    0    1    0    4    1    2    0]
 [   0 1129    2    1    0    1    2    0    0    0]
 [   4    4 1009    1    1    0    3    6    4    0]
 [   0    0    4  982    0    7    0   10    3    4]
 [   2    0    3    0  966    0    3    2    0    6]
 [   2    0    0    2    1  880    6    0    0    1]
 [   4    3    2    1    2    3  943    0    0    0]
 [   1    2    8    1    0    0    0 1012    1    3]
 [   3    1    6    2    4    4    5    8  938    3]
 [   1    6    0    4   11    6    0    9    0  972]]
              precision    recall  f1-score   support

           0       0.98      0.99      0.99       980
           1       0.99      0.99      0.99      1135
           2       0.97      0.98      0.98      1032
           3       0.99      0.97      0.98      1010
           4       0.98      

Dla każdego z klasyfikatorów ich wyniki są porównywalne do tych z deskew preprocessingiem: \
AVC: 91% -> 91% \
RTC: 82.75% -> 83.5% \
ANN: 97.8% -> 98.2%

# **AVC with raw data**

In [0]:
train_images_raw = train_images.reshape((60000, 28 * 28))
test_images_raw = test_images.reshape((10000, 28 * 28))

model = svm.SVC(C=15.5,gamma=0.7)
model = model.fit(train_images_raw[0:trainingSetsCount,:], train_labels[0:trainingSetsCount])

pred_labels = model.predict(test_images_raw[0:testingSetsCount,:])
mask = pred_labels==test_labels[0:testingSetsCount]
correct = np.count_nonzero(mask)
cm = confusion_matrix(test_labels[0:testingSetsCount], pred_labels)

print(correct*100.0/pred_labels.size)
print(cm)
print(classification_report(test_labels[0:testingSetsCount], pred_labels))

14.25
[[ 0 33  0  0  0  0  0  0  0  0]
 [ 0 57  0  0  0  0  0  0  0  0]
 [ 0 44  0  0  0  0  0  0  0  0]
 [ 0 35  0  0  0  0  0  0  0  0]
 [ 0 46  0  0  0  0  0  0  0  0]
 [ 0 42  0  0  0  0  0  0  0  0]
 [ 0 34  0  0  0  0  0  0  0  0]
 [ 0 41  0  0  0  0  0  0  0  0]
 [ 0 27  0  0  0  0  0  0  0  0]
 [ 0 41  0  0  0  0  0  0  0  0]]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        33
           1       0.14      1.00      0.25        57
           2       0.00      0.00      0.00        44
           3       0.00      0.00      0.00        35
           4       0.00      0.00      0.00        46
           5       0.00      0.00      0.00        42
           6       0.00      0.00      0.00        34
           7       0.00      0.00      0.00        41
           8       0.00      0.00      0.00        27
           9       0.00      0.00      0.00        41

    accuracy                           0.14       400
   macro avg 

  _warn_prf(average, modifier, msg_start, len(result))


# **RFC with raw data**

In [0]:
rfc = RandomForestClassifier(max_depth=15, n_estimators=100, max_features=60)
rfc = rfc.fit(train_images_raw[0:trainingSetsCount,:], train_labels[0:trainingSetsCount])

pred_labels = rfc.predict(test_images_raw[0:testingSetsCount,:])

mask = pred_labels==test_labels[0:testingSetsCount]
correct = np.count_nonzero(mask)
cm = confusion_matrix(test_labels[0:testingSetsCount], pred_labels)

print(correct*100.0/pred_labels.size)
print(cm)
print(classification_report(test_labels[0:testingSetsCount], pred_labels))

85.0
[[33  0  0  0  0  0  0  0  0  0]
 [ 0 57  0  0  0  0  0  0  0  0]
 [ 1  2 36  0  0  0  0  4  1  0]
 [ 1  0  1 27  0  2  1  3  0  0]
 [ 1  1  1  0 37  0  2  0  0  4]
 [ 1  1  1  3  2 31  0  1  0  2]
 [ 1  0  2  0  2  1 27  1  0  0]
 [ 0  1  0  0  2  0  0 36  0  2]
 [ 1  0  1  1  1  0  0  0 21  2]
 [ 0  1  0  1  1  0  0  2  1 35]]
              precision    recall  f1-score   support

           0       0.85      1.00      0.92        33
           1       0.90      1.00      0.95        57
           2       0.86      0.82      0.84        44
           3       0.84      0.77      0.81        35
           4       0.82      0.80      0.81        46
           5       0.91      0.74      0.82        42
           6       0.90      0.79      0.84        34
           7       0.77      0.88      0.82        41
           8       0.91      0.78      0.84        27
           9       0.78      0.85      0.81        41

    accuracy                           0.85       400
   macro avg  

AVC nie poradził sobie z surowymi danymi, natomiast wynik RFC jest podobny do tego z preprocessingiem.