# Simple digit recognition

In [0]:
from tensorflow import keras
from keras.layers import Conv2D, BatchNormalization, MaxPooling2D, Dropout, Dense, Flatten
from keras.models import Sequential, save_model, load_model
from keras.preprocessing.image import ImageDataGenerator
from keras.datasets import mnist
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping

from sklearn.preprocessing import LabelBinarizer
from sklearn.metrics import classification_report
import pickle
import numpy as np

In [18]:
# Mount google drive
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


## Load and prepare data

In [0]:
X_train, y_train = mnist.load_data()[0]
X_test, y_test = mnist.load_data()[1]

In [0]:
# Binarizing labels
lb = LabelBinarizer()
y_train = lb.fit_transform(y_train)
y_test = lb.transform(y_test)

# Save binarizer to disk
with open('/content/gdrive/My Drive/Digit recognition/label_binarizer.pkl', 'wb') as f:
    pickle.dump(lb, f)

In [0]:
X_train = X_train.reshape(X_train.shape[0], 28, 28, 1)
X_test = X_test.reshape(X_test.shape[0], 28, 28, 1)

In [22]:
X_train[0].shape

(28, 28, 1)

## Define model structure

In [0]:
class DR:
    @staticmethod
    def build(height, width, classes):
        input_shape = (height, width, 1)
        model = Sequential()
        
        # (Conv => RELU)*2 => Pool
        model.add(Conv2D(32, (3, 3), padding='same',
                         input_shape=input_shape, activation='relu'))
        model.add(Conv2D(32, (3, 3), padding='same', activation='relu'))
        model.add(BatchNormalization(axis=-1))
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Dropout(0.25))
        
        # (Conv => RELU)*2 => Pool
        model.add(Conv2D(64, (3, 3), padding='same', activation='relu'))
        model.add(Conv2D(64, (3, 3), padding='same', activation='relu'))
        model.add(BatchNormalization(axis=-1))
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Dropout(0.25))
        
        # (Conv => RELU)*3 => Pool
        model.add(Conv2D(128, (5, 5), padding='same', activation='relu'))
        model.add(Conv2D(128, (5, 5), padding='same', activation='relu'))
        model.add(Conv2D(128, (5, 5), padding='same', activation='relu'))
        model.add(BatchNormalization(axis=-1))
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Dropout(0.25))
        
        # FC
        model.add(Flatten())
        model.add(Dense(512, activation='relu'))
        model.add(Dropout(0.25))
        
        # Softmax classification
        model.add(Dense(10, activation='softmax'))
        
        return model

## Train model

In [0]:
# Initialize necessary constants
INIT_LR = 0.001
EPOCHS = 50
BS = 32


# Construct model
model = DR.build(28, 28, len(lb.classes_))
model.compile(loss='categorical_crossentropy',
              optimizer=Adam(lr=INIT_LR),
              metrics=['accuracy'])

In [0]:
# Create data augmentation
aug = ImageDataGenerator(rotation_range=30, width_shift_range=0.1,
                         height_shift_range=0.1, shear_range=0.2,
                         zoom_range=0.2, horizontal_flip=True,
                         fill_mode='nearest')

In [0]:
# Fit model
H = model.fit_generator(aug.flow(X_train, y_train, batch_size=BS),
                        validation_data=(X_test, y_test),
                        steps_per_epoch=len(X_train) // BS,
                        callbacks=[EarlyStopping(patience=2)],
                        epochs=10)

Instructions for updating:
Use tf.cast instead.
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10


In [0]:
# Save trained model to disk
save_model(model, '/content/gdrive/My Drive/Digit recognition/dr.h5')

In [0]:
# Load model
model = load_model('/content/gdrive/My Drive/Digit recognition/dr.h5')

In [0]:
# Load Label Binarizer
with open('/content/gdrive/My Drive/Digit recognition/label_binarizer.pkl', 'rb') as f:
    lb = pickle.load(f)

## Prediction

In [26]:
# Evaluate model
score = model.evaluate(X_test, y_test)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Test loss: 0.04935315418606624
Test accuracy: 0.986


In [27]:
import cv2


# Load image
image = cv2.imread('/content/gdrive/My Drive/Digit recognition/test_images/7.jpg')
image = cv2.resize(image, (28, 28))
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
image = cv2.bitwise_not(image)
image = image.reshape(1, 28, 28, 1)

# Make prediction
pred = model.predict(image)
ind = pred.argmax(axis=1)[0]
label = lb.classes_[ind]

print('{}: {:.2f}%'.format(label, pred[0][ind]*100))

7: 98.25%
