In [1]:
# load data
from keras.datasets import mnist
(train_images_original, train_labels_original), (test_images_original, test_labels_original) = mnist.load_data()

Using TensorFlow backend.


In [2]:
# normalization
train_images = train_images_original.reshape((60000, 28 * 28))
train_images = train_images.astype('float32') / 255

test_images = test_images_original.reshape((10000, 28 * 28))
test_images = test_images.astype('float32') / 255

train_labels = train_labels_original.reshape((60000, 1))
test_labels = test_labels_original.reshape((10000, 1))


In [3]:
# cross-entropy loss function
def gradient(w, b, x, y):
    m = x.shape[0]              # number of examples
    z = np.dot(x, w) + b        # wx + b   
    a = sigmoid(z)              # activation function
    dw = np.dot(x.T, (a-y))/m   # x * (a - y) / m
    db = np.sum(a-y)/m          # (a - y) / m
    
    return dw, db

In [4]:
def trainer(w, b, x, y, lr):
    dw, db = gradient(w, b, x, y)
    w = w - lr * dw
    b = b - lr * db

    return w, b

In [5]:
# sigmoid function
def sigmoid(z):
    result = 1.0 / (1.0 + np.exp(-z))
    
    return result

In [6]:
def predict(w, b, x):
    m = x.shape[0]
    y_pred = np.zeros((m, 1))
    z = np.dot(x, w) + b
    a = sigmoid(z)
    for i in range(m):
        if a[i] >= 0.5: 
            y_pred[i] = 1    # if image is not the digit
        elif a[i] < 0.5:
            y_pred[i] = 0    # if image is digit
    
    return y_pred

In [7]:
def accuracy(y, y_predict):
    acc = np.mean(y == y_predict) * 100.0
    acc = round(acc, 3)
    print("Accuracy {}%".format(acc))

In [8]:
import numpy as np
# test label 
test_label_list = np.ones((10, test_images.shape[0], 1), dtype='float32') 
for i in range(test_images.shape[0]):
    digit = test_labels[i][0]
    test_label_list[digit][i] = 0.0

# training labels coded in 0, 1 format
train_label_list = np.ones((10, train_images.shape[0], 1), dtype='float32') 
for i in range(train_images.shape[0]):
    digit = train_labels[i][0]
    train_label_list[digit][i] = 0.0

In [9]:
# initialize weight and bias
# w.shape -- (10, 28 * 28, 1)
w = np.zeros((10, train_images.shape[1], 1))
b = np.zeros((10, 1))
batch_size = 32
epochs = 10

for d in range(10):  
    print("Start training ", d)
    for epoch in range(epochs):
        y_digit = train_label_list[d]
        shuffled_indices = np.random.permutation(train_images.shape[0])
        x_shuffled = train_images[shuffled_indices]
        y_shuffled = y_digit[shuffled_indices]

        for i in range(0, train_images.shape[0], batch_size):
            x = x_shuffled[i:i+batch_size]
            y = y_shuffled[i:i+batch_size]
            w[d], b[d] = trainer(w[d], b[d], x, y, 0.5)
    print("Training {} finished".format(int(d)))
    y_pred = predict(w[d], b[d], test_images)
    accuracy(test_label_list[d], y_pred)

Start training  0


Training 0 finished
Accuracy 99.2%
Start training  1


Training 1 finished
Accuracy 99.35%
Start training  2


Training 2 finished
Accuracy 97.78%
Start training  3


Training 3 finished
Accuracy 97.38%
Start training  4


Training 4 finished
Accuracy 98.36%
Start training  5


Training 5 finished
Accuracy 97.4%
Start training  6


Training 6 finished
Accuracy 98.67%
Start training  7


Training 7 finished
Accuracy 98.47%
Start training  8


Training 8 finished
Accuracy 95.73%
Start training  9


Training 9 finished
Accuracy 96.38%
