In [1]:
from keras.datasets import mnist

(train_images_original, train_labels_original), (test_images_original, test_labels_original) = mnist.load_data()

Using TensorFlow backend.


Downloading data from https://s3.amazonaws.com/img-datasets/mnist.npz


In [0]:
train_images = train_images_original.reshape((60000, 28 * 28))
train_images = train_images.astype('float32') / 255

test_images = test_images_original.reshape((10000, 28 * 28))
test_images = test_images.astype('float32') / 255

In [0]:
from keras.utils import to_categorical

train_labels = to_categorical(train_labels_original)
test_labels = to_categorical(test_labels_original)

In [4]:
from keras import models
from keras import layers

network = models.Sequential()
network.add(layers.Dense(512, activation='relu', input_shape=(28 * 28,)))
network.add(layers.Dense(10, activation='softmax'))
#network.summary()


Instructions for updating:
Colocations handled automatically by placer.


In [0]:
# cross-entropy loss function
def gradient(w, b, x, y):
    m = x.shape[0]              # number of examples
    z = np.dot(x, w.T) + b      # wx + b   
    a = softmax(z)              # activation function

    for i in range(m):   
        a[i][np.argmax(y[i])] -= 1

    a = a/m
    
    dw = np.dot(a.T, x)
    db = np.sum(a)
    
    return dw, db


In [0]:
# def trainer(w, b, x, y, lr, iteration):
#     for i in range(iteration):   
#         dw, db = gradient(w, b, x, y)
#         w = w - lr * dw
#         b = b - lr * db
#         #print("iteration = %f" % (i + 1))
# 
#     return w, b


def trainer(w, b, x, y, lr):
 
    dw, db = gradient(w, b, x, y)
    w = w - lr * dw
    b = b - lr * db
    #print("iteration = %f" % (i + 1))

    return w, b

In [0]:
# Activation funtion
def softmax(z):
    result = np.exp(z) / np.sum(np.exp(z))
    
    return result

In [0]:
def predict(w, b, x):
    m = x.shape[0]
    z = np.dot(x, w.T) + b 
    a = softmax(z)
    y_pred = np.zeros((m, 10), dtype='float32')
    
    for i in range(m):
        digit = np.argmax(a[i])
        y_pred[i][digit] = 1
   
    return y_pred


In [0]:
def accuracy(y, y_predict):
    acc = np.mean(y == y_predict) * 100.0
    acc = round(acc, 3)
    print("Accuracy {}%".format(acc))

In [10]:
import numpy as np
# initialize weight and bias
# w.shape -- (10, 28*28)
w = np.zeros((10, train_images.shape[1]))
b = np.zeros((1, 10))
batch_size = 32
epochs = 10

for epoch in range(epochs):
    w, b = trainer(w, b, train_images, train_labels, 0.05)
    
y_predict = predict(w, b, test_images)
accuracy(test_labels, y_predict)

Accuracy 93.608%


In [11]:
w = np.zeros((10, train_images.shape[1]))
b = np.zeros((1, 10))
batch_size = 32
epochs = 10

for epoch in range(epochs):
    shuffled_indices = np.random.permutation(train_images.shape[0])
    x_shuffled = train_images[shuffled_indices]
    y_shuffled = train_labels[shuffled_indices]
    #print("Epoch ", epoch + 1)
    for i in range(0, train_images.shape[0], batch_size):
        x = x_shuffled[i:i+batch_size]
        y = y_shuffled[i:i+batch_size]
        w, b = trainer(w, b, x, y, 0.05)
print("Training finished")
y_predict = predict(w, b, test_images)
accuracy(test_labels, y_predict)

  
  


Training finished
Accuracy 81.96%


In [0]:
# Having issue with mini-batch!!!
# With the mini_batch training(batch_size = 32, learning rate 0.05), I get 81.96% accuracy.
# Without mini_batch, just run 10 epochs, I get 93.608% accuracy.