In [1]:
import pickle
import gzip

from PIL import Image
import numpy as np
import tensorflow as tf

# 1. Data intake

In [2]:
with gzip.open('mnist.pkl.gz', 'rb') as f:
    u = pickle._Unpickler(f)
    u.encoding = 'latin1'
    tr_s, va_s, te_s = u.load()

In [3]:
tr_i = [np.reshape(x, (28,28,1)) for x in tr_s[0]]
tr_d = list(zip(tr_i, tr_s[1]))

va_i = [np.reshape(x, (28,28,1)) for x in va_s[0]]

te_i = [np.reshape(x, (28,28,1)) for x in te_s[0]]

# 2. Building a Convolutional Network

In [4]:
inp = tf.placeholder(tf.float32, [None, 28, 28, 1])
outp = tf.placeholder(tf.int32, [None])

In [5]:
image = inp

# Convolution Layer
weight1 = tf.Variable(tf.random_normal([5,5,1,20], mean=0, stddev=1/5),
                      name = 'weight1')
bias1 = tf.Variable(tf.random_normal([20], mean=0.1, stddev=1),
                    name = 'bias1')

image = tf.nn.relu(tf.nn.conv2d(image, weight1, strides=[1,1,1,1],
                                padding='VALID') + bias1)

In [6]:
# Pooling Layer
image = tf.nn.max_pool(image, ksize=[1,2,2,1], strides=[1,2,2,1],
                       padding='VALID')
image = tf.reshape(image, [-1,20*12*12])

In [7]:
# Fully Connected Layer
weight2 = tf.Variable(tf.random_normal([20*12*12, 100], mean=0, stddev=1/np.sqrt(20*12*12)),
                      name = 'weight2')
bias2 = tf.Variable(tf.random_normal([100], mean=0.1, stddev=1),
                    name = 'bias2')

image = tf.nn.relu(tf.matmul(image, weight2) + bias2)

In [8]:
# Softmax Layer
weight3 = tf.Variable(tf.random_normal([100,10], mean=0, stddev=1/10),
                      name = 'weight3')
bias3 = tf.Variable(tf.random_normal([10], mean=0.1, stddev=1),
                    name = 'bias3')

image = tf.matmul(image, weight3) + bias3

# 3. Optimization Problem

In [9]:
# Cross-entropy Loss
loss = tf.reduce_mean(
    tf.nn.sparse_softmax_cross_entropy_with_logits(image, outp))

In [10]:
optimizer = tf.train.GradientDescentOptimizer(0.1)
train = optimizer.minimize(loss)

# 4. Training Convolutional Network

In [11]:
batch_size = 100
epoch_num = 100

In [None]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for i in range(epoch_num):
        np.random.shuffle(tr_d)
    
        for j in range(0, len(tr_d), batch_size):
            image_data = np.array([x for x, _ in tr_d[j:j+batch_size]])
            label_data = np.array([y for _, y in tr_d[j:j+batch_size]],
                                  dtype = np.int32)
            sess.run(train, feed_dict={inp: image_data, outp: label_data})
        
        test_results = sess.run(image, feed_dict={inp:te_i})
        accuracy = sum(np.argmax(y_)==y for y_, y in zip(test_results,te_s[1]))/len(te_s[1])
        print("Epoch", i, ": ", accuracy)

Epoch 0 :  0.8492
Epoch 1 :  0.9616
Epoch 2 :  0.9698
Epoch 3 :  0.9795
Epoch 4 :  0.9823
Epoch 5 :  0.9824
Epoch 6 :  0.9845
Epoch 7 :  0.9854
Epoch 8 :  0.9866
Epoch 9 :  0.9845
Epoch 10 :  0.9847
Epoch 11 :  0.9851
Epoch 12 :  0.9847
Epoch 13 :  0.986
Epoch 14 :  0.9865
Epoch 15 :  0.9863
Epoch 16 :  0.9871
Epoch 17 :  0.985
Epoch 18 :  0.9865
Epoch 19 :  0.9875
Epoch 20 :  0.9851
Epoch 21 :  0.9871
Epoch 22 :  0.9866
Epoch 23 :  0.9871
Epoch 24 :  0.9867
Epoch 25 :  0.9869
Epoch 26 :  0.9867
Epoch 27 :  0.9865
Epoch 28 :  0.9863
Epoch 29 :  0.9855
Epoch 30 :  0.9859
Epoch 31 :  0.987
Epoch 32 :  0.9862
Epoch 33 :  0.9867
Epoch 34 :  0.9872
Epoch 35 :  0.9867
Epoch 36 :  0.9867
Epoch 37 :  0.9867
Epoch 38 :  0.9873
Epoch 39 :  0.9875
Epoch 40 :  0.987
Epoch 41 :  0.9866
Epoch 42 :  0.9869
Epoch 43 :  0.987
Epoch 44 :  0.9873
Epoch 45 :  0.9871
Epoch 46 :  0.9874
Epoch 47 :  0.9874
Epoch 48 :  0.9873
Epoch 49 :  0.9869
Epoch 50 :  0.9872
Epoch 51 :  0.9873
Epoch 52 :  0.9875
Epoch 53

array([7, 2, 1, ..., 4, 5, 6], dtype=int64)