In [389]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from mnist import MNIST
import math
%matplotlib inline

In [140]:
mndata = MNIST('MNIST')
X_test, Y_test = mndata.load_testing()
X_train, Y_train = mndata.load_training()

X_test = np.array(list(map(lambda x: np.array(x), X_test))).T
Y_test = np.array(Y_test).T

X_train = np.array(list(map(lambda x: np.array(x), X_train))).T
Y_train = np.array(Y_train).T

In [141]:
IMAGE_WIDTH = 28
INPUTS_NUMBER = IMAGE_WIDTH*IMAGE_WIDTH
CLASSES_NUMBER = 10

## Normalize data

In [142]:
def normalize_data(X_train, X_test):
    total = np.concatenate((X_train, X_test), axis=1)
    avg = np.mean(total, axis=1, keepdims=True)
    norm = np.linalg.norm(total, axis=1, keepdims=True)
    norm = np.array([np.apply_along_axis(lambda x: x if x != 0 else 1, arr=norm, axis=1)]).T
    X_train_normalized = (X_train - avg)/norm
    X_test_normalized = (X_test - avg)/norm
    return X_train_normalized, X_test_normalized

X_train_normalized, X_test_normalized = normalize_data(X_train, X_test)

In [143]:
X_train = X_train_normalized
X_test = X_test_normalized

## One-hot labels encoding

In [144]:
def one_hot(labels):
    onehot = np.zeros((CLASSES_NUMBER, len(labels)))
    for i in range(len(labels)):
        onehot[int(labels[i]),i] = 1
    return onehot

Y_train_onehot = one_hot(Y_train)
Y_test_onehot = one_hot(Y_test)

print(Y_train_onehot.shape)
print(Y_test_onehot.shape)

(10, 60000)
(10, 10000)


In [145]:
Y_train = Y_train_onehot
Y_test = Y_test_onehot

In [157]:
print(X_train.shape)
print(Y_train.shape)
print(X_test.shape)
print(Y_test.shape)

(784, 60000)
(10, 60000)
(784, 10000)
(10, 10000)


## Plot Image

In [8]:
def plot_image(record):
    plt.imshow(record.reshape(IMAGE_WIDTH,-1))

# NN Implementation

## Initialization

In [383]:
def initialize_parameters(layers_dims):
    np.random.seed(3)
    parameters = []
    for i in range(1,len(layers_dims)):
        layer_params = {}
        layer_params['W'] = tf.Variable(tf.random_normal([layers_dims[i], layers_dims[i-1]]), name='W'+str(i), dtype=tf.float32)
        layer_params['b'] = tf.Variable(tf.zeros((layers_dims[i], 1), dtype=tf.float32), name='b'+str(i))
        parameters.append(layer_params)
    return parameters

In [384]:
initialize_parameters([4,5,6])

[{'W': <tf.Variable 'W1_46:0' shape=(5, 4) dtype=float32_ref>,
  'b': <tf.Variable 'b1_46:0' shape=(5, 1) dtype=float32_ref>},
 {'W': <tf.Variable 'W2_27:0' shape=(6, 5) dtype=float32_ref>,
  'b': <tf.Variable 'b2_27:0' shape=(6, 1) dtype=float32_ref>}]

## Forward Propagation

In [390]:
def linear_forward(A, W, b):
    Z = tf.matmul(W, A) + b
    return Z


def linear_activation_forward(A_prev, W, b):
    Z = linear_forward(A_prev, W, b)
    A = tf.nn.relu(Z)        
    return A


def model(dim, X):
    
    parameters = initialize_parameters(dim)
    
    L = len(parameters)
    A = X
    
    for i in range(L-1):
        W = parameters[i]['W']
        b = parameters[i]['b']
        A = linear_activation_forward(A, W, b)
    W = parameters[L-1]['W']
    b = parameters[L-1]['b']
    A = linear_forward(A, W, b)
    
    return A

In [77]:
X = tf.placeholder(tf.float64, [2, None], name='X')
mod = model([2,3,4], X)
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    print(sess.run(mod, feed_dict={X: [[1],[2]]}))

[[-2.20228713e-05]
 [-1.27018650e-04]
 [ 2.34575856e-04]
 [-1.07710890e-04]]


## Training

In [358]:
def batch(index, size, X, Y):
    begin = index*size
    end = index*size+size
    end = end if end < X.shape[1] else X.shape[1] - 1
    return X[:, begin: end], Y[:, begin: end]

In [408]:
def train(dim, X_train, Y_train, X_test, Y_test, epochs=20, batch_size=256, learning_rate=0.01):
    
    Y_ = tf.placeholder(tf.float32, [CLASSES_NUMBER, None], name='X')
    X_ = tf.placeholder(tf.float32, [INPUTS_NUMBER, None], name='Y')
    
    logits = model([INPUTS_NUMBER, 256, 256, CLASSES_NUMBER], X_)
    cost_function = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y_, dim=0))
    train_step = tf.train.AdamOptimizer(learning_rate).minimize(cost_function)
    num_batches = int(math.ceil(X_train.shape[1]/batch_size))
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for epoch in range(epochs):
            for i in range(num_batches):
                batch_x, batch_y = batch(i, batch_size, X_train, Y_train)
                _, cost = sess.run([train_step, cost_function], feed_dict={X_: batch_x, Y_: batch_y})
            print("cost: ", cost)
        pred = tf.nn.softmax(logits, dim=0)
        correct_prediction = tf.equal(tf.argmax(pred, 0), tf.argmax(Y_, 0))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        print("Train accuracy: ", accuracy.eval(feed_dict={X_: X_train, Y_: Y_train}))
        print("Test accuracy: ", accuracy.eval(feed_dict={X_: X_test, Y_: Y_test}))
        return logits

In [None]:
Y_ = tf.placeholder(tf.float32, [CLASSES_NUMBER, None])
X_ = tf.placeholder(tf.float32, [INPUTS_NUMBER, None])
trained = train([INPUTS_NUMBER, 256, 256, CLASSES_NUMBER], X_train, Y_train, X_test, Y_test)

cost:  1.5864121
cost:  0.2537342
cost:  0.05990955
cost:  0.13145597
cost:  0.09205328
cost:  0.016188107
cost:  0.013966052
cost:  0.03234447
cost:  0.12525138
cost:  0.008042746
cost:  0.0011373047
cost:  0.004378619
cost:  0.006593442
cost:  3.4753199


In [387]:
pred = tf.nn.softmax(logits, dim=0)
correct_prediction = tf.equal(tf.argmax(pred, 0), tf.argmax(Y_, 0))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [388]:
# print(logits.eval(feed_dict={X_: X_train, Y_: Y_train}).shape)
# print(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y_, dim=0).eval(feed_dict={X_: X_train, Y_: Y_train}).shape)
# lin
print(accuracy.eval(feed_dict={X_: X_train, Y_: Y_train}))
print(accuracy.eval(feed_dict={X_: X_test, Y_: Y_test}))

0.7226167
0.7278
