In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from mnist import MNIST
%matplotlib inline

  return f(*args, **kwds)


In [140]:
mndata = MNIST('MNIST')
X_test, Y_test = mndata.load_testing()
X_train, Y_train = mndata.load_training()

X_test = np.array(list(map(lambda x: np.array(x), X_test))).T
Y_test = np.array(Y_test).T

X_train = np.array(list(map(lambda x: np.array(x), X_train))).T
Y_train = np.array(Y_train).T

In [141]:
IMAGE_WIDTH = 28
INPUTS_NUMBER = IMAGE_WIDTH*IMAGE_WIDTH
CLASSES_NUMBER = 10

## Normalize data

In [142]:
def normalize_data(X_train, X_test):
    total = np.concatenate((X_train, X_test), axis=1)
    avg = np.mean(total, axis=1, keepdims=True)
    norm = np.linalg.norm(total, axis=1, keepdims=True)
    norm = np.array([np.apply_along_axis(lambda x: x if x != 0 else 1, arr=norm, axis=1)]).T
    X_train_normalized = (X_train - avg)/norm
    X_test_normalized = (X_test - avg)/norm
    return X_train_normalized, X_test_normalized

X_train_normalized, X_test_normalized = normalize_data(X_train, X_test)

In [143]:
X_train = X_train_normalized
X_test = X_test_normalized

## One-hot labels encoding

In [144]:
def one_hot(labels):
    onehot = np.zeros((CLASSES_NUMBER, len(labels)))
    for i in range(len(labels)):
        onehot[int(labels[i]),i] = 1
    return onehot

Y_train_onehot = one_hot(Y_train)
Y_test_onehot = one_hot(Y_test)

print(Y_train_onehot.shape)
print(Y_test_onehot.shape)

(10, 60000)
(10, 10000)


In [145]:
Y_train = Y_train_onehot
Y_test = Y_test_onehot

In [157]:
print(X_train.shape)
print(Y_train.shape)
print(X_test.shape)
print(Y_test.shape)

(784, 60000)
(10, 60000)
(784, 10000)
(10, 10000)


## Plot Image

In [8]:
def plot_image(record):
    plt.imshow(record.reshape(IMAGE_WIDTH,-1))

# NN Implementation

## Activation Functions

In [132]:
def sigmoid(z):
    res = 1./(1.+np.exp(-z))
    return (res, z)

def relu(z):
    return (np.maximum(0, z), z)
    
    
def sigmoid_backward(dA, cache):
    Z = cache
    A = sigmoid(Z)[0]
    dx = dA*(A*(1-A))
    return dx
    

def relu_backward(dA, cache):
    z = cache
    dz = np.array(dA, copy=True)
    dz[z <= 0] = 0
    return dz

## Initialization

In [383]:
def initialize_parameters(layers_dims):
    np.random.seed(3)
    parameters = []
    for i in range(1,len(layers_dims)):
        layer_params = {}
        layer_params['W'] = tf.Variable(tf.random_normal([layers_dims[i], layers_dims[i-1]]), name='W'+str(i), dtype=tf.float32)
        layer_params['b'] = tf.Variable(tf.zeros((layers_dims[i], 1), dtype=tf.float32), name='b'+str(i))
        parameters.append(layer_params)
    return parameters

In [384]:
initialize_parameters([4,5,6])

[{'W': <tf.Variable 'W1_46:0' shape=(5, 4) dtype=float32_ref>,
  'b': <tf.Variable 'b1_46:0' shape=(5, 1) dtype=float32_ref>},
 {'W': <tf.Variable 'W2_27:0' shape=(6, 5) dtype=float32_ref>,
  'b': <tf.Variable 'b2_27:0' shape=(6, 1) dtype=float32_ref>}]

## Forward Propagation

In [357]:
def linear_forward(A, W, b):
    Z = tf.matmul(W, A) + b
    return Z


def linear_activation_forward(A_prev, W, b):
    Z = linear_forward(A_prev, W, b)
    A = tf.nn.relu(Z)        
    return A


def model(dim, X):
    
    parameters = initialize_parameters(dim)
    
    L = len(parameters)
    A = X
    
    for i in range(L-1):
        W = parameters[i]['W']
        b = parameters[i]['b']
        A = linear_activation_forward(A, W, b)
    W = parameters[L-1]['W']
    b = parameters[L-1]['b']
    A = linear_forward(A, W, b)
    
    return A

In [77]:
X = tf.placeholder(tf.float64, [2, None], name='X')
mod = model([2,3,4], X)
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    print(sess.run(mod, feed_dict={X: [[1],[2]]}))

[[-2.20228713e-05]
 [-1.27018650e-04]
 [ 2.34575856e-04]
 [-1.07710890e-04]]


## Training

In [358]:
def batch(index, size, X, Y):
    begin = index*size
    end = index*size+size
    end = end if end < X.shape[1] else X.shape[1] - 1
    return X[:, begin: end], Y[:, begin: end]

In [385]:
Y_ = tf.placeholder(tf.float32, [CLASSES_NUMBER, None])
X_ = tf.placeholder(tf.float32, [INPUTS_NUMBER, None])
logits = model([INPUTS_NUMBER, 256, 256, CLASSES_NUMBER], X_)
cost_function = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y_, dim=0))#tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y_, dim=0))#-tf.reduce_sum(Y_*tf.log(recognizer))
train_step = tf.train.GradientDescentOptimizer(0.01).minimize(cost_function)

In [386]:
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()
batch_idx = 0
for i in range(2000):
    batch_idx = batch_idx if batch_idx*512 < X_train.shape[1] else 0 
    batch_x, batch_y = batch(batch_idx, 512, X_train, Y_train)
    batch_idx += 1
    _, cost = sess.run([train_step, cost_function], feed_dict={X_: batch_x, Y_: batch_y})
    if i%100 == 0:
        print("cost: ", cost)

cost:  16.924866
cost:  2.9364462
cost:  3.080306
cost:  1.8927426
cost:  1.9163662
cost:  4.050782
cost:  1.5441906
cost:  2.5731888
cost:  1.7643876
cost:  2.2412143
cost:  1.5724758
cost:  1.5723088
cost:  1.1148348
cost:  1.9962246
cost:  1.0339694
cost:  1.1783919
cost:  0.88981354
cost:  1.0579739
cost:  1.7679377
cost:  0.95110357


In [387]:
pred = tf.nn.softmax(logits, dim=0)
correct_prediction = tf.equal(tf.argmax(pred, 0), tf.argmax(Y_, 0))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [388]:
# print(logits.eval(feed_dict={X_: X_train, Y_: Y_train}).shape)
# print(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y_, dim=0).eval(feed_dict={X_: X_train, Y_: Y_train}).shape)
# lin
print(accuracy.eval(feed_dict={X_: X_train, Y_: Y_train}))
print(accuracy.eval(feed_dict={X_: X_test, Y_: Y_test}))

0.7226167
0.7278


In [347]:

# Import MNIST data
# from tensorflow.examples.tutorials.mnist import input_data
# mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)


# Parameters
learning_rate = 0.001
training_epochs = 15
batch_size = 100
display_step = 1

# Network Parameters
n_hidden_1 = 256 # 1st layer number of neurons
n_hidden_2 = 256 # 2nd layer number of neurons
n_input = 784 # MNIST data input (img shape: 28*28)
n_classes = 10 # MNIST total classes (0-9 digits)

# tf Graph input
X = tf.placeholder("float", [n_input, None])
Y = tf.placeholder("float", [n_classes, None])

# Store layers weight & bias
weights = {
    'h1': tf.Variable(tf.random_normal([n_hidden_1, n_input])),
    'h2': tf.Variable(tf.random_normal([n_hidden_2, n_hidden_1])),
    'out': tf.Variable(tf.random_normal([n_classes, n_hidden_2]))
}
biases = {
    'b1': tf.Variable(tf.random_normal([n_hidden_1, 1])),
    'b2': tf.Variable(tf.random_normal([n_hidden_2, 1])),
    'out': tf.Variable(tf.random_normal([n_classes, 1]))
}


# Create model
def multilayer_perceptron(x):
    # Hidden fully connected layer with 256 neurons
    layer_1 = tf.add(tf.matmul(weights['h1'],x), biases['b1'])
    # Hidden fully connected layer with 256 neurons
    layer_2 = tf.add(tf.matmul(weights['h2'], layer_1), biases['b2'])
    # Output fully connected layer with a neuron for each class
    out_layer = tf.matmul(weights['out'], layer_2) + biases['out']
    return out_layer

# Construct model
logits = multilayer_perceptron(X)

# Define loss and optimizer
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
    logits=logits, labels=Y, dim=0))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op)
# Initializing the variables
init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)

    # Training cycle
    for epoch in range(training_epochs):
        avg_cost = 0.
        total_batch = int(X_train.shape[1]/batch_size)
        # Loop over all batches
        for i in range(total_batch):
            batch_x, batch_y = batch(i, batch_size, X_train, Y_train)
            
            # Run optimization op (backprop) and cost op (to get loss value)
            _, c = sess.run([train_op, loss_op], feed_dict={X: batch_x,
                                                            Y: batch_y})
            # Compute average loss
            avg_cost += c / total_batch
        # Display logs per epoch step
        if epoch % display_step == 0:
            print("Epoch:", '%04d' % (epoch+1), "cost={:.9f}".format(avg_cost))
    print("Optimization Finished!")

    # Test model
    pred = tf.nn.softmax(logits, dim=0)  # Apply softmax to logits
    correct_prediction = tf.equal(tf.argmax(tf.transpose(pred), 1), tf.argmax(tf.transpose(Y), 1))
    # Calculate accuracy
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
    print("Train Accuracy:", accuracy.eval({X: X_train, Y: Y_train}))
    print("Test Accuracy:", accuracy.eval({X: X_test, Y: Y_test}))

Epoch: 0001 cost=39.985888077
Epoch: 0002 cost=24.718713039
Epoch: 0003 cost=17.404589753
Epoch: 0004 cost=13.036918770
Epoch: 0005 cost=10.550756191
Epoch: 0006 cost=8.954216694
Epoch: 0007 cost=7.898723516
Epoch: 0008 cost=7.098989445
Epoch: 0009 cost=6.457547848
Epoch: 0010 cost=5.942457181
Epoch: 0011 cost=5.531623243
Epoch: 0012 cost=5.206263472
Epoch: 0013 cost=4.936797193
Epoch: 0014 cost=4.699576872
Epoch: 0015 cost=4.496543126
Optimization Finished!
Train Accuracy: 0.73143333
Test Accuracy: 0.741


In [342]:
X = tf.placeholder("float", [None, n_input])
Y = tf.placeholder("float", [None, n_classes])

# Store layers weight & bias
weights = {
    'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
    'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
    'out': tf.Variable(tf.random_normal([n_hidden_2, n_classes]))
}
biases = {
    'b1': tf.Variable(tf.random_normal([n_hidden_1])),
    'b2': tf.Variable(tf.random_normal([n_hidden_2])),
    'out': tf.Variable(tf.random_normal([n_classes]))
}


# Create model
def multilayer_perceptron(x):
    # Hidden fully connected layer with 256 neurons
    layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
    # Hidden fully connected layer with 256 neurons
    layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
    # Output fully connected layer with a neuron for each class
    out_layer = tf.matmul(layer_2, weights['out']) + biases['out']
    return out_layer

# Construct model
logits = multilayer_perceptron(X)

# Define loss and optimizer
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
    logits=logits, labels=Y))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op)
# Initializing the variables
init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)

    # Training cycle
    for epoch in range(training_epochs):
        avg_cost = 0.
        total_batch = int(mnist.train.num_examples/batch_size)
        # Loop over all batches
        for i in range(total_batch):
            batch_x, batch_y = mnist.train.next_batch(batch_size)
            # Run optimization op (backprop) and cost op (to get loss value)
            _, c = sess.run([train_op, loss_op], feed_dict={X: batch_x,
                                                            Y: batch_y})
            # Compute average loss
            avg_cost += c / total_batch
        # Display logs per epoch step
        if epoch % display_step == 0:
            print("Epoch:", '%04d' % (epoch+1), "cost={:.9f}".format(avg_cost))
    print("Optimization Finished!")

    # Test model
    pred = tf.nn.softmax(logits)  # Apply softmax to logits
    correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(Y, 1))
    # Calculate accuracy
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
    print("Test Accuracy:", accuracy.eval({X: mnist.test.images, Y: mnist.test.labels}))
    print("Train Accuracy:", accuracy.eval({X: mnist.train.images, Y: mnist.train.labels}))

Epoch: 0001 cost=251.133857581
Epoch: 0002 cost=100.544263288
Epoch: 0003 cost=74.374830352
Epoch: 0004 cost=63.270150401
Epoch: 0005 cost=53.643589960
Epoch: 0006 cost=46.250007435
Epoch: 0007 cost=41.650627502
Epoch: 0008 cost=38.120960002
Epoch: 0009 cost=34.394468255
Epoch: 0010 cost=31.718446169
Epoch: 0011 cost=28.957364607
Epoch: 0012 cost=27.204996125
Epoch: 0013 cost=26.170167691
Epoch: 0014 cost=24.515232554
Epoch: 0015 cost=23.293204926
Optimization Finished!
Test Accuracy: 0.8745
Train Accuracy: 0.88567275
