In [0]:
import numpy as np


In [0]:
# sigmoid forward pass
def sigmoid(z):
  return ((1.0)/(1+np.exp(-1*z)))

# sigmoid backward pass
def sigmoid_back(dout, cache):
  a = cache
  dz = a*(1-a) * dout
  
  return dz

# weighted sum forward pass
def w_sum_forward(x, theta, theta0):
  print(x.shape)
  print(theta.shape)
  print(theta0.shape)
  cache = (x, theta, theta0)
  return (theta.dot(x) + theta0, cache)

# weighted sum backward pass
def w_sum_backward(dout, cache):
  x, theta, theta0 = cache
  dtheta = dout.dot(x.T)
  dtheta0 = 1 * np.sum(dout, axis=1)
  dx = theta.T.dot(dout)
  
  return dx, dtheta, dtheta0

# sigmoid cross entropy loss
def sigmoid_loss(h,y):
  loss = np.sum(-1*(y.T*np.log(1e-12+h) + (1-y.T)*np.log(1e-12+(1-h))))
  dout = -1*(y.T/1e-12+h)-(1-y.T)/(1e-12+(1-h))
  
  return loss, dout



In [0]:
import tensorflow as tf
import math
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data
import matplotlib.pyplot as plt

mnist = input_data.read_data_sets('MNIST_data/', one_hot=True)


x_train = mnist.train.images
y_train = mnist.train.labels
x_val = mnist.validation.images
y_val = mnist.validation.labels
x_test = mnist.test.images
y_test = mnist.test.labels

Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:
Please write your own downloading logic.
Instructions for updating:
Please use urllib or similar directly.
Successfully downloaded train-images-idx3-ubyte.gz 9912422 bytes.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting MNIST_data/train-images-idx3-ubyte.gz
Successfully downloaded train-labels-idx1-ubyte.gz 28881 bytes.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Instructions for updating:
Please use tf.one_hot on tensors.
Successfully downloaded t10k-images-idx3-ubyte.gz 1648877 bytes.
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Successfully downloaded t10k-labels-idx1-ubyte.gz 4542 bytes.
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py fr

In [0]:
x_val.shape

(5000, 784)

In [0]:
'''
Usage

'''

# placeholders for inputs and outputs
x = x_train
y = y_train

# 2 layers; 100 neurons in hidden layer and 10 neurons in output layer
layers = [100, 10]

# initialize layer weights and biases
# firs hidden layer
theta1 = np.random.randn(100, x_train.shape[1])
theta1_0 = np.zeros((100,1))

# output layer
theta2 = np.random.randn(y_train.shape[1],100)
theta2_0 = np.zeros((y_train.shape[1],1))


In [0]:

# Training: Perform forward prop and backprop
'''FORWARD PROP:'''
# first hidden layer
z1, cache1 = w_sum_forward(x.T, theta1, theta1_0)
a1 = sigmoid(z1)

# output layer
z2, cache2 = w_sum_forward(a1, theta2, theta2_0)
a2 = sigmoid(z2)

# loss
loss, dout = sigmoid_loss(a2, y)

'''BACKWARD PROP'''
# output layer

dz2 = sigmoid_back(dout, a2)
da1, dtheta2, dtheta2_0 = w_sum_backward(dz2, cache2)

dz1 = sigmoid_back(da1, a1)
dx1, dtheta1, dtheta1_0 = w_sum_backward(dz1, cache1)

# update the weights and bias
alpha = 0.0003 # learning rate

theta1 = theta1 - alpha * dtheta1
theta1_0 = theta1_0 - alpha * dtheta1_0.reshape(-1,1)
theta2 = theta2 - alpha * dtheta2
theta2_0 = theta2_0 - alpha * dtheta2_0.reshape(-1,1)

loss

(784, 55000)
(100, 784)
(100, 1)
(100, 55000)
(10, 100)
(10, 1)


1156393.5172071126

In [0]:
'''USING TENSORFLOW'''

X = tf.placeholder(tf.float32, [None, 784])
y = tf.placeholder(tf.float32, [None, 10])

theta1 = tf.Variable(tf.truncated_normal([784, 10], stddev=0.1), name = 'theta1')
theta1_0 = tf.Variable(tf.zeros([10]))

init = tf.global_variables_initializer()

z = tf.matmul(tf.reshape(X, [-1, 784]), theta1) + theta1_0

h = tf.nn.softmax(z)

cross_entropy = -tf.reduce_sum(y * tf.log(h))

is_correct = tf.equal(tf.argmax(y,1), tf.argmax(h,1))
accuracy = tf.reduce_mean(tf.cast(is_correct, tf.float32))

learning_rate = 0.003


optimizer = tf.train.GradientDescentOptimizer(learning_rate)
train_step = optimizer.minimize(cross_entropy)


sess = tf.Session()
sess.run(init)

for i in range(1000):
    # load batch of images and correct answers
    batch_X, batch_Y = mnist.train.next_batch(100)
    train_data={X: batch_X, y: batch_Y}

    # train
    sess.run(train_step, feed_dict=train_data)
    
    a_t,c_t = sess.run([accuracy, cross_entropy], feed_dict=train_data)
    
    test_data={X: mnist.test.images, y: mnist.test.labels}
    a,c = sess.run([accuracy, cross_entropy], feed_dict=test_data)
    
    print('train acc: '+str(a_t) + ' train loss: '+str(c_t) + "||" + 'test acc: '+str(a_t) + ' test loss: '+str(c_t))
    print('\n')
    

train acc: 0.25 train loss: 204.423||test acc: 0.25 test loss: 204.423


train acc: 0.34 train loss: 189.22244||test acc: 0.34 test loss: 189.22244


train acc: 0.53 train loss: 164.36801||test acc: 0.53 test loss: 164.36801


train acc: 0.53 train loss: 154.80878||test acc: 0.53 test loss: 154.80878


train acc: 0.64 train loss: 133.79706||test acc: 0.64 test loss: 133.79706


train acc: 0.64 train loss: 124.5228||test acc: 0.64 test loss: 124.5228


train acc: 0.72 train loss: 110.57581||test acc: 0.72 test loss: 110.57581


train acc: 0.76 train loss: 94.94479||test acc: 0.76 test loss: 94.94479


train acc: 0.69 train loss: 107.64734||test acc: 0.69 test loss: 107.64734


train acc: 0.73 train loss: 104.1424||test acc: 0.73 test loss: 104.1424


train acc: 0.79 train loss: 97.77794||test acc: 0.79 test loss: 97.77794


train acc: 0.77 train loss: 89.679085||test acc: 0.77 test loss: 89.679085


train acc: 0.78 train loss: 95.32274||test acc: 0.78 test loss: 95.32274


train acc: 0.