In [None]:
!pip install tensorflow==1.15 scikit-learn

In [None]:
import tensorflow as tf
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data

In [None]:
def mnist_features_labels(n_labels):
  mnist_features, mnist_labels = [], []
  mnist = input_data.read_data_sets('/datasets/ud730/mnist',one_hot=True)
  for mnist_feature, mnist_label in zip(*mnist.train.next_batch(10000)):
    # Add features and labels if it's for the first <n>th labels
        if mnist_label[:n_labels].any():
          mnist_features.append(mnist_feature)
          mnist_labels.append(mnist_label)
  return mnist_features,mnist_labels

In [None]:
def get_weights(n_features,n_labels):
  x = tf.Variable(tf.truncated_normal((n_features,n_labels)))
  return x

In [None]:
def get_biases(n_labels):
  x = tf.Variable(tf.zeros(n_labels))
  return x

In [None]:
def linear(input,w,b):
  xW = tf.matmul(input,w)
  xW_plus_b = tf.add(xW,b)
  return xW_plus_b

In [None]:
n_features = 28*28
n_labels = 10

features = tf.placeholder(tf.float32)
labels = tf.placeholder(tf.float32)

w = get_weights(n_features,n_labels)
b = get_biases(n_labels)

logits = linear(features,w,b)

train_features, train_labels = mnist_features_labels(n_labels)

with tf.Session() as session:
  session.run(tf.global_variables_initializer())
  # Softmax
  prediction = tf.nn.softmax(logits)

  # Cross entropy
  # This quantifies how far off the predictions were.
  cross_entropy = -tf.reduce_sum(labels * tf.log(prediction), reduction_indices=1)

  # Training loss
  loss = tf.reduce_mean(cross_entropy)

  # Rate at which the weights are changed
  learning_rate = 0.08

  # Gradient Descent
  # This is the method used to train the model
  optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)

  # Run optimizer and get loss
  _, l = session.run([optimizer, loss],feed_dict={features: train_features, labels: train_labels})

print('loss:',l)

In [None]:
from sys import getsizeof
n_input = 784
n_labels = 10
mnist = input_data.read_data_sets('datasets/ud730/mnist',one_hot=True)
train_features = mnist.train.images
val_features = mnist.validation.images
train_labels = mnist.train.labels.astype(np.float32)
val_labels = mnist.validation.labels.astype(np.float32)
test_features = mnist.test.images
test_labels = mnist.test.labels.astype(np.float32)

weights = tf.Variable(tf.random_normal([n_input,n_labels]))
bias = tf.Variable(tf.random_normal([n_labels]))

Extracting datasets/ud730/mnist/train-images-idx3-ubyte.gz
Extracting datasets/ud730/mnist/train-labels-idx1-ubyte.gz
Extracting datasets/ud730/mnist/t10k-images-idx3-ubyte.gz
Extracting datasets/ud730/mnist/t10k-labels-idx1-ubyte.gz


In [None]:
print('shape train features:',train_features.shape)
print('bytes features',train_features.nbytes)

shape train features: (55000, 784)
bytes features 172480000


In [None]:
print('shape train labels:',train_labels.shape)
print('bytes labels',train_labels.nbytes)

shape train labels: (55000, 10)
bytes labels 2200000


In [None]:
import sys
m = tf.global_variables_initializer()
with tf.Session() as s:
  s.run(m)
  print('bytes weights',sys.getsizeof(s.run(weights))-112) # sys is adding 112 bytes overhead
bias_bytes = n_labels*4 # 4 bytes for each float32 object

bytes weights 31360


In order to use mini-batching, you must first divide your data into batches.

Unfortunately, it's sometimes impossible to divide the data into batches of exactly equal size. For example, imagine you'd like to create batches of 128 samples each from a dataset of 1000 samples. Since 128 does not evenly divide into 1000, you'd wind up with 7 batches of 128 samples, and 1 batch of 104 samples. (7*128 + 1*104 = 1000)

In that case, the size of the batches would vary, so you need to take advantage of TensorFlow's tf.placeholder() function to receive the varying batch sizes.

Continuing the example, if each sample had n_input = 784 features and n_classes = 10 possible labels, the dimensions for features would be [None, n_input] and labels would be [None, n_classes].

What does None do here?
The None dimension is a placeholder for the batch size. At runtime, TensorFlow will accept any batch size greater than 0.

Going back to our earlier example, this setup allows you to feed features and labels into the model as either the batches of 128 samples or the single batch of 104 samples.

In [None]:
features = tf.placeholder(tf.float32,[None,n_input])
labels = tf.placeholder(tf.float32,[None,n_labels])

In [None]:
from math import ceil
batch_size = 128
total_batches = train_features.shape[0]/batch_size
last_batch_size = train_features.shape[0]%batch_size
print(ceil(total_batches),last_batch_size)


430 88


In [None]:
def batches(batch_size, features, labels):
  start, end = 0, batch_size
  lst_batches = []
  for batch_no in range(ceil(total_batches)):
    batch_f = features[start:end]
    batch_l = labels[start:end]
    lst_batches.append([batch_f,batch_l])
    start = end
    end += batch_size
  # print('Total data points:',len(features))
  # print('Start:',start,'End:',end)
  return lst_batches

In [None]:
learning_rate = 0.3
epochs = 300
result = batches(batch_size,train_features,train_labels)

# Logits => xW + b
logits = tf.add(tf.matmul(features, weights), bias)

# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(cost)

# Calculate accuracy
correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.



In [None]:
def print_epoch_stats(epoch_i, sess, last_features, last_labels):
    """
    Print cost and validation accuracy of an epoch
    """
    current_cost = sess.run(cost,
        feed_dict={features: last_features, labels: last_labels})
    valid_accuracy = sess.run(accuracy,
        feed_dict={features: val_features, labels: val_labels})
    print('Epoch: {:<4} - Cost: {:<8.3} Valid Accuracy: {:<5.3}'.format(
            epoch_i,
            current_cost,
            valid_accuracy))

In [None]:
init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    for epoch_i in range(epochs):
      lst_batches = batches(batch_size,train_features,train_labels)
      # TODO: Train optimizer on all batches
      for batch_features, batch_labels in lst_batches:
          sess.run(optimizer, feed_dict={features: batch_features, labels: batch_labels})
      print_epoch_stats(epoch_i,sess,batch_features,batch_labels)
    # Calculate accuracy for test dataset
    test_accuracy = sess.run(
        accuracy,
        feed_dict={features: test_features, labels: test_labels})

print('Test Accuracy: {}'.format(test_accuracy)) # 0.92

# Deep Neural Network in TensorFlow
The focus here is on the architecture of multilayer neural networks, not parameter tuning, so here we'll just give you the learning parameters.
The variable n_hidden_layer determines the size of the hidden layer in the neural network. This is also known as the width of a layer.
Deep neural networks use multiple layers with each layer requiring it's own weight and bias. The 'hidden_layer' weight and bias is for the hidden layer. The 'out' weight and bias is for the output layer. If the neural network were deeper, there would be weights and biases for each additional layer.

**Input**
The MNIST data is made up of 28px by 28px images with a single channel. The tf.reshape() function above reshapes the 28px by 28px matrices in x into row vectors of 784px.

In [None]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets(".", one_hot=True, reshape=False)

# Dropout
Dropout is a regularization technique for reducing overfitting. The technique temporarily drops units (artificial neurons) from the network, along with all of those units' incoming and outgoing connections. Figure 1 illustrates how dropout works.

You should only drop units while training the model. During validation or testing, you should keep all of the units to maximize accuracy.

In [None]:
# Parameters
learning_rate = 0.001
training_epochs = 30
hidden_layer_size = 256
batch_size = 128
display_step = 2
n_input = 784  # MNIST data input (img shape: 28*28)
n_labels = 10  # MNIST total classes (0-9 digits)

In [None]:
weights = {
    'hidden_layer':tf.Variable(tf.random_normal([n_input,hidden_layer_size]),name='weights_hidden'),
    'out':tf.Variable(tf.random_normal([hidden_layer_size,n_labels]),name='weights_out')
}
biases = {
    'hidden_layer':tf.Variable(tf.random_normal([hidden_layer_size]),name='bias_hidden'),
    'out':tf.Variable(tf.random_normal([n_labels]),name='bias_out')
}

In [None]:
# tf Graph input
x = tf.placeholder(tf.float32, [None, 28, 28, 1],name='features')
y = tf.placeholder(tf.float32, [None, n_labels],name='labels')

x_flat = tf.reshape(x, [-1, n_input])
keep_prob = tf.placeholder(tf.float32,name='keep_prob')

In [None]:
# Hidden layer with RELU activation
hidden_layer = tf.add(tf.matmul(x_flat,weights['hidden_layer']),biases['hidden_layer'])
hidden_layer = tf.nn.relu(hidden_layer)
hidden_layer = tf.nn.dropout(hidden_layer,keep_prob)
# Output layer with linear activation
logits = tf.add(tf.matmul(hidden_layer,weights['out']),biases['out'])

Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [None]:
# Optimiser
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits,labels=y))
optimiser = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
# Calculate accuracy
correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32))

Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.



# Saving Variables
Training a model can take hours. But once you close your TensorFlow session, you lose all the trained weights and biases. If you were to reuse the model in the future, you would have to train it all over again!
If you're using TensorFlow 0.11.0RC1 or newer, a file called "model.ckpt.meta" will also be created. This file contains the TensorFlow graph.

In [None]:
save_file = './model.ckpt'
init = tf.global_variables_initializer()
saver = tf.train.Saver()
with tf.Session() as s:
  s.run(init)
  for epoch_i in range(training_epochs):
    total_batches = int(mnist.train.num_examples/batch_size)
    for i in range(total_batches):
      batch_x, batch_y = mnist.train.next_batch(batch_size) # automatically creating batches
      s.run(optimiser,feed_dict={x:batch_x,y:batch_y,keep_prob:0.5})
    if epoch_i%6==0:
      validation_accuracy = s.run(accuracy,feed_dict={x:mnist.validation.images,y:mnist.validation.labels,keep_prob:1})
      print('Epoch {:<3} - Validation Accuracy: {}'.format(
                epoch_i,validation_accuracy))
  test_accuracy = s.run(accuracy,feed_dict={x:mnist.test.images,y:mnist.test.labels,keep_prob:1})
  print('Test accuracy',test_accuracy)
  saver.save(s,save_file)

Epoch 0   - Validation Accuracy: 0.42480000853538513
Epoch 6   - Validation Accuracy: 0.7757999897003174
Epoch 12  - Validation Accuracy: 0.8284000158309937
Epoch 18  - Validation Accuracy: 0.8492000102996826
Epoch 24  - Validation Accuracy: 0.8593999743461609
Test accuracy 0.866


# Loading Variables
Now that the Tensor Variables are saved, let's load them back into a new model.

You'll notice you still need to create the weights and bias Tensors in Python. The tf.train.Saver.restore() function loads the saved data into weights and bias.

Since tf.train.Saver.restore() sets all the TensorFlow Variables, you don't need to call tf.global_variables_initializer().

In [None]:
tf.reset_default_graph()
# Add all the parameters to be used again
x = tf.placeholder(tf.float32,[None,n_input])
x_flat = tf.reshape(x, [-1, n_input])
y = tf.placeholder(tf.float32,[None,n_labels])
keep_prob = tf.placeholder(tf.float32,name='keep_prob')
weights = {
    'hidden_layer':tf.Variable(tf.random_normal([n_input,hidden_layer_size]),name='weights_hidden'),
    'out':tf.Variable(tf.random_normal([hidden_layer_size,n_labels]),name='weights_out')
}
biases = {
    'hidden_layer':tf.Variable(tf.random_normal([hidden_layer_size]),name='bias_hidden'),
    'out':tf.Variable(tf.random_normal([n_labels]),name='bias_out')
}
# Hidden layer with RELU activation
hidden_layer = tf.add(tf.matmul(x_flat,weights['hidden_layer']),biases['hidden_layer'])
hidden_layer = tf.nn.relu(hidden_layer)
hidden_layer = tf.nn.dropout(hidden_layer,keep_prob)
# Output layer with linear activation
logits = tf.add(tf.matmul(hidden_layer,weights['out']),biases['out'])
correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32))


INFO:tensorflow:Restoring parameters from ./model.ckpt
Weights:	hidden_layer: (784, 256) output_layer: (256, 10)
Biases	hidden_layer: (256,) output_layer: (10,)
0.866


In [None]:
# Restore all the values in the params just created & run to find test accuracy
saver = tf.train.Saver()
with tf.Session() as s:
  saver.restore(s,save_file)
  weights = s.run(weights)
  biases = s.run(biases)
  print('Weights:\thidden_layer:',weights['hidden_layer'].shape,'output_layer:',weights['out'].shape)
  print('Biases\thidden_layer:',biases['hidden_layer'].shape,'output_layer:',biases['out'].shape)
  
  test_accuracy = s.run(accuracy,feed_dict={
      x:mnist.test.images.reshape(-1, n_input),
      y:mnist.test.labels,
      keep_prob:1.0
      })
  print(test_accuracy)