In [None]:
from numpy.random import seed
seed(42)
from tensorflow import set_random_seed
set_random_seed(42)

In [None]:
import os
import numpy as np
import tensorflow as tf

from time import strftime
from PIL import Image

In [None]:
NR_CLASSES = 10
VALIDATION_SIZE = 10000
IMAGE_WIDTH = 28
IMAGE_HEIGHT = 28
CHANNELS = 1
TOTAL_INPUTS = IMAGE_WIDTH*IMAGE_HEIGHT*CHANNELS

### 1) Download mnist dataset and create train and test sets.

In [None]:
from keras.datasets import mnist
import numpy as np
import matplotlib.pyplot as plt

(X_train, Y_train), (X_test, Y_test) = mnist.load_data()

Using TensorFlow backend.


Downloading data from https://s3.amazonaws.com/img-datasets/mnist.npz


### 2) Reshape and rescale data: make it between 0 and 1 - neural networs usually work better with this type of data, do one hot encoding

In [None]:
print(X_train.shape)
nsamples, nx, ny = X_train.shape
nsamples2, nx2, ny2 = X_test.shape
x_train_all = X_train.reshape((nsamples, TOTAL_INPUTS))
x_test = X_test.reshape((nsamples2, TOTAL_INPUTS))

# Re-scale
x_train_all, x_test = x_train_all / 255.0, x_test / 255.0
y_train_all = np.eye(NR_CLASSES)[Y_train]
y_test = np.eye(NR_CLASSES)[Y_test]

print(y_train_all.shape)
print(y_test.shape)

(60000, 28, 28)
(60000, 10)
(10000, 10)


### 3) Split the training dataset into a smaller training dataset and a validation dataset for the features and the labels. Create four arrays: x_val, y_val, x_train, and y_train from x_train_all and y_train_all. Use the validation size of 10,000.

In [None]:
x_val = x_train_all[:VALIDATION_SIZE]
y_val = y_train_all[:VALIDATION_SIZE]

x_train = x_train_all[VALIDATION_SIZE:]
y_train = y_train_all[VALIDATION_SIZE:]

print(x_train.shape)
print(x_val.shape)

(50000, 784)
(10000, 784)


### 4) Setup Tensorflow Graph

In [None]:
X = tf.placeholder(tf.float32, shape=[None, TOTAL_INPUTS], name='X')
Y = tf.placeholder(tf.float32, shape=[None, NR_CLASSES], name='labels')
print(X.shape)

(?, 784)


### 5) Create variables for number of epochs, learning rate and two hidden layers: 512 and 64 neurons

In [None]:
#nr_epochs = 50
learning_rate = 1e-3

n_hidden1 = 512
n_hidden2 = 64

# Setup of tensorboard on google colab
Tensorboard is the very good way to visualise your data. 

In [None]:
!wget https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip
!unzip ngrok-stable-linux-amd64.zip

LOG_DIR = './log'
get_ipython().system_raw(
    'tensorboard --logdir {} --host 0.0.0.0 --port 6006 &'
    .format(LOG_DIR)
)

get_ipython().system_raw('./ngrok http 6006 &')

! curl -s http://localhost:4040/api/tunnels | python3 -c \
    "import sys, json; print(json.load(sys.stdin)['tunnels'][0]['public_url'])"

--2019-11-05 08:36:44--  https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip
Resolving bin.equinox.io (bin.equinox.io)... 52.20.12.96, 34.197.46.159, 50.17.165.171, ...
Connecting to bin.equinox.io (bin.equinox.io)|52.20.12.96|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 13773305 (13M) [application/octet-stream]
Saving to: ‘ngrok-stable-linux-amd64.zip’


2019-11-05 08:36:45 (17.1 MB/s) - ‘ngrok-stable-linux-amd64.zip’ saved [13773305/13773305]

Archive:  ngrok-stable-linux-amd64.zip
  inflating: ngrok                   
http://b536d46e.ngrok.io


### 6) Create function to proceed one layer in neural network:
You have input, dimension of weight: weight_dim, dimension of bias: bias_dim and name of your layer, return the output layer.
Use truncated normal distribution to generate initial weights and zero-constants for biases

In [None]:
def setup_layer(input, weight_dim, bias_dim, name):
    
    with tf.name_scope(name):
        initial_w = tf.truncated_normal(shape=weight_dim, stddev=0.1, seed=42)
        w = tf.Variable(initial_value=initial_w, name='W')

        initial_b = tf.constant(value=0.0, shape=bias_dim)
        b = tf.Variable(initial_value=initial_b, name='B')

        layer_in = tf.matmul(input, w) + b
        
        if name=='out':
            layer_out = tf.nn.softmax(layer_in)
        else:
            layer_out = tf.nn.relu(layer_in)
        
        tf.summary.histogram('weights', w)
        tf.summary.histogram('biases', b)
        
        return layer_out

### 7) Create neural network with 2 hidden layers, using this function from previous item. Add also one dropout layer to avoid overfitting

In [None]:
layer_1 = setup_layer(X, weight_dim=[TOTAL_INPUTS, n_hidden1], 
                      bias_dim=[n_hidden1], name='layer_1')

layer_drop = tf.nn.dropout(layer_1, rate=0.2, name='dropout_layer')

layer_2 = setup_layer(layer_drop, weight_dim=[n_hidden1, n_hidden2], 
                      bias_dim=[n_hidden2], name='layer_2')

output = setup_layer(layer_2, weight_dim=[n_hidden2, NR_CLASSES], 
                      bias_dim=[NR_CLASSES], name='out')

model_name = f'{n_hidden1}-DO-{n_hidden2} LR{learning_rate}'

### 8) For better visualization in Tensorboard we want to use tf.name_scope() to aggregate loss, optimizer, accuracy metrica and performance.

In [None]:
# Defining Loss Function
with tf.name_scope('loss_calc'):
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=Y, logits=output))
# Defining Optimizer
with tf.name_scope('optimizer'):
    optimizer = tf.train.AdamOptimizer(learning_rate)
    train_step = optimizer.minimize(loss)
# Accuracy Metric
with tf.name_scope('accuracy_calc'):
    correct_pred = tf.equal(tf.argmax(output, axis=1), tf.argmax(Y, axis=1))
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

with tf.name_scope('performance'):
    tf.summary.scalar('accuracy', accuracy)
    tf.summary.scalar('cost', loss)

#Check Input Images in Tensorboard

with tf.name_scope('show_image'):
    x_image = tf.reshape(X, [-1, 28, 28, 1])
    tf.summary.image('image_input', x_image, max_outputs=4)

### 9) Create session using tf.Session(), merge summaries using tf.summary.merge_all(). Use tf.summary.FileWriter() to write you summaries.

In [None]:
#Run Session

sess = tf.Session()

merged_summary = tf.summary.merge_all()

train_writer = tf.summary.FileWriter('./log/train')
train_writer.add_graph(sess.graph)

validation_writer = train_writer = tf.summary.FileWriter('./log/validation')

### 10) Initialise all the variables, and run the session, look at the TensorBoard

In [None]:
init = tf.global_variables_initializer()
sess.run(init)

### 11) If you data is quite big, it is usefull to have so-called batches, smaller pieces of data. We have 50000 data points, we want to have batches with 1000 points. Create next_batch function, which gives you the part of the data

In [None]:
size_of_batch = 1000
num_examples = y_train.shape[0]
nr_iterations = int(num_examples/size_of_batch)

index_in_epoch = 0
print("num_examples =", num_examples)
print("nr_iterations =", nr_iterations)

num_examples = 50000
nr_iterations = 50


In [None]:
def next_batch(batch_size, data, labels):
    
    global num_examples
    global index_in_epoch
    
    start = index_in_epoch
    index_in_epoch += batch_size
    
    if index_in_epoch > num_examples:
        start = 0
        index_in_epoch = batch_size
    
    end = index_in_epoch
    
    return data[start:end], labels[start:end]

In [None]:
batch_x, batch_y = next_batch(size_of_batch, x_train, y_train)
print(batch_x.shape)
print(batch_y.shape)

(1000, 784)
(1000, 10)


### 12) Run the algorighm: do several so-called epochs - the runs through all the data. In each epoch use 50 batches with 1000 data points. Write information to TensorBoard to investigate later.

In [None]:
nr_epochs = 10

for epoch in range(nr_epochs):
    
    # ============= Training Dataset =========
    for i in range(nr_iterations):
        
        batch_x, batch_y = next_batch(batch_size=size_of_batch, data=x_train, labels=y_train)
        feed_dictionary = {X:batch_x, Y:batch_y}
        sess.run(train_step, feed_dict=feed_dictionary)
        
    s, batch_accuracy = sess.run(fetches=[merged_summary, accuracy], feed_dict=feed_dictionary)  
    train_writer.add_summary(s, epoch)
    print(f'Epoch {epoch} \t| Training Accuracy = {batch_accuracy}')
    
    # ================== Validation ======================
    
    summary = sess.run(fetches=merged_summary, feed_dict={X:x_val, Y:y_val})
    validation_writer.add_summary(summary, epoch)

print('Done training!')

Epoch 0 	| Training Accuracy = 0.9089999794960022
Epoch 1 	| Training Accuracy = 0.9399999976158142
Epoch 2 	| Training Accuracy = 0.9559999704360962
Epoch 3 	| Training Accuracy = 0.9580000042915344
Epoch 4 	| Training Accuracy = 0.9620000123977661
Epoch 5 	| Training Accuracy = 0.9670000076293945
Epoch 6 	| Training Accuracy = 0.9639999866485596
Epoch 7 	| Training Accuracy = 0.9710000157356262
Epoch 8 	| Training Accuracy = 0.9729999899864197
Epoch 9 	| Training Accuracy = 0.9760000109672546
Done training!


### 13) Calculate the accuracy over the test dataset (x_test and y_test). Use your knowledge of running a session to get the accuracy. Display the accuracy as a percentage rounded to two decimal numbers.

In [None]:
test_accuracy = sess.run(fetches=accuracy, feed_dict={X:x_test, Y:y_test})
print(f'Accuracy on test set is {test_accuracy:0.2%}')

Accuracy on test set is 96.64%


### 14) Reset for the Next Run

In [None]:
# Reset for the Next Run

train_writer.close()
validation_writer.close()
sess.close()
tf.reset_default_graph()

# Keras Implementation

Let's now dive into the implementation of our first neural network.
Our network is a simple neural network, **without convolution operations**.

We make use of the **sequential paradigm** of Tensorflow, made to build models by plugging together building blocks. This interface allows for easier code writing, while Tensorflow also offers alternative ways to write more complex deep learning algorithms through the use of its **define-by-run interface**.

The network's structure is be the following :
 - A **flatten** layer, used to vectorize the whole input batch of data
 - A **dense** layer, transforming the 28x28=784 input data to a 512 vector, using a rectified linear unit activation function
 - A **dropout** layer, ensuring the network does not overfit the training data by giving each of its neuron a 20% chance not to be activated at each stage
 - A **dense** layer, outputing a 10 vector using a softmax function

The optimizer we use at first is named **Adam**, because it requires very little parameter tuning.

We use the sparse categorical crossentropy loss function because each sample of our data belongs to exactly one class (i.e. each handwritten digit represents only one specific digit).

We also use the **accuracy** metric, which is basically the percentage of correct predictions our network computes.

We will then train this neural network for **5 epochs** (i.e. on the whole dataset five times), and then test it on the testing set

In [None]:
# Let's implement the network first
model = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(input_shape=(28, 28)),
  tf.keras.layers.Dense(512, activation=tf.nn.relu),
  tf.keras.layers.Dropout(0.5),
  tf.keras.layers.Dense(10, activation=tf.nn.softmax)
])

# Then choose the optimizer, loss function, and metric, as compilation parameters
model.compile(optimizer='rmsprop',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Let's now train the model we just built
model.fit(X_train, Y_train, epochs=10)

# And evaluate its performances on the testing set
test_results = model.evaluate(X_test, Y_test)

# Now finally print the value of the loss and metric functions specified above
print("\nloss :", test_results[0])
print("\naccuracy :", test_results[1])

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Train on 60000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

loss : 0.7714054804242142

accuracy : 0.9585


Some points and advices to check for students if you code is not working:
1. Use constants - it will make you code more readible.
2. It is important to do reshape AND rescale. For rescale you can divide by 255. or use normalization library, but be careful with it.
3. Use one-hot encoding, it will improve your quality dramatically.
4. Explain, why we nee train + validation + test split of data.
5. Use proper placeholers with None.
6. The folder for your tensorboard is './log'
7. For starting data use stddev = 0.1 and aslo use names
8. Use softmax_v2 loss with reduce mean - it should be number, not vector.
9. Adam optimizer works like 10-20 times better that GradientDecsent
10. Be careful with next_batch function: when data is finished you need to go back to the beginning.
11. You have 10 classes, make sure you have a right function for correct_pred