## Deep Learning with upgrades

Use the TensorFlow framework to create a deep nueral network - this time utilizing the most powerful features currently known for the model. Again we use the train the model on the MNIST (0-4 only)

In [1]:
import tensorflow as tf

In [5]:
# Load the data

mnist = tf.keras.datasets.mnist
(x_train, y_train),(x_test, y_test) = mnist.load_data()

#Filter 0-4 for auxilary training in later notebook
x_train, y_train = x_train[y_train <= 4], y_train[y_train <= 4]
x_test, y_test = x_test[y_test <= 4], y_test[y_test <= 4]

x_train, x_test = x_train / 255.0, x_test / 255.0
x_train, x_test = x_train.reshape(-1,28*28), x_test.reshape(-1,28*28)

In [9]:
# Create method for getting batches for training

class mini_batches:
    
    def __init__(self, x, y, size):
        self.x = x
        self.y = y
        self.size = size
        self.index = 0
    
    def next_batch(self):
        if self.index + self.size >= len(self.x):            
            batch_x = self.x[self.index:]
            batch_y = self.y[self.index:]
            self.index = 0
            return batch_x, batch_y
        
        batch_x = self.x[self.index:self.index + self.size]
        batch_y = self.y[self.index:self.index + self.size]
        self.index = self.index + self.size
        return batch_x, batch_y

In [14]:
# Build the computational graph

from tensorflow.contrib.layers import fully_connected 
from tensorflow.contrib.layers import batch_norm
from tensorflow.contrib.layers import dropout

tf.reset_default_graph()


is_training = tf.placeholder(tf.bool, shape=(), name='is_training')

# Inputs for training
X = tf.placeholder(tf.float32, shape=(None,28*28), name='X')
y = tf.placeholder(tf.int32, shape=(None), name='y')
X_drop = dropout(X,.5, is_training=is_training)

# Nueral Network layers
with tf.name_scope('network'):
    he_init = tf.contrib.layers.variance_scaling_initializer()
    bn_params = {'is_training':is_training, 'decay':0.99, 'updates_collections':None}
    
    with tf.contrib.framework.arg_scope([fully_connected], weights_initializer=he_init, activation_fn=tf.nn.elu, 
                                        normalizer_fn=batch_norm, normalizer_params=bn_params):
        h1 = dropout(fully_connected(X_drop, 100, scope='h1'))
        h2 = dropout(fully_connected(h1, 100, scope='h2'))
        h3 = dropout(fully_connected(h2, 100, scope='h3'))
        h4 = dropout(fully_connected(h3, 100, scope='h4'))
        h5 = dropout(fully_connected(h4, 100, scope='h5'))
        output = fully_connected(h5, 5, scope='output', activation_fn=None)

# Loss from Network
with tf.name_scope('loss'):
    x_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=output)
    loss = tf.reduce_mean(x_entropy, name='loss')

# SGD
with tf.name_scope('train'):
    optimizer = tf.train.AdamOptimizer()
    train = optimizer.minimize(loss)
    
# Evaluation of performance
with tf.name_scope('eval'):
    correct = tf.nn.in_top_k(output, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    
init = tf.global_variables_initializer()

In [15]:
# Train the model

# Mini batches
batches = mini_batches(x_train, y_train, 1000)
max_acc = 0
epochs = 0

# Save model
saver = tf.train.Saver()

# Log files
import os
from datetime import datetime
now = datetime.utcnow().strftime('%Y%m%d%H%M%S')
log_dir = os.path.join(os.getcwd(), 'tensorflow/logs/11-deep-learning-{}/'.format(now))
acc_summary = tf.summary.scalar('accuracy',accuracy)
writer = tf.summary.FileWriter(log_dir, tf.get_default_graph())

with tf.Session() as sess:
    init.run()
    
    # SGD Updates
    for index, batch in enumerate(range(50000)):
        batch_x, batch_y = batches.next_batch()
        sess.run(train, feed_dict={X: batch_x, y:batch_y, is_training:True})
        
        # Early stopping and Checkpoint logging
        if index % 1000 == 0:
            saver.save(sess, os.path.join(os.getcwd(), 'tensorflow/models/11_deep_learning.ckpt'))
            log_str = acc_summary.eval(feed_dict={X: x_test, y:y_test, is_training:False})
            writer.add_summary(log_str, index)
            
            cur_acc = accuracy.eval(feed_dict={X: x_test, y:y_test, is_training:False})
            print(cur_acc)
            if cur_acc > max_acc:
                max_acc = cur_acc
                epochs = 0
            else:
                epochs = epochs + 1
                if epochs > 3:
                    saver.save(sess, os.path.join(os.getcwd(), 'tensorflow/models/11_deep_learning.ckpt'))
                    break

    # Save final model
    saver.save(sess, os.path.join(os.getcwd(), 'tensorflow/models/11_deep_learning.ckpt'))

0.21735746
0.9706169
0.9768437
0.9797626
0.97898424
0.9797626
0.98287606
0.9820977
0.98112476
0.98248684
0.9830706
0.97995716
0.9822923
0.97995716
0.98112476
