In [14]:
import tensorflow as tf
import pandas as pd
import numpy as np
import pandas_datareader.data as web
%pylab inline
import matplotlib.pyplot as plt
import os
import datetime
import functools

from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)

Populating the interactive namespace from numpy and matplotlib
Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


We abandon the idea of inheritance in favor of a universal "Model" class that contains all functionality that we have used so far. The model is defined layer by layer in a hyperparameter dictionary.

We also begin to use the TensorBoard to visualize our graph and any summary statistics that we choose to log. We separate this logging into distinct directories so that we can compare separate experiments in tensorboard.

## Functions and Utils

In [2]:
def define_scope(function):
    attribute = '_cache_' + function.__name__

    @property
    @functools.wraps(function)
    def decorator(self):
        if not hasattr(self, attribute):
            with tf.variable_scope(function.__name__):
                setattr(self, attribute, function(self))
        return getattr(self, attribute)

    return decorator

In [3]:
def fully_connected_relu(input, output_dim, relu=True):
    input_dim = input.get_shape().as_list()[1]
    weights = tf.get_variable("weights",shape=[input_dim, output_dim])
    biases = tf.get_variable("biases", shape=[output_dim])
    out = tf.add(tf.matmul(input, weights), biases)
    if relu:
        out = tf.nn.relu(out)
    return out

In [4]:
def conv_relu(input, kernel_shape, bias_shape):
    weights = tf.get_variable("weights", kernel_shape,
        initializer=tf.random_normal_initializer())
    biases = tf.get_variable("biases", bias_shape,
        initializer=tf.constant_initializer(0.0))
    conv = tf.nn.conv2d(input, weights,
        strides=[1, 1, 1, 1], padding='SAME')
    return tf.nn.relu(conv + biases)

def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                        strides=[1, 2, 2, 1], padding='SAME')

## Model Object

In [60]:
class Model:
    def __init__(self, input_data, target_data, hparams, keep_prob): 
    
        self.input_data = input_data
        self.target_data = target_data
        self.in_dim, self.out_dim = self.get_inout_dim()
        
        self.keep_prob = keep_prob
        self.predict_builder = hparams.PREDICT_BUILDER
        self.learning_rate = hparams.LEARNING_RATE
        

        self.prediction 
        self.loss 
        self.train 
        self.evaluate
        
        self.merged = tf.summary.merge_all()

        
    def get_inout_dim(self):
        in_dim = self.input_data.get_shape().as_list()[1]
        out_dim = self.target_data.get_shape().as_list()[1]
        return in_dim, out_dim
    
    @define_scope
    def prediction(self):
        """Builds a graph of layers based on hparams.PREDICT_BUILDER.
        
        Types of layers:
            * reshape(a,b,c...) calls tf.reshape and passes dims
            * fconn_relu(a) calls fully_connected_relu with dim
            * fconn calls(a) fully_connected_relu with dim and relu=False
            * conv_maxp(a,b,c,d,e) calls conv_relu (a single-stepping, same-padded conv layer)
                        - conv is a*b pixels
                        - pixels have c colors
                        - conv outputs d colors
                        - bias is shape e (I think always the same as d?)
                + then calls max_pool_2x2 which cuts the pixel side length in half (no dims)
            
        
        dims special cases:
            * dims=='out': dims <- self.out_dim    (so far a scalar that works with fconn)
            * len(dims)==1: dims <- dims[0]    (so that can be passed into fconn[_relu])
        
        """
        x = self.input_data
        
        count_dict = {}
        for builder in self.predict_builder:
            layer, dims = builder.split('|')
            if dims == 'out': 
                dims = self.out_dim
            else: 
                dims = [int(i) for i in dims.split(',')] 
                if len(dims)==1:
                    dims = dims[0]
            
            if layer == 'reshape':
                try: count_dict[layer]+=1
                except: count_dict[layer]=1
                with tf.variable_scope(''.join([layer, str(count_dict[layer])])):
                    x = tf.reshape(x, dims)
            
            if layer == 'fconn_relu':
                try: count_dict[layer]+=1
                except: count_dict[layer]=1
                with tf.variable_scope(''.join([layer, str(count_dict[layer])])):
                    x = fully_connected_relu(x, dims)
                    
            if layer == 'fconn':
                try: count_dict[layer]+=1
                except: count_dict[layer]=1
                with tf.variable_scope(''.join([layer, str(count_dict[layer])])):
                    x = fully_connected_relu(x, dims, relu=False)
                    
            if layer == 'conv_maxp':
                try: count_dict[layer]+=1
                except: count_dict[layer]=1
                with tf.variable_scope(''.join([layer, str(count_dict[layer])])):
                    x = conv_relu(x, dims[0:-1], [dims[-1]])  
                    x = max_pool_2x2(x) 
                    
            if layer == 'dropout':
                try: count_dict[layer]+=1
                except: count_dict[layer]=1
                with tf.variable_scope(''.join([layer, str(count_dict[layer])])):
                    x = tf.nn.dropout(x, self.keep_prob)
        
        return x
    
    @define_scope
    def loss(self):
        loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
                labels = self.target_data, logits = self.prediction))
        tf.summary.scalar('loss', loss) ##summary##
        return loss
    
    @define_scope
    def train(self):
        optimizer = tf.train.GradientDescentOptimizer(self.learning_rate)
        train = optimizer.minimize(self.loss)
        return train
    
    @define_scope
    def evaluate(self):
        dense = tf.argmax(Y, axis=1)
        dense2 = tf.argmax(self.prediction, axis=1)
        correct = tf.equal(dense, dense2)
        score = tf.reduce_mean(tf.cast(correct, tf.float32))
        tf.summary.scalar('score', score) ##summary##
        return score
    

## Flags

In [31]:
FLAGS = {
    'logdir':'./logdir/'
}

## HParam model defs

In [52]:
# simple NN
hparams = tf.contrib.training.HParams(
    LOGDIR = FLAGS['logdir'] + 'simpleNN/' + str(datetime.datetime.now().strftime('%Y%m%d_%H%M%S')),
    LEARNING_RATE=0.001,
    PREDICT_BUILDER = [
        'fconn_relu|10', 
        'fconn_relu|10', 
        'fconn|out'
    ],
    KEEP_PROB = .75
)

In [71]:
# convolutional NN
hparams = tf.contrib.training.HParams(
    LOGDIR = FLAGS['logdir'] + 'convnet/' + str(datetime.datetime.now().strftime('%Y%m%d_%H%M%S')),
    LEARNING_RATE=0.001,
    PREDICT_BUILDER = [
        'reshape|-1,28,28,1', 
        'conv_maxp|5,5,1,32,32', 
        'conv_maxp|5,5,32,64,64',
        'reshape|-1,3136',
        'fconn_relu|1024',
        'dropout|0',
        'fconn|out'
    ],
    KEEP_PROB = .75
)

## Graph Creation and Initialization

In [72]:
# create the graph and initialize it in a session
tf.reset_default_graph() 
X = tf.placeholder(dtype=tf.float32, shape = [None, 784])
Y = tf.placeholder(dtype=tf.float64, shape = [None, 10])
keep_prob = tf.placeholder(tf.float32)
model = Model(X, Y, hparams, keep_prob)
sess = tf.Session()

writer = tf.summary.FileWriter(hparams.LOGDIR, graph=tf.get_default_graph())#sess.graph)

sess.run(tf.global_variables_initializer())

## Training Iteration

In [73]:
# iterate through evaluations
iter = 0
for i in range(10):
    images, labels = mnist.test.images, mnist.test.labels
    loss, evaluate, summary = sess.run([model.loss, model.evaluate, model.merged], feed_dict={X: images, Y: labels, keep_prob: 1.0})
    writer.add_summary(summary, iter)
    print('Test accuracy {:6.2f}% \t\tX-Ent Loss: {:6.2f}'.format(100 * evaluate, loss))
    
    # nested iterate through training steps
    for j in range(10):
        data = mnist.train.next_batch(100)
        images, labels = data[0], data[1]
        _, summary = sess.run([model.train, model.merged] , {X: data[0], Y: data[1], keep_prob: hparams.KEEP_PROB})
        writer.add_summary(summary, iter)
        
        iter +=1
        
summary = sess.run(model.merged, feed_dict={X: images, Y: labels, keep_prob: 1.0})
writer.add_summary(summary, iter)
writer.flush()

Test accuracy   8.97% 		X-Ent Loss:  67.83
Test accuracy  58.41% 		X-Ent Loss:   1.34
Test accuracy  73.80% 		X-Ent Loss:   0.84
Test accuracy  77.49% 		X-Ent Loss:   0.71
Test accuracy  81.05% 		X-Ent Loss:   0.60
Test accuracy  80.31% 		X-Ent Loss:   0.61
Test accuracy  84.45% 		X-Ent Loss:   0.51
Test accuracy  85.95% 		X-Ent Loss:   0.46
Test accuracy  86.04% 		X-Ent Loss:   0.45
Test accuracy  86.71% 		X-Ent Loss:   0.45


# Next Steps

* LSTM (new dataset)
* feeddict as function
* optimizer hparams
* hparams as json to logs dirs
* read logs file and hparams.json into python
* epochs and iters as hparams
* other best practices:
    * vars init set to best level
    * step set inteligently
* random hparams
