# Neural Networks using TensorFlow

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import src.mnist
import src.const

In [3]:
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
%matplotlib inline

In [57]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data", one_hot=True)

train_set, test_set = mnist.train, mnist.test

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


In [42]:
k_reduced = train_set.kmeans.transform(train_set.images)
k_test = train_set.kmeans.transform(test_set.images)

In [23]:
def next_batch(images, labels, batch_size, i):
    start = i
    i += batch_size
    N = images.shape[0]
    
    if i > N:
        perm = np.arange(N)
        np.random.suffle(perm)
        
        images = images[perm]
        labels = labels[perm]
        
        start = 0
        i = batch_size
    
    end = i

    return images, labels, i, images[start:end], labels[start:end]


In [65]:
from itertools import tee

def pairwise(iterable):
    "s -> (s0,s1), (s1,s2), (s2, s3), ..."
    a, b = tee(iterable)
    next(b, None)

    return zip(a, b)

In [90]:
learning_rate = 0.001
training_epochs = 30
batch_size = 100
display_step = 1

n_hidden_1 = 1024
n_hidden_2 = 512
n_hidden_3 = 256
n_hidden_4 = 128
n_input = 784
n_classes = 10

def build_network(n_input, n_classes, hlayers, optimizer=None, has_exp_decay=False, activation=None):

    if not optimizer:
        optimizer = tf.train.GradientDescentOptimizer
    
    if not activation:
        activation = tf.sigmoid
    
    layers = [n_input] + hlayers
    
    graph = tf.Graph()
    with graph.as_default():

        x = tf.placeholder("float", [None, n_input])
        y = tf.placeholder("float", [None, n_classes])
        
        if has_exp_decay:
            global_step = tf.Variable(0)

        def multilayer_perceptron(x, weights, biases):
            layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
            layer_1 = tf.sigmoid(layer_1)

            out_layer = tf.matmul(layer_1, weights['out']) + biases['out']

            return out_layer

        Var = tf.Variable
        Ran = tf.random_normal
        
        biases = {'b%s' % i: Var(Ran([l])) for i, l in enumerate(layers[1:])}
        biases['out'] = Var(Ran([n_classes]))
        
        weights = {'h%s' % i: Var(Ran([l_i, l_ii])) for i, (l_i, l_ii) in enumerate(pairwise(layers))}
        weights['out'] = Var(Ran([layers[-1], n_classes]))
        
        prev_layer = x
        for i in range(len(layers) - 1):
            w, b = weights['h%s' % i], biases['b%s' % i]

            layer = tf.add(tf.matmul(prev_layer, w), b)
            layer = activation(layer)
            
            prev_layer = layer
        
        pred = tf.matmul(prev_layer, weights['out']) + biases['out']
        
        cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))

        # exp_lr = tf.train.exponential_decay(learning_rate, global_step, 1000, 0.90, staircase=True)
        # optimizer = tf.train.AdamOptimizer(exp_lr).minimize(cost, global_step=global_step)
        optimizer = optimizer(learning_rate).minimize(cost)

    return graph

In [140]:
def Grapher(train_set, test_set, n_input, n_classes, hlayers, optimizer=None,
                       has_exp_decay=False, activation=None):
        
    optimizer = optimizer if optimizer else tf.train.GradientDescentOptimizer
    activation = activation if activation else tf.sigmoid

    layers = [n_input] + hlayers

    graph = tf.Graph()
    with graph.as_default():

        x = tf.placeholder("float", [None, n_input])
        y = tf.placeholder("float", [None, n_classes])

        if has_exp_decay:
            global_step = tf.Variable(0)

        Var = tf.Variable
        Ran = tf.random_normal

        biases = {'b%s' % i: Var(Ran([l])) for i, l in enumerate(layers[1:])}
        biases['out'] = Var(Ran([n_classes]))

        weights = {'h%s' % i: Var(Ran([l_i, l_ii])) for i, (l_i, l_ii) in enumerate(pairwise(layers))}
        weights['out'] = Var(Ran([layers[-1], n_classes]))

        prev_layer = x
        for i in range(len(layers) - 1):
            w, b = weights['h%s' % i], biases['b%s' % i]

            layer = tf.add(tf.matmul(prev_layer, w), b)
            layer = activation(layer)

            prev_layer = layer

        pred = tf.matmul(prev_layer, weights['out']) + biases['out']

        cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))

        if has_exp_decay:
            exp_lr = tf.train.exponential_decay(learning_rate, global_step, 1000, 0.90, staircase=True)
            optimizer = tf.train.AdamOptimizer(exp_lr).minimize(cost, global_step=global_step)
        else:
            optimizer = optimizer(learning_rate).minimize(cost)

    with tf.Session(graph=graph) as sess:
        tf.initialize_all_variables().run()

        print("Initialized")
        prev, curr = -1, 0
        epoch = 0

        while abs(curr - prev) > 0.1:
            prev = curr

            avg_cost = 0.
            total_batch = int(train_set.num_examples / batch_size)

            for i in range(total_batch):
                batch_x, batch_y = train_set.next_batch(batch_size)
                _, c = sess.run([optimizer, cost], feed_dict={x: batch_x, y: batch_y})
                avg_cost += c / total_batch

            if epoch % display_step == 0:
                print("Epoch:", '%04d' % (epoch + 1), "cost =", "{:.9f}".format(avg_cost))

            epoch += 1

            curr = avg_cost
        print("Optimization Finished!")

        correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))

        print("Accuracy:", accuracy.eval({x: test_set.images, y: test_set.labels}))

In [134]:
Grapher(train_set, test_set, n_input, n_classes, [256],
        tf.train.GradientDescentOptimizer, activation=tf.sigmoid)

Initialized
Epoch: 0001 cost = 12.568821004
Epoch: 0002 cost = 9.240629437
Epoch: 0003 cost = 7.747553156
Epoch: 0004 cost = 6.765919820
Epoch: 0005 cost = 6.136766780
Epoch: 0006 cost = 5.666180403
Epoch: 0007 cost = 5.288129089
Epoch: 0008 cost = 4.940382444
Epoch: 0009 cost = 4.629014284
Epoch: 0010 cost = 4.362127741
Epoch: 0011 cost = 4.107666473
Epoch: 0012 cost = 3.935222556
Epoch: 0013 cost = 3.751227068
Epoch: 0014 cost = 3.585853815
Epoch: 0015 cost = 3.407629982
Epoch: 0016 cost = 3.270236436
Epoch: 0017 cost = 3.175762827
Optimization Finished!
Accuracy: 0.4935


In [135]:
Grapher(train_set, test_set, n_input, n_classes, [256, 256],
        tf.train.GradientDescentOptimizer, activation=tf.sigmoid)

Initialized
Epoch: 0001 cost = 9.275056529
Epoch: 0002 cost = 6.591123818
Epoch: 0003 cost = 5.749068124
Epoch: 0004 cost = 5.074636205
Epoch: 0005 cost = 4.566350620
Epoch: 0006 cost = 4.180550253
Epoch: 0007 cost = 3.850842323
Epoch: 0008 cost = 3.586441510
Epoch: 0009 cost = 3.371263473
Epoch: 0010 cost = 3.172172715
Epoch: 0011 cost = 3.012585413
Epoch: 0012 cost = 2.860472160
Epoch: 0013 cost = 2.729380232
Epoch: 0014 cost = 2.632905596
Optimization Finished!
Accuracy: 0.503


In [136]:
Grapher(train_set, test_set, n_input, n_classes, [256, 256],
        tf.train.GradientDescentOptimizer, activation=tf.nn.relu)

Initialized
Epoch: 0001 cost = 168.461362749
Epoch: 0002 cost = 62.744317270
Epoch: 0003 cost = 46.698486593
Epoch: 0004 cost = 38.821199485
Epoch: 0005 cost = 31.482605140
Epoch: 0006 cost = 27.599581663
Epoch: 0007 cost = 24.711671715
Epoch: 0008 cost = 22.122395800
Epoch: 0009 cost = 19.410969272
Epoch: 0010 cost = 18.034688600
Epoch: 0011 cost = 17.145818053
Epoch: 0012 cost = 15.500657835
Epoch: 0013 cost = 14.673952134
Epoch: 0014 cost = 13.642148634
Epoch: 0015 cost = 12.670881062
Epoch: 0016 cost = 12.061243624
Epoch: 0017 cost = 11.047795625
Epoch: 0018 cost = 10.755847830
Epoch: 0019 cost = 10.132051462
Epoch: 0020 cost = 9.620841554
Epoch: 0021 cost = 9.224581518
Epoch: 0022 cost = 8.680838946
Epoch: 0023 cost = 8.205091965
Epoch: 0024 cost = 7.929797125
Epoch: 0025 cost = 7.614313779
Epoch: 0026 cost = 7.127340345
Epoch: 0027 cost = 7.005310774
Epoch: 0028 cost = 6.469353232
Epoch: 0029 cost = 6.502118008
Optimization Finished!
Accuracy: 0.9219


In [141]:
Grapher(train_set, test_set, n_input, n_classes, [256, 256],
        tf.train.GradientDescentOptimizer, activation=tf.tanh)

Initialized
Epoch: 0001 cost = 16.688322176
Epoch: 0002 cost = 13.074088488
Epoch: 0003 cost = 11.104799135
Epoch: 0004 cost = 9.601502661
Epoch: 0005 cost = 8.725808338
Epoch: 0006 cost = 7.837694082
Epoch: 0007 cost = 7.195812986
Epoch: 0008 cost = 6.740403003
Epoch: 0009 cost = 6.345725234
Epoch: 0010 cost = 5.871733464
Epoch: 0011 cost = 5.639490885
Epoch: 0012 cost = 5.356760943
Epoch: 0013 cost = 5.112513972
Epoch: 0014 cost = 4.945968019
Epoch: 0015 cost = 4.706601268
Epoch: 0016 cost = 4.586755428
Epoch: 0017 cost = 4.396630606
Epoch: 0018 cost = 4.282278256
Epoch: 0019 cost = 4.177132766
Epoch: 0020 cost = 3.975218591
Epoch: 0021 cost = 3.947260102
Optimization Finished!
Accuracy: 0.6354


In [142]:
Grapher(train_set, test_set, n_input, n_classes, [256, 256],
        tf.train.GradientDescentOptimizer, activation=tf.nn.elu)

Initialized
Epoch: 0001 cost = 158.636479350
Epoch: 0002 cost = 63.175238659
Epoch: 0003 cost = 47.295233217
Epoch: 0004 cost = 37.196117105
Epoch: 0005 cost = 32.251805290
Epoch: 0006 cost = 27.030370163
Epoch: 0007 cost = 24.711034253
Epoch: 0008 cost = 21.700759888
Epoch: 0009 cost = 19.693960668
Epoch: 0010 cost = 18.666757154
Epoch: 0011 cost = 16.656596048
Epoch: 0012 cost = 15.403089993
Epoch: 0013 cost = 13.998912788
Epoch: 0014 cost = 13.604372936
Epoch: 0015 cost = 12.309657693
Epoch: 0016 cost = 11.897913117
Epoch: 0017 cost = 10.774219702
Epoch: 0018 cost = 10.526115965
Epoch: 0019 cost = 9.957187676
Epoch: 0020 cost = 9.478471657
Epoch: 0021 cost = 8.584910696
Epoch: 0022 cost = 8.495057244
Optimization Finished!
Accuracy: 0.9212


In [143]:
Grapher(train_set, test_set, n_input, n_classes, [256, 256],
        tf.train.GradientDescentOptimizer, activation=tf.nn.softplus)

Initialized
Epoch: 0001 cost = 145.214975239
Epoch: 0002 cost = 55.051630793
Epoch: 0003 cost = 41.692173535
Epoch: 0004 cost = 33.820915558
Epoch: 0005 cost = 28.709974655
Epoch: 0006 cost = 25.175938875
Epoch: 0007 cost = 22.333532874
Epoch: 0008 cost = 20.577434903
Epoch: 0009 cost = 18.387683962
Epoch: 0010 cost = 16.573732892
Epoch: 0011 cost = 15.729543466
Epoch: 0012 cost = 14.421685715
Epoch: 0013 cost = 13.539032680
Epoch: 0014 cost = 12.734992062
Epoch: 0015 cost = 12.245311676
Epoch: 0016 cost = 11.123402960
Epoch: 0017 cost = 10.700283347
Epoch: 0018 cost = 10.152210467
Epoch: 0019 cost = 9.525157732
Epoch: 0020 cost = 9.266509566
Epoch: 0021 cost = 8.789047323
Epoch: 0022 cost = 8.076773992
Epoch: 0023 cost = 8.139067929
Optimization Finished!
Accuracy: 0.9112


In [144]:
Grapher(train_set, test_set, n_input, n_classes, [256, 256],
        tf.train.GradientDescentOptimizer, activation=tf.nn.softsign)

Initialized
Epoch: 0001 cost = 17.914353048
Epoch: 0002 cost = 12.976675747
Epoch: 0003 cost = 10.377192303
Epoch: 0004 cost = 8.814106817
Epoch: 0005 cost = 7.676544284
Epoch: 0006 cost = 6.938102120
Epoch: 0007 cost = 6.215243773
Epoch: 0008 cost = 5.838264429
Epoch: 0009 cost = 5.451482548
Epoch: 0010 cost = 5.110269877
Epoch: 0011 cost = 4.793616508
Epoch: 0012 cost = 4.571912144
Epoch: 0013 cost = 4.393346232
Epoch: 0014 cost = 4.220301905
Epoch: 0015 cost = 3.994542176
Epoch: 0016 cost = 3.906197088
Optimization Finished!
Accuracy: 0.6216


In [145]:
Grapher(train_set, test_set, n_input, n_classes, [256, 256],
        tf.train.GradientDescentOptimizer, activation=tf.nn.relu6)

Initialized
Epoch: 0001 cost = 42.262833058
Epoch: 0002 cost = 22.901573221
Epoch: 0003 cost = 17.642307158
Epoch: 0004 cost = 14.601805008
Epoch: 0005 cost = 12.459141713
Epoch: 0006 cost = 11.299387116
Epoch: 0007 cost = 10.221891618
Epoch: 0008 cost = 9.471752096
Epoch: 0009 cost = 8.696619465
Epoch: 0010 cost = 8.249033723
Epoch: 0011 cost = 7.781070447
Epoch: 0012 cost = 7.426614202
Epoch: 0013 cost = 6.991343694
Epoch: 0014 cost = 6.692083751
Epoch: 0015 cost = 6.469527607
Epoch: 0016 cost = 6.211510993
Epoch: 0017 cost = 5.951969195
Epoch: 0018 cost = 5.770207920
Epoch: 0019 cost = 5.567523551
Epoch: 0020 cost = 5.409057073
Epoch: 0021 cost = 5.225529260
Epoch: 0022 cost = 5.115692814
Epoch: 0023 cost = 4.922357229
Epoch: 0024 cost = 4.828075184
Optimization Finished!
Accuracy: 0.7503


In [133]:
Grapher(train_set, test_set, n_input, n_classes, [256, 256],
        tf.train.AdamOptimizer, activation=tf.nn.relu)

Initialized
Epoch: 0001 cost = 161.324909730
Epoch: 0002 cost = 42.605847155
Epoch: 0003 cost = 27.092446155
Epoch: 0004 cost = 18.928185773
Epoch: 0005 cost = 13.638499924
Epoch: 0006 cost = 10.507979187
Epoch: 0007 cost = 7.581083359
Epoch: 0008 cost = 5.892259366
Epoch: 0009 cost = 4.339253664
Epoch: 0010 cost = 3.284954584
Epoch: 0011 cost = 2.483077058
Epoch: 0012 cost = 1.824827947
Epoch: 0013 cost = 1.411266974
Epoch: 0014 cost = 1.027642392
Epoch: 0015 cost = 0.890238776
Epoch: 0016 cost = 0.588751286
Epoch: 0017 cost = 0.632639819
Optimization Finished!
Accuracy: 0.9439


In [138]:
Grapher(train_set, test_set, n_input, n_classes, [512, 256, 256, 128],
        tf.train.AdamOptimizer, activation=tf.nn.relu)

Initialized
Epoch: 0001 cost = 11656.389232289
Epoch: 0002 cost = 2576.888508023
Epoch: 0003 cost = 1521.778461540
Epoch: 0004 cost = 921.037624425
Epoch: 0005 cost = 618.953350626
Epoch: 0006 cost = 418.070778136
Epoch: 0007 cost = 276.746967639
Epoch: 0008 cost = 210.410835565
Epoch: 0009 cost = 143.992396416
Epoch: 0010 cost = 114.645118584
Epoch: 0011 cost = 95.811391312
Epoch: 0012 cost = 82.438209940
Epoch: 0013 cost = 77.402258797
Epoch: 0014 cost = 62.501727035
Epoch: 0015 cost = 64.486672124
Epoch: 0016 cost = 60.762682703
Epoch: 0017 cost = 61.763544250
Epoch: 0018 cost = 60.295203682
Epoch: 0019 cost = 51.085164257
Epoch: 0020 cost = 42.559724841
Epoch: 0021 cost = 37.925221639
Epoch: 0022 cost = 47.848343113
Epoch: 0023 cost = 35.715297865
Epoch: 0024 cost = 45.053807521
Epoch: 0025 cost = 47.460952774
Epoch: 0026 cost = 33.219050219
Epoch: 0027 cost = 32.914941593
Epoch: 0028 cost = 41.677827588
Epoch: 0029 cost = 36.751194100
Epoch: 0030 cost = 29.356319802
Epoch: 0031 co

In [151]:
Grapher(train_set, test_set, n_input, n_classes, [800, 800],
        tf.train.AdamOptimizer, activation=tf.tanh)

Initialized
Epoch: 0001 cost = 8.421146493
Epoch: 0002 cost = 3.251908079
Epoch: 0003 cost = 2.204635056
Epoch: 0004 cost = 1.605988110
Epoch: 0005 cost = 1.227234688
Epoch: 0006 cost = 1.022173952
Epoch: 0007 cost = 0.856822639
Epoch: 0008 cost = 0.739583416
Epoch: 0009 cost = 0.612919816
Epoch: 0010 cost = 0.540722586
Optimization Finished!
Accuracy: 0.9149


In [150]:
Grapher(train_set, test_set, n_input, n_classes, [512, 256, 256, 128],
        tf.train.AdamOptimizer, activation=tf.nn.relu, has_exp_decay=True)

Initialized
Epoch: 0001 cost = 14101.317995384
Epoch: 0002 cost = 2902.710198364
Epoch: 0003 cost = 1645.432493453
Epoch: 0004 cost = 1078.996475599
Epoch: 0005 cost = 707.609671101
Epoch: 0006 cost = 486.602784084
Epoch: 0007 cost = 338.621206574
Epoch: 0008 cost = 232.569961710
Epoch: 0009 cost = 165.607490565
Epoch: 0010 cost = 97.782493746
Epoch: 0011 cost = 76.551032037
Epoch: 0012 cost = 51.486159956
Epoch: 0013 cost = 38.167750156
Epoch: 0014 cost = 35.862590961
Epoch: 0015 cost = 26.271362687
Epoch: 0016 cost = 20.911167633
Epoch: 0017 cost = 17.416731048
Epoch: 0018 cost = 17.935352062
Epoch: 0019 cost = 10.805751886
Epoch: 0020 cost = 10.230374276
Epoch: 0021 cost = 8.220172869
Epoch: 0022 cost = 7.297210685
Epoch: 0023 cost = 5.800839519
Epoch: 0024 cost = 5.076248872
Epoch: 0025 cost = 4.806048712
Epoch: 0026 cost = 3.774042987
Epoch: 0027 cost = 3.612254013
Epoch: 0028 cost = 3.669455304
Optimization Finished!
Accuracy: 0.9445
