In [1]:
'''
DNN to classify MNIST handwritten digits
'''

import os
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'

import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

mnist = input_data.read_data_sets("./data/mnist", one_hot=True)

Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:
Please write your own downloading logic.
Instructions for updating:
Please use urllib or similar directly.
Successfully downloaded train-images-idx3-ubyte.gz 9912422 bytes.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting ./data/mnist/train-images-idx3-ubyte.gz
Successfully downloaded train-labels-idx1-ubyte.gz 28881 bytes.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting ./data/mnist/train-labels-idx1-ubyte.gz
Instructions for updating:
Please use tf.one_hot on tensors.
Successfully downloaded t10k-images-idx3-ubyte.gz 1648877 bytes.
Extracting ./data/mnist/t10k-images-idx3-ubyte.gz
Successfully downloaded t10k-labels-idx1-ubyte.gz 4542 bytes.
Extracting ./data/mnist/t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please use alternatives such as official/mnist/datas

In [2]:
"""
Task #1 & Task #2
"""

# Parameters
learning_rate = 0.001
n_epochs = 20
batch_size = 100
display_step = 1

# Network Parameters
n_hidden_1 = 256 # 1st layer number of features
n_hidden_2 = 256 # 2nd layer number of features
n_input = 784 # MNIST data input (img shape: 28*28)
n_classes = 10 # MNIST total classes (0-9 digits)

# tf Graph input
X = tf.placeholder(tf.float32, [batch_size, n_input])
Y = tf.placeholder(tf.float32, [batch_size, n_classes])


# Create model
def multilayer_perceptron(x, weights, biases):
    # Hidden layer
    layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
    layer_1 = tf.nn.relu(layer_1)
    # Hidden layer
    layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
    layer_2 = tf.nn.relu(layer_2)
    # Output layer with linear activation
    out_layer = tf.matmul(layer_2, weights['out']) + biases['out']
    return out_layer

# Store layers weight & bias
weights = {
    'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
    'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
    'out': tf.Variable(tf.random_normal([n_hidden_2, n_classes]))
}

biases = {
    'b1': tf.Variable(tf.random_normal([n_hidden_1])),
    'b2': tf.Variable(tf.random_normal([n_hidden_2])),
    'out': tf.Variable(tf.random_normal([n_classes]))
}

# Construct model
pred = multilayer_perceptron(X, weights, biases)

# Define loss and optimizer
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=Y))

#optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)
optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate).minimize(loss)


# Launch the graph
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())

    # Training 
    for i in range(n_epochs):
        total_loss = 0.
        n_batches = int(mnist.train.num_examples/batch_size)
        # Loop over all batches
        for j in range(n_batches):
            X_batch, Y_batch = mnist.train.next_batch(batch_size)
            # Run optimization op (backprop) and cost op (to get loss value)
            _, l = sess.run([optimizer, loss], feed_dict={X: X_batch, Y: Y_batch})
            # Compute average loss
            total_loss += l
        # Display logs per epoch step
        print('Average loss epoch {0}: {1}'.format(i, total_loss/n_batches))

    print("Optimization Finished!")


    correct_preds = tf.equal(tf.argmax(pred, axis=1), tf.argmax(Y, axis=1))
    accuracy = tf.reduce_sum(tf.cast(correct_preds, tf.float32))
    
    n_batches = int(mnist.test.num_examples/batch_size)
    total_correct_preds = 0
    
    for i in range(n_batches):
        X_batch, Y_batch = mnist.test.next_batch(batch_size)
        accuracy_batch = sess.run(accuracy, feed_dict={X: X_batch, Y:Y_batch}) 
        total_correct_preds += accuracy_batch   
    
    print('Accuracy {0}'.format(total_correct_preds/mnist.test.num_examples))

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.

Average loss epoch 0: 135.8106663305109
Average loss epoch 1: 33.07767553502863
Average loss epoch 2: 20.323474855422972
Average loss epoch 3: 13.902497258294712
Average loss epoch 4: 9.965045595610475
Average loss epoch 5: 7.358891784230919
Average loss epoch 6: 5.474251942507508
Average loss epoch 7: 4.163528723437352
Average loss epoch 8: 3.182201073814472
Average loss epoch 9: 2.5385226775052376
Average loss epoch 10: 1.9849248842868366
Average loss epoch 11: 1.5254589312393576
Average loss epoch 12: 1.225964889697639
Average loss epoch 13: 0.9822812628534695
Average loss epoch 14: 0.7658523709361795
Average loss epoch 15: 0.6246198208679925
Average loss epoch 16: 0.5215695654856197
Average loss epoch 17: 0.4649000269089883

In [3]:
"""
Task #3
"""

import pandas as pd
import numpy as np
import time

task3_data_dict = {"Features": np.array([256, 128, 64]),
                   "Accuracy": np.zeros(3),
                   "Num of Hidden Layers": np.zeros(3),
                   "Num of Learning Params": np.zeros(3),
                   "Avg Loss (Last Epoch)": np.zeros(3),
                   "Elapsed Time": np.zeros(3)}

task3_df = pd.DataFrame.from_dict(task3_data_dict)
task3_df.index = task3_df["Features"]
del task3_df["Features"]

In [4]:
"""
Task #3 - 1 : DNN with two hidden layers, each layer has 256 units (as in the given code)
"""

# Pandas Params
row_index = 256

# Parameters
learning_rate = 0.001
n_epochs = 20
batch_size = 100
display_step = 1

# Network Parameters
n_hidden_1 = 256 # 1st layer number of features
n_hidden_2 = 256 # 2nd layer number of features
n_input = 784 # MNIST data input (img shape: 28*28)
n_classes = 10 # MNIST total classes (0-9 digits)

task3_df.loc[row_index, "Num of Hidden Layers"] = 2
task3_df.loc[row_index, "Num of Learning Params"] = n_input * n_hidden_1 * n_hidden_2

# tf Graph input
X = tf.placeholder(tf.float32, [batch_size, n_input])
Y = tf.placeholder(tf.float32, [batch_size, n_classes])


# Create model
def multilayer_perceptron(x, weights, biases):
    # Hidden layer
    layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
    layer_1 = tf.nn.relu(layer_1)
    # Hidden layer
    layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
    layer_2 = tf.nn.relu(layer_2)
    # Output layer with linear activation
    out_layer = tf.matmul(layer_2, weights['out']) + biases['out']
    return out_layer

# Store layers weight & bias
weights = {
    'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
    'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
    'out': tf.Variable(tf.random_normal([n_hidden_2, n_classes]))
}

biases = {
    'b1': tf.Variable(tf.random_normal([n_hidden_1])),
    'b2': tf.Variable(tf.random_normal([n_hidden_2])),
    'out': tf.Variable(tf.random_normal([n_classes]))
}

# Construct model
pred = multilayer_perceptron(X, weights, biases)

# Define loss and optimizer
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=Y))

#optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)
optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate).minimize(loss)


# Launch the graph
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())

    start = time.time()
    # Training 
    for i in range(n_epochs):
        total_loss = 0.
        n_batches = int(mnist.train.num_examples/batch_size)
        # Loop over all batches
        for j in range(n_batches):
            X_batch, Y_batch = mnist.train.next_batch(batch_size)
            # Run optimization op (backprop) and cost op (to get loss value)
            _, l = sess.run([optimizer, loss], feed_dict={X: X_batch, Y: Y_batch})
            # Compute average loss
            total_loss += l
        # Display logs per epoch step
        print('Average loss epoch {0}: {1}'.format(i, total_loss/n_batches))
        
        if (j == (n_batches - 1)):
            task3_df.loc[row_index, "Avg Loss (Last Epoch)"] = total_loss / n_batches
            

    end = time.time()
    print("Optimization Finished!")

    task3_df.loc[row_index, "Elapsed Time"] = end - start

    correct_preds = tf.equal(tf.argmax(pred, axis=1), tf.argmax(Y, axis=1))
    accuracy = tf.reduce_sum(tf.cast(correct_preds, tf.float32))
    
    n_batches = int(mnist.test.num_examples/batch_size)
    total_correct_preds = 0
    
    for i in range(n_batches):
        X_batch, Y_batch = mnist.test.next_batch(batch_size)
        accuracy_batch = sess.run(accuracy, feed_dict={X: X_batch, Y:Y_batch}) 
        total_correct_preds += accuracy_batch   
    
    print('Accuracy {0}'.format(total_correct_preds/mnist.test.num_examples))
    
    task3_df.loc[row_index, "Accuracy"] = total_correct_preds / mnist.test.num_examples

Average loss epoch 0: 160.86162128101697
Average loss epoch 1: 34.096188970912586
Average loss epoch 2: 20.73371030742472
Average loss epoch 3: 14.302431708140807
Average loss epoch 4: 10.400523157350042
Average loss epoch 5: 7.721865692366295
Average loss epoch 6: 5.7856823261618855
Average loss epoch 7: 4.424016938563365
Average loss epoch 8: 3.4341916809548785
Average loss epoch 9: 2.6681965720259564
Average loss epoch 10: 2.1047920663636686
Average loss epoch 11: 1.6463948617910695
Average loss epoch 12: 1.3296441090488085
Average loss epoch 13: 1.0270987755473107
Average loss epoch 14: 0.8384445363209733
Average loss epoch 15: 0.6921892414488527
Average loss epoch 16: 0.5857040391102663
Average loss epoch 17: 0.48609939847797395
Average loss epoch 18: 0.4299562169102687
Average loss epoch 19: 0.3741052600311888
Optimization Finished!
Accuracy 0.9475


In [5]:
"""
Task #3 - 2 : DNN with three hidden layers, each layer has 128 units
"""

# Pandas Params
row_index = 128

# Parameters
learning_rate = 0.001
n_epochs = 20
batch_size = 100
display_step = 1

# Network Parameters
n_hidden_1 = 128 # 1st layer number of features
n_hidden_2 = 128 # 2nd layer number of features
n_hidden_3 = 128 # 3rd layer number of features
n_input = 784 # MNIST data input (img shape: 28*28)
n_classes = 10 # MNIST total classes (0-9 digits)

task3_df.loc[row_index, "Num of Hidden Layers"] = 3
task3_df.loc[row_index, "Num of Learning Params"] = n_input * n_hidden_1 * n_hidden_2 * n_hidden_3

# tf Graph input
X = tf.placeholder(tf.float32, [batch_size, n_input])
Y = tf.placeholder(tf.float32, [batch_size, n_classes])


# Create model
def multilayer_perceptron(x, weights, biases):
    # Hidden layer
    layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
    layer_1 = tf.nn.relu(layer_1)
    # Hidden layer
    layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
    layer_2 = tf.nn.relu(layer_2)
    # Hidden layer
    layer_3 = tf.add(tf.matmul(layer_2, weights['h3']), biases['b3'])
    layer_3 = tf.nn.relu(layer_3)
    # Output layer with linear activation
    out_layer = tf.matmul(layer_3, weights['out']) + biases['out']
    return out_layer

# Store layers weight & bias
weights = {
    'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
    'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
    'h3': tf.Variable(tf.random_normal([n_hidden_2, n_hidden_3])),
    'out': tf.Variable(tf.random_normal([n_hidden_2, n_classes]))
}

biases = {
    'b1': tf.Variable(tf.random_normal([n_hidden_1])),
    'b2': tf.Variable(tf.random_normal([n_hidden_2])),
    'b3': tf.Variable(tf.random_normal([n_hidden_3])),
    'out': tf.Variable(tf.random_normal([n_classes]))
}

# Construct model
pred = multilayer_perceptron(X, weights, biases)

# Define loss and optimizer
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=Y))

#optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)
optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate).minimize(loss)


# Launch the graph
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())

    start = time.time()
    # Training 
    for i in range(n_epochs):
        total_loss = 0.
        n_batches = int(mnist.train.num_examples/batch_size)
        # Loop over all batches
        for j in range(n_batches):
            X_batch, Y_batch = mnist.train.next_batch(batch_size)
            # Run optimization op (backprop) and cost op (to get loss value)
            _, l = sess.run([optimizer, loss], feed_dict={X: X_batch, Y: Y_batch})
            # Compute average loss
            total_loss += l
        # Display logs per epoch step
        print('Average loss epoch {0}: {1}'.format(i, total_loss/n_batches))
        
        if (j == (n_batches - 1)):
            task3_df.loc[row_index, "Avg Loss (Last Epoch)"] = total_loss / n_batches
            

    end = time.time()
    print("Optimization Finished!")

    task3_df.loc[row_index, "Elapsed Time"] = end - start

    correct_preds = tf.equal(tf.argmax(pred, axis=1), tf.argmax(Y, axis=1))
    accuracy = tf.reduce_sum(tf.cast(correct_preds, tf.float32))
    
    n_batches = int(mnist.test.num_examples/batch_size)
    total_correct_preds = 0
    
    for i in range(n_batches):
        X_batch, Y_batch = mnist.test.next_batch(batch_size)
        accuracy_batch = sess.run(accuracy, feed_dict={X: X_batch, Y:Y_batch}) 
        total_correct_preds += accuracy_batch   
    
    print('Accuracy {0}'.format(total_correct_preds/mnist.test.num_examples))
    
    task3_df.loc[row_index, "Accuracy"] = total_correct_preds / mnist.test.num_examples

Average loss epoch 0: 560.8089004100452
Average loss epoch 1: 118.7150104453347
Average loss epoch 2: 71.90258411927657
Average loss epoch 3: 50.22454301183874
Average loss epoch 4: 37.56080601442944
Average loss epoch 5: 29.241417313272304
Average loss epoch 6: 23.035642646764384
Average loss epoch 7: 18.52054607440125
Average loss epoch 8: 15.094883438132026
Average loss epoch 9: 12.36318474125266
Average loss epoch 10: 10.12348026864032
Average loss epoch 11: 8.343385827657224
Average loss epoch 12: 7.263522472793784
Average loss epoch 13: 6.035478309174858
Average loss epoch 14: 5.122128925956597
Average loss epoch 15: 4.454072027382025
Average loss epoch 16: 3.8035533240468564
Average loss epoch 17: 3.2434293466820363
Average loss epoch 18: 2.7647905182688706
Average loss epoch 19: 2.3500110400437055
Optimization Finished!
Accuracy 0.9254


In [6]:
"""
Task #3 - 3 : DNN with four hidden layers, each layer has 64 units
"""

# Pandas Params
row_index = 64

# Parameters
learning_rate = 0.001
n_epochs = 20
batch_size = 100
display_step = 1

# Network Parameters
n_hidden_1 = 64 # 1st layer number of features
n_hidden_2 = 64 # 2nd layer number of features
n_hidden_3 = 64 # 3rd layer number of features
n_hidden_4 = 64 # 4th layer number of features
n_input = 784 # MNIST data input (img shape: 28*28)
n_classes = 10 # MNIST total classes (0-9 digits)

task3_df.loc[row_index, "Num of Hidden Layers"] = 4
task3_df.loc[row_index, "Num of Learning Params"] = n_input * n_hidden_1 * n_hidden_2 * n_hidden_3 * n_hidden_4

# tf Graph input
X = tf.placeholder(tf.float32, [batch_size, n_input])
Y = tf.placeholder(tf.float32, [batch_size, n_classes])


# Create model
def multilayer_perceptron(x, weights, biases):
    # Hidden layer
    layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
    layer_1 = tf.nn.relu(layer_1)
    # Hidden layer
    layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
    layer_2 = tf.nn.relu(layer_2)
    # Hidden layer
    layer_3 = tf.add(tf.matmul(layer_2, weights['h3']), biases['b3'])
    layer_3 = tf.nn.relu(layer_3)
    # Hidden layer
    layer_4 = tf.add(tf.matmul(layer_3, weights['h4']), biases['b4'])
    layer_4 = tf.nn.relu(layer_4)
    # Output layer with linear activation
    out_layer = tf.matmul(layer_4, weights['out']) + biases['out']
    return out_layer

# Store layers weight & bias
weights = {
    'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
    'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
    'h3': tf.Variable(tf.random_normal([n_hidden_2, n_hidden_3])),
    'h4': tf.Variable(tf.random_normal([n_hidden_3, n_hidden_4])),
    'out': tf.Variable(tf.random_normal([n_hidden_2, n_classes]))
}

biases = {
    'b1': tf.Variable(tf.random_normal([n_hidden_1])),
    'b2': tf.Variable(tf.random_normal([n_hidden_2])),
    'b3': tf.Variable(tf.random_normal([n_hidden_3])),
    'b4': tf.Variable(tf.random_normal([n_hidden_4])),
    'out': tf.Variable(tf.random_normal([n_classes]))
}

# Construct model
pred = multilayer_perceptron(X, weights, biases)

# Define loss and optimizer
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=Y))

#optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)
optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate).minimize(loss)


# Launch the graph
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())

    start = time.time()
    # Training 
    for i in range(n_epochs):
        total_loss = 0.
        n_batches = int(mnist.train.num_examples/batch_size)
        # Loop over all batches
        for j in range(n_batches):
            X_batch, Y_batch = mnist.train.next_batch(batch_size)
            # Run optimization op (backprop) and cost op (to get loss value)
            _, l = sess.run([optimizer, loss], feed_dict={X: X_batch, Y: Y_batch})
            # Compute average loss
            total_loss += l
        # Display logs per epoch step
        print('Average loss epoch {0}: {1}'.format(i, total_loss/n_batches))
        
        if (j == (n_batches - 1)):
            task3_df.loc[row_index, "Avg Loss (Last Epoch)"] = total_loss / n_batches
            

    end = time.time()
    print("Optimization Finished!")

    task3_df.loc[row_index, "Elapsed Time"] = end - start

    correct_preds = tf.equal(tf.argmax(pred, axis=1), tf.argmax(Y, axis=1))
    accuracy = tf.reduce_sum(tf.cast(correct_preds, tf.float32))
    
    n_batches = int(mnist.test.num_examples/batch_size)
    total_correct_preds = 0
    
    for i in range(n_batches):
        X_batch, Y_batch = mnist.test.next_batch(batch_size)
        accuracy_batch = sess.run(accuracy, feed_dict={X: X_batch, Y:Y_batch}) 
        total_correct_preds += accuracy_batch   
    
    print('Accuracy {0}'.format(total_correct_preds/mnist.test.num_examples))
    
    task3_df.loc[row_index, "Accuracy"] = total_correct_preds / mnist.test.num_examples

Average loss epoch 0: 1116.4598465798117
Average loss epoch 1: 193.05631949684837
Average loss epoch 2: 104.29994050459429
Average loss epoch 3: 68.4742736937783
Average loss epoch 4: 48.72823608745228
Average loss epoch 5: 37.268853923624214
Average loss epoch 6: 29.310892461863432
Average loss epoch 7: 23.731789291121743
Average loss epoch 8: 19.613103564435786
Average loss epoch 9: 16.473175085891377
Average loss epoch 10: 13.982375352165915
Average loss epoch 11: 11.977295897223733
Average loss epoch 12: 10.445814942717552
Average loss epoch 13: 9.136606003479525
Average loss epoch 14: 8.09996989025311
Average loss epoch 15: 7.326248303001577
Average loss epoch 16: 6.546369616389274
Average loss epoch 17: 5.920628056766635
Average loss epoch 18: 5.359115442895167
Average loss epoch 19: 4.9954118120052255
Optimization Finished!
Accuracy 0.8923


In [7]:
task3_df

Unnamed: 0_level_0,Accuracy,Avg Loss (Last Epoch),Elapsed Time,Num of Hidden Layers,Num of Learning Params
Features,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
256,0.9475,0.374105,12.412959,2.0,51380220.0
128,0.9254,2.350011,13.456423,3.0,1644167000.0
64,0.8923,4.995412,14.470979,4.0,13153340000.0


Feature 의 갯수가 적고, Layer 가 깊어질 수록 

In [13]:
"""
Task #4
"""

import json

In [18]:
"""
Models
"""
all_model_dict = dict()

# Network Parameters
n_input = 784 # MNIST data input (img shape: 28*28)
n_classes = 10 # MNIST total classes (0-9 digits)

# 784 -> 512 -> 256 -> 10
class MyModel_1(object):
    
    def __init__(self, sess, learning_rate, batch_size):
        self.sess = sess
        self.learning_rate = learning_rate
        self.batch_size = batch_size
        self.hidden_1_size = 512
        self.hidden_2_size = 256
        
        self.X = tf.placeholder(tf.float32, shape=[self.batch_size, n_input])
        self.Y = tf.placeholder(tf.float32, shape=[self.batch_size, n_classes])
        
        self.pred = None
        self.loss = None
        self.optimizer = None
        
        self.correct_preds = None
        self.accuracy = None
        
    def build_model(self):
        with tf.variable_scope("Layer1"):
            W1 = tf.Variable(tf.random_normal([n_input, self.hidden_1_size]))
            b1 = tf.Variable(tf.random_normal([self.hidden_1_size]))
            layer1 = tf.add(tf.matmul(self.X, W1), b1)
            layer1 = tf.nn.relu(layer1)
            
        with tf.variable_scope("Layer2"):
            W2 = tf.Variable(tf.random_normal([self.hidden_1_size, self.hidden_2_size]))
            b2 = tf.Variable(tf.random_normal([self.hidden_2_size]))
            layer2 = tf.add(tf.matmul(layer1, W2), b2)
            layer2 = tf.nn.relu(layer2)
        
        with tf.variable_scope("Layer3"):
            W3 = tf.Variable(tf.random_normal([self.hidden_2_size, n_classes]))
            b3 = tf.Variable(tf.random_normal([n_classes]))
            layer3 = tf.add(tf.matmul(layer2, W3), b3)
        
        self.pred = layer3
        self.loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=self.pred, labels=self.Y))
        self.optimizer = tf.train.RMSPropOptimizer(learning_rate=self.learning_rate).minimize(self.loss)
        
        self.correct_preds = tf.equal(tf.argmax(self.pred, axis=1), tf.argmax(self.Y, axis=1))
        self.accuracy = tf.reduce_mean(tf.cast(self.correct_preds, tf.float32))
    
    def train(self, x_batch, y_batch):
        return self.sess.run(fetches=[self.loss, self.optimizer], feed_dict={self.X: x_batch, self.Y: y_batch})
    
    def get_accuracy(self, x_batch, y_batch):
        return self.sess.run(fetches=[self.accuracy], feed_dict={self.X: x_batch, self.Y: y_batch})


# 784 -> 784 -> 512 -> 10
class MyModel_2(object):
    
    def __init__(self, sess, learning_rate, batch_size):
        self.sess = sess
        self.learning_rate = learning_rate
        self.batch_size = batch_size
        self.hidden_1_size = 784
        self.hidden_2_size = 512 
        
        self.X = tf.placeholder(tf.float32, shape=[self.batch_size, n_input])
        self.Y = tf.placeholder(tf.float32, shape=[self.batch_size, n_classes])
        
        self.pred = None
        self.loss = None
        self.optimizer = None
        
        self.correct_preds = None
        self.accuracy = None
        
    def build_model(self):
        with tf.variable_scope("Layer1"):
            W1 = tf.Variable(tf.random_normal([n_input, self.hidden_1_size]))
            b1 = tf.Variable(tf.random_normal([self.hidden_1_size]))
            layer1 = tf.add(tf.matmul(self.X, W1), b1)
            layer1 = tf.nn.relu(layer1)
            
        with tf.variable_scope("Layer2"):
            W2 = tf.Variable(tf.random_normal([self.hidden_1_size, self.hidden_2_size]))
            b2 = tf.Variable(tf.random_normal([self.hidden_2_size]))
            layer2 = tf.add(tf.matmul(layer1, W2), b2)
            layer2 = tf.nn.relu(layer2)
        
        with tf.variable_scope("Layer3"):
            W3 = tf.Variable(tf.random_normal([self.hidden_2_size, n_classes]))
            b3 = tf.Variable(tf.random_normal([n_classes]))
            layer3 = tf.add(tf.matmul(layer2, W3), b3)
        
        self.pred = layer3
        self.loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=self.pred, labels=self.Y))
        self.optimizer = tf.train.RMSPropOptimizer(learning_rate=self.learning_rate).minimize(self.loss)
        
        self.correct_preds = tf.equal(tf.argmax(self.pred, axis=1), tf.argmax(self.Y, axis=1))
        self.accuracy = tf.reduce_mean(tf.cast(self.correct_preds, tf.float32))
        
    def train(self, x_batch, y_batch):
        return self.sess.run(fetches=[self.loss, self.optimizer], feed_dict={self.X: x_batch, self.Y: y_batch})
    
    def get_accuracy(self, x_batch, y_batch):
        return self.sess.run(fetches=[self.accuracy], feed_dict={self.X: x_batch, self.Y: y_batch})
    
# 784 -> 1024 -> 784 -> 10
class MyModel_3(object):
    
    def __init__(self, sess, learning_rate, batch_size):
        self.sess = sess
        self.learning_rate = learning_rate
        self.batch_size = batch_size
        self.hidden_1_size = 1024
        self.hidden_2_size = 784
        
        self.X = tf.placeholder(tf.float32, shape=[self.batch_size, n_input])
        self.Y = tf.placeholder(tf.float32, shape=[self.batch_size, n_classes])
        
        self.pred = None
        self.loss = None
        self.optimizer = None
        
        self.correct_preds = None
        self.accuracy = None
        
    def build_model(self):
        with tf.variable_scope("Layer1"):
            W1 = tf.Variable(tf.random_normal([n_input, self.hidden_1_size]))
            b1 = tf.Variable(tf.random_normal([self.hidden_1_size]))
            layer1 = tf.add(tf.matmul(self.X, W1), b1)
            layer1 = tf.nn.relu(layer1)
            
        with tf.variable_scope("Layer2"):
            W2 = tf.Variable(tf.random_normal([self.hidden_1_size, self.hidden_2_size]))
            b2 = tf.Variable(tf.random_normal([self.hidden_2_size]))
            layer2 = tf.add(tf.matmul(layer1, W2), b2)
            layer2 = tf.nn.relu(layer2)
        
        with tf.variable_scope("Layer3"):
            W3 = tf.Variable(tf.random_normal([self.hidden_2_size, n_classes]))
            b3 = tf.Variable(tf.random_normal([n_classes]))
            layer3 = tf.add(tf.matmul(layer2, W3), b3)
        
        self.pred = layer3
        self.loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=self.pred, labels=self.Y))
        self.optimizer = tf.train.RMSPropOptimizer(learning_rate=self.learning_rate).minimize(self.loss)
        
        self.correct_preds = tf.equal(tf.argmax(self.pred, axis=1), tf.argmax(self.Y, axis=1))
        self.accuracy = tf.reduce_mean(tf.cast(self.correct_preds, tf.float32))
        
    def train(self, x_batch, y_batch):
        return self.sess.run(fetches=[self.loss, self.optimizer], feed_dict={self.X: x_batch, self.Y: y_batch})
    
    def get_accuracy(self, x_batch, y_batch):
        return self.sess.run(fetches=[self.accuracy], feed_dict={self.X: x_batch, self.Y: y_batch})

In [11]:
"""
Model 1 (MyModel_1)
"""

# Parameters
n_epochs_list = [10, 20, 40, 80, 160, 320]
batch_size_list = [100, 200]
learning_rate_list = [1e-1, 1e-2, 1e-3, 1e-4]
display_step = 1

# Launch the graph
model_1_dict = dict()

with tf.Session() as sess:
    #Training
    n_epochs_dict = dict()
    for n_epochs in n_epochs_list:
        batch_size_dict = dict()
        
        for batch_size in batch_size_list:
            learning_rate_dict = dict()
            
            for learning_rate in learning_rate_list:
                accuracy_dict = dict()
                
                model = MyModel_1(sess=sess, learning_rate=learning_rate, batch_size=batch_size)
                model.build_model()
                
                # Start
                sess.run(tf.global_variables_initializer())
                
                print("[ n_epochs : {}, batch_size : {}, learning_rate : {} ]".format(n_epochs, batch_size, learning_rate))
                
                for i in range(n_epochs):
                    total_loss = 0.
                    n_batches = int(mnist.train.num_examples/batch_size)
                    # Loop over all batches
                    for j in range(n_batches):
                        X_batch, Y_batch = mnist.train.next_batch(batch_size)
                        # Run optimization op (backprop) and cost op (to get loss value)
                        l, _ = model.train(x_batch=X_batch, y_batch=Y_batch)
                        # Compute average loss
                        total_loss += l
                    # Display logs per epoch step
                    # print('Average loss epoch {0}: {1}'.format(i, total_loss/n_batches))

                # print("Optimization Finished!")

                n_batches = int(mnist.test.num_examples/batch_size)
                total_correct_preds = 0

                for i in range(n_batches):
                    X_batch, Y_batch = mnist.test.next_batch(batch_size)
                    accuracy_batch = model.get_accuracy(x_batch=X_batch, y_batch=Y_batch)
                    total_correct_preds += accuracy_batch[0]

                accuracy = total_correct_preds / n_batches
                print('> Accuracy : {0}'.format(accuracy))
                
                learning_rate_dict[learning_rate] = accuracy
            
            batch_size_dict[batch_size] = learning_rate_dict
            
        n_epochs_dict[n_epochs] = batch_size_dict
    
    model_1_dict["model_1"] = n_epochs_dict

[ n_epochs : 10, batch_size : 100, learning_rate : 0.1 ]
> Accuracy : 0.09579999979585409
[ n_epochs : 10, batch_size : 100, learning_rate : 0.01 ]
> Accuracy : 0.8378999978303909
[ n_epochs : 10, batch_size : 100, learning_rate : 0.001 ]
> Accuracy : 0.9503999990224838
[ n_epochs : 10, batch_size : 100, learning_rate : 0.0001 ]
> Accuracy : 0.8909000033140182
[ n_epochs : 10, batch_size : 200, learning_rate : 0.1 ]
> Accuracy : 0.11350000098347664
[ n_epochs : 10, batch_size : 200, learning_rate : 0.01 ]
> Accuracy : 0.9552999913692475
[ n_epochs : 10, batch_size : 200, learning_rate : 0.001 ]
> Accuracy : 0.944399995803833
[ n_epochs : 10, batch_size : 200, learning_rate : 0.0001 ]
> Accuracy : 0.8801999974250794
[ n_epochs : 20, batch_size : 100, learning_rate : 0.1 ]
> Accuracy : 0.11349999975413085
[ n_epochs : 20, batch_size : 100, learning_rate : 0.01 ]
> Accuracy : 0.8800000023841857
[ n_epochs : 20, batch_size : 100, learning_rate : 0.001 ]
> Accuracy : 0.9554000043869019
[ n_

In [14]:
json_data = json.dumps(model_1_dict)
f = open("model_1_dict.json", "w")
f.write(json_data)
f.close()

model_1_dict

{'model_1': {10: {100: {0.0001: 0.8909000033140182,
    0.001: 0.9503999990224838,
    0.01: 0.8378999978303909,
    0.1: 0.09579999979585409},
   200: {0.0001: 0.8801999974250794,
    0.001: 0.944399995803833,
    0.01: 0.9552999913692475,
    0.1: 0.11350000098347664}},
  20: {100: {0.0001: 0.9155000030994416,
    0.001: 0.9554000043869019,
    0.01: 0.8800000023841857,
    0.1: 0.11349999975413085},
   200: {0.0001: 0.9062000036239624,
    0.001: 0.9500999927520752,
    0.01: 0.9692000091075897,
    0.1: 0.19750000059604644}},
  40: {100: {0.0001: 0.9267000037431717,
    0.001: 0.9627000051736831,
    0.01: 0.8383999997377396,
    0.1: 0.10319999925792217},
   200: {0.0001: 0.9235000026226043,
    0.001: 0.9553999948501587,
    0.01: 0.9767000126838684,
    0.1: 0.10319999977946281}},
  80: {100: {0.0001: 0.931900002360344,
    0.001: 0.9616000056266785,
    0.01: 0.8030999958515167,
    0.1: 0.1010999996215105},
   200: {0.0001: 0.93,
    0.001: 0.9607999980449676,
    0.01: 0.9782

In [15]:
"""
Model 2 (MyModel_2)
"""

# Parameters
n_epochs_list = [10, 20, 40, 80, 160, 320]
batch_size_list = [100, 200]
learning_rate_list = [1e-1, 1e-2, 1e-3, 1e-4]

# Launch the graph
model_2_dict = dict()

with tf.Session() as sess:
    #Training
    n_epochs_dict = dict()
    for n_epochs in n_epochs_list:
        batch_size_dict = dict()
        
        for batch_size in batch_size_list:
            learning_rate_dict = dict()
            
            for learning_rate in learning_rate_list:
                accuracy_dict = dict()
                
                model = MyModel_2(sess=sess, learning_rate=learning_rate, batch_size=batch_size)
                model.build_model()
                
                # Start
                sess.run(tf.global_variables_initializer())
                
                print("[ n_epochs : {}, batch_size : {}, learning_rate : {} ]".format(n_epochs, batch_size, learning_rate))
                
                for i in range(n_epochs):
                    total_loss = 0.
                    n_batches = int(mnist.train.num_examples/batch_size)
                    # Loop over all batches
                    for j in range(n_batches):
                        X_batch, Y_batch = mnist.train.next_batch(batch_size)
                        # Run optimization op (backprop) and cost op (to get loss value)
                        l, _ = model.train(x_batch=X_batch, y_batch=Y_batch)
                        # Compute average loss
                        total_loss += l
                    # Display logs per epoch step
                    # print('Average loss epoch {0}: {1}'.format(i, total_loss/n_batches))

                # print("Optimization Finished!")

                n_batches = int(mnist.test.num_examples/batch_size)
                total_correct_preds = 0

                for i in range(n_batches):
                    X_batch, Y_batch = mnist.test.next_batch(batch_size)
                    accuracy_batch = model.get_accuracy(x_batch=X_batch, y_batch=Y_batch)
                    total_correct_preds += accuracy_batch[0]

                accuracy = total_correct_preds / n_batches
                print('> Accuracy : {0}'.format(accuracy))
                
                learning_rate_dict[learning_rate] = accuracy
            
            batch_size_dict[batch_size] = learning_rate_dict
            
        n_epochs_dict[n_epochs] = batch_size_dict
    
    model_2_dict["model_2"] = n_epochs_dict

[ n_epochs : 10, batch_size : 100, learning_rate : 0.1 ]
> Accuracy : 0.11349999956786633
[ n_epochs : 10, batch_size : 100, learning_rate : 0.01 ]
> Accuracy : 0.9672000062465668
[ n_epochs : 10, batch_size : 100, learning_rate : 0.001 ]
> Accuracy : 0.957900003194809
[ n_epochs : 10, batch_size : 100, learning_rate : 0.0001 ]
> Accuracy : 0.9124000012874603
[ n_epochs : 10, batch_size : 200, learning_rate : 0.1 ]
> Accuracy : 0.0981999995559454
[ n_epochs : 10, batch_size : 200, learning_rate : 0.01 ]
> Accuracy : 0.970900011062622
[ n_epochs : 10, batch_size : 200, learning_rate : 0.001 ]
> Accuracy : 0.9518999934196473
[ n_epochs : 10, batch_size : 200, learning_rate : 0.0001 ]
> Accuracy : 0.8969999992847443
[ n_epochs : 20, batch_size : 100, learning_rate : 0.1 ]
> Accuracy : 0.09820000000298024
[ n_epochs : 20, batch_size : 100, learning_rate : 0.01 ]
> Accuracy : 0.9742000091075897
[ n_epochs : 20, batch_size : 100, learning_rate : 0.001 ]
> Accuracy : 0.9629000037908554
[ n_ep

In [16]:
json_data = json.dumps(model_2_dict)
f = open("model_2_dict.json", "w")
f.write(json_data)
f.close()

model_2_dict

{'model_2': {10: {100: {0.0001: 0.9124000012874603,
    0.001: 0.957900003194809,
    0.01: 0.9672000062465668,
    0.1: 0.11349999956786633},
   200: {0.0001: 0.8969999992847443,
    0.001: 0.9518999934196473,
    0.01: 0.970900011062622,
    0.1: 0.0981999995559454}},
  20: {100: {0.0001: 0.9260000056028366,
    0.001: 0.9629000037908554,
    0.01: 0.9742000091075897,
    0.1: 0.09820000000298024},
   200: {0.0001: 0.9190999972820282,
    0.001: 0.9556999933719635,
    0.01: 0.97910001039505,
    0.1: 0.11350000023841858}},
  40: {100: {0.0001: 0.9334000009298324,
    0.001: 0.9674000060558319,
    0.01: 0.9780000096559525,
    0.1: 0.10279999993741512},
   200: {0.0001: 0.9291000020503998,
    0.001: 0.9645999956130982,
    0.01: 0.9800000143051147,
    0.1: 0.1944999998807907}},
  80: {100: {0.0001: 0.9337000024318695,
    0.001: 0.9682000082731247,
    0.01: 0.977400010228157,
    0.1: 0.10319999972358346},
   200: {0.0001: 0.9389999961853027,
    0.001: 0.9663000011444092,
    0.

In [19]:
"""
Model 3 (MyModel_3)
"""

# Parameters
n_epochs_list = [10, 20, 40, 80, 160, 320]
batch_size_list = [100, 200]
learning_rate_list = [1e-1, 1e-2, 1e-3, 1e-4]

# Launch the graph
model_3_dict = dict()

with tf.Session() as sess:
    #Training
    n_epochs_dict = dict()
    for n_epochs in n_epochs_list:
        batch_size_dict = dict()
        
        for batch_size in batch_size_list:
            learning_rate_dict = dict()
            
            for learning_rate in learning_rate_list:
                accuracy_dict = dict()
                
                model = MyModel_3(sess=sess, learning_rate=learning_rate, batch_size=batch_size)
                model.build_model()
                
                # Start
                sess.run(tf.global_variables_initializer())
                
                print("[ n_epochs : {}, batch_size : {}, learning_rate : {} ]".format(n_epochs, batch_size, learning_rate))
                
                for i in range(n_epochs):
                    total_loss = 0.
                    n_batches = int(mnist.train.num_examples/batch_size)
                    # Loop over all batches
                    for j in range(n_batches):
                        X_batch, Y_batch = mnist.train.next_batch(batch_size)
                        # Run optimization op (backprop) and cost op (to get loss value)
                        l, _ = model.train(x_batch=X_batch, y_batch=Y_batch)
                        # Compute average loss
                        total_loss += l
                    # Display logs per epoch step
                    # print('Average loss epoch {0}: {1}'.format(i, total_loss/n_batches))

                # print("Optimization Finished!")

                n_batches = int(mnist.test.num_examples/batch_size)
                total_correct_preds = 0

                for i in range(n_batches):
                    X_batch, Y_batch = mnist.test.next_batch(batch_size)
                    accuracy_batch = model.get_accuracy(x_batch=X_batch, y_batch=Y_batch)
                    total_correct_preds += accuracy_batch[0]

                accuracy = total_correct_preds / n_batches
                print('> Accuracy : {0}'.format(accuracy))
                
                learning_rate_dict[learning_rate] = accuracy
            
            batch_size_dict[batch_size] = learning_rate_dict
            
        n_epochs_dict[n_epochs] = batch_size_dict
    
    model_3_dict["model_3"] = n_epochs_dict

[ n_epochs : 10, batch_size : 100, learning_rate : 0.1 ]
> Accuracy : 0.1135000004991889
[ n_epochs : 10, batch_size : 100, learning_rate : 0.01 ]
> Accuracy : 0.9681000036001205
[ n_epochs : 10, batch_size : 100, learning_rate : 0.001 ]
> Accuracy : 0.9594000017642975
[ n_epochs : 10, batch_size : 100, learning_rate : 0.0001 ]
> Accuracy : 0.9148000001907348
[ n_epochs : 10, batch_size : 200, learning_rate : 0.1 ]
> Accuracy : 0.1010000005364418
[ n_epochs : 10, batch_size : 200, learning_rate : 0.01 ]
> Accuracy : 0.9716000044345856
[ n_epochs : 10, batch_size : 200, learning_rate : 0.001 ]
> Accuracy : 0.9535999965667724
[ n_epochs : 10, batch_size : 200, learning_rate : 0.0001 ]
> Accuracy : 0.9085000014305115
[ n_epochs : 20, batch_size : 100, learning_rate : 0.1 ]
> Accuracy : 0.20000000044703484
[ n_epochs : 20, batch_size : 100, learning_rate : 0.01 ]
> Accuracy : 0.9774000084400177
[ n_epochs : 20, batch_size : 100, learning_rate : 0.001 ]
> Accuracy : 0.9638000065088272
[ n_e

In [20]:
json_data = json.dumps(model_3_dict)
f = open("model_3_dict.json", "w")
f.write(json_data)
f.close()

model_3_dict

{'model_3': {10: {100: {0.0001: 0.9148000001907348,
    0.001: 0.9594000017642975,
    0.01: 0.9681000036001205,
    0.1: 0.1135000004991889},
   200: {0.0001: 0.9085000014305115,
    0.001: 0.9535999965667724,
    0.01: 0.9716000044345856,
    0.1: 0.1010000005364418}},
  20: {100: {0.0001: 0.9283000028133392,
    0.001: 0.9638000065088272,
    0.01: 0.9774000084400177,
    0.1: 0.20000000044703484},
   200: {0.0001: 0.9277000045776367,
    0.001: 0.9623999953269958,
    0.01: 0.9757000100612641,
    0.1: 0.20490000009536743}},
  40: {100: {0.0001: 0.9345999991893769,
    0.001: 0.9703000110387802,
    0.01: 0.9784000092744827,
    0.1: 0.1947000003606081},
   200: {0.0001: 0.9309000015258789,
    0.001: 0.9637999951839447,
    0.01: 0.9783000123500823,
    0.1: 0.10279999919235706}},
  80: {100: {0.0001: 0.9343000000715256,
    0.001: 0.9725000065565109,
    0.01: 0.9798000103235245,
    0.1: 0.11349999971687794},
   200: {0.0001: 0.9385999989509582,
    0.001: 0.9711000049114227,
  

In [24]:
entire_model_dict = dict()

entire_model_dict["model_1"] = model_1_dict["model_1"]
entire_model_dict["model_2"] = model_2_dict["model_2"]
entire_model_dict["model_3"] = model_3_dict["model_3"]

In [27]:
best_acc_list = list()

for m_key, m_val in entire_model_dict.items():
    for e_key, n_val in m_val.items():
        for b_key, b_val in n_val.items():
            for l_key, accuracy in b_val.items():
                best_acc_list.append({"model": m_key, "n_epochs": e_key, "batch_size": b_key, "learning_rate": l_key, "accuracy": accuracy})

best_acc_list = sorted(best_acc_list, key=lambda element: element["accuracy"], reverse=True)

In [30]:
best_acc_list[:20]

[{'accuracy': 0.9837000095844268,
  'batch_size': 100,
  'learning_rate': 0.01,
  'model': 'model_3',
  'n_epochs': 160},
 {'accuracy': 0.9833000099658966,
  'batch_size': 200,
  'learning_rate': 0.01,
  'model': 'model_3',
  'n_epochs': 320},
 {'accuracy': 0.9831000089645385,
  'batch_size': 100,
  'learning_rate': 0.01,
  'model': 'model_3',
  'n_epochs': 320},
 {'accuracy': 0.9828000098466874,
  'batch_size': 100,
  'learning_rate': 0.01,
  'model': 'model_2',
  'n_epochs': 160},
 {'accuracy': 0.9824000132083893,
  'batch_size': 200,
  'learning_rate': 0.01,
  'model': 'model_1',
  'n_epochs': 320},
 {'accuracy': 0.9822000104188919,
  'batch_size': 100,
  'learning_rate': 0.01,
  'model': 'model_2',
  'n_epochs': 320},
 {'accuracy': 0.9815000128746033,
  'batch_size': 200,
  'learning_rate': 0.01,
  'model': 'model_3',
  'n_epochs': 160},
 {'accuracy': 0.9812000143527985,
  'batch_size': 200,
  'learning_rate': 0.01,
  'model': 'model_2',
  'n_epochs': 320},
 {'accuracy': 0.98110001