In [1]:
%%capture
# Import libraries
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

import time
import pandas as pd

# Read data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
TESTING = False

# Usefull functions

In [2]:
def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)

def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)

def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                        strides=[1, 2, 2, 1], padding='SAME')


# Architecture

In [3]:
def create_architecture(filter1, filter2, feature_map1, feature_map2, one_fc_only):
    """Function that creates our architecture given various options.
    """
    # Input layer
    x  = tf.placeholder(tf.float32, [None, 784], name='x')
    y_ = tf.placeholder(tf.float32, [None, 10],  name='y_')
    x_image = tf.reshape(x, [-1, 28, 28, 1])

    # Convolutional layer 1
    W_conv1 = weight_variable([filter1, filter1, 1, feature_map1])
    b_conv1 = bias_variable([feature_map1])
    h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
    h_pool1 = max_pool_2x2(h_conv1)

    # Convolutional layer 2
    W_conv2 = weight_variable([filter2, filter2, feature_map1, feature_map2])
    b_conv2 = bias_variable([feature_map2])
    h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
    h_pool2 = max_pool_2x2(h_conv2)

    # Fully connected logic
    h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*feature_map2])
    if one_fc_only:
        W_fc2 = weight_variable([7 * 7 * feature_map2, 10])
        b_fc2 = bias_variable([10])
        h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc2) + b_fc2)
        keep_prob = None
    else:
        # Fully connected layer 1
        W_fc1 = weight_variable([7 * 7 * feature_map2, 1024])
        b_fc1 = bias_variable([1024])
        h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

        # Fully connected layer 2 (Output layer)
        W_fc2 = weight_variable([1024, 10])
        b_fc2 = bias_variable([10])
        
        # Dropout
        keep_prob  = tf.placeholder(tf.float32)
        h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
    
    if one_fc_only:
        y = tf.nn.softmax(h_fc1, name='y')
    else:
        y = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2, name='y')
        
    cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))
    correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name='accuracy')

    # Training algorithm
    train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
    
    return x, y_, train_step, accuracy, keep_prob

# Run Network

In [4]:
def run_experiment(filter1=5, filter2=5, feature_map1=32, feature_map2=64,
                   one_fc_only=False, testing=False, log=False):
    """Runs a full experiment on our data, creating a architecture as 
    specified by the user.
    """
    x, y_, train_step, accuracy, keep_prob = create_architecture(filter1, filter2,
                                                                feature_map1,
                                                                feature_map2,
                                                                one_fc_only)
    max_steps = 1 if testing else 1000
    test_dict = {x: mnist.test.images, y_: mnist.test.labels}
    if not one_fc_only:
        test_dict[keep_prob] = 1.0

    with tf.Session() as sess:
        start_time = time.time()
        sess.run(tf.initialize_all_variables())

        for step in range(max_steps):
            batch_xs, batch_ys = mnist.train.next_batch(50)
            train_dict = {x: batch_xs, y_: batch_ys}
            if not one_fc_only:
                train_dict[keep_prob] = 0.5
                
            if (step % 100) == 0:
                acc = sess.run(accuracy, feed_dict=test_dict)
                if log:
                    print(step, acc)
            sess.run(train_step, feed_dict=train_dict)
            
        total_acc = sess.run(accuracy, feed_dict=test_dict)
        if log:
            print(max_steps, total_acc)
            
    return total_acc, time.time() - start_time

## Default

In [5]:
results = pd.DataFrame(columns=["Nome", "Acurácia", "Tempo(s)"])

In [6]:
g = tf.Graph()
with g.as_default():
    temp = list(run_experiment(testing=TESTING))
    results.loc[len(results)] = ["Padrão"] + list(temp)

## 25 and 50 for the features maps of each convolutional layer respectively

In [7]:
g = tf.Graph()
with g.as_default():
    temp = run_experiment(feature_map1=25, feature_map2=50, testing=TESTING)
    results.loc[len(results)] = ["25-50 maps"] +  list(temp)

## filter 1 with size 3 and filter 2 with size 4

In [8]:
g = tf.Graph()
with g.as_default():
    temp = run_experiment(feature_map1=25, feature_map2=50, 
                   filter1=3, filter2=4, testing=TESTING)
    results.loc[len(results)] = ["3-4 filters"] +  list(temp)

## Remove 1 FC layer

In [9]:
g = tf.Graph()
with g.as_default():
    temp = run_experiment(feature_map1=25, feature_map2=50, 
                   filter1=3, filter2=4, one_fc_only=True, testing=TESTING)
    results.loc[len(results)] = ["1 FC"] +  list(temp)



- - - 

<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>

Resultados

In [10]:
results

Unnamed: 0,Nome,Acurácia,Tempo(s)
0,Padrão,0.9646,107.369723
1,25-50 maps,0.9627,94.950684
2,3-4 filters,0.9609,71.079185
3,1 FC,0.564,52.530506
