# Fully Connected Neural Networks

In [1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

The above code is for compatibility with Python 2. If you are using Python 3 (recommended), you may ignore this.

In [2]:
import tensorflow as tf
import numpy as np

We must import the TensorFlow and numpy packages to be able to use them! We use the prefix "tf" to avoid having to type out the full name every time we want to use a TensorFlow command. Likewise, we prefix all numpy commands with "np".

In [3]:
seed = 1337
tf.set_random_seed(seed)  # Tell TensorFlow to use our seed
np.random.seed(seed)      # Tell NumPy to use our seed

## Loading CIFAR-10 Dataset

In [4]:
import sys
def unpickle(file):
    import pickle
    with open(file, 'rb') as fo:
        if sys.version[0] == '3':
            dict = pickle.load(fo, encoding='bytes')
        elif sys.version[0] == '2':
            dict = pickle.load(fo)
    return dict

In [5]:
dataset = unpickle('../cifar-10-data')
dataset.keys()

dict_keys([b'batch_label', b'labels', b'data', b'filenames'])

In [6]:
x_data = np.array(dataset[b'data'])    # The training images
y_data = np.array(dataset[b'labels'])  # The labels for the training images
print("x_data shape:", x_data.shape)
print("y_data shape:", y_data.shape)

x_data shape: (10000, 3072)
y_data shape: (10000,)


In [7]:
n_samples = x_data.shape[0]
indices = np.random.permutation(n_samples)
training_idx, test_idx = indices[:int(0.9*n_samples)], indices[int(0.9*n_samples):]

x_train, x_test = x_data[training_idx,:], x_data[test_idx,:]
y_train, y_test = y_data[training_idx], y_data[test_idx]

print("x_train shape:", x_train.shape)
print("x_test shape:", x_test.shape)
print()
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)

x_train shape: (9000, 3072)
x_test shape: (1000, 3072)

y_train shape: (9000,)
y_test shape: (1000,)


In [8]:
input_length = 3072
num_classes = 10

In [9]:
def fc(input_tensor, output_features, name='FC', func=tf.nn.relu):
    """Creates a Fully Connected Layer

    Args:
        input_tensor:  Tensor of shape `[batch, features]` that this FC layer uses as its input features.
        output_features:  The number of features that the layer will output.
        name:  The name of the Fully Connected layer. Will use this to define the `tf.variable_scope()`.
        func:  The activation function to use. If `None`, uses ReLU.
    
    Returns:
        A tuple of (activations, weights, bias). The weights and bias are returned so that a regularizer
        can operate directly on them if needed. The activation Tensor represents the output feature 
        activations. Will have shape `[None, output_features]`.
    """
    input_features = int(input_tensor.shape[1])  # Get the number of features for the input tensor
    with tf.variable_scope(name):
        w = tf.get_variable('W', initializer=tf.truncated_normal(
            shape=[input_features, output_features],
            stddev=np.sqrt(2/(input_features*output_features))))  # Set stddev of random distribution for weights
        b = tf.get_variable('B', initializer=tf.zeros([output_features]))
        
        return func(tf.matmul(input_tensor, w) + b, name='Activations'), w, b

In [16]:
def testing_error(hidden_features, lmbda):
    
    print('hidden_features =', hidden_features, 'lambda =', lmbda)
    tf.reset_default_graph()  # Clear the graph to avoid errors from reusing variables

    with tf.variable_scope('Inputs'):
        x = tf.placeholder(tf.float32, [None, input_length], name='x')
        y = tf.placeholder(tf.int64, [None,], name='y')  # Last time we one-hot encoded our labels. Now we won't.

    with tf.variable_scope('Hidden-Layers'):
        #hidden_features = 512
        hidden1, w1, _ = fc(x, hidden_features, 'FC1')
        hidden2, w2, _ = fc(hidden1, hidden_features, 'FC2')
        hidden3, w3, _ = fc(hidden2, hidden_features, 'FC3')

    with tf.variable_scope('Softmax'):
        w = tf.get_variable('W', initializer=tf.truncated_normal(
            shape=[hidden_features, num_classes],
            stddev=np.sqrt(1/(hidden_features*num_classes)))) # Set stddev of random distribution for weights
        b = tf.get_variable('B', initializer=tf.zeros([num_classes]))

        scores = tf.matmul(hidden3, w) + b
        # Predicted probability vectors for each sample in the batch, shape = `[None, 10]`
        pred = tf.nn.softmax(scores)

    with tf.variable_scope('Optimization'):
        # Last time we used the regular cross entropy function, but this time we use the "sparse" version. 
        # That's because this version takes care of turning the labels into one hot encodings for us!
        loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=scores))

        regularizer = (tf.nn.l2_loss(w1) + tf.nn.l2_loss(w2) + tf.nn.l2_loss(w3) + tf.nn.l2_loss(w))  # L2 Regularizer
        #lmbda = 0.04  # Regularizer coefficient
        loss += lmbda*regularizer  # Add regularization penalty to the loss function

        correct = tf.equal(tf.argmax(pred, axis=1), y)           # boolean 1-D Tensor of if pred was correct
        accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))  # scalar (0-D) Tensor of the average accuracy

        train_step = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)  # Op that steps loss towards minimum

    init = tf.global_variables_initializer()  # Op that initializes variables
    sess = tf.InteractiveSession()
    sess.run(init)
    
    n_epochs = 1   # The number of full passes through the dataset before we quit training
    batch_size= 256  # Feed in only 256 images in a single batch instead of all 9,000

    training_size = x_train.shape[0]

    saver = tf.train.Saver() # Allows us to save a model
    # saver.restore(sess, model_name)


    for j in range(n_epochs):
        perm = np.random.permutation(training_size)  # Every epoch, get a new set of batches
        for i in range(0, training_size, batch_size):
            idx = perm[i:i+batch_size]  # Select indices for batch
            x_batch = x_train[idx]
            y_batch = y_train[idx]
            sess.run(train_step, feed_dict={x:x_batch, y:y_batch})
        if j%50 == 49 or j==0:
            l, r, a = sess.run([loss, regularizer, accuracy], feed_dict={x:x_train, y:y_train})
            print("epoch %6d, loss=%6f, regularizer=%0.4f, accuracy=%.2f%%" % (j+1, l, round(r, 4), 100*round(a, 4)))
            
    print('Training Accuracy:', sess.run(accuracy, feed_dict={x:x_train, y:y_train}))
    print('Testing Accuracy:', sess.run(accuracy, feed_dict={x:x_test, y:y_test}))
    print('')
    
    return sess.run(accuracy, feed_dict={x:x_test, y:y_test})

In [21]:
import pysmac

In [22]:
parameters=dict(hidden_features=('integer',[256, 4096], 512), lmbda=('real', [0, 1], 0.04))

In [23]:
opt = pysmac.SMAC_optimizer()

value, parameters = opt.minimize(testing_error, 50, parameters)

print(('Lowest function value found: %f'%value))
print(('Parameter setting %s'%parameters))

hidden_features = 512 lambda = 0.04


KeyboardInterrupt: 