In [1]:
import sys
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [138]:
# image shape
HEIGHT = 96
WIDTH = 96
DEPTH = 3

# size of a single image in bytes
SIZE = HEIGHT * WIDTH * DEPTH

# path to the directory with the data
DATA_DIR = './data'

# url of the binary data
DATA_URL = 'http://ai.stanford.edu/~acoates/stl10/stl10_binary.tar.gz'

In [139]:
def read_labels(path_to_labels):
    """
    :param path_to_labels: path to the binary file containing labels from the STL-10 dataset
    :return: an array containing the labels
    """
    with open(path_to_labels, 'rb') as f:
        labels = np.fromfile(f, dtype=np.uint8)
        return labels

In [140]:
def read_all_images(path_to_data):
    """
    :param path_to_data: the file containing the binary images from the STL-10 dataset
    :return: an array containing all the images
    """

    with open(path_to_data, 'rb') as f:
        # read whole file in uint8 chunks
        everything = np.fromfile(f, dtype=np.uint8)

        # We force the data into 3x96x96 chunks, since the
        # images are stored in "column-major order", meaning
        # that "the first 96*96 values are the red channel,
        # the next 96*96 are green, and the last are blue."
        # The -1 is since the size of the pictures depends
        # on the input file, and this way numpy determines
        # the size on its own.

        images = np.reshape(everything, (-1, 3, 96, 96))

        # Now transpose the images into a standard image format
        # readable by, for example, matplotlib.imshow
        # You might want to comment this line or reverse the shuffle
        # if you will use a learning algorithm like CNN, since they like
        # their channels separated.
        images = np.transpose(images, (0, 3, 2, 1))
        return images

In [142]:
# path to the binary train file with image data
TRAIN_DATA_PATH = './data/stl10_binary/train_X.bin'

# path to the binary train file with labels
TRAIN_LABEL_PATH = './data/stl10_binary/train_y.bin'

# test to check if the whole dataset is read correctly
X_train = read_all_images(TRAIN_DATA_PATH)
print(X_train.shape)

y_train = read_labels(TRAIN_LABEL_PATH)
print(y_train.shape)


(5000, 96, 96, 3)
(5000,)


In [143]:
# path to the binary train file with image data
TEST_DATA_PATH = './data/stl10_binary/test_X.bin'

# path to the binary train file with labels
TEST_LABEL_PATH = './data/stl10_binary/test_y.bin'

# test to check if the whole dataset is read correctly
X_test = read_all_images(TEST_DATA_PATH)
print(X_test.shape)

y_test = read_labels(TEST_LABEL_PATH)
print(y_test.shape)

(8000, 96, 96, 3)
(8000,)


In [144]:
X_test, X_valid, y_test, y_valid = train_test_split(X_test, y_test, test_size=0.33, random_state=42)
print(X_test.shape, X_valid.shape, y_test.shape, y_valid.shape)

(5360, 96, 96, 3) (2640, 96, 96, 3) (5360,) (2640,)


In [127]:
import tensorflow as tf
# Parameters
learning_rate = 0.1
num_steps = 500
batch_size = 128
display_step = 100

# Network Parameters
n_hidden_1 = 512 # 1st layer number of neurons
n_hidden_2 = 256 # 2nd layer number of neurons
n_hidden_3 = 128 # 3rd layer number of neurons
n_hidden_4 = 64 # 4th layer number of neurons
n_hidden_5 = 32 # 5th layer number of neurons
n_hidden_6 = 16 # 6th layer number of neurons
num_input = SIZE # data input (img shape: 96 * 96 * 3)
num_classes = 10 # total classes 1 - 10 mapped to 0 - 9

# tf Graph input
X = tf.placeholder("float", [None, num_input])
Y = tf.placeholder("float", [None, num_classes])

In [128]:

# Store layers weight & bias
weights = {
    'h1': tf.Variable(tf.random_normal([num_input, n_hidden_1])),
    'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
    'h3': tf.Variable(tf.random_normal([n_hidden_2, n_hidden_3])),
    'h4': tf.Variable(tf.random_normal([n_hidden_3, n_hidden_4])),
    'h5': tf.Variable(tf.random_normal([n_hidden_4, n_hidden_5])),
    'h6': tf.Variable(tf.random_normal([n_hidden_5, n_hidden_6])),
    'out': tf.Variable(tf.random_normal([n_hidden_6, num_classes]))
}
biases = {
    'b1': tf.Variable(tf.random_normal([n_hidden_1])),
    'b2': tf.Variable(tf.random_normal([n_hidden_2])),
    'b3': tf.Variable(tf.random_normal([n_hidden_3])),
    'b4': tf.Variable(tf.random_normal([n_hidden_4])),
    'b5': tf.Variable(tf.random_normal([n_hidden_5])),
    'b6': tf.Variable(tf.random_normal([n_hidden_6])),
    'out': tf.Variable(tf.random_normal([num_classes]))
}

In [131]:
# Create model
def neural_net(x):
    # Hidden fully connected layer with 256 neurons
    layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
    # Hidden fully connected layer with 256 neurons
    layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
    layer_3 = tf.add(tf.matmul(layer_2, weights['h3']), biases['b3'])
    layer_4 = tf.add(tf.matmul(layer_3, weights['h4']), biases['b4'])
    layer_5 = tf.add(tf.matmul(layer_4, weights['h5']), biases['b5'])
    layer_6 = tf.add(tf.matmul(layer_5, weights['h6']), biases['b6'])
    # Output fully connected layer with a neuron for each class
    out_layer = tf.matmul(layer_6, weights['out']) + biases['out']
    return out_layer

In [132]:

# Construct model
logits = neural_net(X)

# Define loss and optimizer
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
    logits=logits, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op)

# Evaluate model (with test logits, for dropout to be disabled)
correct_pred = tf.equal(tf.argmax(logits, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

# Initialize the variables (i.e. assign their default value)
init = tf.global_variables_initializer()

In [133]:
MAX_LABELS = len(set(y_train))
MAX_LABELS

10

In [134]:
import random
def onehotencoder(label_out):
    batch_size = len(label_out)
    encoded = np.zeros((batch_size, MAX_LABELS))
    encoded[np.arange(batch_size), label_out] = 1
    return encoded

def getbatch(X, y, batchsize=32):
    arraylength = len(X);
    count = 0 
    while count < arraylength/batchsize:
        randindex = random.sample(range(0, arraylength), batchsize)
        count += 1
        image_path_out = [X[i] for i in randindex]
        label_out = [y[i] for i in randindex]
        one_hot_encoded_label_out = onehotencoder(label_out)
        yield (image_path_out, one_hot_encoded_label_out)

In [135]:
#
#batch_x, batch_y = getbatch(X_train, y_train, batchsize=8)

IndexError: index 10 is out of bounds for axis 1 with size 10

In [136]:
def next_batch(num, data, labels):
    '''
    Return a total of `num` random samples and labels. 
    '''
    idx = np.arange(0 , len(data))
    np.random.shuffle(idx)
    idx = idx[:num]
    data_shuffle = [data[i].reshape(-1) for i in idx]
    labels_shuffle = [labels[i]-1 for i in idx]

    return np.asarray(data_shuffle), onehotencoder(np.asarray(labels_shuffle))

batch_x, batch_y = next_batch(32, X_train, y_train)
batch_y.shape

(32, 10)

In [137]:
# Start training
with tf.Session() as sess:

    # Run the initializer
    sess.run(init)

    for step in range(1, num_steps+1):
        batch_x, batch_y = next_batch(32, X_train, y_train)
        # Run optimization op (backprop)
        sess.run(train_op, feed_dict={X: batch_x, Y: batch_y})
        if step % display_step == 0 or step == 1:
            # Calculate batch loss and accuracy
            loss, acc = sess.run([loss_op, accuracy], feed_dict={X: batch_x,
                                                                 Y: batch_y})
            print("Step " + str(step) + ", Minibatch Loss= " + \
                  "{:.4f}".format(loss) + ", Training Accuracy= " + \
                  "{:.3f}".format(acc))

    print("Optimization Finished!")

    # Calculate accuracy for MNIST test images
    batch_x, batch_y = next_batch(32, X_test, y_test)
    print("Testing Accuracy:", \
        sess.run(accuracy, feed_dict={X: batch_x,
                                      Y: batch_y}))

Step 1, Minibatch Loss= 1312885571584.0000, Training Accuracy= 0.156
Step 100, Minibatch Loss= 17464434688.0000, Training Accuracy= 0.156
Step 200, Minibatch Loss= 3902156288.0000, Training Accuracy= 0.219
Step 300, Minibatch Loss= 1783213696.0000, Training Accuracy= 0.250
Step 400, Minibatch Loss= 1240479744.0000, Training Accuracy= 0.250
Step 500, Minibatch Loss= 817291392.0000, Training Accuracy= 0.406
Optimization Finished!
Testing Accuracy: 0.3125
