In [None]:
# Tensorflow and numpy to create the neural network
import tensorflow as tf
import numpy as np

# Matplotlib to plot info to show our results
import matplotlib.pyplot as plt

# OS to load files and save checkpoints
import os

%matplotlib inline

In [None]:
# Load MNIST data from tf examples

image_height = 28
image_width = 28

color_channels = 1

model_name = "mnist"

mnist = tf.contrib.learn.datasets.load_dataset("mnist")

train_data = mnist.train.images
train_labels = np.asarray(mnist.train.labels, dtype=np.int32)

# Load all the data batches
eval_data = mnist.test.images
eval_labels = np.asarray(mnist.test.labels, dtype=np.int32)

category_names = list(map(str, range(10)))

# TODO: Process mnist data
print(train_data.shape)

train_data = np.reshape(train_data, (-1, image_height, image_width, color_channels))

print(train_data.shape)

eval_data = np.reshape(eval_data, (-1, image_height, image_width, color_channels))

In [None]:
# Load cifar data from file

image_height = 32
image_width = 32

color_channels = 3

model_name = "cifar"

def unpickle(file):
    import pickle
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

cifar_path = '../cifar-10-data/'



# Load the english category names.
category_names_bytes = unpickle(cifar_path + 'batches.meta')[b'label_names']
category_names = list(map(lambda x: x.decode("utf-8"), category_names_bytes))

# TODO: Process Cifar data

def process_data(data):
    float_data = np.array(data, dtype=np.float32) / 255.0
    
    reshaped_data = np.reshape(float_data, (-1, color_channels, image_height, image_width))
    
    # The incorrect image
    
    transposed_data = np.transpose(reshaped_data, [0, 2, 3, 1])
    
    return transposed_data


def _int64_feature(value):
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))


def _bytes_feature(value):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))


train_filename = "../cifar-10-data/train.tfrecords"

if not os.path.isfile(train_filename):
    # If training data isn't rfrecorded yet, do so.
    
    train_data = np.array([])
    train_labels = np.array([], dtype=np.int64)
    
    # Load all the data batches.
    for i in range(1,6):
        data_batch = unpickle(cifar_path + 'data_batch_' + str(i))
        train_data = np.append(train_data, data_batch[b'data'])
        train_labels = np.append(train_labels, data_batch[b'labels'])
    
    writer = tf.python_io.TFRecordWriter(train_filename)
    
    train_data = process_data(train_data)
    
    for index in range(len(train_data)):
        feature = {'image': _bytes_feature(tf.compat.as_bytes(train_data[index].tostring())),
                   'label': _int64_feature(train_labels[index])}
        example = tf.train.Example(features=tf.train.Features(feature=feature))
        
        writer.write(example.SerializeToString())
    
    writer.close()
    print("Training data generated in " + train_filename)


# Load the eval batch.
eval_batch = unpickle(cifar_path + 'test_batch')

eval_data = eval_batch[b'data']
eval_labels = eval_batch[b'labels'] 

eval_data = process_data(eval_data)

# Function to parse a training tfrecord
def parse_cifar_record(record):
    features = tf.parse_single_example(
        record,
        features={
            'image': tf.FixedLenFeature([], tf.string),
            'label': tf.FixedLenFeature([], tf.int64),
        })
    image = tf.decode_raw(features['image'], tf.float32)
    image.set_shape([color_channels * image_height * image_width])
    image = tf.reshape(image, [image_height, image_width, color_channels])
    label = tf.cast(features['label'], tf.float32)
    
    return image, label

In [None]:
# TODO: The neural network
class ConvNet:
    
    def __init__(self, image_height, image_width, channels, num_classes):
        
        self.input_layer = tf.placeholder(dtype=tf.float32, shape=[None, image_height, image_width, channels], name="inputs")
        print(self.input_layer.shape)  # it's important to print each tensor for debugging
        
        conv_layer_1 = tf.layers.conv2d(self.input_layer, filters=32, kernel_size=[5, 5], padding="same", activation=tf.nn.relu)
        print(conv_layer_1.shape)
        
        pooling_layer_1 = tf.layers.max_pooling2d(conv_layer_1, pool_size=[2,2], strides=2)
        print(pooling_layer_1.shape)
        
        conv_layer_2 = tf.layers.conv2d(pooling_layer_1, filters=64, kernel_size=[5, 5], padding="same", activation=tf.nn.relu)
        print(conv_layer_2.shape)
        
        pooling_layer_2 = tf.layers.max_pooling2d(conv_layer_2, pool_size=[2, 2], strides=2)
        print(pooling_layer_2.shape)
        
        flattened_pooling = tf.layers.flatten(pooling_layer_2)
        dense_layer = tf.layers.dense(flattened_pooling, 1024, activation=tf.nn.relu)
        print(dense_layer.shape)
        dropout = tf.layers.dropout(dense_layer, rate=0.4, training=True)
        outputs = tf.layers.dense(dropout, num_classes)
        print(outputs.shape)
        
        self.choice = tf.argmax(outputs, axis=1)
        self.probability = tf.nn.softmax(outputs)
        
        self.labels = tf.placeholder(dtype=tf.float32, name="labels")
        self.accuracy, self.accuracy_op = tf.metrics.accuracy(self.labels, self.choice)
        """self.accuracy stores the accuracy, _op stores the models accuracy"""
        
        one_hot_labels = tf.one_hot(indices=tf.cast(self.labels, dtype=tf.int32), depth=num_classes)     
        self.loss = tf.losses.softmax_cross_entropy(onehot_labels=one_hot_labels, logits=outputs)
        
        optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-2)
        self.train_operation = optimizer.minimize(loss=self.loss, global_step=tf.train.get_global_step())
        """
        argmax: finds the index of the highest weight element in a tensor, returns choice of class
        outputs: sizes final layer based on the possible classifications to compute the outputs, returns output weights
        softmax: returns a tensor with decimal probabilities of each element

        softmax and cross_entropy are used as a loss function to train network

        convolutional networks are built of a few basic layers:
            > convolutional layers to learn features
            > pooling layers to simplify data
            > dense layers at the end to output a decision
        """

In [None]:
#TODO: initialize variables
training_steps = 10000  # how long training loop will run (20000)
batch_size = 32  # how many images will be passed into the network at each step of training (64)

path = "./" + model_name + "-cnn/"  # path to save neural networks training

load_checkpoint = False  # toggle to set whether load a past training model
performance_graph = np.array([])  # for graph's values

In [None]:
# TODO: implement the training loop
tf.reset_default_graph()

# dataset = tf.data.Dataset.from_tensor_slices((train_data, train_labels)) # created from existing tensor or list
# dataset = dataset.shuffle(buffer_size=train_labels.shape[0])  # shuffles dataset
# dataset = dataset.batch(batch_size) # batches dataset into batches
# dataset = dataset.repeat()  # sets the dataset to loop repeatedly
if model_name == "mnist":
    dataset = tf.data.Dataset.from_tensor_slices((train_data, train_labels))    
    dataset = dataset.shuffle(buffer_size=train_labels.shape[0])
    dataset = dataset.batch(batch_size)
    dataset = dataset.repeat()
else:
    filenames = ["../cifar-10-data/train.tfrecords"]
    dataset = tf.data.TFRecordDataset(filenames).repeat()
    dataset = dataset.map(parse_cifar_record)
    dataset = dataset.shuffle(buffer_size=45000)
    dataset = dataset.batch(batch_size)

# dataset_iterator = dataset.make_one_shot_iterator()  # creates an iterator
dataset_iterator = dataset.make_initializable_iterator()
next_element = dataset_iterator.get_next()  # saves its get_next operation

# TODO: Setting Up the Neural Network
cnn = ConvNet(image_height,image_width,color_channels,10)

saver = tf.train.Saver(max_to_keep=2)
""" The saver class allows weights and vars in TF graphs to be saved between runs.
    The 'max_to_keep' controls the number of checkpoints to save, low number saves disk space
"""

if not os.path.exists(path):
    os.makedirs(path)

# Load and initialize vars into TF session
    
with tf.Session() as sess:
    # either load from checkpoint or init weights
    if load_checkpoint:
        checkpoint = tf.train.get_checkpoint_state(path)
        saver.restore(sess, checkpoint.model_checkpoint_path)
    else:  # if on a new network, runs global initialize
        sess.run(tf.global_variables_initializer())
    
    # initialize local vars for the accuracy metrics calculation
    sess.run(tf.local_variables_initializer())  # run local initializer
    sess.run(dataset_iterator.initializer)  # initialize the iterator
    
    # loop through and train
    for step in range(training_steps):
        current_batch = sess.run(next_element)  # get the current batch
        
        batch_inputs = current_batch[0]  # split the resulting batch into the image inputs and...
        batch_labels = current_batch[1]     # the image labels
        
        # run networks training operation and the accuracy operation
        sess.run((cnn.train_operation, cnn.accuracy_op), feed_dict={cnn.input_layer:batch_inputs, cnn.labels:batch_labels})
        
        if step % 10 == 0:
            performance_graph = np.append(performance_graph, sess.run(cnn.accuracy))  # updates list every 10 steps
        
        if step % 1000 == 0 and step > 0:
            current_acc = sess.run(cnn.accuracy)  # gets current accuracy
            print("Accuracy at step " + str(step) + ": " + str(current_acc))
            print("Saving checkpoint")
            saver.save(sess, path + model_name, step)  # save a checkpoint
        
    print("Saving final checkpoint for training session.")
    saver.save(sess, path + model_name, step)
    
    sess.close()

In [None]:
# TODO: Display graph of performance over time
plt.figure().set_facecolor('white')
plt.xlabel("Steps")
plt.ylabel("Accuracy")
plt.plot(performance_graph)

In [None]:
# Expand this code box to check your work!# TODO: Run through the evaluation data set, check accuracy of model
with tf.Session() as sess:
    checkpoint = tf.train.get_checkpoint_state(path)
    saver.restore(sess,checkpoint.model_checkpoint_path)  # load saved checkpoint
    
    sess.run(tf.local_variables_initializer())  # initialize local vars
    
    for image, label in zip(eval_data, eval_labels):
        sess.run(cnn.accuracy_op, feed_dict={cnn.input_layer:[image], cnn.labels:label})
    
    print(sess.run(cnn.accuracy))

In [None]:
# Load images from indices, display the network's guess and label.

with tf.Session() as sess:
    checkpoint = tf.train.get_checkpoint_state(path)
    saver.restore(sess,checkpoint.model_checkpoint_path)
    
    indexes = np.random.choice(len(eval_data), 10, replace=False)
    
    rows = 5
    cols = 2
    
    fig, axes = plt.subplots(rows, cols, figsize=(5,5))
    fig.patch.set_facecolor('white')
    image_count = 0
    
    for idx in indexes:
        image_count += 1
        sub = plt.subplot(rows,cols,image_count)
        img = eval_data[idx]
        if model_name == "mnist":
            img = img.reshape(28, 28)
        plt.imshow(img)
        guess = sess.run(cnn.choice, feed_dict={cnn.input_layer:[eval_data[idx]]})
        if model_name == "mnist":
            guess_name = str(guess[0])
            actual_name = str(eval_labels[idx])
        else:
            guess_name = category_names[guess[0]]
            actual_name = category_names[eval_labels[idx]]
        sub.set_title("G: " + guess_name + " A: " + actual_name)
    plt.tight_layout()

In [None]:
tf.Session().close()
