Austin Feydt (apf31)

March 30 2018

EECS 531: Assignment 3

# Simple Network

I began by constructing a very simple neural network as a baseline.  This network flattens the image to a 1 dimensional vector, and then has a fully connected layer, which condenses the network from 64x64x3 nodes in the input layer to just 128 nodes in the hidden layer (with the rectified linear unit as an activation function). Then, the hidden layer is fully connected to the output layer, which is just 3 nodes, representing the 3 labels. The network chooses the largest output as the predicted label of the input object. 

Below, we can see the implementation of this simple network.

In [1]:
import tensorflow as tf
import numpy as np
import os


DATA_DIR = "data/"
TRAINING_SET_SIZE = 13718
BATCH_SIZE = 16
IMAGE_SIZE = 64
CHANNELS = 3


def _int64_feature(value):
    return tf.train.Feature(int64_list=tf.train.Int64List(value=value))

def _bytes_feature(value):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

class _image_object:
    def __init__(self):
        self.image = tf.Variable([], dtype = tf.string)
        self.height = tf.Variable([], dtype = tf.int64)
        self.width = tf.Variable([], dtype = tf.int64)
        self.filename = tf.Variable([], dtype = tf.string)
        self.label = tf.Variable([], dtype = tf.int32)

def read_and_decode(filename_queue):
    reader = tf.TFRecordReader()
    _, serialized_example = reader.read(filename_queue)
    features = tf.parse_single_example(serialized_example, features = {
        "image/encoded": tf.FixedLenFeature([], tf.string),
        "image/height": tf.FixedLenFeature([], tf.int64),
        "image/width": tf.FixedLenFeature([], tf.int64),
        "image/filename": tf.FixedLenFeature([], tf.string),
        "image/class/label": tf.FixedLenFeature([], tf.int64),})
    image_encoded = features["image/encoded"]
    image_raw = tf.image.decode_jpeg(image_encoded, channels=CHANNELS)
    print(image_raw)
    image_object = _image_object()
    image_object.image = tf.image.resize_image_with_crop_or_pad(image_raw, IMAGE_SIZE, IMAGE_SIZE)
    image_object.height = features["image/height"]
    image_object.width = features["image/width"]
    image_object.filename = features["image/filename"]
    image_object.label = tf.cast(features["image/class/label"], tf.int64)
    return image_object

def net_input(if_random = True, if_training = True):
    if(if_training):
        filenames = [os.path.join(DATA_DIR, "train-0000%d-of-00002.tfrecord" % i) for i in range(0, 1)]
    else:
        filenames = [os.path.join(DATA_DIR, "validation-0000%d-of-00002.tfrecord" % i) for i in range(0, 1)]

    for f in filenames:
        if not tf.gfile.Exists(f):
            raise ValueError("Failed to find file: " + f)
    filename_queue = tf.train.string_input_producer(filenames)
    image_object = read_and_decode(filename_queue)
    image = tf.image.per_image_standardization(image_object.image)
    label = image_object.label
    filename = image_object.filename

    image_batch, label_batch, filename_batch = tf.train.batch([image, label, filename],batch_size = BATCH_SIZE,num_threads = 1)
    return image_batch, label_batch, filename_batch


def weight_variable(shape):
    return tf.Variable(tf.truncated_normal(shape, stddev=0.1))

def bias_variable(val,shape):
    if val == 0:
        return tf.Variable(tf.zeros(shape))
    else:
        return tf.Variable(tf.constant(val, shape=shape))

def conv2d(x, W, padding):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding=padding)

def max_pool_2x2(x, kernel_size):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1,2,2,1], padding='VALID')

In [2]:
def simplenet_inference(image_batch):
    depth1 = 128
    depth2 = 3

    x_image = tf.reshape(image_batch, [BATCH_SIZE, IMAGE_SIZE*IMAGE_SIZE*CHANNELS])
    
    print(x_image.shape)

    # Start by defining all variables
    W_1 = weight_variable([IMAGE_SIZE*IMAGE_SIZE*CHANNELS, depth1])
    b_1 = bias_variable(0, [depth1])
    
    W_2 = weight_variable([depth1, depth2])
    b_2 = bias_variable(0, [depth2])

    # First fully connected layer
    hidden = tf.nn.relu(tf.matmul(x_image, W_1) + b_1)

    #Fully connected layers leading to logits
    output = tf.matmul(hidden,W_2) + b_2
    
    return output


def simple_train():
    image_batch_out, label_batch_out, filename_batch = net_input(if_random = False, if_training = True)

    image_batch_placeholder = tf.placeholder(tf.float32, shape=[BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, CHANNELS])
    image_batch = tf.reshape(image_batch_out, (BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, CHANNELS))

    label_batch_placeholder = tf.placeholder(tf.float32, shape=[BATCH_SIZE, 3])
    label_offset = -tf.ones([BATCH_SIZE], dtype=tf.int64, name="label_batch_offset")
    label_batch_one_hot = tf.one_hot(tf.add(label_batch_out, label_offset), depth=3, on_value=1.0, off_value=0.0)


    logits_out = simplenet_inference(image_batch_placeholder)
    loss = tf.losses.mean_squared_error(labels=label_batch_placeholder, predictions=logits_out)
    train_step = tf.train.GradientDescentOptimizer(0.00005).minimize(loss)

    saver = tf.train.Saver()

    with tf.Session() as sess:
        file_writer = tf.summary.FileWriter("./logs", sess.graph)

        sess.run(tf.global_variables_initializer())
        saver.restore(sess, "simpleoutput/checkpoint-train")
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord, sess = sess)

        for i in range(10000):
            image_out, label_out, label_batch_one_hot_out, filename_out = sess.run([image_batch, label_batch_out, label_batch_one_hot, filename_batch])
            _, infer_out, loss_out = sess.run([train_step, logits_out, loss], feed_dict={image_batch_placeholder: image_out, label_batch_placeholder: label_batch_one_hot_out})
          
            if(i%25 == 0):
                print(loss_out)
                saver.save(sess, "simpleoutput/checkpoint-train")

            if(loss_out < 0.02):
            	break
        coord.request_stop()
        coord.join(threads)
        sess.close()

#simple_train()

In [3]:
def simple_evaluate():
    image_batch_out, label_batch_out, filename_batch = net_input(if_random = False, if_training = False)

    image_batch_placeholder = tf.placeholder(tf.float32, shape=[BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, CHANNELS])
    image_batch = tf.reshape(image_batch_out, (BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, CHANNELS))

    label_tensor_placeholder = tf.placeholder(tf.int64, shape=[BATCH_SIZE])
    label_offset = -tf.ones([BATCH_SIZE], dtype=tf.int64, name="label_batch_offset")
    label_batch = tf.add(label_batch_out, label_offset)

    logits_out = tf.reshape(simplenet_inference(image_batch_placeholder), [BATCH_SIZE, 3])
    logits_batch = tf.to_int64(tf.arg_max(logits_out, dimension = 1))

    correct_prediction = tf.equal(logits_batch, label_tensor_placeholder)
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    saver = tf.train.Saver()

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        saver.restore(sess,  "simpleoutput/checkpoint-train")
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord, sess = sess)

        accuracy_accu = 0

        for i in range(50):
            image_out, label_out, filename_out = sess.run([image_batch, label_batch, filename_batch])

            accuracy_out, logits_batch_out = sess.run([accuracy, logits_batch], feed_dict={image_batch_placeholder: image_out, label_tensor_placeholder: label_out})
            accuracy_accu += accuracy_out
            if i % 10 == 0: 
                print('real labels:')
                print(label_out)
                print('predicted labels:')
                print(logits_batch_out)
                print('')
    
        print("Accuracy: ")
        print(accuracy_accu / 50)

        coord.request_stop()
        coord.join(threads)
        sess.close()

simple_evaluate()

Tensor("DecodeJpeg:0", shape=(?, ?, 3), dtype=uint8)
(16, 12288)
Instructions for updating:
Use `argmax` instead
INFO:tensorflow:Restoring parameters from simpleoutput/checkpoint-train
real labels:
[2 0 0 0 2 2 2 2 1 2 1 2 0 1 0 2]
predicted labels:
[2 0 0 0 2 2 2 2 2 2 2 2 0 2 0 2]

real labels:
[0 1 0 2 2 2 1 0 2 0 1 0 1 0 1 2]
predicted labels:
[0 2 0 2 2 2 2 0 2 2 2 0 2 0 2 2]

real labels:
[1 0 0 0 0 2 1 0 0 0 2 2 2 0 1 2]
predicted labels:
[2 0 0 0 0 2 2 0 0 0 2 2 2 0 2 2]

real labels:
[1 1 2 1 2 1 0 0 0 2 1 2 2 2 0 1]
predicted labels:
[2 2 2 2 2 2 0 0 0 2 2 2 2 2 0 2]

real labels:
[1 0 0 1 0 2 1 2 0 1 1 1 0 2 2 2]
predicted labels:
[2 0 0 2 0 2 2 2 0 2 2 2 0 2 2 2]

Accuracy: 
0.66875


### Analysis
From above, we can see that the accuracy is only around 67%, which is not much better than a random classifier.  This is mainly because of the structure of the network.  We know that there is almost always an underlying structure to the pixels in an image.  However, this network strings the entire image out into a single line of pixels, getting rid of any structure whatsoever.  Thus, the network is just attempting to predict the output based on a sequence of 12,288 pixel values, with no knowledge of the previous spacial features of the image. We will next construct a convolutional neural network, which will take advantage of the structure of the input images.