# Section 7

### Today
<ul>
<li><a href='https://www.tensorflow.org/versions/master/api_docs/python/tf/data/TFRecordDataset'>TensorFlow Datasets</a></li>
<li>Fully Convolutional Networks</li>
<li><a href='https://www.tensorflow.org/api_docs/python/tf/image'>Up-Convolutions</a></li>
<li><a href='https://www.tensorflow.org/api_docs/python/tf/layers/conv2d'>Skip Connections</a></li>
</ul>

### Related Reading
<ul>
<li><a href='https://distill.pub/2016/deconv-checkerboard/'>Deconvolution Vizualation</a></li>
<li><a href='https://people.eecs.berkeley.edu/~jonlong/long_shelhamer_fcn.pdf'>Fully Convolutional Networks for Semantic Segmentation</a></li>
<li><a href='https://arxiv.org/pdf/1603.07285v1.pdf'>A guide to convolution arithmetic for deep
learning</a></li>
<li><a href='http://www.matthewzeiler.com/wp-content/uploads/2017/07/cvpr2010.pdf'>Deconvolutional Networks</a></li>
<li><a href='https://www.tensorflow.org/programmers_guide/datasets'>TensorFlow Programmers Guide on Importing Data</a> - focuses on newer Dataset API tf.contrib.data.Dataset (not what we are using for now but has a similar API).</li>
</ul>

In [1]:
import tensorflow as tf
import numpy as np
import util

## Datasets - <a href='https://www.tensorflow.org/versions/master/api_docs/python/tf/data/TFRecordDataset'>tf.data.TFRecordDataset</a>
"The TFRecord file format is a simple record-oriented binary format that many TensorFlow applications use for training data. The tf.contrib.data.TFRecordDataset class enables you to stream over the contents of one or more TFRecord files as part of an input pipeline."

In [2]:
def parser(record):
    # Parse the TF record
    parsed = tf.parse_single_example(record, features={
       'height': tf.FixedLenFeature([], tf.int64),
       'width': tf.FixedLenFeature([], tf.int64),
       'image_raw': tf.FixedLenFeature([], tf.string),
       'label_raw': tf.FixedLenFeature([], tf.string)
    })
    # Load the data and format it
    H = tf.cast(parsed['height'], tf.int32)
    W = tf.cast(parsed['width'], tf.int32)
    image = tf.reshape(tf.decode_raw(parsed["image_raw"], tf.uint8), [H,W,3])
    label = tf.reshape(tf.decode_raw(parsed["label_raw"], tf.uint8), [H,W])
    # Perform additional preprocessing on the parsed data.
    return image, label

def data_augmentation(image, label):
    # TODO: Apply some data augmentation, namely cropping and mirroring for faster training
    return image, label

def load_dataset(tfrecord, augment=True):
    # Load the dataset
    dataset = tf.contrib.data.TFRecordDataset(tfrecord)

    # Parse the tf record entries
    dataset = dataset.map(parser)
    if augment:
        dataset = dataset.map(data_augmentation)

    # Shuffle the data, batch it and run this for multiple epochs
    dataset = dataset.shuffle(buffer_size=10000)
    dataset = dataset.batch(32)
    dataset = dataset.repeat()
    return dataset

# Lets clear the tensorflow graph, so that you don't have to restart the notebook every time you change the network
tf.reset_default_graph()

train_data = load_dataset('train.tfrecord')
valid_data = load_dataset('valid.tfrecord')

# Create an iterator for the datasets
# The iterator allows us to quickly switch between training and validataion
iterator = tf.contrib.data.Iterator.from_structure(train_data.output_types,train_data.output_shapes)

# and fetch the next images from the dataset (every time next_image is evaluated a new image set of 32 images is returned)
next_image, next_label = iterator.get_next()

# Define operations that switch between train and valid
switch_train_op = iterator.make_initializer(train_data)
switch_valid_op = iterator.make_initializer(valid_data)


## Fully Convolutional Networks (FCNs)
<b>Key Idea</b>: Whole Image Inputs -- Whole Image Ground Truths

In [3]:
inputs = tf.placeholder(tf.float32, (None, 64, 64, 3))
print(inputs)

# Up-Convolution
h = tf.contrib.layers.conv2d(inputs, num_outputs=30, kernel_size=(5, 5), stride=(2, 2))
print(h)

output = tf.contrib.layers.conv2d_transpose(h, num_outputs=3, kernel_size=(5, 5), stride=(2, 2))
print(output)

Tensor("Placeholder:0", shape=(?, 64, 64, 3), dtype=float32)
Tensor("Conv/Relu:0", shape=(?, 32, 32, 30), dtype=float32)
Tensor("Conv2d_transpose/Relu:0", shape=(?, 64, 64, 3), dtype=float32)


In [5]:
# FCN as a Convolutional Auto-encoder
loss = tf.reduce_mean((inputs - output) ** 2)
opt = tf.train.AdamOptimizer().minimize(loss)

# Get your favorite images
def load(filename, W=64, H=64):
    data = np.fromfile(filename, dtype=np.uint8).reshape((-1, W*H*3+1))
    images, labels = data[:, :-1].reshape((-1,H,W,3)), data[:, -1]
    return images, labels

image_data, label_data = load('tux_train.dat')

sess = tf.Session()
sess.run(tf.global_variables_initializer())

for e in range(1000):
    sess.run(opt, feed_dict={inputs: image_data[0:5]})


## Skip Connections
"Combining fine layers and coarse layers lets the model make local predictions that respect global structure." - <a href='https://people.eecs.berkeley.edu/~jonlong/long_shelhamer_fcn.pdf'>Fully Convolutional Networks for Semantic Segmentation</a>


In [7]:
# Option 1: Use tensors of same shape and add
inputs = tf.placeholder(tf.float32, (None, 32, 32, 3))

h = tf.contrib.layers.conv2d(inputs, num_outputs=30, kernel_size=(5, 5), stride=(2, 2))
print('h1: ' + str(h))

h = tf.contrib.layers.conv2d(h, num_outputs=20, kernel_size=(5, 5), stride=(2, 2))
print('h2: ' + str(h))

h = tf.contrib.layers.conv2d(h, num_outputs=10, kernel_size=(5, 5), stride=(2, 2))
print('h3: ' + str(h))
print('')

h1 = tf.contrib.layers.conv2d_transpose(h, num_outputs=10, kernel_size=(5, 5), stride=(2, 2))
h1_skip = tf.contrib.layers.conv2d_transpose(h, num_outputs=3, kernel_size=(5, 5), stride=(8, 8))
# change kernal size to 8,8 or add more skips 
print('h1: ' + str(h1))
print('h1_skip: ' + str(h1_skip) + '\n')

h2 = tf.contrib.layers.conv2d_transpose(h1, num_outputs=20, kernel_size=(5, 5), stride=(2, 2))
h2_skip = tf.contrib.layers.conv2d_transpose(h2, num_outputs=3, kernel_size=(5, 5), stride=(2, 2))
print('h2: ' + str(h2))
print('h2_skip: ' + str(h2_skip) + '\n')

h3 = tf.contrib.layers.conv2d_transpose(h2, num_outputs=3, kernel_size=(5, 5), stride=(2, 2))
print('h3: ' + str(h3) + '\n')

output = h3 + h2_skip + h1_skip
print('output: ' + str(output))


h1: Tensor("Conv_4/Relu:0", shape=(?, 16, 16, 30), dtype=float32)
h2: Tensor("Conv_5/Relu:0", shape=(?, 8, 8, 20), dtype=float32)
h3: Tensor("Conv_6/Relu:0", shape=(?, 4, 4, 10), dtype=float32)

h1: Tensor("Conv2d_transpose_6/Relu:0", shape=(?, 8, 8, 10), dtype=float32)
h1_skip: Tensor("Conv2d_transpose_7/Relu:0", shape=(?, 32, 32, 3), dtype=float32)

h2: Tensor("Conv2d_transpose_8/Relu:0", shape=(?, 16, 16, 20), dtype=float32)
h2_skip: Tensor("Conv2d_transpose_9/Relu:0", shape=(?, 32, 32, 3), dtype=float32)

h3: Tensor("Conv2d_transpose_10/Relu:0", shape=(?, 32, 32, 3), dtype=float32)

output: Tensor("add_3:0", shape=(?, 32, 32, 3), dtype=float32)


In [None]:
# Option 2: Use tensors of different shapes and add
inputs = tf.placeholder(tf.float32, (None, 32, 32, 3))

inputs = tf.placeholder(tf.float32, (None, 32, 32, 3))

h = tf.contrib.layers.conv2d(inputs, num_outputs=30, kernel_size=(5, 5), stride=(2, 2))
print('h1: ' + str(h))

h = tf.contrib.layers.conv2d(h, num_outputs=20, kernel_size=(5, 5), stride=(2, 2))
print('h2: ' + str(h))

h = tf.contrib.layers.conv2d(h, num_outputs=10, kernel_size=(5, 5), stride=(2, 2))
print('h3: ' + str(h))
print('')

h1 = tf.contrib.layers.conv2d_transpose(h, num_outputs=10, kernel_size=(5, 5), stride=(2, 2))
h1_skip = tf.contrib.layers.conv2d_transpose(h, num_outputs=10, kernel_size=(5, 5), stride=(8, 8))
print('h1: ' + str(h))
print('h1_skip: ' + str(h1_skip) + '\n')

h2 = tf.contrib.layers.conv2d_transpose(h1, num_outputs=20, kernel_size=(5, 5), stride=(2, 2))
h2_skip = tf.contrib.layers.conv2d_transpose(h2, num_outputs=10, kernel_size=(5, 5), stride=(2, 2))
print('h2: ' + str(h2))
print('h2_skip: ' + str(h2_skip) + '\n')

h3 = tf.contrib.layers.conv2d_transpose(h2, num_outputs=3, kernel_size=(5, 5), stride=(2, 2))
print('h3: ' + str(h3) + '\n')

filter_concat = tf.concat([h1_skip, h2_skip, h3], axis=-1)
print('filter_concat: ' + str(filter_concat))

output = tf.contrib.layers.conv2d(filter_concat, num_outputs=3, kernel_size=(1, 1), stride=(1, 1))
print('output: ' + str(output))
