In [1]:
%pylab inline

Populating the interactive namespace from numpy and matplotlib


# Homework 7
This homework focuses on fully convolutional networks.

## Part 0: Setup

In [2]:
import tensorflow as tf
import numpy as np
import util

# Colors to visualize the labeling
COLORS = np.array([(0,0,0), (255,0,0), (0,255,0), (255,255,0), (0,0,255), (255,255,255)], dtype=np.uint8)
CROP_SIZE = 64

def parser(record):
    # Parse the TF record
    parsed = tf.parse_single_example(record, features={
        'height': tf.FixedLenFeature([], tf.int64),
        'width': tf.FixedLenFeature([], tf.int64),
        'image_raw': tf.FixedLenFeature([], tf.string),
        'label_raw': tf.FixedLenFeature([], tf.string)
    })
    # Load the data and format it
    H = tf.cast(parsed['height'], tf.int32)
    W = tf.cast(parsed['width'], tf.int32)
    image = tf.reshape(tf.decode_raw(parsed["image_raw"], tf.uint8), [H,W,3])
    label = tf.reshape(tf.decode_raw(parsed["label_raw"], tf.uint8), [H,W])
    
    ## Data augmentation
    # Stack the image and labels to make sure the same operations are applied
    data = tf.concat([image, label[:,:,None]], axis=-1)
    
    # TODO: Apply the data augmentation (you should both crop the images randomly and flip them)
    data = tf.random_crop(data, [CROP_SIZE, CROP_SIZE, 4])
    data = tf.image.random_flip_left_right(data)
    
    return data[:,:,:-1], data[:,:,-1]

def load_dataset(tfrecord):
    # Load the dataset
    dataset = tf.contrib.data.TFRecordDataset(tfrecord)

    # Parse the tf record entries
    dataset = dataset.map(parser, num_threads=8, output_buffer_size=1024)

    # Shuffle the data, batch it and run this for multiple epochs
    dataset = dataset.shuffle(buffer_size=10000)
    dataset = dataset.batch(32)
    dataset = dataset.repeat()
    return dataset

# We still have 6 classes
num_classes = 6

## Part 1: Define your convnet
Important note. The label frequency is horribly inbalanced for this task. On the training set
```[ 0.66839117, 0.00382957, 0.00092516, 0.00345217, 0.00339063, 0.3200113 ]```
On the validation set
```[ 0.68367316, 0.00392016, 0.00165766, 0.00194697, 0.0034067, 0.30539535]```
Tux, bonus, objects and enemies make up less than 1.5% of all labels overall.
You should reweight the loss to address this, if you don't your model will likely ignore all but background and tile labels.

In [3]:
# Create a new log directory (if you run low on disk space you can either disable this or delete old logs)
# run: `tensorboard --logdir log` to see all the nice summaries
for n_model in range(1000):
    LOG_DIR = 'log/model_%d'%n_model
    from os import path
    if not path.exists(LOG_DIR):
        break

# Lets clear the tensorflow graph, so that you don't have to restart the notebook every time you change the network
tf.reset_default_graph()

TF_COLORS = tf.constant(COLORS)

train_data = load_dataset('train.tfrecord')
valid_data = load_dataset('valid.tfrecord')

# Create an iterator for the datasets
# The iterator allows us to quickly switch between training and validataion
iterator = tf.contrib.data.Iterator.from_structure(train_data.output_types, ((None,None,None,3), (None,None,None)))

# and fetch the next images from the dataset (every time next_image is evaluated a new image set of 32 images is returned)
next_image, next_label = iterator.get_next()

# Define operations that switch between train and valid
switch_train_op = iterator.make_initializer(train_data)
switch_valid_op = iterator.make_initializer(valid_data)

# Convert the input
image = tf.cast(next_image, tf.float32)
label = tf.cast(next_label, tf.int32)

# Whiten the input
inputs = tf.identity(image, name='inputs')
white_inputs = (inputs - 100.) / 72.

# TODO: Define your convnet here
C0 = 25
D = 5
h = white_inputs
hs = []
for i in range(D):
    hs.append(h)
    h = tf.contrib.layers.conv2d(h, C0*int(1.5**i), (3,3), stride=2, scope='conv%d'%(i+1))

for i in range(D)[::-1]:
    h = tf.contrib.layers.conv2d_transpose(h, C0*int(1.5**i), (3,3), stride=2, scope='upconv%d'%(i+1))
    h = tf.concat([h, hs[i]], axis=-1)
h = tf.contrib.layers.conv2d(h, num_classes, (1,1), scope='cls', activation_fn=None)

# Let's compute the output labeling
output = tf.identity(tf.argmax(h, axis=-1), name='output')

# Define the loss function
loss_weight = tf.constant([ 0.66839117, 0.00382957, 0.00092516, 0.00345217, 0.00339063, 0.3200113 ]) ** -0.9 + 1
weight = tf.gather_nd(loss_weight,label[:,:,:,None])
loss = tf.reduce_sum(weight * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=h, labels=label)) / tf.reduce_sum(weight)

# Let's weight the regularization loss down, otherwise it will hurt the model performance
# You can tune this weight if you wish
regularization_loss = tf.losses.get_regularization_loss()
total_loss = loss + 1e-6 * regularization_loss

# Adam will likely converge much faster than SGD for this assignment.
optimizer = tf.train.AdamOptimizer(0.001, 0.9, 0.999)

# use that optimizer on your loss function (control_dependencies makes sure any 
# batch_norm parameters are properly updated)
with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
    opt = optimizer.minimize(total_loss)
confusion = tf.confusion_matrix(labels=tf.reshape(label,[-1]), predictions=tf.reshape(output,[-1]), num_classes=num_classes)

# Let's define some summaries for tensorboard
colored_label = tf.gather_nd(TF_COLORS, label[:,:,:,None])
colored_output = tf.gather_nd(TF_COLORS, output[:,:,:,None])
tf.summary.image('confusion', tf.cast(confusion[None,:,:,None], tf.float32), max_outputs=1)
tf.summary.image('image', next_image, max_outputs=3)
tf.summary.image('label', colored_label, max_outputs=3)
tf.summary.image('output', colored_output, max_outputs=3)
tf.summary.scalar('loss', tf.placeholder(tf.float32, name='loss'))
tf.summary.scalar('accuracy', tf.placeholder(tf.float32, name='accuracy'))
tf.summary.scalar('class_accuracy', tf.placeholder(tf.float32, name='class_accuracy'))
tf.summary.scalar('jaccard', tf.placeholder(tf.float32, name='jaccard'))
tf.summary.scalar('val_accuracy', tf.placeholder(tf.float32, name='val_accuracy'))
tf.summary.scalar('val_class_accuracy', tf.placeholder(tf.float32, name='val_class_accuracy'))
tf.summary.scalar('val_jaccard', tf.placeholder(tf.float32, name='val_jaccard'))

merged_summary = tf.summary.merge_all()
summary_writer = tf.summary.FileWriter(LOG_DIR, tf.get_default_graph())

# Let's compute the model size
print( "Total number of variables used ", np.sum([v.get_shape().num_elements() for v in tf.trainable_variables()]) )

Total number of variables used  496449


## Part 2: Training

Training might take up to 20 min depending on your architecture (and if you have a GPU or not).

In [5]:
def accuracy(confusion):
    # Overall pixelwise accuracy
    # This metric heavily favors tiles and background (as they are most frequent)
    return np.sum(np.diag(confusion)) / np.sum(confusion)

def class_accuracy(confusion):
    # Class wise accuracy
    # This metric normalizes for class frequencies and favors small classes
    return np.mean(np.diag(confusion) / (np.sum(confusion, axis=1) + 1e-10))

def jaccard(confusion):
    # Jaccard index
    # A mix of the above, neither favors small or large classes much
    D = np.diag(confusion)
    return np.mean( D / (np.sum(confusion, axis=1) + np.sum(confusion, axis=0) - D + 1e-10))

# Start a session
sess = tf.Session()

# Set up training
sess.run(tf.global_variables_initializer())

# Run the training for some iterations
for it in range(100):
    sess.run(switch_train_op)

    total_confusion = np.zeros((num_classes, num_classes))
    loss_vals = []
    # Run 10 training iterations and 1 validation iteration
    for i in range(10):
        confusion_val, loss_val, _ = sess.run([confusion, loss, opt])
        total_confusion += confusion_val
        loss_vals.append(loss_val)
    
    sess.run(switch_valid_op)
    confusion_val = sess.run(confusion)

    # Let's update tensorboard
    summary_writer.add_summary( sess.run(merged_summary, {'loss:0': np.mean(loss_vals), 'accuracy:0': accuracy(total_confusion), 'class_accuracy:0': class_accuracy(total_confusion), 'jaccard:0': jaccard(total_confusion), 'val_accuracy:0': accuracy(confusion_val), 'val_class_accuracy:0': class_accuracy(confusion_val), 'val_jaccard:0': jaccard(confusion_val)}), it )
    print('[%3d] Loss: %0.3f  \t  A.: %0.3f  CA.: %0.3f  J.: %0.3f  \t  Val A.: %0.3f  CA.: %0.3f  J.: %0.3f'%(it, np.mean(loss_vals), accuracy(total_confusion), class_accuracy(total_confusion), jaccard(total_confusion), accuracy(confusion_val), class_accuracy(confusion_val), jaccard(confusion_val)))    


[  0] Loss: 1.764  	  A.: 0.126  CA.: 0.197  J.: 0.035  	  Val A.: 0.299  CA.: 0.198  J.: 0.075
[  1] Loss: 1.666  	  A.: 0.483  CA.: 0.231  J.: 0.108  	  Val A.: 0.651  CA.: 0.218  J.: 0.132
[  2] Loss: 1.511  	  A.: 0.622  CA.: 0.281  J.: 0.132  	  Val A.: 0.538  CA.: 0.260  J.: 0.105
[  3] Loss: 1.520  	  A.: 0.607  CA.: 0.283  J.: 0.128  	  Val A.: 0.784  CA.: 0.305  J.: 0.157
[  4] Loss: 1.393  	  A.: 0.610  CA.: 0.323  J.: 0.139  	  Val A.: 0.607  CA.: 0.321  J.: 0.131
[  5] Loss: 1.452  	  A.: 0.533  CA.: 0.329  J.: 0.116  	  Val A.: 0.702  CA.: 0.232  J.: 0.136
[  6] Loss: 1.397  	  A.: 0.570  CA.: 0.338  J.: 0.135  	  Val A.: 0.518  CA.: 0.357  J.: 0.134
[  7] Loss: 1.417  	  A.: 0.587  CA.: 0.352  J.: 0.145  	  Val A.: 0.716  CA.: 0.317  J.: 0.156
[  8] Loss: 1.384  	  A.: 0.638  CA.: 0.366  J.: 0.151  	  Val A.: 0.593  CA.: 0.241  J.: 0.132
[  9] Loss: 1.354  	  A.: 0.603  CA.: 0.387  J.: 0.142  	  Val A.: 0.589  CA.: 0.311  J.: 0.137
[ 10] Loss: 1.329  	  A.: 0.591  CA.: 0.

## Part 3: Evaluation
### Compute the validation accuracy

In [6]:
total_lbl, total_cor = np.zeros(6)+1e-10, np.zeros(6)
for it in tf.python_io.tf_record_iterator('valid.tfrecord'):
    example = tf.train.Example()
    example.ParseFromString(it)
    I = np.frombuffer(example.features.feature['image_raw'].bytes_list.value[0], dtype=np.uint8).reshape(256, 256, 3)
    L = np.frombuffer(example.features.feature['label_raw'].bytes_list.value[0], dtype=np.uint8).reshape(256, 256)
    
    P = sess.run('output:0', {'inputs:0':I[None]})
    total_lbl += np.bincount(L.flat, minlength=6)
    total_cor += np.bincount(L.flat, (P==L).flat, minlength=6)
print( 'Mean class accuracy', np.mean(total_cor / total_lbl) )

Mean class accuracy 0.71041832503


## Part 4: Save Model
Please note that we also want you to turn in your ipynb for this assignment.  Zip up the ipynb along with the tfg for your submission.

In [7]:
util.save('assignment7.tfg', session=sess)