*NOTES*
- Data augmentation: generare more inpute
- Drop out... MAYBE
- Rule of thumb: per layer, halve dimensions X*Y and double depth
- To reduce overfitting, we will apply dropout before the readout layer.
  TODO mention paper:Dropout: A Simple Way to Prevent Neural Networks from Overfitting
  https://www.cs.toronto.edu/~hinton/absps/JMLRdropout.pdf

In [1]:
%matplotlib inline
"""
Baseline for machine learning project on road segmentation.
This simple baseline consits of a CNN with two convolutional+pooling layers with a soft-max loss
Credits: Aurelien Lucchi, ETH Zürich
"""
import gzip
import os
import sys
import urllib
import matplotlib.image as mpimg
from PIL import Image
import code
import tensorflow.python.platform
import numpy
import tensorflow as tf
from types import * #for assert XXX is IntTYpe

In [2]:
NUM_CHANNELS = 3 # RGB images
PIXEL_DEPTH = 255
NUM_LABELS = 2
TRAINING_SIZE = 100
VALIDATION_SIZE = 5  # Size of the validation set.
SEED = 66478  # Set to None for random seed.
BATCH_SIZE = 16 # 64
RESTORE_MODEL = False # If True, restore existing model instead of training a new one
RECORDING_STEP = 1000
DATA_AUGMENTATION = True

tf.app.flags.DEFINE_string('train_dir', './tmp/', "Directory where to write event logs and checkpoint.")
FLAGS = tf.app.flags.FLAGS

# Pre-processing: Data augmentation

In [None]:
from scipy import ndimage

EXTRA_IMAGES_ID = [23,26,27,28,30,32,33,38,42,69,72,73,75,83,88,91]

if (DATA_AUGMENTATION):
    for i in range(0, len(EXTRA_IMAGES_ID)):
        

In [3]:
# Extract patches from a given image
def img_crop(im, w, h):
    list_patches = []
    imgwidth = im.shape[0]
    imgheight = im.shape[1]
    is_2d = len(im.shape) < 3
    for i in range(0,imgheight,h):
        for j in range(0,imgwidth,w):
            if is_2d:
                im_patch = im[j:j+w, i:i+h]
            else:
                im_patch = im[j:j+w, i:i+h, :]
            list_patches.append(im_patch)
    return list_patches

#we are training stoch gradient descent but with batches, batch size of BATCH_SIZE

#return matrix of image patches
def extract_data(filename, num_images, phase):
    """Extract the images into a 4D tensor [image index, y, x, channels].
    Values are rescaled from [0, 255] down to [-0.5, 0.5].
    """
    imgs = []
    if phase==1:
        for i in range(1, num_images+1):
            image_filename = filename + "satImage_%.3d" % i + ".png"
            if os.path.isfile(image_filename):
                #print ('Loading ' + image_filename)
                img = mpimg.imread(image_filename)
                imgs.append(img)
            else:
                print ('File ' + image_filename + ' does not exist')
    if phase==2:
        for i in range(1, num_images+1):
            image_filename = filename + "prediction_raw_" + str(i) + ".png"
            if os.path.isfile(image_filename):
                #print ('Loading ' + image_filename)
                img = mpimg.imread(image_filename)
                imgs.append(img)
            else:
                print ('File ' + image_filename + ' does not exist')
                
    num_images = len(imgs)
    IMG_WIDTH = imgs[0].shape[0]
    IMG_HEIGHT = imgs[0].shape[1]
    N_PATCHES_PER_IMAGE = (IMG_WIDTH/IMG_PATCH_SIZE)*(IMG_HEIGHT/IMG_PATCH_SIZE)

    img_patches = [img_crop(imgs[i], IMG_PATCH_SIZE, IMG_PATCH_SIZE) for i in range(num_images)]
    data = [img_patches[i][j] for i in range(len(img_patches)) for j in range(len(img_patches[i]))]

    return numpy.asarray(data)
        
# Assign a label to a patch v
def value_to_class(v):
    foreground_threshold = 0.25 # percentage of pixels > 1 required to assign a foreground label to a patch
    df = numpy.sum(v)
    if df > foreground_threshold:
        return [0, 1]
    else:
        return [1, 0]

# Extract label images
def extract_labels(filename, num_images):
    """Extract the labels into a 1-hot matrix [image index, label index]."""
    gt_imgs = []
    for i in range(1, num_images+1):
        image_filename = filename + "satImage_%.3d" % i + ".png"
        if os.path.isfile(image_filename):
            #print ('Loading ' + image_filename)
            img = mpimg.imread(image_filename)
            gt_imgs.append(img)
        else:
            print ('File ' + image_filename + ' does not exist')

    num_images = len(gt_imgs)
    gt_patches = [img_crop(gt_imgs[i], IMG_PATCH_SIZE, IMG_PATCH_SIZE) for i in range(num_images)]
    data = numpy.asarray([gt_patches[i][j] for i in range(len(gt_patches)) for j in range(len(gt_patches[i]))])
    labels = numpy.asarray([value_to_class(numpy.mean(data[i])) for i in range(len(data))])

    # Convert to dense 1-hot representation.
    return labels.astype(numpy.float32)


#returns percentage of WRONG labels (right ones stored in predictions)
def error_rate(predictions, labels):
    """Return the error rate based on dense predictions and 1-hot labels."""
    return 100.0 - (
        100.0 *
        numpy.sum(numpy.argmax(predictions, 1) == numpy.argmax(labels, 1)) /
        predictions.shape[0])

# Write predictions from neural network to a file
def write_predictions_to_file(predictions, labels, filename):
    max_labels = numpy.argmax(labels, 1)
    max_predictions = numpy.argmax(predictions, 1)
    file = open(filename, "w")
    n = predictions.shape[0]
    for i in range(0, n):
        file.write(max_labels(i) + ' ' + max_predictions(i))
    file.close()

# Print predictions from neural network
def print_predictions(predictions, labels):
    max_labels = numpy.argmax(labels, 1)
    max_predictions = numpy.argmax(predictions, 1)
    print (str(max_labels) + ' ' + str(max_predictions))

# Convert array of labels to an image
def label_to_img(imgwidth, imgheight, w, h, labels):
    array_labels = numpy.zeros([imgwidth, imgheight])
    idx = 0
    for i in range(0,imgheight,h):
        for j in range(0,imgwidth,w):
            if labels[idx][0] > 0.5:
                l = 1
            else:
                l = 0
            array_labels[j:j+w, i:i+h] = l
            idx = idx + 1
    return array_labels

def img_float_to_uint8(img):
    rimg = img - numpy.min(img)
    rimg = (rimg / numpy.max(rimg) * PIXEL_DEPTH).round().astype(numpy.uint8)
    return rimg

def concatenate_images(img, gt_img):
    nChannels = len(gt_img.shape)
    w = gt_img.shape[0]
    h = gt_img.shape[1]
    if nChannels == 3:
        cimg = numpy.concatenate((img, gt_img), axis=1)
    else:
        gt_img_3c = numpy.zeros((w, h, 3), dtype=numpy.uint8)
        gt_img8 = img_float_to_uint8(gt_img)          
        gt_img_3c[:,:,0] = gt_img8
        gt_img_3c[:,:,1] = gt_img8
        gt_img_3c[:,:,2] = gt_img8
        img8 = img_float_to_uint8(img)
        cimg = numpy.concatenate((img8, gt_img_3c), axis=1)
    return cimg

def make_img_overlay(img, predicted_img):
    w = img.shape[0]
    h = img.shape[1]
    color_mask = numpy.zeros((w, h, 3), dtype=numpy.uint8)
    color_mask[:,:,0] = predicted_img*PIXEL_DEPTH

    img8 = img_float_to_uint8(img)
    background = Image.fromarray(img8, 'RGB').convert("RGBA")
    overlay = Image.fromarray(color_mask, 'RGB').convert("RGBA")
    new_img = Image.blend(background, overlay, 0.2)
    return new_img

MAIN STARTS HERE

In [23]:
class CNN:

        def __init__(self):
            self.cdata = []

        def run(self, phase, conv_layers=2):

            # Make an image summary for 4d tensor image with index idx
            def get_image_summary(img, idx = 0):
                #Take img BATCHx16x16x3 --> slice 1x16x16x1 (-1 means "to all")
                #ie a single patch, all HxV pixels, single column
                V = tf.slice(img, (0, 0, 0, idx), (1, -1, -1, 1))
                img_w = img.get_shape().as_list()[1] #16: data was BATCH_SIZEx16x16x3
                img_h = img.get_shape().as_list()[2]
                min_value = tf.reduce_min(V) #gives min number across all dimensions 
                V = V - min_value  #TRANSLATION: we translate all data (start from 0)
                max_value = tf.reduce_max(V)
                V = V / (max_value*PIXEL_DEPTH)  #NORMALIZATION: values in 0 to 1
                V = tf.reshape(V, (img_w, img_h, 1))
                V = tf.transpose(V, (2, 0, 1))
                V = tf.reshape(V, (-1, img_w, img_h, 1))
                return V

            # Make an image summary for 3d tensor image with index idx
            def get_image_summary_3d(img):
                V = tf.slice(img, (0, 0, 0), (1, -1, -1))
                img_w = img.get_shape().as_list()[1]
                img_h = img.get_shape().as_list()[2]
                V = tf.reshape(V, (img_w, img_h, 1))
                V = tf.transpose(V, (2, 0, 1))
                V = tf.reshape(V, (-1, img_w, img_h, 1))
                return V         
            
            # Get prediction for given input image 
            def get_prediction(img,phase, conv_layers):
                data = numpy.asarray(img_crop(img, IMG_PATCH_SIZE, IMG_PATCH_SIZE))
                data_node = tf.constant(data)
                output = tf.nn.softmax(model(data_node,phase,conv_layers))
                output_prediction = s.run(output)
                img_prediction = label_to_img(img.shape[0], img.shape[1], IMG_PATCH_SIZE, IMG_PATCH_SIZE, output_prediction)
                return img_prediction

            # Get a concatenation of the prediction and groundtruth for given input file
            def get_prediction_with_groundtruth(filename, image_idx,phase,conv_layers):
                if phase ==1:
                    image_filename = filename + "satImage_%.3d" % image_idx + ".png"
                if phase == 2:
                    image_filename = filename + "prediction_raw_" + str(image_idx) + ".png" 
                img = mpimg.imread(image_filename)
                img_prediction = get_prediction(img,phase,conv_layers)
                return concatenate_images(img, img_prediction)
                
            # Get prediction overlaid on the original image for given input file
            def get_prediction_with_overlay(filename, image_idx,phase,conv_layers):
                if phase ==1:
                    image_filename = filename + "satImage_%.3d" % image_idx + ".png"
                if phase == 2:
                    image_filename = filename + "prediction_raw_" + str(image_idx) + ".png"
                    #image_filename = filename + "satImage_%.3d" % image_idx + ".png" 
                img = mpimg.imread(image_filename)

                img_prediction = get_prediction(img,phase, conv_layers)
                oimg = make_img_overlay(img, img_prediction)

                return oimg

            # We will replicate the model structure for the training subgraph, as well
            # as the evaluation subgraphs, while sharing the trainable parameters.
            def model(data, phase, conv_layers, dropout=False, train=False):
                """The Model definition."""
                convs = [None] * conv_layers
                relus = [None] * conv_layers
                pools = [None] * conv_layers
            
                #define all convolational networks layers
                for i in range (0, conv_layers):
                    #2D convolution: w.T*x, x is data
                    if i==0:
                        convs[i] = tf.nn.conv2d(data,    ###input is data : BATCH_SIZEx16x16x3
                                        conv_weights[i], #### 5x5x3x32
                                        strides=[1, 1, 1, 1],
                                        padding='SAME')
                    else:
                        convs[i] = tf.nn.conv2d(pools[i-1], #input is previous layers output
                                    conv_weights[i],
                                    strides=[1, 1, 1, 1],
                                    padding='SAME')
                
                    # activity funtion: bias and rectified linear non-linearity. relu(w.T*x+b)
                    relus[i] = tf.nn.relu(tf.nn.bias_add(convs[i], conv_biases[i]))
                
                    #pooling: best of CONV_POOLING_STRIDE results from every X and Y output
                    pools[i] = tf.nn.max_pool(relus[i],
                                      ksize  =[1, POOL_FILTER_STRIDES[i], POOL_FILTER_STRIDES[i], 1],
                                      strides=[1, POOL_FILTER_STRIDES[i], POOL_FILTER_STRIDES[i], 1],
                                      padding='SAME') 

                # Reshape the feature map cuboid into a 2D matrix to feed it to the fully connected layers.
                last_pool = pools[conv_layers-1];
                pool_shape = last_pool.get_shape().as_list()
                reshape = tf.reshape(
                    last_pool, #16x4x4x64
                    [pool_shape[0], pool_shape[1] * pool_shape[2] * pool_shape[3]]) #[16, 16*16*64]

                # Fully connected layer. Note that the '+' operation automatically broadcasts the biases.
                hidden = tf.nn.relu(tf.matmul(reshape, fc1_weights) + fc1_biases)
                
                # Add a 50% dropout during training only. Dropout also scales
                # activations such that no rescaling is needed at evaluation time.
                if train and dropout:
                    hidden = tf.nn.dropout(hidden, DROPOUT_RATE, seed=SEED)
                out = tf.matmul(hidden, fc2_weights) + fc2_biases

                # During training, output data types and sizes
                if train==True:
                    print ("== INFORMATION ON DIMENSIONALITY (train =", str(train),"):")
                    print ("-- data: ", str(data.get_shape()))
                    for i in range(0,conv_layers):
                        print ("-- convs["+str(i)+"]:", str(convs[i].get_shape()))
                        print ("-- conv_biases["+str(i)+"]:", str(conv_biases[i].get_shape()))
                        print ("-- conv_weights["+str(i)+"]:", str(conv_weights[i].get_shape()))
                        print ("-- relus["+str(i)+"]:", str(relus[i].get_shape()))
                        print ("-- relus["+str(i)+"]:", str(relus[i].get_shape()))
                        print ("-- pools["+str(i)+"]:", str(pools[i].get_shape()))
                    print ("-- reshape:", str(reshape.get_shape()))
                    print ("-- fc1_weights:", str(fc1_weights.get_shape()))
                    print ("-- hidden:", str(hidden.get_shape()))
                    print ("-- fc2_weights:", str(fc2_weights.get_shape()))
                    print ("-- out:", str(out.get_shape()))

                if train == True:
                    summary_id = '_0'
                    s_data = get_image_summary(data) #from docs: 3 channels so it's interpreted as RGB
                    filter_summary0 = tf.image_summary('summary_data' + summary_id, s_data)
                    s_convs = [None] * conv_layers
                    filter_summaries = [None] * conv_layers *2
                    s_pools = [None] * conv_layers
                    for i in range (0, conv_layers):
                        s_convs[i] = get_image_summary(convs[i])
                        filter_summaries[i]   = tf.image_summary('summary_conv' + str(i) + summary_id, s_convs[i])
                        s_pools[i] = get_image_summary(pools[i])
                        filter_summaries[i+1] = tf.image_summary('summary_pool' + str(i) + summary_id, s_pools[i])
                return out
            
            #create all convolutional network layers
            conv_weights = [None] * conv_layers
            conv_biases  = [None] * conv_layers
            for i in range (0, conv_layers):
                if i == 0 :
                    conv_weights[i] = tf.Variable(
                        tf.truncated_normal([CONV_FILTER_SIZES[i], CONV_FILTER_SIZES[i], NUM_CHANNELS, CONV_FILTER_DEPTHS[i]],
                                stddev=0.1,
                                seed=SEED)) #NOTE: this randomness allows the weights not to be started as zero (so that we can start training.. otherwise derivative is 0)
                    conv_biases[i] = tf.Variable(tf.zeros([CONV_FILTER_DEPTHS[i]]))  #the +b in the equation above

                else:
                    conv_weights[i] = tf.Variable(
                        tf.truncated_normal([CONV_FILTER_SIZES[i], CONV_FILTER_SIZES[i], CONV_FILTER_DEPTHS[i-1], CONV_FILTER_DEPTHS[i]],
                                stddev=0.1,
                                seed=SEED))  #each of 64 outputs of conv2 will be connected to 64 nodes in upper layer
                    conv_biases[i] = tf.Variable(tf.constant(0.1, shape=[CONV_FILTER_DEPTHS[i]]))  #TODO why is it a constant?
            
            #create the two fully connected layers
            fc1_pixel_size = IMG_PATCH_SIZE
            for i in range(0, conv_layers):
                #make sure strides and patches size are divisible
                assert IMG_PATCH_SIZE / POOL_FILTER_STRIDES[i] % 1 == 0, "IMG_PATCH_SIZE / POOL_FILTER_STRIDES[%r] is not an integer!" % i
                fc1_pixel_size /= POOL_FILTER_STRIDES[i]
                
            fc1_weights = tf.Variable( 
                tf.truncated_normal([int(fc1_pixel_size*fc1_pixel_size*CONV_FILTER_DEPTHS[conv_layers-1]), FC1_WEIGHTS_DEPTH],
                                    stddev=0.1,
                                    seed=SEED))
            fc1_biases = tf.Variable(tf.constant(0.1, shape=[FC1_WEIGHTS_DEPTH]))
            fc2_weights = tf.Variable(
                tf.truncated_normal([FC1_WEIGHTS_DEPTH, NUM_LABELS],
                                    stddev=0.1,
                                    seed=SEED))
            fc2_biases = tf.Variable(tf.constant(0.1, shape=[NUM_LABELS]))

            print(phase, ": extract_data...")
            if phase == 1:
                train_data_filename = 'training/images/'
                train_data = extract_data(train_data_filename, TRAINING_SIZE,phase)

            if phase == 2:
                train_data_filename = "predictions_training/"
                train_data = extract_data(train_data_filename, TRAINING_SIZE,phase)

            # Extract labels into numpy arrays.
            print(phase, ": extract_labels...")
            train_labels_filename = 'training/groundtruth/' 
            train_labels = extract_labels(train_labels_filename, TRAINING_SIZE)

            num_epochs = NUM_EPOCHS #iterations count

            c0 = 0 #count of tiles labelled as 0
            c1 = 0 #... as 1
            for i in range(len(train_labels)):
                if train_labels[i][0] == 1:
                    c0 = c0 + 1
                else:
                    c1 = c1 + 1

            #We are training on the same number of 1s and 0s, to avoid training data being biased!
            print (phase,': before balancing: number of data points per class: c0 = ' + str(c0) + ' c1 = ' + str(c1))
            print("-- train_data (before): ", numpy.shape(train_data))
            print("-- train_labels (before): ", numpy.shape(train_labels.shape))
            min_c = min(c0, c1)
            idx0 = [i for i, j in enumerate(train_labels) if j[0] == 1]
            idx1 = [i for i, j in enumerate(train_labels) if j[1] == 1]
            new_indices = idx0[0:min_c] + idx1[0:min_c]
            train_data = train_data[new_indices,:,:,:]
            print("-- train_data (after): ", numpy.shape(train_data))
            train_labels = train_labels[new_indices]
            print("-- train_labels (after): ", numpy.shape(train_labels.shape))
            train_size = train_labels.shape[0]

            #TODO we should alternate it: it's training zeros and then ones!
            #TODO try to randomize the picking of patches (its discarding "non-road" of last pics only...)
            c0 = 0
            c1 = 0
            for i in range(len(train_labels)):
                if train_labels[i][0] == 1:
                    c0 = c0 + 1
                else:
                    c1 = c1 + 1
            print (phase, ': after balancing: Number of data points per class: c0 = ' + str(c0) + ' c1 = ' + str(c1))

            # This is where training samples and labels are fed to the graph.
            # These placeholder nodes will be fed a batch of training data at each
            # training step using the {feed_dict} argument to the Run() call below.
            train_data_node = tf.placeholder(
                tf.float32,
                shape=(BATCH_SIZE, IMG_PATCH_SIZE, IMG_PATCH_SIZE, NUM_CHANNELS))
            train_labels_node = tf.placeholder(tf.float32, shape=(BATCH_SIZE, NUM_LABELS))
            train_all_data_node = tf.constant(train_data) #converting train_data to tensorflow variable
            print("-- train_all_data_node:", str(train_all_data_node.get_shape()))

            # Training computation: logits + cross-entropy loss.
            print("-- train_data_node:", train_data_node.get_shape())
            logits = model(train_data_node, phase, conv_layers, True, True) # BATCH_SIZE*16x16x3
            print("-- logits =", str(logits.get_shape()))
            print("-- train_labels_node = ", str(train_labels_node.get_shape()))
            loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(   #softmax cross entropy is the loss function
                logits, train_labels_node))
            tf.scalar_summary('loss', loss)

            #set up parameters for nodes
            all_params_node  = []
            for i in range (0, conv_layers):
                all_params_node.append(conv_weights[i])
                all_params_node.append(conv_biases[i])
            all_params_node.append(fc1_weights)
            all_params_node.append(fc1_biases)
            all_params_node.append(fc2_weights)
            all_params_node.append(fc2_biases)

            all_params_names = []
            for i in range (0, conv_layers):
                all_params_names.append('conv_weights['+str(i)+']')
                all_params_names.append('conv_biases['+str(i)+']')
            all_params_names.append('fc1_weights')
            all_params_names.append('fc1_biases')
            all_params_names.append('fc2_weights')
            all_params_names.append('fc2_biases')
            all_grads_node = tf.gradients(loss, all_params_node)
            all_grad_norms_node = [None] * conv_layers
            for i in range(0, len(all_grads_node)):
                norm_grad_i = tf.global_norm([all_grads_node[i]])
                all_grad_norms_node.append(norm_grad_i)
                tf.scalar_summary(all_params_names[i], norm_grad_i)

            # L2 regularization for the fully connected parameters.
            #### avoid extrploding weights ("it only makes changes to the weights if they will really make a difference")
            regularizers = (tf.nn.l2_loss(fc1_weights) + tf.nn.l2_loss(fc1_biases) +
                            tf.nn.l2_loss(fc2_weights) + tf.nn.l2_loss(fc2_biases))

            # Add the regularization term to the loss.
            loss += 5e-4 * regularizers

            # Optimizer: set up a variable that's incremented once per batch and controls the learning rate decay.
            batch = tf.Variable(0)
            # Decay once per epoch, using an exponential schedule starting at 0.01.
            learning_rate = tf.train.exponential_decay(
                LEARNING_RATE,       # Base learning rate.
                batch * BATCH_SIZE,  # Current index into the dataset.
                train_size,          # Decay step.
                DECAY_RATE,          # Decay of the step size
                staircase=True)
            tf.scalar_summary('learning_rate', learning_rate)

            # Use simple momentum for the optimization.
            optimizer = tf.train.MomentumOptimizer(learning_rate,0.0).minimize(loss, global_step=batch)

            # Predictions for the minibatch, validation set and test set.
            train_prediction = tf.nn.softmax(logits)
            
            # We'll compute them only once in a while by calling their {eval()} method.
            train_all_prediction = tf.nn.softmax(model(train_all_data_node,phase, conv_layers))

            # Add ops to save and restore all the variables.
            saver = tf.train.Saver()

            # Create a local session to run this computation.
            with tf.Session() as s:

                if RESTORE_MODEL:
                    # Restore variables from disk.
                    saver.restore(s, FLAGS.train_dir + "/model.ckpt")
                    print("Model restored.")

                else:
                    # Run all the initializers to prepare the trainable parameters.
                    #tf.initialize_all_variables().run()
                    tf.global_variables_initializer().run()

                    # Build the summary operation based on the TF collection of Summaries.
                    summary_op = tf.merge_all_summaries()
                    summary_writer = tf.train.SummaryWriter(FLAGS.train_dir,
                                                            graph=s.graph)
                                                            #graph_def=s.graph_def)
                    # Loop through training steps.
                    print ('Initialized: total number of iterations = ' + str(int(num_epochs * train_size / BATCH_SIZE)))

                    training_indices = range(train_size)

                    for iepoch in range(num_epochs):

                        # Permute training indices
                        perm_indices = numpy.random.permutation(training_indices)

                        for step in range (int(train_size / BATCH_SIZE)):

                            offset = (step * BATCH_SIZE) % (train_size - BATCH_SIZE)
                            batch_indices = perm_indices[offset:(offset + BATCH_SIZE)]

                            # Compute the offset of the current minibatch in the data.
                            # Note that we could use better randomization across epochs.
                            batch_data = train_data[batch_indices, :, :, :]
                            batch_labels = train_labels[batch_indices]
                            # This dictionary maps the batch data (as a numpy array) to the
                            # node in the graph is should be fed to.
                            feed_dict = {train_data_node: batch_data,
                                         train_labels_node: batch_labels}

                            if step % RECORDING_STEP == 0:
                                summary_str, _, l, lr, predictions = s.run(
                                    [summary_op, optimizer, loss, learning_rate, train_prediction],
                                    feed_dict=feed_dict)
                                #summary_str = s.run(summary_op, feed_dict=feed_dict) #TODO uncomment this? what does it do?
                                summary_writer.add_summary(summary_str, step)
                                summary_writer.flush()

                                # print_predictions(predictions, batch_labels)

                                print ('Epoch: ', iepoch,'.',step,', minibatch loss: %.3f' % (l), ', Minibatch error: %.1f%%' % error_rate(predictions, batch_labels))

                                sys.stdout.flush()
                            else:
                                # Run the graph and fetch some of the nodes.
                                _, l, lr, predictions = s.run(
                                    [optimizer, loss, learning_rate, train_prediction],
                                    feed_dict=feed_dict)

                        # Save the variables to disk.
                        save_path = saver.save(s, FLAGS.train_dir + "/model.ckpt")
                        print("Model saved in file: %s" % save_path)


                print ("Running prediction on training set, outputing", TRAINING_SIZE,"files")
                prediction_training_dir = "predictions_training/"
                if not os.path.isdir(prediction_training_dir):
                    os.mkdir(prediction_training_dir)
                for i in range(1, TRAINING_SIZE+1):

                    if phase ==1:
                        image_filename = train_data_filename + "satImage_%.3d" % i + ".png"
                    if phase == 2:
                        image_filename = train_data_filename + "prediction_raw_" + str(i) + ".png" 

                    pimg = get_prediction_with_groundtruth(train_data_filename,i,phase,conv_layers)

                    rimg = mpimg.imread(image_filename)
                    rimg_prediction = get_prediction(rimg,phase, conv_layers)
                    #convert from 2D array 1/0 to RGB
                    w = rimg_prediction.shape[0]
                    h = rimg_prediction.shape[1]
                    rimg_mask = numpy.zeros((w, h, 3), dtype=numpy.uint8)
                    rimg_mask[:,:,0] = rimg_prediction*PIXEL_DEPTH
                    rimg_mask[:,:,1] = rimg_prediction*PIXEL_DEPTH
                    rimg_mask[:,:,2] = rimg_prediction*PIXEL_DEPTH
                    rimg_final = Image.fromarray(rimg_mask, 'RGB')    
                    
                    oimg = get_prediction_with_overlay(train_data_filename,i,phase,conv_layers)
                    
                    if phase == 1:
                        Image.fromarray(pimg).save(prediction_training_dir + "prediction_" + str(i) + ".png")
                        rimg_final.save(prediction_training_dir + "prediction_raw_" + str(i) + ".png")
                        oimg.save(prediction_training_dir + "overlay_" + str(i) + ".png")
                    if phase == 2:
                        Image.fromarray(pimg).save(prediction_training_dir + "prediction_2_" + str(i) + ".png")
                        rimg_final.save(prediction_training_dir + "prediction_raw_2_" + str(i) + ".png")
                        oimg.save(prediction_training_dir + "overlay_2_" + str(i) + ".png")

            print("-- job done --")

# PHASE 1

In [24]:
## CNN settings:
IMG_PATCH_SIZE = 16 #should be a multiple of 4 and img_size (400)
CONV_LAYERS=3
CONV_FILTER_SIZES = [5, 5, 5, 5] #size of X*Y filter in conv[i]
CONV_FILTER_DEPTHS = [32, 64, 128, 256] #depth of conv_weights[i]
POOL_FILTER_STRIDES = [2, 2, 2, 2] #stride for pooling
FC1_WEIGHTS_DEPTH = 512 #depth of weights in fully connected 1 (before out)

#Learning settings
DROPOUT_RATE = 0.5 #amount of nodes we drop during training
LEARNING_RATE = 0.01
DECAY_RATE = 0.95 #decay of step size of gradient descent
NUM_EPOCHS = 3

#execute phase 1 (train inputs)
cnn1 = CNN()
cnn1.run(phase=1, conv_layers=CONV_LAYERS)

1 : extract_data...
1 : extract_labels...
1 : before balancing: number of data points per class: c0 = 46309 c1 = 16191
-- train_data (before):  (62500, 16, 16, 3)
-- train_labels (before):  (2,)
-- train_data (after):  (32382, 16, 16, 3)
-- train_labels (after):  (2,)
1 : after balancing: Number of data points per class: c0 = 16191 c1 = 16191
-- train_all_data_node: (32382, 16, 16, 3)
-- train_data_node: (16, 16, 16, 3)
== INFORMATION ON DIMENSIONALITY (train = True ):
-- data:  (16, 16, 16, 3)
-- convs[0]: (16, 16, 16, 32)
-- conv_biases[0]: (32,)
-- conv_weights[0]: (5, 5, 3, 32)
-- relus[0]: (16, 16, 16, 32)
-- relus[0]: (16, 16, 16, 32)
-- pools[0]: (16, 8, 8, 32)
-- convs[1]: (16, 8, 8, 64)
-- conv_biases[1]: (64,)
-- conv_weights[1]: (5, 5, 32, 64)
-- relus[1]: (16, 8, 8, 64)
-- relus[1]: (16, 8, 8, 64)
-- pools[1]: (16, 4, 4, 64)
-- convs[2]: (16, 4, 4, 128)
-- conv_biases[2]: (128,)
-- conv_weights[2]: (5, 5, 64, 128)
-- relus[2]: (16, 4, 4, 128)
-- relus[2]: (16, 4, 4, 128)
--

# PHASE 2

In [None]:
## CNN settings:
IMG_PATCH_SIZE = 16 #should be a multiple of 4 and img_size (400)
CONV_LAYERS=2
CONV_FILTER_SIZES = [5, 5, 5, 5] #size of X*Y filter in conv[i]
CONV_FILTER_DEPTHS = [32, 64, 128, 256] #depth of conv_weights[i]
POOL_FILTER_STRIDES = [2,2, 2, 2] #stride for pooling
FC1_WEIGHTS_DEPTH = 512 #depth of weights in fully connected 1 (before out)

#Learning settings
DROPOUT_RATE = 0.5 #amount of nodes we drop during training
LEARNING_RATE = 0.05
DECAY_RATE = 0.95 #decay of step size of gradient descent
NUM_EPOCHS = 1

#execute phase 1 (train inputs)
cnn1 = CNN()
cnn1.run(phase=2, conv_layers=CONV_LAYERS)

mask_to_submission.py

In [None]:
#!/usr/bin/env python3

import os
import numpy as np
import matplotlib.image as mpimg
import re

foreground_threshold = 0.25 # percentage of pixels > 1 required to assign a foreground label to a patch

# assign a label to a patch
def patch_to_label(patch):
    df = np.mean(patch)
    if df > foreground_threshold:
        return 1
    else:
        return 0

def mask_to_submission_strings(image_filename):
    """Reads a single image and outputs the strings that should go into the submission file"""
    img_number = int(re.search(r"\d+", image_filename).group(0))
    im = mpimg.imread(image_filename)
    patch_size = IMG_PATCH_SIZE
    for j in range(0, im.shape[1], patch_size):
        for i in range(0, im.shape[0], patch_size):
            patch = im[i:i + patch_size, j:j + patch_size]
            label = patch_to_label(patch)
            yield("{:03d}_{}_{},{}".format(img_number, j, i, label))


def masks_to_submission(submission_filename, *image_filenames):
    """Converts images into a submission file"""
    with open(submission_filename, 'w') as f:
        f.write('id,prediction\n')
        for fn in image_filenames[0:]:
            f.writelines('{}\n'.format(s) for s in mask_to_submission_strings(fn))


submission_filename = 'dummy_submission.csv'
image_filenames = []
for i in range(1, 51):
    image_filename = 'training/groundtruth/satImage_' + '%.3d' % i + '.png'
    print image_filename
    image_filenames.append(image_filename)
    
masks_to_submission(submission_filename, *image_filenames)


In [None]:
######################## from Log regression code ##########