In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

PATH = "../data/Airbus/"

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import cv2
#import matplotlib.pyplot as plt
from PIL import Image
from IPython.display import display 
# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os
# Any results you write to the current directory are saved as output.

In [2]:
trainImgFileNames = os.listdir(PATH+"train_v2/")
print("dataset size: ", len(trainImgFileNames))
trainLabels = pd.read_csv(PATH+"train_ship_segmentations_v2.csv")
trainImgFileNames = trainLabels[(pd.notna(trainLabels["EncodedPixels"]))]
trainImgFileNames = np.unique(trainImgFileNames["ImageId"].values)
print(trainLabels.count(), len(trainImgFileNames))
#trainLabels = trainLabels[trainLabels["ImageId"].isin(trainImgFileNames)]
#print(trainLabels[(pd.isna(trainLabels["EncodedPixels"]))].groupby("ImageId").first()[:10])
X = 256
Y = 256

dataset size:  192556
ImageId          231723
EncodedPixels     81723
dtype: int64 42556


In [16]:

# returns batched rgb input images
def getImageData(fileName):
    img = cv2.imread(PATH+"/train_v2/"+fileName)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    #img = cv2.resize(img, (X, Y), interpolation=cv2.INTER_NEAREST)
    img = ((img - img.mean()) / img.std()).astype(np.float32)
    
    return np.array(img)


# returns batched label masks from runlength encodings
def getLabelData(fileName):
    img = np.zeros((X*Y), dtype=np.uint8)
    rLen = trainLabels[trainLabels["ImageId"] == fileName]["EncodedPixels"]
    #print(fileName, ": ", rLen.size)
    if not rLen.isnull().values.any():
        # for multiple segments
        for s in range(rLen.size):
            vals = np.array(rLen.values[s].split(" "), dtype=np.int32)
            starts = vals[::2]
            ends = np.add(vals[::2],vals[1::2])
            for idx in range(starts.size):
                img[starts[idx]:ends[idx]] = 1

    # extend to image with 3 channels
    #imgConc = img.copy()
    #for d in range(2):
     #   imgConc = np.concatenate((imgConc, img), axis=0)

    # tranpose to run-lengh encoding direction
    return np.array(img.reshape((X,Y)).T)

#MAX_IMAGES = 12

#x_valid = getImageData(trainImgFileNames[66:66+MAX_IMAGES])
#y_valid = getLabelData(trainImgFileNames[66:66+MAX_IMAGES])
    
#print(x_valid.shape, y_valid.shape)



In [17]:
import tensorflow as tf
tf.VERSION

'1.13.1'

In [18]:

STRIDE = 2

def weight_variable(shape, name):
      #initial = tf.truncated_normal(shape, stddev=0.1, name=name)
      #return tf.Variable(initial)
      with tf.variable_scope("", reuse=tf.AUTO_REUSE):
            return tf.get_variable(name, shape=shape,
              initializer=tf.contrib.layers.xavier_initializer())#tf.initializers.orthogonal())

def bias_variable(shape, name):
    with tf.variable_scope("", reuse=tf.AUTO_REUSE):
        initial = tf.constant(0.1, shape=shape, name=name)
        return tf.Variable(initial)

def conv(input, filter, name, pad="SAME", dilation=0, dropR=0.3):
    f = weight_variable(filter, name+"f1")

    if not dilation > 0:
        conv = tf.nn.conv2d(input, f, strides=[1,1,1,1], padding=pad, name=name)
    else:
        conv = tf.nn.atrous_conv2d(input, f, dilation, padding=pad)
    
    conv_bias = tf.nn.bias_add(conv, bias_variable([filter[3]], name=name+"b1"))
    batch_norm = tf.contrib.layers.batch_norm(conv_bias)
    
    relu = tf.nn.relu(batch_norm)
    
    #if leakyR:
    #relu = tf.nn.leaky_relu(batch_norm,alpha=0.2,name=None)

    #drop = tf.nn.dropout(relu, dropR)

    #print(name +": ", conv.get_shape())
    #tf.summary.scalar(name, tf.reduce_sum(conv))

    return relu
    
def pool(input, window, stride, poolIndices=False, name="POOL"):
    

    if poolIndices:
        pool = tf.nn.max_pool_with_argmax(
                input,
                ksize=[1, window, window,1],
                strides=[1, stride, stride, 1],
                padding="SAME",
                name=name
            )
    else:
        pool = tf.nn.max_pool(
                input,
                ksize=[1, window, window,1],
                strides=[1, stride, stride, 1],
                padding="SAME",
                name=name
            )

        #print(name + ": ", pool.get_shape())

    
    return pool
    
def deconv_filter(shape, name):
    
    # filter = tf.zeros((
    #     shape[0], # height
    #     shape[1], # width
    #     shape[2], # out channels
    #     shape[3] # in channels
    #  ), name=name) 
    
    #return tf.Variable(filter)
    return tf.get_variable(name, shape=shape,
           initializer=tf.contrib.layers.xavier_initializer())

def deconv(layer, outputShape, filterShape, name, stride=2):
    filter = deconv_filter(filterShape, "deconvF"+layer.name[:len(layer.name)-2])

    deconv = tf.nn.conv2d_transpose(
            layer,
            filter,
            outputShape,
            strides=[1,stride,stride,1],
            padding="SAME",
            name=name
        )
    
    #print(name +": ", deconv.get_shape())
    return deconv


In [6]:

STRIDE = 2

# uNet according to:
# Ronneberger et al. - U-Net: Convolutional Networks for Biomedical Image Segmentation
# https://arxiv.org/pdf/1505.04597.pdf 

def net(image, classes):

    #image = tf.image.resize_images(image, [101,101])#, method=ResizeMethod.NEAREST_NEIGHBOR)
    bsize = image.get_shape()[0].value
    f = 3 # kernel size
    #tf.summary.scalar("input", tf.reduce_sum(image))

    #encoding - downsampling
    # encoding level 1
    e1_c1 = conv(image, [f,f,3,1], "e1_c1", "SAME")
    e1_c2 = conv(e1_c1, [f,f,1,64], "e1_c2", "SAME")
    e1_c3 = conv(e1_c2, [f,f,64,64], "e1_c3", "SAME")
    pool1 = pool(e1_c3, 2, 2, name="pool1")

    #tf.summary.scalar("e1_c1", tf.reduce_sum(e1_c1))


    # encoding level 2
    e2_c1 = conv(pool1, [f,f,64,128], "e2_c1", "SAME")
    e2_c2 = conv(e2_c1, [f,f,128,128], "e2_c2", "SAME")
    e2_c3 = conv(e2_c2, [f,f,128,128], "e2_c3", "SAME")
    pool2 = pool(e2_c3, 2, 2, name="pool2")
    
    tf.summary.scalar("e2_c1", tf.reduce_sum(e2_c1))

    # encoding level 3
    e3_c1 = conv(pool2, [f,f,128,256], "e3_c1", "SAME")
    e3_c2 = conv(e3_c1, [f,f,256,256], "e3_c2", "SAME")
    e3_c3 = conv(e3_c2, [f,f,256,256], "e3_c3", "SAME")
    pool3 = pool(e3_c3, 2, 2, name="pool3")


    #tf.summary.scalar("e3_c1", tf.reduce_sum(e3_c1))

    # encoding level 4
    e4_c1 = conv(pool3, [f,f,256,512], "e4_c1", "SAME")
    e4_c2 = conv(e4_c1, [f,f,512,512], "e4_c2", "SAME")
    e4_c3 = conv(e4_c2, [f,f,512,512], "e4_c3", "SAME")
    pool4 = pool(e4_c3, 2, 2, name="pool4")

    #tf.summary.scalar("e4_c1", tf.reduce_sum(e4_c1))


    # encoding level 5
    e5_c1 = conv(pool4, [f,f,512,1024], "e5_c1", "SAME")
    e5_c2 = conv(e5_c1, [f,f,1024,1024], "e5_c2", "SAME")
    deOut = [bsize, e5_c2.get_shape()[1].value*STRIDE, e5_c2.get_shape()[2].value*STRIDE, 512]
    de_dc1 = deconv(e5_c2, deOut, [f, f, 512, 1024], "de_dc1")
    #de_dc1 = pixelDeconv.pixel_dcl(e5_c2, 512, [f,f], "de_dc1")
    #tf.summary.scalar("e5_c2", tf.reduce_sum(e5_c2))
    #tf.summary.scalar("de_dc1", tf.reduce_sum(de_dc1))


    # decoding - upsampling 
    # decoding level 1   
    sliced = tf.slice(e4_c3, [0,0,0,0],[-1, deOut[1], deOut[2],-1])
    de1_c1 = conv(tf.concat([sliced, de_dc1], 3), [f,f,1024,512], "de1_c1", "SAME")
    de1_c2 = conv(de1_c1, [f,f,512,512], "de1_c2", "SAME")
    deOut = [bsize, de1_c2.get_shape()[1].value*STRIDE, de1_c2.get_shape()[2].value*STRIDE, 256]
    de1_dc1 = deconv(de1_c2, deOut, [f,f, 256, 512],  "de1_dc1")
    #de1_dc1 = pixelDeconv.pixel_dcl(de1_c2, 256, [f,f], "de1_dc1")
    
    # decoding level 2 
    sliced = tf.slice(e3_c3, [0,0,0,0],[-1, deOut[1], deOut[2],-1]) 
    de2_c1 = conv(tf.concat([sliced, de1_dc1], 3), [f,f,512,256], "de2_c1", "SAME")
    de2_c2 = conv(de2_c1, [f,f,256,256], "de2_c2", "SAME")
    deOut = [bsize, de2_c2.get_shape()[1].value*STRIDE, de2_c2.get_shape()[2].value*STRIDE, 128]
    de2_dc1 = deconv(de2_c2, deOut, [f,f, 128, 256], "de2_dc1")
    #de2_dc1 = pixelDeconv.pixel_dcl(de2_c2, 128, [f,f], "de2_dc1")
    
    # decoding level 3 
    sliced = tf.slice(e2_c2, [0,0,0,0],[-1, deOut[1], deOut[2], -1]) 
    de3_c1 = conv(tf.concat([sliced, de2_dc1], 3), [f,f,256,128], "de3_c1", "SAME")
    de3_c2 = conv(de3_c1, [f,f,128,128], "de3_c2", "SAME")
    deOut = [bsize, de3_c2.get_shape()[1].value*STRIDE, de3_c2.get_shape()[2].value*STRIDE, 64]
    de3_dc1 = deconv(de3_c2,deOut, [f,f, 64, 128],  "de3_dc1")
    #de3_dc1 = pixelDeconv.pixel_dcl(de3_c2, 64, [f,f], "de3_dc1")
    
    # decoding level 3 
    sliced = tf.slice(e1_c2, [0,0,0,0],[-1, deOut[1], deOut[2],-1]) 
    de4_c1 = conv(tf.concat([sliced, de3_dc1], 3), [f,f,128,64], "de4_c1", "SAME")
    de4_c2 = conv(de4_c1, [f,f,64,64], "de4_c2", "SAME")
    de4_c3 = conv(de4_c2, [f,f,64,64], "de4_c3", "SAME")
    de4_c4 = conv(de4_c3, [f,f,64,64], "de4_c4", "SAME")

    final = conv(de4_c4, [1,1,64,classes], "final", "SAME")
    #tf.summary.scalar("final", tf.reduce_sum(final))

    softmax = tf.nn.softmax(final)

    return final, tf.argmax(softmax, axis=3), softmax

In [7]:
def buildGraph(sess, data, config):

        # Main Variables
       
        # create placeholder later to be filled
        imageShape = [config["batchSize"], data.config["y"], data.config["x"], data.config["imageChannels"]]
        image = tf.placeholder(tf.float32, shape=imageShape, name="input_image")

        labelsShape = [config["batchSize"], data.config["y"], data.config["x"]]
        labels = tf.placeholder(tf.int32, labelsShape, name="labels")

        # class Weights for class imbalance
        # create weights for the particular batch
        
        onehot_labels = tf.one_hot(labels, data.config["classes"])
        weights = onehot_labels * data.config["classWeights"] 
        weights = tf.reduce_sum(weights, 3)

        # Neural Network is loaded from an extra file whose name is specified in the config file
        logits, predictionNet, softmaxNet = net(image, data.config["classes"])

        # Training part
        # sparse because labels are given as in only the correct class has the value 1 and the rest are zeros
        loss = tf.reduce_mean(tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits,weights=weights))
        tf.summary.scalar("loss", loss)

        # Set a learn rate variable for later configuration
        LR = tf.Variable(config["learningRate"], name="learningRate")
        tf.summary.scalar("learning_rate", LR)
        # Optimizer
        optimizer = tf.train.AdamOptimizer(learning_rate=LR, name="AdamOpt")
        train_op = optimizer.minimize(loss, global_step=tf.Variable(0, trainable=False))
        
        # metric variables for train pixel accuracy
        correct_prediction = tf.equal(tf.cast(predictionNet, tf.int32), labels)
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

        # Tensorflow dataset for a more efficient input pipeline by using threads
        labelData = None
        imgData = None
        with tf.device('/cpu:0'):
            iterators = []
            for _type in ["train", "validation", "test"]:

                print("Creating ", _type, " dataset...")
                imageFilenames = tf.constant(data.imageData[_type])
                labelsFileNames = tf.constant(data.imageData[_type+"Label"])

                dataset = tf.data.Dataset.from_tensor_slices((imageFilenames, imageFilenames))
                dataset = dataset.map(lambda filename, label: tf.py_func(
                                              data.getImageTuple,
                                              [filename, label],
                                              [tf.float32, tf.uint8]
                                           ),  num_parallel_calls=config["threadCount"])
                if _type == "train":
                    dataset = dataset.shuffle(buffer_size=int(100/config["batchSize"]))
                
                dataset = dataset.batch(config["batchSize"])
                dataset = dataset.prefetch(4)
                dataset = dataset.repeat(config["epochs"])
                iterators.append(dataset.make_one_shot_iterator())


        return {
            "logits":logits,
            "loss": loss,
            "prediction": predictionNet,
            "softmaxOut": softmaxNet,
            "learningRate": LR,
            "imagePlaceholder": image,
            "labelPlaceholder": labels,
            "trainOp": train_op,
            "preFetchIterators": iterators,
            "accuracy": accuracy
}

In [19]:
# Author: https://github.com/Aequalitas
# This class provides some utility functions to work with a dataset
import sys 
import random
import json
from time import sleep

class Data:

    # loads all the in the dataset config file specified file names or the serialized numpy object
    def loadDataset(self):
        # set dictonary for the different dataset splits and set initially the dataset path
        self.pathImages = {
            "train": self.config["path"],
            "trainLabel": self.config["path"],
            "test" : self.config["path"],
            "testLabel": self.config["path"],
            "validation": self.config["path"],
            "validationLabel": self.config["path"]
        }
        
        # append the given train or label path
        self.pathImages["train"] += self.config["images"]
        self.pathImages["trainLabel"] += self.config["labels"]
        self.pathImages["test"] += self.config["images"]
        self.pathImages["testLabel"] += self.config["labels"]
        self.pathImages["validation"] += self.config["images"]
        self.pathImages["validationLabel"] += self.config["labels"]
        
        # with os.listdir() read the file names in the directories
        trainDataFiles = trainImgFileNames#os.listdir(self.pathImages["train"])
        trainLabelDataFiles = trainDataFiles #os.listdir(self.pathImages["trainLabel"])

        # sort file names because os.listdir does extract them in arbitrary order
        #trainDataFiles.sort()
        #trainLabelDataFiles.sort()

        # count the amount of the file names that also sets the training size
        trainElements = int(self.config["trainSize"]*len(trainDataFiles))
        testElements = int(self.config["testSize"]*len(trainDataFiles))

        # remove n elements in order for a complete last batch with axis = 0 => batchSize
        trainElements -= trainElements % self.config["batchSize"]
        testElements -= testElements % self.config["batchSize"]

        # shuffle the file names for creating a balanced training experience
        # same random seed to be able to compare results with other training sessions
        # here the sum of chars in the dataset name. Calc function taken from: https://codereview.stackexchange.com/q/13863
        random.seed(sum(ord(c) - 64 for c in self.config["name"]))
        randomIndices = np.arange(len(trainDataFiles), dtype=np.int32)
        random.shuffle(randomIndices)
        trainDataFiles = np.take(trainDataFiles, randomIndices)
        trainLabelDataFiles = np.take(trainLabelDataFiles, randomIndices)

        # set the given dataset split whith their element by simple numpy indexing
        self.imageData = {
            "train": trainDataFiles[:trainElements],
            "trainLabel": trainLabelDataFiles[:trainElements],
            "test": trainDataFiles[trainElements:trainElements+testElements],
            "testLabel": trainLabelDataFiles[trainElements:trainElements+testElements],
            "validation": trainDataFiles[trainElements+testElements if testElements > 0 else trainElements:],
            "validationLabel": trainLabelDataFiles[trainElements+testElements if testElements > 0 else trainElements:]
        }
        # set the dataset splits sizes
        self.config["trainSize"] = len(self.imageData["train"])
        self.config["testSize"] = len(self.imageData["test"])
        self.config["validationSize"] = len(self.imageData["validation"])
        print("trainSize: ", self.config["trainSize"], " Testsize: ", self.config["testSize"], "Validationsize: ", self.config["validationSize"])

    # string configPath - path of the json file which describes the dataset
    def __init__(self, _config):
        self.config = _config   
       
    
        self.loadDataset()

    # gets a value from the config file with its given name
    def getConfig(self, name):
        return self.config[name]
    
    # reads an image and pre-processes it for training/testing
    # string imageFilename name(s) of the current train batch
    # string labelFilename name(s) of the current label batch
    def getImageTuple(self, imageFilename, labelFilename):
        #print(imageFilename.decode())
        img = getImageData(imageFilename.decode())
        labelImg = getLabelData(labelFilename.decode())
        
        if self.config["downsize"]:
            img = cv2.resize(img, (self.config["x"], self.config["y"]), interpolation=cv2.INTER_NEAREST)
            labelImg = cv2.resize(labelImg, (self.config["x"], self.config["y"]), interpolation=cv2.INTER_NEAREST)
            
        return img, labelImg


In [20]:

def predict(sess, config, data, graph):
    imagePath = PATH+"train_v2/0005d01c8.jpg"

    img = cv2.imread(imagePath)  
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    #imgRes = cv2.resize(img, (data.config["x"], data.config["y"]), interpolation=cv2.INTER_NEAREST)
    
    imgRes = (imgRes - imgRes.mean()) / imgRes.std()
    
    inputData = np.expand_dims(imgRes, axis=0)
    
    if config["batchSize"] > 1:
        fillerArr = np.zeros((1,data.config["y"], data.config["x"], data.config["imageChannels"]))
        for x in range(config["batchSize"]-1):
            inputData = np.concatenate((inputData, fillerArr), axis=0)
  
    feed_dict = {
            graph["imagePlaceholder"]: inputData 
        }
                       
    predClasses = sess.run(graph["prediction"], feed_dict=feed_dict)
    predClasses = predClasses[0].reshape(data.config["x"]*data.config["y"]).astype(np.uint8)
    display(Image.fromarray(predClasses, "L"))

In [21]:
# Author: https://github.com/Aequalitas/
# This file contains the training routine
# One epoch is trained and at the end the validation set is evaluated

import time 
import datetime

def doTrain(epoch, sess, graph, config, data, modelFileName):

    step = 1
    loss = []
    train_acc = []
    acc = []
    epochSize = int(data.config["trainSize"]/config["batchSize"])
    iterator = graph["preFetchIterators"][0]
    nextImgData = iterator.get_next()
  
    for batchIdx in range(epochSize):
        start = time.time()
        try:
            imgData  = sess.run(nextImgData)
            # in case the last rest does not fit into a batch
            if imgData[0].shape[0] == config["batchSize"]:
                _imageData = imgData[0]
                _labelData = imgData[1]
            else:
                break       
        except tf.errors.OutOfRangeError:
            break

        feed_dict = {
            graph["imagePlaceholder"] : _imageData,
            graph["labelPlaceholder"] : _labelData
        }

        # main train operation 
        graph["trainOp"].run(feed_dict=feed_dict)
        end = time.time() 
        
        # print the train status every 10% of the train steps
        #if step % 100 == 0: #int((epochSize)/10) == 0:
        _loss, _train_acc = sess.run([graph["loss"], graph["accuracy"]], feed_dict=feed_dict)
        train_acc.append(_train_acc*100)
        loss.append(_loss)
        status = "Epoch: "+str(epoch)+" || Step: "+str(step)+"/"+ str(epochSize)
        status += " || loss: "+str(round(np.mean(np.array(loss)), 5))+" || train_acc: "+ str(round(np.mean(np.array(train_acc)), 5))
        status += "% || ETA: "+str(datetime.timedelta(seconds=((end-start)*((epochSize)-step))))
        # ends with \r to delete the older line so the new line can be printed thus only one line is present at a time
        print(status, end="\r")

        if step >= epochSize:
            break

        step+=1

    # validate trained model after one epoch
    iterator = graph["preFetchIterators"][1]
    nextImgData = iterator.get_next()
    valSize = int(data.config["validationSize"]/config["batchSize"])
    for r in range(valSize):
        imgData  = sess.run(nextImgData)
        if imgData[0].shape[0] == config["batchSize"]:
            feed_dict={
                graph["imagePlaceholder"]: np.expand_dims(imgData[0], axis=3) if data.config["imageChannels"] == 1 else imgData[0],
                graph["labelPlaceholder"]: imgData if data.config["imageChannels"] == 1 else imgData[1]
            }
            _acc = 100*(graph["accuracy"].eval(feed_dict=feed_dict))    
            acc.append(_acc)

    acc = round(np.mean(np.array(acc)), 5)
    print("\nvalidation_accuracy: "+str(acc))
    return acc


In [None]:
config = {
    "neuralNetwork":"uNet",
    "batchSize":4,
    "threadCount":8,
    "learningRate":0.01,
    "steps":9999999,
    "epochs":40
} 

dataConfig = {
    "x": 256,
    "y": 256,
    "imageChannels": 3,
    "batchSize":4,
    "tfPrefetch":True,
    "downsize":True,
    "name":"Airbus",
    "classWeights":[0.01, 1.0],
    "classes":2,
    "path":PATH,
    "images":"train_v2/",
    "labels":"train_v2/",
    "trainSize":0.9,
    "testSize":0.005
    
}

tf.logging.set_verbosity(tf.logging.INFO)
# GPU configuration
tfConfig = tf.ConfigProto()

with tf.Session(config=tfConfig) as sess:
    data = Data(dataConfig)
    # create the static tensorflow graph
    graph = buildGraph(sess, data, config)
    sess.run(tf.global_variables_initializer())
    modelFileName = "DAS"
    print("Starting training...")
    best_acc = 0
    LRcounter = 0
    bestMeanIoU = 0
    for e in range(1, config["epochs"]+1):
        curr_acc = doTrain(e, sess, graph, config, data, modelFileName)
        predict(sess, config, data, graph)
        # if validation accuracy is not increasing after 4 times then decrease the learning rate by multiple of 0.1
        if best_acc < curr_acc:
            print("val acc of ", curr_acc, " better than ", best_acc)
            best_acc = curr_acc
            LRcounter = 0
        else:
            print("val acc of ", curr_acc, " NOT better than ", best_acc)
            if LRcounter >= 4:
                lr = graph["learningRate"].eval()
                graph["learningRate"] = tf.assign(graph["learningRate"], lr*0.1)
                print("Learning rate of ", lr ," is now decreased to ", lr * 0.1)
                LRcounter = 0

            LRcounter = LRcounter + 1



trainSize:  38300  Testsize:  212 Validationsize:  4044
Creating  train  dataset...
Creating  validation  dataset...
Creating  test  dataset...
Starting training...
