In [1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

from tensorflow.keras.layers import Dense, Conv2D
from tensorflow.keras.layers import BatchNormalization, Activation
from tensorflow.keras.layers import AveragePooling2D, Input, Flatten
from tensorflow.keras.layers import Add
from tensorflow.keras.regularizers import l2
from tensorflow.keras.models import Model
from tensorflow.keras.utils import plot_model

from tensorflow.keras.layers import Activation, Input
from tensorflow.keras.layers import Conv2D, Conv2DTranspose
from tensorflow.keras.layers import BatchNormalization, Concatenate
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import Softmax, UpSampling2D
from tensorflow.keras.models import Model
from tensorflow.keras import backend as K

from tensorflow.keras.optimizers import Adam
import tensorflow as tf
import numpy as np

import numpy as np
import os

import skimage
from skimage.io import imread

In [2]:
FEATURE_DIM = 4
delta_v = 0.5
delta_d = 0.5
param_var = 0.5
param_dist = 0.5
param_reg = 0.5
num_images = 4
data_path = "./data/image"
label_path = "./data/gt_binary_image"

In [3]:
def resnet_layer(inputs,
                 num_filters=16,
                 kernel_size=3,
                 strides=1,
                 activation='relu',
                 batch_normalization=True,
                 conv_first=True):
    """2D Convolution-Batch Normalization-Activation stack builder

    Arguments:
        inputs (tensor): Input tensor from input image or previous layer
        num_filters (int): Conv2D number of filters
        kernel_size (int): Conv2D square kernel dimensions
        strides (int): Conv2D square stride dimensions
        activation (string): Activation name
        batch_normalization (bool): Whether to include batch normalization
        conv_first (bool): conv-bn-activation (True) or
            bn-activation-conv (False)

    Returns:
        x (tensor): Tensor as input to the next layer
    """
    conv = Conv2D(num_filters,
                  kernel_size=kernel_size,
                  strides=strides,
                  padding='same',
                  kernel_initializer='he_normal',
                  kernel_regularizer=l2(1e-4))

    x = inputs
    if conv_first:
        x = conv(x)
        if batch_normalization:
            x = BatchNormalization()(x)
        if activation is not None:
            x = Activation(activation)(x)
    else:
        if batch_normalization:
            x = BatchNormalization()(x)
        if activation is not None:
            x = Activation(activation)(x)
        x = conv(x)
    return x

In [4]:
def resnet_v1(input_shape, depth, num_classes=10):
    """ResNet Version 1 Model builder [a]

    Stacks of 2 x (3 x 3) Conv2D-BN-ReLU
    Last ReLU is after the shortcut connection.
    At the beginning of each stage, the feature map size is halved (downsampled)
    by a convolutional layer with strides=2, while the number of filters is
    doubled. Within each stage, the layers have the same number filters and the
    same number of filters.
    Features maps sizes:
    stage 0: 32x32, 16
    stage 1: 16x16, 32
    stage 2:  8x8,  64
    The Number of parameters is approx the same as Table 6 of [a]:
    ResNet20 0.27M
    ResNet32 0.46M
    ResNet44 0.66M
    ResNet56 0.85M
    ResNet110 1.7M

    # Arguments
        input_shape (tensor): Shape of input image tensor
        depth (int): Number of core convolutional layers
        num_classes (int): Number of classes (CIFAR10 has 10)

    # Returns
        model (Model): Keras model instance
    """
    if (depth - 2) % 6 != 0:
        raise ValueError('depth should be 6n+2 (eg 20, 32, 44 in [a])')
    # Start model definition.
    num_filters = 16
    num_res_blocks = int((depth - 2) / 6)

    inputs = Input(shape=input_shape)
    x = resnet_layer(inputs=inputs)
    # Instantiate the stack of residual units
    for stack in range(3):
        for res_block in range(num_res_blocks):
            strides = 1
            if stack > 0 and res_block == 0:  # first layer but not first stack
                strides = 2  # downsample
            y = resnet_layer(inputs=x,
                             num_filters=num_filters,
                             strides=strides)
            y = resnet_layer(inputs=y,
                             num_filters=num_filters,
                             activation=None)
            if stack > 0 and res_block == 0:  # first layer but not first stack
                # linear projection residual shortcut connection to match
                # changed dims
                x = resnet_layer(inputs=x,
                                 num_filters=num_filters,
                                 kernel_size=1,
                                 strides=strides,
                                 activation=None,
                                 batch_normalization=False)
            x = Add()([x, y])
            x = Activation('relu')(x)
        num_filters *= 2

    # feature maps
    outputs = features_pyramid(x, n_layers)
    

    # instantiate model
    name = 'ResNet%dv1' % (depth)
    model = Model(inputs=inputs,
                  outputs=outputs,
                  name=name)
    return model

In [5]:
def resnet_v2(input_shape, depth, n_layers=4):
    """ResNet Version 2 Model builder [b]

    Stacks of (1 x 1)-(3 x 3)-(1 x 1) BN-ReLU-Conv2D or also known as
    bottleneck layer
    First shortcut connection per layer is 1 x 1 Conv2D.
    Second and onwards shortcut connection is identity.
    At the beginning of each stage, the feature map size is halved (downsampled)
    by a convolutional layer with strides=2, while the number of filter maps is
    doubled. Within each stage, the layers have the same number filters and the
    same filter map sizes.
    Features maps sizes:
    conv1  : 32x32,  16
    stage 0: 32x32,  64
    stage 1: 16x16, 128
    stage 2:  8x8,  256

    # Arguments
        input_shape (tensor): Shape of input image tensor
        depth (int): Number of core convolutional layers
        num_classes (int): Number of classes (CIFAR10 has 10)

    # Returns
        model (Model): Keras model instance
    """
    if (depth - 2) % 9 != 0:
        raise ValueError('depth should be 9n+2 (eg 56 or 110 in [b])')
    # Start model definition.
    num_filters_in = 16
    num_res_blocks = int((depth - 2) / 9)

    inputs = Input(shape=input_shape)
    # v2 performs Conv2D with BN-ReLU on input before splitting into 2 paths
    x = resnet_layer(inputs=inputs,
                     num_filters=num_filters_in,
                     conv_first=True)

    # Instantiate the stack of residual units
    for stage in range(3):
        for res_block in range(num_res_blocks):
            activation = 'relu'
            batch_normalization = True
            strides = 1
            if stage == 0:
                num_filters_out = num_filters_in * 4
                if res_block == 0:  # first layer and first stage
                    activation = None
                    batch_normalization = False
            else:
                num_filters_out = num_filters_in * 2
                if res_block == 0:  # first layer but not first stage
                    strides = 2    # downsample

            # bottleneck residual unit
            y = resnet_layer(inputs=x,
                             num_filters=num_filters_in,
                             kernel_size=1,
                             strides=strides,
                             activation=activation,
                             batch_normalization=batch_normalization,
                             conv_first=False)
            y = resnet_layer(inputs=y,
                             num_filters=num_filters_in,
                             conv_first=False)
            y = resnet_layer(inputs=y,
                             num_filters=num_filters_out,
                             kernel_size=1,
                             conv_first=False)
            if res_block == 0:
                # linear projection residual shortcut connection to match
                # changed dims
                x = resnet_layer(inputs=x,
                                 num_filters=num_filters_out,
                                 kernel_size=1,
                                 strides=strides,
                                 activation=None,
                                 batch_normalization=False)
            x = Add()([x, y])

        num_filters_in = num_filters_out

    # v2 has BN-ReLU before Pooling
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    # 1st feature map layer

    # main feature maps (160, 120)
    # succeeding feature maps are scaled down by
    # 2, 4, 8
    outputs = features_pyramid(x, n_layers)

    # instantiate model.
    name = 'ResNet%dv2' % (depth)
    model = Model(inputs=inputs,
                  outputs=outputs,
                  name=name)
    return model

In [6]:
def features_pyramid(x, n_layers):
    """Generate features pyramid from the output of the 
    last layer of a backbone network (e.g. ResNetv1 or v2)

    Arguments:
        x (tensor): Output feature maps of a backbone network
        n_layers (int): Number of additional pyramid layers

    Return:
        outputs (list): Features pyramid 
    """
    outputs = [x]
    conv = AveragePooling2D(pool_size=2, name='pool1')(x)
    outputs.append(conv)
    prev_conv = conv
    n_filters = 512

    # additional feature map layers
    for i in range(n_layers - 1):
        postfix = "_layer" + str(i+2)
        conv = conv_layer(prev_conv,
                          n_filters,
                          kernel_size=3,
                          strides=2,
                          use_maxpool=False,
                          postfix=postfix)
        outputs.append(conv)
        prev_conv = conv

    return outputs

In [7]:
def build_resnet(input_shape,
                 n_layers=4,
                 version=2,
                 n=6):
    """Build a resnet as backbone

    # Arguments:
        input_shape (list): Input image size and channels
        n_layers (int): Number of feature layers 
        version (int): Supports ResNetv1 and v2 but v2 by default
        n (int): Determines number of ResNet layers
                 (Default is ResNet50)

    # Returns
        model (Keras Model)

    """
    # computed depth from supplied model parameter n
    if version == 1:
        depth = n * 6 + 2
    elif version == 2:
        depth = n * 9 + 2

    # model name, depth and version
    # input_shape (h, w, 3)
    if version==1:
        model = resnet_v1(input_shape=input_shape,
                          depth=depth,
                          n_layers=n_layers)
    else:
        model = resnet_v2(input_shape=input_shape,
                          depth=depth,
                          n_layers=n_layers)
    return model

In [8]:
import cv2

def data_generation(keys, data_path, label_path):
    """Generate train data: images and 
    segmentation ground truth labels 

    Arguments:
        keys (array): Randomly sampled keys
            (key is image filename)

    Returns:
        x (tensor): Batch of images
        y (tensor): Batch of pixel-wise categories
    """
    # a batch of images
    x = []
    # and their corresponding segmentation masks
    y = []

    for i, key in enumerate(keys):
        # images are assumed to be stored 
        # in self.args.data_path
        # key is the image filename 
        image_path = os.path.join(data_path, key)
       # print('image_path: ', image_path)
        # load the image using OpenCV
        image = cv2.imread(image_path, cv2.IMREAD_COLOR)
        # resize the image to (640, 480)
        #print(image)
        image = cv2.resize(image, (640, 480), interpolation=cv2.INTER_LINEAR)
        # convert the image from BGR to RGB format
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        # append image to the list
        x.append(image)
        # and its corresponding label (segmentation mask)
       # print('label_path: ', label_path)
        label = os.path.join(label_path, key)
        label = skimage.img_as_float(imread(label))
        # resize the label to (640, 480)
        label = cv2.resize(label, (640, 480), interpolation=cv2.INTER_NEAREST)
        y.append(label)

    return np.array(x), np.array(y)

In [9]:
def conv_layer(inputs,
               filters=32,
               kernel_size=3,
               strides=1,
               use_maxpool=True,
               postfix=None,
               activation=None):
    """Helper function to build Conv2D-BN-ReLU layer
        with optional MaxPooling2D.
    """

    x = Conv2D(filters=filters,
               kernel_size=kernel_size,
               strides=strides,
               kernel_initializer='he_normal',
               name="conv_"+postfix,
               padding='same')(inputs)
    x = BatchNormalization(name="bn_"+postfix)(x)
    x = Activation('relu', name='relu_'+postfix)(x)
    if use_maxpool:
        x = MaxPooling2D(name='pool'+postfix)(x)
    return x

In [10]:
def tconv_layer(inputs,
                filters=32,
                kernel_size=3,
                strides=2,
                postfix=None):
    """Helper function to build Conv2DTranspose-BN-ReLU 
        layer
    """
    x = Conv2DTranspose(filters=filters,
                        kernel_size=kernel_size,
                        strides=strides,
                        padding='same',
                        kernel_initializer='he_normal',
                        name='tconv_'+postfix)(inputs)
    x = BatchNormalization(name="bn_"+postfix)(x)
    x = Activation('relu', name='relu_'+postfix)(x)
    return x

In [11]:
#close enough backbone
def build_fcn(input_shape, backbone):
    """Helper function to build an FCN model.

    Arguments:
        backbone (Model): A backbone network
            such as ResNetv2 or v1
        n_classes (int): Number of object classes
            including background.
    """

    inputs = Input(shape=input_shape)
    features = backbone(inputs)

    main_feature = features[0]
    features = features[1:]
    out_features = [main_feature]
    feature_size = 8
    size = 2
    N = 4
    # other half of the features pyramid
    # including upsampling or downsampling to resize the
    # feature maps to the dimensions
    # equal to 1/4 the image size
    for feature in features:
        postfix = "fcn_" + str(feature_size)
        feature = conv_layer(feature,
                             filters=256,
                             use_maxpool=False,
                             postfix=postfix)
        postfix = postfix + "_up2d"
        if feature.shape[1] != main_feature.shape[1]:
            feature = UpSampling2D(size=(main_feature.shape[1] // feature.shape[1], main_feature.shape[2] // feature.shape[2]),
                               interpolation='bilinear',
                               name=postfix)(feature)
        else:
            feature = Conv2D(256, kernel_size=3, strides=1, padding="same", name=postfix)(feature)
        out_features.append(feature)
        size = size * 2
        feature_size = feature_size * 2
    
    #out_features = out_features[:-2] 
    #print(out_features)
    # concatenate all upsampled features
    # concatenate all upsampled features
    x = Concatenate()(out_features)
    y = tf.identity(x)
    # binary segmentation branch
    # perform 2 additional feature extraction 
    # and upsampling
    x = tconv_layer(x, 256, postfix="up_x2")
    x = tconv_layer(x, 256, postfix="up_x4")

    # generate the pixel-wise classifier
    x = Conv2DTranspose(filters=1,
                    kernel_size=1,
                    strides=1,
                    padding='same',
                    kernel_initializer='he_normal',
                    name="pre_activation_x")(x)
    x = Activation('sigmoid', name="classification_x")(x)

    #instance segmentation branch
    y = tconv_layer(y, 256, postfix="up_y2")
    y = tconv_layer(y, 256, postfix="up_y4")
    
    y = Conv2DTranspose(filters=FEATURE_DIM,
                    kernel_size=1,
                    strides=1,
                    padding='same',
                    kernel_initializer='he_normal',
                    name="pre_activation_y")(y)
    y = Activation('sigmoid', name="classification_y")(y)
    #y = tf.round(y)
    #y = tf.cast(y, tf.int32)


    #product
    #z = tf.multiply(x, y)

    #model = Model(inputs, z, name="fcn")
    model = Model(inputs, (x,y), name="fcn")

    return model   

In [12]:
def binary_loss(binary_seg_logits, binary_label):
    binary_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=binary_seg_logits, labels=binary_label))
    return binary_loss

In [13]:
def lanenet_loss(correct_label, instance_seg_logits):
    _, rows, cols = np.shape(correct_label)
    
    correct_label = tf.reshape(correct_label, [-1])
    instance_seg_logits = tf.reshape(instance_seg_logits, (-1, 4))

    # Get the unique values and their counts
    unique_labels, unique_id, counts = tf.unique_with_counts(tf.reshape(correct_label, [-1]))
    segmented_sum = tf.math.unsorted_segment_sum(instance_seg_logits, correct_label, FEATURE_DIM)
    #tf.print(segmented_sum)
    counts = tf.cast(counts, tf.float32)

    num_instances = tf.size(unique_labels)
    mu = tf.math.divide(segmented_sum, tf.reshape(counts, (-1,1)))
    mu_expand = tf.gather(mu, unique_id)

    instance_seg_logits = tf.cast(instance_seg_logits, tf.float32)
    distance = tf.norm(tf.subtract(mu_expand, instance_seg_logits), axis=1, ord=1)
    distance = tf.subtract(distance, delta_v)
    distance = tf.clip_by_value(distance, 0., distance)
    distance = tf.square(distance)

    l_var = tf.math.unsorted_segment_sum(distance, unique_id, num_instances)
    l_var = tf.math.divide(l_var, counts)
    l_var = tf.math.reduce_sum(l_var)
    l_var = tf.math.divide(l_var, tf.cast(num_instances, tf.float32))

    mu_interleaved_rep = tf.tile(mu, [num_instances, 1])
    mu_band_rep = tf.tile(mu, [1, num_instances])
    mu_band_rep = tf.reshape(mu_band_rep,(num_instances * num_instances, FEATURE_DIM))

    mu_diff = tf.math.subtract(mu_band_rep, mu_interleaved_rep)

    intermediate_tensor = tf.math.reduce_sum(tf.math.abs(mu_diff), axis=1)
    zero_vector = tf.zeros((1,), dtype=tf.float32)
    bool_mask = tf.math.not_equal(intermediate_tensor, zero_vector)
    mu_diff_bool = tf.boolean_mask(mu_diff, bool_mask)

    mu_norm = tf.norm(mu_diff_bool, axis=1, ord=1)
    mu_norm = tf.math.subtract(2. * delta_d, mu_norm)
    mu_norm = tf.clip_by_value(mu_norm, 0., mu_norm)
    mu_norm = tf.math.square(mu_norm)

    l_dist = tf.math.reduce_mean(mu_norm)

    l_reg = tf.reduce_mean(tf.norm(mu, axis=1, ord=1))

    param_scale = 1.
    l_var = param_var * l_var
    l_dist = param_dist * l_dist
    l_reg = param_reg * l_reg

    loss = param_scale * (l_var + l_dist + l_reg)

    return loss

In [14]:
image_names = keys = ["000"+str(i)+".png" for i in range(num_images)]
train_data, train_label = data_generation(keys, data_path, label_path)

print("train label shape: ", train_label.shape)
# Define the image shape
image_shape = (480, 640)

# Create an empty numpy array to store the images
images = np.empty((num_images, *image_shape), dtype=np.int32)

# Fill in the numpy array with random integer values between 0 and 4
for i in range(num_images):
    images[i] = np.random.randint(0, FEATURE_DIM, size=image_shape, dtype=np.int32)

# Build the model
input_shape = train_data[0].shape
backbone = build_resnet(input_shape, n_layers=2, version=2, n=6)
model = build_fcn(input_shape, backbone)
#model.summary()

# Compile the model
optimizer = Adam(lr=1e-4)
#binary_label = train_label  # replace with actual binary label
#instance_label = train_label  # replace with actual instance label
model.compile(optimizer=optimizer, loss=[binary_loss,lanenet_loss])

#print("images: ", np.shape(images))
# Train the model
batch_size = 1
epochs = 10
history = model.fit(train_data, (train_label, images), batch_size=batch_size, epochs=epochs)

train label shape:  (4, 480, 640)




Epoch 1/10
Epoch 2/10


KeyboardInterrupt: ignored