In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import glob
import tensorflow as tf
import datetime
import matplotlib.pyplot as plt
import seaborn as sb
import segmentation_models as sm
import functools as ft
import pandas as pd
import numpy as np
import tensorflow.keras as keras
import time

sm.set_framework("tf.keras")
sm.framework()

tf.random.set_seed(1024)
SEED = 1024


Segmentation Models: using `keras` framework.


In [3]:
def get_image_decode(image, label):
    image = tf.io.read_file(image, "image")
    label = tf.io.read_file(label, "label")

    image = tf.image.decode_image(image)
    label = tf.image.decode_image(label)

    return image, label


# [w, h, c], 448, 448, 3
def decode_crop(image, label):
    image = image[368 // 2 : -(368 // 2), 256 // 2 : -(256 // 2)]
    label = label[368 // 2 : -(368 // 2), 256 // 2 : -(256 // 2)]

    img_array = []
    label_array = []

    for index in range(4 * 8):
        x, y = index // 8, index % 8
        img_array.append(image[448 * x : 448 * (1 + x), 448 * y : 448 * (1 + y)])
        label_array.append(label[448 * x : 448 * (1 + x), 448 * y : 448 * (1 + y)])

    return tf.data.Dataset.from_tensor_slices((img_array, label_array))


def get_mask(image, label):
    labels = []
    labels.append((label[:, :, 0] == 0) & (label[:, :, 1] == 0) & (label[:, :, 2] == 0))
    labels.append(
        (label[:, :, 0] == 128) & (label[:, :, 1] == 0) & (label[:, :, 2] == 0)
    )
    labels.append(
        (label[:, :, 0] == 128) & (label[:, :, 1] == 64) & (label[:, :, 2] == 128)
    )
    labels.append(
        (label[:, :, 0] == 0) & (label[:, :, 1] == 128) & (label[:, :, 2] == 0)
    )
    labels.append(
        (label[:, :, 0] == 128) & (label[:, :, 1] == 128) & (label[:, :, 2] == 0)
    )
    labels.append(
        (label[:, :, 0] == 64) & (label[:, :, 1] == 0) & (label[:, :, 2] == 128)
    )
    labels.append(
        (label[:, :, 0] == 192) & (label[:, :, 1] == 0) & (label[:, :, 2] == 192)
    )
    labels.append(
        (label[:, :, 0] == 64) & (label[:, :, 1] == 64) & (label[:, :, 2] == 0)
    )
    labels = tf.cast(labels, tf.float32)
    image = tf.cast(image, tf.float32)

    # must perform this
    return image, tf.transpose(labels, [1, 2, 0])


def create_ds(batch_size, istrain=True, maximage=False):
    AUTOTUNE = tf.data.AUTOTUNE

    if istrain:
        directory = (
            "C:/home/dataset/uavid_v1.5_official_release/uavid_train/**/Images/*.png"
        )
        images = glob.glob(directory, recursive=True)
        directory = (
            "C:/home/dataset/uavid_v1.5_official_release/uavid_train/**/Labels/*.png"
        )
        labels = glob.glob(directory, recursive=True)
    else:
        directory = (
            "C:/home/dataset/uavid_v1.5_official_release/uavid_val/**/Images/*.png"
        )
        images = glob.glob(directory, recursive=True)
        directory = (
            "C:/home/dataset/uavid_v1.5_official_release/uavid_val/**/Labels/*.png"
        )
        labels = glob.glob(directory, recursive=True)

    ds = tf.data.Dataset.from_tensor_slices((images, labels))
    ds = ds.cache()
    if istrain:
        ds = ds.shuffle(6400, SEED, reshuffle_each_iteration=True)
    ds = ds.map(get_image_decode, AUTOTUNE)

    if not maximage:
        ds = ds.flat_map(decode_crop)

    ds = ds.map(get_mask, AUTOTUNE)

    # batch and prefetch
    ds = ds.batch(batch_size)
    if istrain:
        ds = ds.prefetch(AUTOTUNE)

    return ds


In [4]:
def create_backbone_efficient():
    _backbone = keras.applications.EfficientNetB0(include_top=False)

    outputs = [
        layer.output
        for layer in _backbone.layers
        if layer.name
        in [
            "block2a_activation",
            "block3a_activation",
            "block5a_activation",
            "block7a_activation",
        ]
    ]

    return tf.keras.Model(
        inputs=[_backbone.input], outputs=outputs, name="efficientb0_backbone"
    )


In [5]:
class FPN(tf.keras.layers.Layer):
    def __init__(self, backbone=None, **kwargs):
        super().__init__(name="Feature_Pyramid_Network", **kwargs)

        self.backbone = create_backbone_efficient()

        self.conv5_1x1 = tf.keras.layers.Conv2D(
            filters=256, kernel_size=(1, 1), padding="same"
        )
        self.conv4_1x1 = tf.keras.layers.Conv2D(
            filters=256, kernel_size=(1, 1), padding="same"
        )
        self.conv3_1x1 = tf.keras.layers.Conv2D(
            filters=256, kernel_size=(1, 1), padding="same"
        )
        self.conv2_1x1 = tf.keras.layers.Conv2D(
            filters=256, kernel_size=(1, 1), padding="same"
        )
        self.conv5_3x3_1 = tf.keras.layers.Conv2D(
            filters=128, kernel_size=(3, 3), padding="same", activation="relu"
        )
        self.conv5_3x3_2 = tf.keras.layers.Conv2D(
            filters=128, kernel_size=(3, 3), padding="same", activation="relu"
        )
        self.conv4_3x3_1 = tf.keras.layers.Conv2D(
            filters=128, kernel_size=(3, 3), padding="same", activation="relu"
        )
        self.conv4_3x3_2 = tf.keras.layers.Conv2D(
            filters=128, kernel_size=(3, 3), padding="same", activation="relu"
        )
        self.conv3_3x3_1 = tf.keras.layers.Conv2D(
            filters=128, kernel_size=(3, 3), padding="same", activation="relu"
        )
        self.conv3_3x3_2 = tf.keras.layers.Conv2D(
            filters=128, kernel_size=(3, 3), padding="same", activation="relu"
        )
        self.conv2_3x3_1 = tf.keras.layers.Conv2D(
            filters=128, kernel_size=(3, 3), padding="same", activation="relu"
        )
        self.conv2_3x3_2 = tf.keras.layers.Conv2D(
            filters=128, kernel_size=(3, 3), padding="same", activation="relu"
        )
        self.upscale = tf.keras.layers.UpSampling2D(size=(2, 2))

    def call(self, images, training=False):
        # 112x112, 56x56, 28x28, 14x14
        conv2, conv3, conv4, conv5 = self.backbone(images, training=False)
        conv5_m = self.conv5_1x1(conv5)
        conv5_p = self.conv5_3x3_1(conv5_m)
        conv5_p = self.conv5_3x3_2(conv5_p)

        conv4_m_1 = self.upscale(conv5_m)
        conv4_m_2 = self.conv4_1x1(conv4)
        conv4_m = conv4_m_1 + conv4_m_2
        conv4_p = self.conv4_3x3_1(conv4_m)
        conv4_p = self.conv4_3x3_2(conv4_p)

        conv3_m_1 = self.upscale(conv4_m)
        conv3_m_2 = self.conv3_1x1(conv3)
        conv3_m = conv3_m_1 + conv3_m_2
        conv3_p = self.conv3_3x3_1(conv3_m)
        conv3_p = self.conv3_3x3_2(conv3_p)

        conv2_m_1 = self.upscale(conv3_m)
        conv2_m_2 = self.conv2_1x1(conv2)
        conv2_m = conv2_m_1 + conv2_m_2
        conv2_p = self.conv2_3x3_1(conv2_m)
        conv2_p = self.conv2_3x3_2(conv2_p)

        return conv5_p, conv4_p, conv3_p, conv2_p


class FCN(tf.keras.Model):
    def __init__(self, n_classes=8, backbone=None, **kwargs):
        super().__init__(name="FCN", **kwargs)
        self.fpn = FPN(backbone)
        self.upscale_2x = tf.keras.layers.UpSampling2D()
        self.upscale_4x = tf.keras.layers.UpSampling2D((4, 4))
        self.upscale_8x = tf.keras.layers.UpSampling2D((8, 8))
        self.concat = tf.keras.layers.Concatenate()
        self.conv6 = tf.keras.layers.Conv2D(
            filters=(512), kernel_size=(3, 3), padding="same", activation="relu"
        )
        self.conv7 = tf.keras.layers.Conv2D(
            filters=n_classes, kernel_size=(1, 1), padding="same", activation="relu"
        )
        self.upscale_final = tf.keras.layers.UpSampling2D(
            size=(4, 4), interpolation="bilinear"
        )

    def call(self, images, training=False):
        conv5_p, conv4_p, conv3_p, conv2_p = self.fpn(images, training=training)
        m_5 = self.upscale_8x(conv5_p)
        m_4 = self.upscale_4x(conv4_p)
        m_3 = self.upscale_2x(conv3_p)
        m_2 = conv2_p

        m_all = self.concat([m_2, m_3, m_4, m_5])
        m_all = self.conv6(m_all)
        m_all = self.conv7(m_all)
        m_all = self.upscale_final(m_all)

        return m_all


In [6]:
def load_model(model, path_name):
    """
    return None if no weight loaded
    """
    optimizer = keras.optimizers.Adam()
    ckpt = tf.train.Checkpoint(model=model, optimizer=optimizer)
    ckptmg = tf.train.CheckpointManager(ckpt, path_name, 5)

    if ckptmg.latest_checkpoint is None:
        return None
    ckpt.restore(ckptmg.latest_checkpoint).expect_partial()
    return True


In [7]:
class RescalingUnet(keras.layers.Layer):
    def __init__(self):
        super(RescalingUnet, self).__init__()
        self.mean = [0.485, 0.456, 0.406]
        self.std = [0.229, 0.224, 0.225]

    def call(self, inputs):
        return ((inputs * (1 / 255.0)) - self.mean) / self.std


In [8]:
def combined_model(mode="multi", n_classes=8):
    model_unet = sm.Unet(
        backbone_name="efficientnetb0",
        encoder_weights="imagenet",
        encoder_freeze=False,
        classes=n_classes,
        decoder_use_batchnorm=False,
    )
    model_fpn = FCN(n_classes)

    # if load_model(model_fcn, "trained_model/fcn8s") == None:
    #     print("failed to load fcn8s")
    #     return

    # if load_model(model_fpn, "trained_model/fpn") == None:
    #     print("failed to load fcn8s")
    #     return

    conv1x1 = keras.layers.Conv2D(n_classes, 1, padding="same", activation="softmax")
    rescale_layer = RescalingUnet()
    concat = keras.layers.Concatenate()
    input_layer = keras.layers.Input([None, None, 3])

    output_model_fcn = model_unet(rescale_layer(input_layer))
    output_model_fpn = model_fpn(input_layer)
    output = concat([output_model_fcn, output_model_fpn])
    output_final = conv1x1(output)

    return keras.Model([input_layer], [output_final])


In [9]:
# this iteration is calculated fom 160 iteration from
# paper
n_epoch = 60
n_classes = 8
batch_size = 8
trainds = create_ds(batch_size)
testds = create_ds(batch_size, False)

model = combined_model()

optimizer = keras.optimizers.Adam(1e-4)
focal_loss = sm.losses.CategoricalFocalLoss()
dice_loss = sm.losses.DiceLoss()

ckpt = tf.train.Checkpoint(model=model, optimizer=optimizer)
ckptmg = tf.train.CheckpointManager(ckpt, f"trained_model/unetfpnconcat", 5)
ckptmg.restore_or_initialize()

current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
train_log_dir = f"logs/unetfpnconcat/{current_time}/train"
train_summary_writer = tf.summary.create_file_writer(train_log_dir)
test_log_dir = f"logs/unetfpnconcat/{current_time}/test"
test_summary_writer = tf.summary.create_file_writer(test_log_dir)


In [11]:
current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
train_log_dir = f"logs/unetfpnconcat/{current_time}/train"
train_summary_writer = tf.summary.create_file_writer(train_log_dir)
test_log_dir = f"logs/unetfpnconcat/{current_time}/test"
test_summary_writer = tf.summary.create_file_writer(test_log_dir)

In [10]:
# Real training
train_iteration = 0
iteration = 0

sum_iou = 0
sum_loss = 0
ALPHA = 1.0

for epoch in range(n_epoch):
    initial_time = time.time()
    for bs_images, bs_labels in trainds:
        with tf.GradientTape() as t:
            output = model(bs_images, training=True)
            c_loss = dice_loss(bs_labels, output)
            c_loss += ALPHA * focal_loss(bs_labels, output)

        grad = t.gradient(c_loss, model.trainable_variables)
        optimizer.apply_gradients(zip(grad, model.trainable_variables))
        sum_loss = c_loss
        train_iteration += 1

        # calculate loss and IoU at iteration
        # this is train
        with train_summary_writer.as_default():
            tf.summary.scalar("loss", c_loss, step=train_iteration)
            tf.summary.scalar(
                "iou", sm.metrics.iou_score(bs_labels, output), step=train_iteration
            )

    print(f"Epoch: {epoch + 1}, Time Taken: {round(time.time() - initial_time, 3)}s")

    for bs_images, bs_labels in testds:
        output = model(bs_images, training=False)
        sum_loss += (
            dice_loss(bs_labels, output) + ALPHA * focal_loss(bs_labels, output)
        ) * batch_size
        sum_iou += sm.metrics.iou_score(bs_labels, output) * batch_size
        iteration += batch_size

    # calculate validation loss and IoU
    # this is test
    with test_summary_writer.as_default():
        tf.summary.scalar("loss", sum_loss / iteration, step=train_iteration)
        tf.summary.scalar("iou", sum_iou / iteration, step=train_iteration)

    iteration = 0
    sum_iou = 0
    sum_loss = 0
    ckptmg.save()


Epoch: 1, Time Taken: 663.382s
Epoch: 2, Time Taken: 664.051s
Epoch: 3, Time Taken: 665.083s
Epoch: 4, Time Taken: 693.484s
Epoch: 5, Time Taken: 723.736s
Epoch: 6, Time Taken: 724.578s
Epoch: 7, Time Taken: 726.217s
Epoch: 8, Time Taken: 726.62s
Epoch: 9, Time Taken: 727.488s
Epoch: 10, Time Taken: 731.218s
Epoch: 11, Time Taken: 729.84s
Epoch: 12, Time Taken: 730.235s
Epoch: 13, Time Taken: 730.341s
Epoch: 14, Time Taken: 729.29s
Epoch: 15, Time Taken: 729.521s
Epoch: 16, Time Taken: 726.705s
Epoch: 17, Time Taken: 726.623s
Epoch: 18, Time Taken: 725.359s
Epoch: 19, Time Taken: 724.372s
Epoch: 20, Time Taken: 720.824s
Epoch: 21, Time Taken: 708.812s
Epoch: 22, Time Taken: 705.552s
Epoch: 23, Time Taken: 704.563s
Epoch: 24, Time Taken: 701.71s
Epoch: 25, Time Taken: 710.162s
Epoch: 26, Time Taken: 717.575s
Epoch: 27, Time Taken: 719.901s
Epoch: 28, Time Taken: 722.488s
Epoch: 29, Time Taken: 723.69s
Epoch: 30, Time Taken: 726.019s
Epoch: 31, Time Taken: 727.48s
Epoch: 32, Time Taken: 

KeyboardInterrupt: 

# CHANGE
2) change learning rate to 1e-4(0.0001)

# To explain about LABELS

1. Background Clutter (0, 0, 0)
2. Building           (128, 0, 0)
3. Road               (128, 64, 128)
4. Tree               (0, 128, 0)
5. Low Vegetation     (128, 128, 0)
6. Moving Car         (64, 0, 128)
7. Static Car         (192, 0, 192)
8. Human              (64, 64, 0)