In [1]:
!pip install scikit-image
!pip install tqdm
!pip install tensorflow
import os
import glob
import time
import numpy as np
from PIL import Image
from pathlib import Path
from tqdm import tqdm
import matplotlib.pyplot as plt
from skimage.color import rgb2lab, lab2rgb

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


# Download COCO Dataset

In [None]:
!wget http://images.cocodataset.org/zips/train2017.zip -O coco_train2017.zip
!wget http://images.cocodataset.org/zips/val2017.zip -O coco_val2017.zip
!wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip -O coco_ann2017.zip

--2023-04-14 23:49:57--  http://images.cocodataset.org/zips/train2017.zip
Resolving images.cocodataset.org (images.cocodataset.org)... 54.231.139.33, 54.231.132.121, 3.5.29.200, ...
Connecting to images.cocodataset.org (images.cocodataset.org)|54.231.139.33|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 19336861798 (18G) [application/zip]
Saving to: ‘coco_train2017.zip’


2023-04-14 23:56:43 (45.4 MB/s) - ‘coco_train2017.zip’ saved [19336861798/19336861798]

--2023-04-14 23:56:43--  http://images.cocodataset.org/zips/val2017.zip
Resolving images.cocodataset.org (images.cocodataset.org)... 52.216.22.19, 52.217.205.41, 52.217.132.65, ...
Connecting to images.cocodataset.org (images.cocodataset.org)|52.216.22.19|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 815585330 (778M) [application/zip]
Saving to: ‘coco_val2017.zip’


2023-04-14 23:57:01 (44.7 MB/s) - ‘coco_val2017.zip’ saved [815585330/815585330]

--2023-04-14 23:57:01--  http://i

In [None]:
# Visualise the raw data
from zipfile import ZipFile, BadZipFile
def extract_zip_file(extract_path):
    try:
        with ZipFile(extract_path+".zip") as zfile:
            zfile.extractall(".")
        # remove zipfile
        zfileTOremove=f"{extract_path}"+".zip"
        if os.path.isfile(zfileTOremove):
            os.remove(zfileTOremove)
        else:
            print("Error: %s file not found" % zfileTOremove)    
    except BadZipFile as e:
        print("Error:", e)

extract_train_path = "./coco_train2017"
extract_val_path = "./coco_val2017"
extract_ann_path="./coco_ann2017"
extract_zip_file(extract_train_path)
extract_zip_file(extract_val_path)
extract_zip_file(extract_ann_path)

In [None]:
!mkdir data
!cd data && git clone https://github.com/cocodataset/cocoapi
!cd data/cocoapi/PythonAPI && make

Cloning into 'cocoapi'...
remote: Enumerating objects: 975, done.[K
remote: Total 975 (delta 0), reused 0 (delta 0), pack-reused 975[K
Receiving objects: 100% (975/975), 11.72 MiB | 22.65 MiB/s, done.
Resolving deltas: 100% (576/576), done.
python setup.py build_ext --inplace
running build_ext
cythoning pycocotools/_mask.pyx to pycocotools/_mask.c
  tree = Parsing.p_module(s, pxd, full_module_name)
building 'pycocotools._mask' extension
creating build
creating build/common
creating build/temp.linux-x86_64-3.9
creating build/temp.linux-x86_64-3.9/pycocotools
x86_64-linux-gnu-gcc -pthread -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O2 -Wall -g -fstack-protector-strong -Wformat -Werror=format-security -g -fwrapv -O2 -g -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 -fPIC -I/usr/local/lib/python3.9/dist-packages/numpy/core/include -I../common -I/usr/include/python3.9 -c ../common/maskApi.c -o build/temp.linux-x86_64-3.9/../common/mask

In [2]:
import os
import skimage.io as io
import numpy as np
import tensorflow as tf
print('TensorFlow version:', tf.__version__)

TensorFlow version: 2.12.0


In [3]:
import matplotlib.pyplot as plt
import random
!pip install livelossplot --quiet
from livelossplot.tf_keras import PlotLossesCallback

In [None]:
COCO_ROOT = '/content'
COCO_API_ROOT = './data/'
import sys
sys.path.insert(0, os.path.join(COCO_API_ROOT, 'cocoapi/PythonAPI'))
from pycocotools.coco import COCO

In [None]:
class Dataset():

    def crop_images(self, img, inp_size, random_crop=False):
        shape = tf.shape(img)
        pad = (
            [0, tf.maximum(inp_size - shape[0], 0)],
            [0, tf.maximum(inp_size - shape[1], 0)],
            [0, 0],
        )
        img = tf.pad(img, pad)

        if random_crop:
            img = tf.image.random_crop(img, (inp_size, inp_size, shape[2]))
        else:
            shape = tf.shape(img)
            ho = (shape[0] - inp_size) // 2
            wo = (shape[1] - inp_size) // 2
            img = img[ho:ho+inp_size, wo:wo+inp_size, :]

        return img

    def train_dataset(self, batch_size, epochs, inp_size):

        def item_to_images(item):
            img_combined = tf.py_function(self.read_images, [item], tf.uint8)
            img_combined = self.crop_images(img_combined, inp_size, True)

            img = tf.cast(img_combined[...,:3], tf.float32) / np.float32(255.)
            mask_class = tf.cast(img_combined[...,3:4], tf.float32)
            return img, mask_class

        dataset = tf.data.Dataset.from_tensor_slices(self.img_list)
        dataset = dataset.shuffle(buffer_size=len(self.img_list))
        dataset = dataset.map(item_to_images)
        dataset = dataset.repeat(epochs)
        dataset = dataset.batch(batch_size, drop_remainder=True)

        return dataset

    def val_dataset(self, batch_size, inp_size):

        def item_to_images(item):
            img_combined = tf.py_function(self.read_images, [item], tf.uint8)
            img_combined = self.crop_images(img_combined, inp_size, False)

            img = tf.cast(img_combined[...,:3], tf.float32) / np.float32(255.)
            mask_class = tf.cast(img_combined[...,3:4], tf.float32)
            return img, mask_class

        dataset = tf.data.Dataset.from_tensor_slices(self.img_list)
        dataset = dataset.map(item_to_images)
        dataset = dataset.batch(batch_size, drop_remainder=True)

        return dataset

In [None]:
class COCO_Dataset(Dataset):

    def __init__(self, sublist, percent=1):
        ann_file_fpath = os.path.join(COCO_ROOT, 'annotations', 'instances_'+sublist+'2017.json')
        self.coco = COCO(ann_file_fpath)
        self.cat_ids = self.coco.getCatIds(catNms=['person'])
        self.img_list = self.coco.getImgIds(catIds=self.cat_ids)
        self.img_list = random.sample(self.img_list, int(len(self.img_list) * percent))

    def read_images(self, img_id):
        img_id = int(img_id.numpy())
        img_data = self.coco.loadImgs(img_id)[0]
        img_fname = '/'.join(img_data['coco_url'].split('/')[-2:])

        img = io.imread(os.path.join(COCO_ROOT, img_fname))
        if len(img.shape) == 2:
            img = np.tile(img[..., None], (1, 1, 3))

        ann_ids = self.coco.getAnnIds(imgIds=img_data['id'], catIds=self.cat_ids, iscrowd=None)
        anns = self.coco.loadAnns(ann_ids)
        mask_class = np.zeros((img.shape[0], img.shape[1]), dtype=np.uint8)
        for i in range(len(anns)):
            mask_class += self.coco.annToMask(anns[i])
        mask_class = (mask_class > 0).astype(np.uint8)

        img_combined = np.concatenate([img, mask_class[..., None]], axis=2)

        return img_combined

# Ternaus Model

In [4]:
from tensorflow.keras import Model
from tensorflow.keras.layers import Conv2D, Conv2DTranspose, MaxPooling2D, ReLU, UpSampling2D, concatenate

def TernausNet(image_size):
    x = tf.keras.layers.Input((image_size, image_size, 3))
    max_pool = MaxPooling2D(pool_size=(2, 2))
    num_filters = 32

    # Define VGG16 model
    vgg16 = tf.keras.applications.VGG16(include_top=False, weights='imagenet', 
                                        input_shape=(image_size,image_size,3))
    vgg_encoder = vgg16.layers
    conv1 = tf.keras.Sequential([
            vgg_encoder[0],
            vgg_encoder[1],
            vgg_encoder[2],
        ])(x)

    conv2 = tf.keras.Sequential([
            vgg_encoder[4],
            vgg_encoder[5],
            vgg_encoder[6],
        ])(conv1)

    conv3 =  tf.keras.Sequential([
            vgg_encoder[7],
            vgg_encoder[8],
            vgg_encoder[9],
            vgg_encoder[10],
        ])(conv2)
    
    conv4 = tf.keras.Sequential([
            vgg_encoder[11],
            vgg_encoder[12],
            vgg_encoder[13],
            vgg_encoder[14],
        ])(conv3)
    
    conv5 = tf.keras.Sequential([
            vgg_encoder[15],
            vgg_encoder[16],
            vgg_encoder[17],
            vgg_encoder[18],
        ])(conv4)

    center = tf.keras.Sequential([
                Conv2D(num_filters * 8 * 2, kernel_size=3, padding='same', activation='relu'),
                Conv2DTranspose(num_filters * 8, kernel_size=4, strides=1, padding='same'),
                ReLU(),
            ])(conv5)
    
    dec5 = tf.keras.Sequential([
                Conv2D(num_filters * 8 * 2, kernel_size=3, padding='same', activation='relu'),
                Conv2DTranspose(num_filters * 8, kernel_size=4, strides=2, padding='same'),
                ReLU(),
            ])(tf.concat([center, conv5], axis=-1))
    
    dec4 = tf.keras.Sequential([
                Conv2D(num_filters * 8 * 2, kernel_size=3, padding='same', activation='relu'),
                Conv2DTranspose(num_filters * 8, kernel_size=4, strides=2, padding='same'),
                ReLU(),
            ])(tf.concat([dec5, conv4], axis=-1))
    
    dec3 = tf.keras.Sequential([
                Conv2D(num_filters * 4 * 2, kernel_size=3, padding='same', activation='relu'),
                Conv2DTranspose(num_filters * 2, kernel_size=4, strides=2, padding='same'),
                ReLU(),
            ])(tf.concat([dec4, conv3], axis=-1))
    
    dec2 = tf.keras.Sequential([
                Conv2D(num_filters * 2 * 2, kernel_size=3, padding='same', activation='relu'),
                Conv2DTranspose(num_filters, kernel_size=4, strides=2, padding='same'),
                ReLU(),
            ])(tf.concat([dec3, conv2], axis=-1))
    
    dec1 = Conv2D(num_filters, kernel_size=3, padding='same',
                  activation='relu')(tf.concat([dec2, conv1], axis=-1))
    
    final = tf.keras.layers.Conv2D(1, (3, 3), padding='same', 
                                   activation='sigmoid')(dec1)
    
    return tf.keras.Model(inputs=x, outputs=final)

In [5]:
TernausNet(image_size=224).summary()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 224, 224, 3  0           []                               
                                )]                                                                
                                                                                                  
 sequential (Sequential)        (None, 224, 224, 64  38720       ['input_1[0][0]']                
                                )                                                                 
                                                                                                  
 sequential_1 (Sequential)      (None, 112, 112, 12  2

# DeepLabV3Plus Model

#### The model code was adopted from this keras documentation page: https://keras.io/examples/vision/deeplabv3_plus/

In [6]:
# DeepLabV3 code citation:
# Keras. "DeepLabv3+ semantic segmentation." Keras Documentation, https://keras.io/examples/vision/deeplabv3_plus/. Accessed 16 April 2023.

import os
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

def convolution_block(
    block_input,
    num_filters=256,
    kernel_size=3,
    dilation_rate=1,
    use_bias=False,
):
    x = layers.Conv2D(
        num_filters,
        kernel_size=kernel_size,
        dilation_rate=dilation_rate,
        padding="same",
        use_bias=use_bias,
        kernel_initializer=keras.initializers.HeNormal(),
    )(block_input)
    x = layers.BatchNormalization()(x)
    return tf.nn.relu(x)


def DilatedSpatialPyramidPooling(dspp_input):
    dims = dspp_input.shape
    x = layers.AveragePooling2D(pool_size=(dims[-3], dims[-2]))(dspp_input)
    x = convolution_block(x, kernel_size=1, use_bias=True)
    out_pool = layers.UpSampling2D(
        size=(dims[-3] // x.shape[1], dims[-2] // x.shape[2]), interpolation="bilinear",
    )(x)

    out_1 = convolution_block(dspp_input, kernel_size=1)
    out_6 = convolution_block(dspp_input, dilation_rate=6)
    out_12 = convolution_block(dspp_input, dilation_rate=12)
    out_18 = convolution_block(dspp_input, dilation_rate=18)

    x = layers.Concatenate(axis=-1)([out_pool, out_1, out_6, out_12, out_18])
    output = convolution_block(x, kernel_size=1)
    return output

def DeeplabV3Plus(image_size):
    model_input = keras.Input(shape=(image_size, image_size, 3))
    resnet50 = keras.applications.ResNet50(
        weights="imagenet", include_top=False, input_tensor=model_input
    )
    x = resnet50.get_layer("conv4_block6_2_relu").output
    x = DilatedSpatialPyramidPooling(x)

    input_a = layers.UpSampling2D(
        size=(image_size // 4 // x.shape[1], image_size // 4 // x.shape[2]),
        interpolation="bilinear",
    )(x)
    input_b = resnet50.get_layer("conv2_block3_2_relu").output
    input_b = convolution_block(input_b, num_filters=48, kernel_size=1)

    x = layers.Concatenate(axis=-1)([input_a, input_b])
    x = convolution_block(x)
    x = convolution_block(x)
    x = layers.UpSampling2D(
        size=(image_size // x.shape[1], image_size // x.shape[2]),
        interpolation="bilinear",
    )(x)

    model_output = layers.Conv2D(1, kernel_size=(1, 1), padding="same", 
                                 activation='sigmoid')(x)
    return keras.Model(inputs=model_input, outputs=model_output)


model = DeeplabV3Plus(image_size=224)
model.summary()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_3 (InputLayer)           [(None, 224, 224, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv1_pad (ZeroPadding2D)      (None, 230, 230, 3)  0           ['input_3[0][0]']                
                                                                                                  
 conv1_conv (Conv2D)            (None, 112, 112, 64  9472        ['conv1_pad[0][0]']              
                                )               

# InceptionV3 Model

In [None]:
# Define InceptionV3 model
def InceptionV3(image_size=225):
    model_input = tf.keras.Input(shape=(image_size, image_size, 3))
    inceptionv3 = tf.keras.applications.InceptionV3(
        weights="imagenet", include_top=False, input_tensor=model_input
    )
    x = inceptionv3.output
    x =  tf.keras.layers.Conv2D(1, (3, 3), padding='same', activation='relu')(x)
    x = tf.keras.layers.UpSampling2D(
        size=(image_size // x.shape[1], image_size // x.shape[2]),
        interpolation="bilinear",
    )(x)

    model_output =  tf.keras.layers.Conv2D(1, (3, 3), padding='same', 
                                   activation='sigmoid')(x)
    model = tf.keras.Model(inputs=model_input, outputs=model_output)
    return model

model = InceptionV3(image_size=225)
model.summary()

Model: "model_9"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_11 (InputLayer)          [(None, 225, 225, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv2d_950 (Conv2D)            (None, 112, 112, 32  864         ['input_11[0][0]']               
                                )                                                                 
                                                                                                  
 batch_normalization_940 (Batch  (None, 112, 112, 32  96         ['conv2d_950[0][0]']             
 Normalization)                 )                                                           

In [None]:
# Define Dataset
COCO_dataset_train = COCO_Dataset('train', 1.0)
COCO_dataset_val = COCO_Dataset('val', 1.0)


In [None]:
BATCH_SIZE = 40
IMAGE_SIZE = 224
EPOCHS = 40

train_ds = COCO_dataset_train.train_dataset(batch_size=BATCH_SIZE, epochs=EPOCHS, 
                                            inp_size=IMAGE_SIZE)
val_ds = COCO_dataset_val.val_dataset(batch_size=BATCH_SIZE, 
                                      inp_size=IMAGE_SIZE)

In [None]:
from pathlib import Path

In [None]:
path = Path("model_1")
path.mkdir(exist_ok=True, parents=True) # folder created in google drive
assert path.exists()
cpt_filename = "best1.hdf5"
cpt_path =str(path / cpt_filename)

# TernausNet Model Training
model = TernausNet(image_size=IMAGE_SIZE)
checkpoint = tf.keras.callbacks.ModelCheckpoint(cpt_path, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
optimizer = tf.keras.optimizers.Adam(0.0001)

model.compile(loss='binary_crossentropy', metrics=['accuracy'], 
              optimizer=optimizer)

hist = model.fit(
    train_ds, 
    epochs=EPOCHS,
    steps_per_epoch=50,
    validation_data=val_ds,
    callbacks=[PlotLossesCallback(), checkpoint])

In [None]:
path = Path("model_2")
path.mkdir(exist_ok=True, parents=True) # folder created in google drive
assert path.exists()
cpt_filename = "best2.hdf5"
cpt_path =str(path / cpt_filename)

# DeeplabV3Plus Model Training
model = DeeplabV3Plus(image_size=224)
checkpoint = tf.keras.callbacks.ModelCheckpoint(cpt_path, monitor='val_accuracy', 
                                                verbose=1, save_best_only=True, 
                                                mode='max')
optimizer = tf.keras.optimizers.Adam(0.001)

model.compile(loss='binary_crossentropy', metrics=['accuracy'], optimizer=optimizer)

hist = model.fit(
    train_ds, 
    epochs=EPOCHS,
    steps_per_epoch=50,
    validation_data=val_ds,
    callbacks=[PlotLossesCallback(), checkpoint])

In [None]:
path = Path("model_3")
path.mkdir(exist_ok=True, parents=True) # folder created in google drive
assert path.exists()
cpt_filename = "best3.hdf5"
cpt_path =str(path / cpt_filename)

# InceptionV3 Model Training
model = InceptionV3(image_size=225)
checkpoint = tf.keras.callbacks.ModelCheckpoint(cpt_path, monitor='val_accuracy', 
                                                verbose=1, save_best_only=True, 
                                                mode='max')
optimizer = tf.keras.optimizers.Adam(0.0001)

model.compile(loss='binary_crossentropy', metrics=['accuracy'], 
              optimizer=optimizer)

hist = model.fit(
    train_ds, 
    epochs=40,
    steps_per_epoch=50,
    validation_data=val_ds,
    callbacks=[PlotLossesCallback(), checkpoint])

In [None]:
import matplotlib.pyplot as plt
import random
##########  Model Results ##########

def draw_sub_image(i, sample, mask):
    plt.subplot(4,4, i + 1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    mask = (mask[..., 0] > 0.2).astype(np.float32)
    mask_clr = plt.get_cmap('viridis')(mask)[..., :3]
    plt.imshow(sample*0.5 + mask_clr*0.5)
    title_object = plt.title("Predicted" if i % 2 else "Real")
    _ = plt.setp(title_object, color="b")

plt.figure(figsize=(10,10))

imgs, masks = next(train_ds.as_numpy_iterator())

for i, sample, mask in zip(range(imgs.shape[0]), imgs, masks):
    pred_mask = model.predict(sample[None, ...])
    if i > 7:
        break
    draw_sub_image(2*i, sample, mask)
    draw_sub_image(2*i + 1, sample, pred_mask[0, ...])