In [1]:
# Create Colab Notebook folder
from google.colab import drive
drive.mount('/gdrive')
!ln -s "/gdrive/My Drive/Colab Notebooks" "/content/Colab Notebooks"

# Put Colab in the context of the project
import os
# os.chdir allows you to change directories, like cd in the Terminal
os.chdir('/content/Colab Notebooks/AerialImageDataset')

Mounted at /gdrive


In [2]:
%cd /content

/content


In [3]:
import os
import glob
import random
# from osgeo import gdal
# import rasterio
import shutil
from types import MethodType

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import cv2

import tensorflow as tf
from tensorflow.keras.layers import Conv2D, BatchNormalization, Activation, MaxPool2D, Conv2DTranspose, Concatenate,ZeroPadding2D , UpSampling2D,Reshape
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, CSVLogger
from tensorflow.keras.preprocessing import image_dataset_from_directory
from tensorflow.keras.preprocessing.image import array_to_img, img_to_array, load_img
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import Input


from PIL import Image

In [4]:
# Check if TensorFlow is using GPU
physical_devices = tf.config.list_physical_devices('GPU')
if physical_devices:
    for device in physical_devices:
        print(f"GPU Device Name: {device.name}")
else:
    print("No GPU devices found. Make sure TensorFlow GPU is properly installed.")

# Check TensorFlow version
print("TensorFlow Version:", tf.__version__)

GPU Device Name: /physical_device:GPU:0
TensorFlow Version: 2.12.0


In [5]:
os.environ["PYTHONHASHSEED"] = str(23)
np.random.seed(23)
tf.random.set_seed(23)

In [6]:
pwd

'/content'

In [7]:
!mkdir /content/aerialimage

In [8]:
!ls /content

 aerialimage  'Colab Notebooks'   sample_data


In [9]:
# create a folders to move train and validation files

!mkdir /content/aerialimage/train_input
!mkdir /content/aerialimage/train_target

!mkdir /content/aerialimage/val_input
!mkdir /content/aerialimage/val_target

In [10]:
# create a folders to move train and validation files

!mkdir /content/aerialimage/train_input/input_data
!mkdir /content/aerialimage/train_target/target_data

!mkdir /content/aerialimage/val_input/input_data
!mkdir /content/aerialimage/val_target/target_data

In [11]:
# move input images to the folder

org_train_input = '/content/Colab Notebooks/AerialImageDataset/train/images'
new_train_input = '/content/aerialimage/train_input/input_data'
new_val_input = '/content/aerialimage/val_input/input_data'

input_img_paths = []

for dirname, _, filenames in os.walk(org_train_input):
    for filename in filenames:
        input_img_paths.append(os.path.join(dirname, filename))

input_img_paths = sorted(input_img_paths)

random.Random(23).shuffle(input_img_paths)
    
input_img_paths[0:2]


['/content/Colab Notebooks/AerialImageDataset/train/images/chicago5.tif',
 '/content/Colab Notebooks/AerialImageDataset/train/images/kitsap13.tif']

In [12]:
val_num = 40 ## 20% of train dataset 

for file in input_img_paths[:val_num]:
    shutil.copy(file, new_val_input)
    
for file in input_img_paths[val_num:]:
    shutil.copy(file, new_train_input)

In [13]:
# move target images to the folder

org_train_target = '/content/Colab Notebooks/AerialImageDataset/train/gt'
new_train_target = '/content/aerialimage/train_target/target_data'
new_val_target = '/content/aerialimage/val_target/target_data'

target_img_paths = []

for dirname, _, filenames in os.walk(org_train_target):
    for filename in filenames:
        target_img_paths.append(os.path.join(dirname, filename))

target_img_paths = sorted(target_img_paths)

random.Random(23).shuffle(target_img_paths)

target_img_paths[0:2]

['/content/Colab Notebooks/AerialImageDataset/train/gt/chicago5.tif',
 '/content/Colab Notebooks/AerialImageDataset/train/gt/kitsap13.tif']

In [14]:
# copy 40 target images to validation folder

for file in target_img_paths[:val_num]:
    shutil.copy(file, new_val_target)
    
for file in target_img_paths[val_num:]:
    shutil.copy(file, new_train_target)

In [15]:
# create an image generator / data augmentation pipeline 
# augment data in train dataset only

BATCH_SIZE = 5
TARGET_SIZE = (256,256)
seed = 23
# patch_size = (500, 500)  # Specify the desired patch size
# overlap = 250  # Specify the desired overlap size, 50% for this case

train_datagen = ImageDataGenerator(rescale=1./255,
                                   samplewise_std_normalization=False, 
                                   horizontal_flip = True, 
                                   vertical_flip = False, # 1st run : False, 2nd run: True
                                   height_shift_range = 0.1, 
                                   width_shift_range = 0.1, 
                                   rotation_range = 3, # 1st run : 3, 2nd run:10
                                   shear_range = 0.01, # 1st run : 0.01, 2nd run: 0.1
                                   fill_mode = 'nearest',
                                   zoom_range = 0.05, #1st run 0.05, 2nd run: 0.1
                                   zca_whitening = True,
                                   zca_epsilon=1e-5, 
#                                    preprocessing_function=patch_image,
                                   )


target_datagen = ImageDataGenerator(rescale=1./255,
                                   samplewise_std_normalization=False, 
                                   horizontal_flip = True, 
                                   vertical_flip = False, # 1st run : False, 2nd run: True
                                   height_shift_range = 0.1, 
                                   width_shift_range = 0.1, 
                                   rotation_range = 3, # 1st run : 3, 2nd run:10
                                   shear_range = 0.01, # 1st run : 0.01, 2nd run: 0.1
                                   fill_mode = 'nearest',
                                   zoom_range = 0.05, #1st run 0.05, 2nd run: 0.1
                                   zca_whitening = True,
                                   zca_epsilon=1e-5, 
#                                    preprocessing_function=patch_image
                                   )

val_input_datagen = ImageDataGenerator(rescale=1./255)

val_target_datagen = ImageDataGenerator(rescale=1./255)



In [16]:
# use data augmentation pipeline and start loading train data from folders
# TARGET_SIZE = (512,512)

train_generator_input = train_datagen.flow_from_directory(
    '/content/aerialimage/train_input',
    target_size=TARGET_SIZE,
    batch_size=BATCH_SIZE,
    class_mode=None,
    seed = seed

)

train_generator_output = target_datagen.flow_from_directory(
    '/content/aerialimage/train_target',
    target_size=TARGET_SIZE,
    batch_size=BATCH_SIZE,
    class_mode=None,
    color_mode="grayscale",
    seed = seed
)

train_generator = zip(train_generator_input, train_generator_output)

Found 140 images belonging to 1 classes.
Found 140 images belonging to 1 classes.


In [17]:
# use data augmentation pipeline and start loading validation data from folders

val_generator_input = val_input_datagen.flow_from_directory(
    '/content/aerialimage/val_input',
    target_size=TARGET_SIZE,
    batch_size=BATCH_SIZE,
    class_mode=None,
    seed = seed

)

val_generator_output = val_target_datagen.flow_from_directory(
    '/content/aerialimage/val_target',
    target_size=TARGET_SIZE,
    batch_size=BATCH_SIZE,
    class_mode=None,
    color_mode="grayscale",
    seed = seed
)

val_generator = zip(val_generator_input, val_generator_output)

Found 40 images belonging to 1 classes.
Found 40 images belonging to 1 classes.


## Image Patching

In [18]:
def create_patches(image, patch_size, overlap): #image is image array with dimention, e.g, (5000,5000,3). patch_size is 2d. overlap is a number
    height, width = image.shape[:2]    
    patch_height, patch_width = patch_size
    
    stride_height = patch_height - overlap
    stride_width = patch_width - overlap
    
    patches = []
    
    for y in range(0, height-patch_height+1, stride_height):
        for x in range(0, width-patch_width+1, stride_width):
            patch = image[y:y+patch_height, x:x+patch_width]
            patches.append(patch)
    
    return patches   #return all patches

In [19]:
!mkdir /content/aerialimage/train_patches
!mkdir /content/aerialimage/train_targetpatches
!mkdir /content/aerialimage/test_patches
!mkdir /content/aerialimage/val_patches
!mkdir /content/aerialimage/val_targetpatches

In [20]:
# shutil.rmtree('/content/aerialimage/train_patches/gt_patches')

In [21]:
!mkdir /content/aerialimage/train_patches/images_patches
!mkdir /content/aerialimage/train_targetpatches/gt_patches
!mkdir /content/aerialimage/test_patches/images_patches
!mkdir /content/aerialimage/val_patches/images_patches
!mkdir /content/aerialimage/val_targetpatches/gt_patches

In [22]:
### Training patches

In [23]:
import os
import cv2

# Specify the absolute path to the "aerialimage" folder
absolute_path = '/content/aerialimage'

# Construct the absolute path to the "train/images" directory for reading
train_images_dir = new_train_input

# Retrieve a list of files in the "train/images" directory
files = os.listdir(train_images_dir)

# Filter files based on criteria (e.g., file extension)
image_files = [file for file in sorted(files) if file.endswith('.tif')]

# Construct the relative path to the "train_patches" directory for writing
train_images_patches_dir = os.path.join(absolute_path, 'train_patches/images_patches')
train_gt_patches_dir = os.path.join(absolute_path, 'train_targetpatches/gt_patches')

# Construct the relative path to the "test_patches" directory for writing
test_images_patches_dir = os.path.join(absolute_path, 'test_patches/images_patches')

patch_size = (500, 500)  # Specify the desired patch size
overlap = 250  # Specify the desired overlap size, 50% for this case


for image_file in image_files:
    image_path = os.path.join(train_images_dir, image_file)
    image = cv2.imread(image_path)   # Read image
    patches = create_patches(image, patch_size, overlap)

    for i, patch in enumerate(patches):
        patch_name = f'{image_file[:-4]}_patch_{i+1}.tif'
        cv2.imwrite(os.path.join(train_images_patches_dir, patch_name), patch)


In [24]:
len(os.listdir(train_images_patches_dir))

50540

In [25]:
import os
import cv2

train_gt_patches_dir = os.path.join(absolute_path, 'train_targetpatches/gt_patches')

# Construct the absolute path to the "train/images" directory for reading
train_gt_dir = new_train_target

# Retrieve a list of files in the "train/images" directory
files = os.listdir(train_gt_dir)

# Filter files based on criteria (e.g., file extension)
gt_files = [file for file in sorted(files) if file.endswith('.tif')]

patch_size = (500, 500)  # Specify the desired patch size
overlap = 250  # Specify the desired overlap size, 50% for this case


for image_file in gt_files:
    image_path = os.path.join(train_gt_dir, image_file)
    image = cv2.imread(image_path)   # Read image
    patches = create_patches(image, patch_size, overlap)

    for i, patch in enumerate(patches):
        patch_name = f'{image_file[:-4]}_patch_{i+1}.tif'
        cv2.imwrite(os.path.join(train_gt_patches_dir, patch_name), patch)

In [26]:
len(os.listdir(train_gt_patches_dir))

50540

In [165]:
### Validation Patches

In [27]:
import os
import cv2


# Construct the absolute path to the "train/images" directory for reading
val_images_dir = new_val_input

# Retrieve a list of files in the "train/images" directory
files = os.listdir(val_images_dir)

# Filter files based on criteria (e.g., file extension)
image_files = [file for file in sorted(files) if file.endswith('.tif')]

# Construct the relative path to the "train_patches" directory for writing
val_images_patches_dir = os.path.join(absolute_path, 'val_patches/images_patches')
val_gt_patches_dir = os.path.join(absolute_path, 'val_targetpatches/gt_patches')

patch_size = (500, 500)  # Specify the desired patch size
overlap = 250  # Specify the desired overlap size, 50% for this case


for image_file in image_files:
    image_path = os.path.join(val_images_dir, image_file)
    image = cv2.imread(image_path)   # Read image
    patches = create_patches(image, patch_size, overlap)

    for i, patch in enumerate(patches):
        patch_name = f'{image_file[:-4]}_patch_{i+1}.tif'
        cv2.imwrite(os.path.join(val_images_patches_dir, patch_name), patch)

In [28]:
len(os.listdir(val_images_patches_dir))

14440

In [29]:
import os
import cv2



# Construct the absolute path to the "train/images" directory for reading
val_gt_dir = new_val_target

# Retrieve a list of files in the "train/images" directory
files = os.listdir(val_gt_dir)

# Filter files based on criteria (e.g., file extension)
gt_files = [file for file in sorted(files) if file.endswith('.tif')]

patch_size = (500, 500)  # Specify the desired patch size
overlap = 250  # Specify the desired overlap size, 50% for this case


for image_file in gt_files:
    image_path = os.path.join(val_gt_dir, image_file)
    image = cv2.imread(image_path)   # Read image
    patches = create_patches(image, patch_size, overlap)

    for i, patch in enumerate(patches):
        patch_name = f'{image_file[:-4]}_patch_{i+1}.tif'
        cv2.imwrite(os.path.join(val_gt_patches_dir, patch_name), patch)

In [30]:
len(os.listdir(val_gt_patches_dir))

14440

In [73]:
# create an image generator / data augmentation pipeline 
# augment data in train dataset only

BATCH_SIZE = 5
TARGET_SIZE = (512,512)
seed = 23
# patch_size = (500, 500)  # Specify the desired patch size
# overlap = 250  # Specify the desired overlap size, 50% for this case

train_patch_datagen = ImageDataGenerator(rescale=1./255,
                                   samplewise_std_normalization=False, 
                                   horizontal_flip = True, 
                                   vertical_flip = False, # 1st run : False, 2nd run: True
                                   height_shift_range = 0.1, 
                                   width_shift_range = 0.1, 
                                   rotation_range = 3, # 1st run : 3, 2nd run:10
                                   shear_range = 0.01, # 1st run : 0.01, 2nd run: 0.1
                                   fill_mode = 'nearest',
                                   zoom_range = 0.05, #1st run 0.05, 2nd run: 0.1
                                   zca_whitening = True,
                                   zca_epsilon=1e-5, 
                                   )


target_patch_datagen = ImageDataGenerator(rescale=1./255,
                                   samplewise_std_normalization=False, 
                                   horizontal_flip = True, 
                                   vertical_flip = False, # 1st run : False, 2nd run: True
                                   height_shift_range = 0.1, 
                                   width_shift_range = 0.1, 
                                   rotation_range = 3, # 1st run : 3, 2nd run:10
                                   shear_range = 0.01, # 1st run : 0.01, 2nd run: 0.1
                                   fill_mode = 'nearest',
                                   zoom_range = 0.05, #1st run 0.05, 2nd run: 0.1
                                   zca_whitening = True,
                                   zca_epsilon=1e-5, 
                                   )

val_patch_input_datagen = ImageDataGenerator(rescale=1./255)

val_patch_target_datagen = ImageDataGenerator(rescale=1./255)



In [74]:
# use data augmentation pipeline and start loading train data from folders
TARGET_SIZE = (512,512)

train_generator_input_patch = train_patch_datagen.flow_from_directory(
    '/content/aerialimage/train_patches/',
    target_size=TARGET_SIZE,
    batch_size=BATCH_SIZE,
    class_mode=None,
    seed = seed

)

train_generator_output_patch = target_patch_datagen.flow_from_directory(
    '/content/aerialimage/train_targetpatches/',
    target_size=TARGET_SIZE,
    batch_size=BATCH_SIZE,
    class_mode=None,
    color_mode="grayscale",
    seed = seed
)

train_patch_generator = zip(train_generator_input_patch, train_generator_output_patch)

Found 50540 images belonging to 1 classes.
Found 50540 images belonging to 1 classes.


In [75]:
# use data augmentation pipeline and start loading validation data from folders

val_generator_input_patch = val_patch_input_datagen.flow_from_directory(
    '/content/aerialimage/val_patches/',
    target_size=TARGET_SIZE,
    batch_size=BATCH_SIZE,
    class_mode=None,
    seed = seed

)

val_generator_output_patch = val_patch_target_datagen.flow_from_directory(
    '/content/aerialimage/val_targetpatches/',
    target_size=TARGET_SIZE,
    batch_size=BATCH_SIZE,
    class_mode=None,
    color_mode="grayscale",
    seed = seed
)

val_patch_generator = zip(val_generator_input_patch, val_generator_output_patch)

Found 14440 images belonging to 1 classes.
Found 14440 images belonging to 1 classes.


In [34]:
!mkdir /content/model_checkpoint
!mkdir /content/model_log

In [35]:
# 1st run
model_file = '/content/model_checkpoint/model_checkpoint_patch.h5'
log_file =  '/content/model_log/model_log_patch.csv'

## Unet

In [100]:
from tensorflow.keras.layers import Conv2D, BatchNormalization, Activation, MaxPool2D, Conv2DTranspose, Concatenate, Input
from tensorflow.keras.models import Model

def conv_block(input, num_filters):
    x = Conv2D(num_filters, 3, padding="same")(input)
    x = BatchNormalization()(x)
    x = Activation("relu")(x)

    x = Conv2D(num_filters, 3, padding="same")(x)
    x = BatchNormalization()(x)
    x = Activation("relu")(x)

    return x

def encoder_block(input, num_filters):
    x = conv_block(input, num_filters)
    p = MaxPool2D((2, 2))(x)
    return x, p

def decoder_block(input, skip_features, num_filters):
    x = Conv2DTranspose(num_filters, (2, 2), strides=2, padding="same")(input)
    x = Concatenate()([x, skip_features])
    x = conv_block(x, num_filters)
    return x

def build_unet(input_shape):
    inputs = Input(input_shape)

    s1, p1 = encoder_block(inputs, 64)
    s2, p2 = encoder_block(p1, 128)
    s3, p3 = encoder_block(p2, 256)
    s4, p4 = encoder_block(p3, 512)

    b1 = conv_block(p4, 1024)

    d1 = decoder_block(b1, s4, 512)
    d2 = decoder_block(d1, s3, 256)
    d3 = decoder_block(d2, s2, 128)
    d4 = decoder_block(d3, s1, 64)

    outputs = Conv2D(1, 1, padding="same", activation="sigmoid")(d4)

    model = Model(inputs, outputs, name="U-Net")
    return model


input_shape = (512, 512, 3)
model = build_unet(input_shape)
model.summary()

Model: "U-Net"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_15 (InputLayer)          [(None, 512, 512, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv2d_276 (Conv2D)            (None, 512, 512, 64  1792        ['input_15[0][0]']               
                                )                                                                 
                                                                                                  
 batch_normalization_300 (Batch  (None, 512, 512, 64  256        ['conv2d_276[0][0]']             
 Normalization)                 )                                                             

## u2net

In [76]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, BatchNormalization, Activation, MaxPool2D, UpSampling2D, Concatenate, Add

def conv_block(inputs, out_ch, rate=1):
    x = Conv2D(out_ch, 3, padding="same", dilation_rate=1)(inputs)
    x = BatchNormalization()(x)
    x = Activation("relu")(x)
    return x

def RSU_L(inputs, out_ch, int_ch, num_layers, rate=2):
    """ Initial Conv """
    x = conv_block(inputs, out_ch)
    init_feats = x

    """ Encoder """
    skip = []
    x = conv_block(x, int_ch)
    skip.append(x)

    for i in range(num_layers-2):
        x = MaxPool2D((2, 2))(x)
        x = conv_block(x, int_ch)
        skip.append(x)

    """ Bridge """
    x = conv_block(x, int_ch, rate=rate)

    """ Decoder """
    skip.reverse()

    x = Concatenate()([x, skip[0]])
    x = conv_block(x, int_ch)

    for i in range(num_layers-3):
        x = UpSampling2D(size=(2, 2), interpolation="bilinear")(x)
        x = Concatenate()([x, skip[i+1]])
        x = conv_block(x, int_ch)

    x = UpSampling2D(size=(2, 2), interpolation="bilinear")(x)
    x = Concatenate()([x, skip[-1]])
    x = conv_block(x, out_ch)

    """ Add """
    x = Add()([x, init_feats])
    return x

def RSU_4F(inputs, out_ch, int_ch):
    """ Initial Conv """
    x0 = conv_block(inputs, out_ch, rate=1)

    """ Encoder """
    x1 = conv_block(x0, int_ch, rate=1)
    x2 = conv_block(x1, int_ch, rate=2)
    x3 = conv_block(x2, int_ch, rate=4)

    """ Bridge """
    x4 = conv_block(x3, int_ch, rate=8)

    """ Decoder """
    x = Concatenate()([x4, x3])
    x = conv_block(x, int_ch, rate=4)

    x = Concatenate()([x, x2])
    x = conv_block(x, int_ch, rate=2)

    x = Concatenate()([x, x1])
    x = conv_block(x, out_ch, rate=1)

    """ Addition """
    x = Add()([x, x0])
    return x

def u2net(input_shape, out_ch, int_ch, num_classes=1):
    """ Input Layer """
    inputs = Input(input_shape)
    s0 = inputs

    """ Encoder """
    s1 = RSU_L(s0, out_ch[0], int_ch[0], 7)
    p1 = MaxPool2D((2, 2))(s1)

    s2 = RSU_L(p1, out_ch[1], int_ch[1], 6)
    p2 = MaxPool2D((2, 2))(s2)

    s3 = RSU_L(p2, out_ch[2], int_ch[2], 5)
    p3 = MaxPool2D((2, 2))(s3)

    s4 = RSU_L(p3, out_ch[3], int_ch[3], 4)
    p4 = MaxPool2D((2, 2))(s4)

    s5 = RSU_4F(p4, out_ch[4], int_ch[4])
    p5 = MaxPool2D((2, 2))(s5)

    """ Bridge """
    b1 = RSU_4F(p5, out_ch[5], int_ch[5])
    b2 = UpSampling2D(size=(2, 2), interpolation="bilinear")(b1)

    """ Decoder """
    d1 = Concatenate()([b2, s5])
    d1 = RSU_4F(d1, out_ch[6], int_ch[6])
    u1 = UpSampling2D(size=(2, 2), interpolation="bilinear")(d1)

    d2 = Concatenate()([u1, s4])
    d2 = RSU_L(d2, out_ch[7], int_ch[7], 4)
    u2 = UpSampling2D(size=(2, 2), interpolation="bilinear")(d2)

    d3 = Concatenate()([u2, s3])
    d3 = RSU_L(d3, out_ch[8], int_ch[8], 5)
    u3 = UpSampling2D(size=(2, 2), interpolation="bilinear")(d3)

    d4 = Concatenate()([u3, s2])
    d4 = RSU_L(d4, out_ch[9], int_ch[9], 6)
    u4 = UpSampling2D(size=(2, 2), interpolation="bilinear")(d4)

    d5 = Concatenate()([u4, s1])
    d5 = RSU_L(d5, out_ch[10], int_ch[10], 7)

    """ Side Outputs """
    y1 = Conv2D(num_classes, 3, padding="same")(d5)

    y2 = Conv2D(num_classes, 3, padding="same")(d4)
    y2 = UpSampling2D(size=(2, 2), interpolation="bilinear")(y2)

    y3 = Conv2D(num_classes, 3, padding="same")(d3)
    y3 = UpSampling2D(size=(4, 4), interpolation="bilinear")(y3)

    y4 = Conv2D(num_classes, 3, padding="same")(d2)
    y4 = UpSampling2D(size=(8, 8), interpolation="bilinear")(y4)

    y5 = Conv2D(num_classes, 3, padding="same")(d1)
    y5 = UpSampling2D(size=(16, 16), interpolation="bilinear")(y5)

    y6 = Conv2D(num_classes, 3, padding="same")(b1)
    y6 = UpSampling2D(size=(32, 32), interpolation="bilinear")(y6)

    y0 = Concatenate()([y1, y2, y3, y4, y5, y6])
    y0 = Conv2D(num_classes, 3, padding="same")(y0)

    y0 = Activation("sigmoid")(y0)
    y1 = Activation("sigmoid")(y1)
    y2 = Activation("sigmoid")(y2)
    y3 = Activation("sigmoid")(y3)
    y4 = Activation("sigmoid")(y4)
    y5 = Activation("sigmoid")(y5)
    y6 = Activation("sigmoid")(y6)

    model = tf.keras.models.Model(inputs, outputs=[y0, y1, y2, y3, y4, y5, y6])
    return model

def build_u2net(input_shape, num_classes=1):
    out_ch = [64, 128, 256, 512, 512, 512, 512, 256, 128, 64, 64]
    int_ch = [32, 32, 64, 128, 256, 256, 256, 128, 64, 32, 16]
    model = u2net(input_shape, out_ch, int_ch, num_classes=num_classes)
    return model

def build_u2net_lite(input_shape, num_classes=1):
    out_ch = [64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64]
    int_ch = [16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16]
    model = u2net(input_shape, out_ch, int_ch, num_classes=num_classes)
    return model


model = build_u2net_lite((512, 512, 3))
model.summary()

Model: "model_6"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_11 (InputLayer)          [(None, 512, 512, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv2d_130 (Conv2D)            (None, 512, 512, 64  1792        ['input_11[0][0]']               
                                )                                                                 
                                                                                                  
 batch_normalization_164 (Batch  (None, 512, 512, 64  256        ['conv2d_130[0][0]']             
 Normalization)                 )                                                           

## UnetR

In [None]:
## sample code to import module
from google.colab import drive
drive.mount('/content/drive')

import sys
sys.path.append('/content/drive/MyDrive/folder_name')

import my_module

In [None]:
def mlp(x, hidden_units, dropout_rate):
    if not isinstance(hidden_units, list): hidden_units = [hidden_units]
    for units in hidden_units:
        x = Dense(units, activation=tf.nn.gelu)(x)
        x = Dropout(dropout_rate)(x)
    return x

def transformer_encoder(x, cf):
    skip_1 = x
    x = L.LayerNormalization()(x)
    x = L.MultiHeadAttention(
        num_heads=cf["num_heads"], key_dim=cf["hidden_dim"]
    )(x, x)
    x = L.Add()([x, skip_1])

    skip_2 = x
    x = L.LayerNormalization()(x)
    x = mlp(x, cf)
    x = L.Add()([x, skip_2])

    return x

def conv_block(x, num_filters, kernel_size=3):
    x = L.Conv2D(num_filters, kernel_size=kernel_size, padding="same")(x)
    x = L.BatchNormalization()(x)
    x = L.ReLU()(x)
    return x

def deconv_block(x, num_filters):
    x = L.Conv2DTranspose(num_filters, kernel_size=2, padding="same", strides=2)(x)
    return x

def build_unetr_2d(cf):
    """ Inputs """
    input_shape = (cf["num_patches"], cf["patch_size"]*cf["patch_size"]*cf["num_channels"])
    inputs = L.Input(input_shape) ## (None, 256, 768)

    """ Patch + Position Embeddings """
    patch_embed = L.Dense(cf["hidden_dim"])(inputs) ## (None, 256, 768)

    positions = tf.range(start=0, limit=cf["num_patches"], delta=1) ## (256,)
    pos_embed = L.Embedding(input_dim=cf["num_patches"], output_dim=cf["hidden_dim"])(positions) ## (256, 768)
    x = patch_embed + pos_embed ## (None, 256, 768)

    """ Transformer Encoder """
    skip_connection_index = [3, 6, 9, 12]
    skip_connections = []

    for i in range(1, cf["num_layers"]+1, 1):
        x = transformer_encoder(x, cf)

        if i in skip_connection_index:
            skip_connections.append(x)

    """ CNN Decoder """
    z3, z6, z9, z12 = skip_connections

    ## Reshaping
    z0 = L.Reshape((cf["image_size"], cf["image_size"], cf["num_channels"]))(inputs)
    z3 = L.Reshape((cf["patch_size"], cf["patch_size"], cf["hidden_dim"]))(z3)
    z6 = L.Reshape((cf["patch_size"], cf["patch_size"], cf["hidden_dim"]))(z6)
    z9 = L.Reshape((cf["patch_size"], cf["patch_size"], cf["hidden_dim"]))(z9)
    z12 = L.Reshape((cf["patch_size"], cf["patch_size"], cf["hidden_dim"]))(z12)

    ## Decoder 1
    x = deconv_block(z12, 512)

    s = deconv_block(z9, 512)
    s = conv_block(s, 512)
    x = L.Concatenate()([x, s])

    x = conv_block(x, 512)
    x = conv_block(x, 512)

    ## Decoder 2
    x = deconv_block(x, 256)

    s = deconv_block(z6, 256)
    s = conv_block(s, 256)
    s = deconv_block(s, 256)
    s = conv_block(s, 256)

    x = L.Concatenate()([x, s])
    x = conv_block(x, 256)
    x = conv_block(x, 256)

    ## Decoder 3
    x = deconv_block(x, 128)

    s = deconv_block(z3, 128)
    s = conv_block(s, 128)
    s = deconv_block(s, 128)
    s = conv_block(s, 128)
    s = deconv_block(s, 128)
    s = conv_block(s, 128)

    x = L.Concatenate()([x, s])
    x = conv_block(x, 128)
    x = conv_block(x, 128)

    ## Decoder 4
    x = deconv_block(x, 64)

    s = conv_block(z0, 64)
    s = conv_block(s, 64)

    x = L.Concatenate()([x, s])
    x = conv_block(x, 64)
    x = conv_block(x, 64)

    """ Output """
    outputs = L.Conv2D(1, kernel_size=1, padding="same", activation="sigmoid")(x)

    return Model(inputs, outputs, name="UNETR_2D")

if __name__ == "__main__":
    config = {}
    config["image_size"] = 256
    config["num_layers"] = 12
    config["hidden_dim"] = 768
    config["mlp_dim"] = 3072
    config["num_heads"] = 12
    config["dropout_rate"] = 0.1
    config["num_patches"] = 256
    config["patch_size"] = 16
    config["num_channels"] = 3

    model = build_unetr_2d(config)
    model.summary()

In [68]:
from keras.layers import (Layer, BatchNormalization, LayerNormalization, Conv2D, Conv2DTranspose, Embedding, 
    Activation, Dense, Dropout, MultiHeadAttention, add, Input, concatenate, GlobalAveragePooling1D)
from keras.models import Model

def mlp(x, hidden_units, dropout_rate):
    if not isinstance(hidden_units, list): hidden_units = [hidden_units]
    for units in hidden_units:
        x = Dense(units, activation=tf.nn.gelu)(x)
        x = Dropout(dropout_rate)(x)
    return x

class Patches(Layer):
    '''
    [B, H, W, C] 
    -> [B, H/patch_size, W/patch_size, C*(patch_size^2)] 
    -> [B, H*W/(patch_size^2), C*(patch_size^2)]
    '''
    def __init__(self, patch_size):
        super(Patches, self).__init__()
        self.patch_size = patch_size

    def call(self, images):
        batch_size = 5
        patches = tf.image.extract_patches(
            images = images,
            sizes=[1, self.patch_size, self.patch_size, 1],
            strides=[1, self.patch_size, self.patch_size, 1],
            rates=[1, 1, 1, 1],
            padding='VALID',
        )
        patch_dims = patches.shape[-1]
        patches = tf.reshape(patches, [batch_size, -1, patch_dims])

        ##
        # height, width = images.shape[:2]    
        # patch_height, patch_width = patch_size
        
        # stride_height = patch_height - overlap
        # stride_width = patch_width - overlap
        
        # patches = []
        
        # for y in range(0, height-patch_height+1, stride_height):
        #     for x in range(0, width-patch_width+1, stride_width):
        #         patch = image[y:y+patch_height, x:x+patch_width]
        #         patches.append(patch)
    


        return patches

class PatchEncoder(Layer):
    '''
    Project the patches to projection_dim and introduce learnable positional embedding for patches
    '''
    def __init__(self, num_patches, projection_dim):
        super(PatchEncoder, self).__init__()
        self.num_patches = num_patches
        self.projection = Dense(units=projection_dim)
        self.position_embeding = Embedding(
            input_dim=num_patches, output_dim=projection_dim
        )
    
    def call(self, patch):
        positions = tf.range(start=0, limit=self.num_patches, delta=1)
        encoded = self.projection(patch) + self.position_embeding(positions)
        return encoded

def normalization(input_tensor, normalization, name=None):

    if normalization=='batch':
        return(BatchNormalization(name=None if name is None else name + '_batchnorm')(input_tensor))
    elif normalization=='layer':
        return(LayerNormalization(epsilon=1e-6, name=None if name is None else name + '_layernorm')(input_tensor))
    elif normalization=='group':
        return(tfa.layers.GroupNormalization(groups=8, name=None if name is None else name + '_groupnorm')(input_tensor))
    elif normalization == None:
        return input_tensor
    else:
        raise ValueError('Invalid normalization')

def conv_norm_act(input_tensor, filters, kernel_size , norm_type='batch', act_type='relu', dilation=1):
    '''
    Conv2d + Normalization(norm_type:str) + Activation(act_type:str)
    '''
    output_tensor = Conv2D(filters, kernel_size, padding='same', dilation_rate=(dilation, dilation), use_bias=False if norm_type is not None else True, kernel_initializer='he_normal')(input_tensor)
    output_tensor = normalization(output_tensor, normalization=norm_type)
    if act_type is not None: output_tensor = Activation(act_type)(output_tensor)

    return output_tensor

def conv2d_block(input_tensor, filters, kernel_size, 
                norm_type, use_residual, act_type='relu',
                double_features = False, dilation=[1, 1], name=None):

    x = Conv2D(filters, kernel_size, padding='same', dilation_rate=dilation[0], use_bias=False, kernel_initializer='he_normal', name=None if name is None else name + '_conv2d_0')(input_tensor)
    x = normalization(x, norm_type, name=None if name is None else name + '_0')
    x = Activation(act_type, name=None if name is None else name + act_type + '_0')(x)

    if double_features:
        filters *= 2

    x = Conv2D(filters, kernel_size, padding='same', dilation_rate=dilation[1], use_bias=False, kernel_initializer='he_normal', name=None if name is None else name + '_conv2d_1')(x)
    x = normalization(x, norm_type, name=None if name is None else name + '_1')

    if use_residual:
        if K.int_shape(input_tensor)[-1] != K.int_shape(x)[-1]:
            shortcut = Conv2D(filters, kernel_size=1, padding='same', use_bias=False, kernel_initializer='he_normal', name=None if name is None else name + '_shortcut_conv2d')(input_tensor)
            shortcut = normalization(shortcut, norm_type, name=None if name is None else name + '_shortcut')
            x = add([x, shortcut])
        else:
            x = add([x, input_tensor])

    x = Activation(act_type, name=None if name is None else name + act_type + '_0')(x)

    return x

def deconv_conv_block(x,
                      filters_list: list,
                      kernel_size,
                      norm_type,
                      act_type,
                      ):
    '''
    Corresponding to the blue block in the UNETR architecture diagram
    '''
    for filts in filters_list:
        x = Conv2DTranspose(filts, 2, (2, 2), kernel_initializer='he_normal')(x)
        x = conv_norm_act(x, filts, kernel_size, norm_type, act_type)
    return x

def conv_deconv_block(x,
                      filters,
                      kernel_size,
                      norm_type,
                      use_residual,
                      act_type,
                      ):
    '''
    Corresponding to the yellow + green block in the UNETR architecture diagram
    '''
    x = conv2d_block(x, filters, kernel_size, norm_type, use_residual, act_type)
    x = Conv2DTranspose(filters // 2, 2, (2, 2), kernel_initializer='he_normal')(x)
    return x


def create_vit(x,
               patch_size,
               num_patches,
               projection_dim,
               num_heads,
               transformer_units,
               transformer_layers,
               dropout_rate,
               extract_layers,
               ):
    skip_connections = []

    # Create patches.
    patches = Patches(patch_size)(x)
    # Encode patches.
    encoded_patches = PatchEncoder(num_patches, projection_dim)(patches)

    # Create multiple layers of the Transformer block.
    for layer in range(transformer_layers):
        # Layer normalization 1.
        x1 = LayerNormalization(epsilon=1e-6)(encoded_patches)
        # Create a multi-head attention layer.
        attention_output = MultiHeadAttention(
            num_heads=num_heads, key_dim=projection_dim//num_heads, dropout=dropout_rate
        )(x1, x1)
        # Skip connection 1.
        x2 = add([attention_output, encoded_patches])
        # Layer normalization 2.
        x3 = LayerNormalization(epsilon=1e-6)(x2)
        # MLP.
        x3 = mlp(x3, hidden_units=transformer_units, dropout_rate=dropout_rate)
        # Skip connection 2.
        encoded_patches = add([x3, x2])
        if layer + 1 in extract_layers:
            skip_connections.append(encoded_patches)

    return skip_connections


def build_model(# ↓ Base arguments
                input_shape = (512, 512, 3),
                class_nums = 2,
                # ↓ ViT arguments
                patch_size = 32,
                projection_dim = 768,
                num_heads = 12,
                transformer_units = [2048, 768],
                transformer_layers = 12,
                extract_layers = [3, 6, 9, 12],
                dropout_rate = 0.1,
                # ↓ Conv arguments
                kernel_size = 3,
                conv_norm = 'batch',
                conv_act = 'relu',
                use_residual = False,
                # ↓ Other arguments
                show_summary = True,
                output_act = 'auto',
                ):
    '''
    input_shape: tuple, (height, width, channel) note that this network is used for 2D image segmentation tasks
    class_nums: int, output channels
    patch_size: int, image partition size
    projection_dim: int, pojection dimensions in ViT
    num_heads: int, nums of heads for MultiHeadAttention
    transformer_units: list, the number of hidden units of the MLP module in ViT, note that it is in the form of a list, should be [hidden units, projection_dims]
    transformer_layers: int, transformer stacking nums
    extract_layers: list, Determine which layers in ViT should be added to the "skip connection", the default is [3, 6, 9, 12]
    dropout_rate: float, dropout ratio for the ViT part
    kernel_size: int, kernel size for convolution block
    conv_norm: str, The normalization method of the convolutional layer, 'batch' or 'layer' or 'group'
    conv_act: str, activation function for convilution layer
    use_residual: bool, whether the convolution module uses residual connections
    show_summary: bool, whether to show the model overview
    output_act: str, The activation function of the output layer will be determined according to class_nums when 'auto'
      (i.e. 'sigmoid' for binary segmentation task, 'softmax' for multi-class segmentation task)
    '''
    
    z4_de_filts = 512
    z3_de_filts_list = [512]
    z2_de_filts_list = [512, 256]
    z1_de_filts_list = [512, 256, 128]
    z34_conv_filts = 512
    z23_conv_filts = 256
    z12_conv_filts = 128
    z01_conv_filts = 64
    if output_act == 'auto': output_act = 'sigmoid' if class_nums == 1 else 'softmax'

    assert input_shape[0] == input_shape[1] and input_shape[0] // patch_size
    num_patches = (input_shape[0] * input_shape[1]) // (patch_size ** 2)

    inputs = Input(input_shape)
    z0 = inputs

    z1, z2, z3, z4 = create_vit(z0, 
                                patch_size,
                                num_patches,
                                projection_dim,
                                num_heads,
                                transformer_units,
                                transformer_layers,
                                dropout_rate,
                                extract_layers)
    
    z1 = tf.reshape(z1, (-1, input_shape[0] // patch_size, input_shape[1] // patch_size, projection_dim)) # [B, H/16, W/16, projection_dim]
    z2 = tf.reshape(z2, (-1, input_shape[0] // patch_size, input_shape[1] // patch_size, projection_dim))
    z3 = tf.reshape(z3, (-1, input_shape[0] // patch_size, input_shape[1] // patch_size, projection_dim))
    z4 = tf.reshape(z4, (-1, input_shape[0] // patch_size, input_shape[1] // patch_size, projection_dim))

    z4 = Conv2DTranspose(z4_de_filts, 2, (2, 2), kernel_initializer='he_normal')(z4)
    z3 = deconv_conv_block(z3, z3_de_filts_list, kernel_size, conv_norm, conv_act)
    z3 = concatenate([z3, z4])
    z3 = conv_deconv_block(z3, z34_conv_filts, kernel_size, conv_norm, use_residual, conv_act)
    z2 = deconv_conv_block(z2, z2_de_filts_list, kernel_size, conv_norm, conv_act)
    z2 = concatenate([z2, z3])
    z2 = conv_deconv_block(z2, z23_conv_filts, kernel_size, conv_norm, use_residual, conv_act)
    z1 = deconv_conv_block(z1, z1_de_filts_list, kernel_size, conv_norm, conv_act)
    z1 = concatenate([z1, z2])
    z1 = conv_deconv_block(z1, z12_conv_filts, kernel_size, conv_norm, use_residual, conv_act)
    z0 = conv2d_block(z0, z01_conv_filts, kernel_size, conv_norm, use_residual, conv_act)
    z0 = concatenate([z0, z1])
    z0 = conv2d_block(z0, z01_conv_filts, kernel_size, conv_norm, use_residual, conv_act)

    outputs = Conv2D(class_nums, 1, activation=output_act)(z0)

    model = Model(inputs=inputs, outputs=outputs)

    if show_summary: model.summary()

    return model


# x = np.random.uniform(size=(1, 512, 512, 3))
model = build_model(# ↓ Base arguments
          input_shape = (512, 512, 3),
          class_nums = 1,
          # ↓ ViT arguments
          patch_size = 16,
          projection_dim = 768,
          num_heads = 12,
          transformer_units = [2048, 768],
          transformer_layers = 12,
          extract_layers = [3, 6, 9, 12],
          dropout_rate = 0.1,
          # ↓ Conv arguments
          kernel_size = 3,
          conv_norm = 'batch',
          conv_act = 'relu',
          use_residual = False,
          # ↓ Other arguments
          show_summary = True,
          output_act = 'auto',)
# y = model(x)
# print(x.shape, y.shape)
model.summary()

Model: "model_5"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_10 (InputLayer)          [(None, 512, 512, 3  0           []                               
                                )]                                                                
                                                                                                  
 patches_8 (Patches)            (5, None, 768)       0           ['input_10[0][0]']               
                                                                                                  
 patch_encoder_7 (PatchEncoder)  (5, 1024, 768)      1377024     ['patches_8[0][0]']              
                                                                                                  
 layer_normalization_242 (Layer  (5, 1024, 768)      1536        ['patch_encoder_7[0][0]']  

## Vgg19 Unet

In [90]:
from tensorflow.keras.layers import Conv2D, BatchNormalization, Activation, MaxPool2D, Conv2DTranspose, Concatenate, Input
from tensorflow.keras.models import Model
from tensorflow.keras.applications import VGG19

def conv_block(input, num_filters):
    x = Conv2D(num_filters, 3, padding="same")(input)
    x = BatchNormalization()(x)
    x = Activation("relu")(x)

    x = Conv2D(num_filters, 3, padding="same")(x)
    x = BatchNormalization()(x)
    x = Activation("relu")(x)

    return x

def decoder_block(input, skip_features, num_filters):
    x = Conv2DTranspose(num_filters, (2, 2), strides=2, padding="same")(input)
    x = Concatenate()([x, skip_features])
    x = conv_block(x, num_filters)
    return x

def build_vgg19_unet(input_shape):
    """ Input """
    inputs = Input(input_shape)

    """ Pre-trained VGG19 Model """
    vgg19 = VGG19(include_top=False, weights="imagenet", input_tensor=inputs)

    """ Encoder """
    s1 = vgg19.get_layer("block1_conv2").output         ## (512 x 512)
    s2 = vgg19.get_layer("block2_conv2").output         ## (256 x 256)
    s3 = vgg19.get_layer("block3_conv4").output         ## (128 x 128)
    s4 = vgg19.get_layer("block4_conv4").output         ## (64 x 64)

    """ Bridge """
    b1 = vgg19.get_layer("block5_conv4").output         ## (32 x 32)

    """ Decoder """
    d1 = decoder_block(b1, s4, 512)                     ## (64 x 64)
    d2 = decoder_block(d1, s3, 256)                     ## (128 x 128)
    d3 = decoder_block(d2, s2, 128)                     ## (256 x 256)
    d4 = decoder_block(d3, s1, 64)                      ## (512 x 512)

    """ Output """
    outputs = Conv2D(1, 1, padding="same", activation="sigmoid")(d4)

    model = Model(inputs, outputs, name="VGG19_U-Net")
    return model


input_shape = (512, 512, 3)
model = build_vgg19_unet(input_shape)
model.summary()

Model: "VGG19_U-Net"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_14 (InputLayer)          [(None, 512, 512, 3  0           []                               
                                )]                                                                
                                                                                                  
 block1_conv1 (Conv2D)          (None, 512, 512, 64  1792        ['input_14[0][0]']               
                                )                                                                 
                                                                                                  
 block1_conv2 (Conv2D)          (None, 512, 512, 64  36928       ['block1_conv1[0][0]']           
                                )                                                       

## Model Training & Compile

In [92]:
def loss(y_true, y_pred):
    def dice_loss(y_true, y_pred):
        y_pred = tf.math.sigmoid(y_pred)
        numerator = 2 * tf.reduce_sum(y_true * y_pred)
        denominator = tf.reduce_sum(y_true + y_pred)

        return 1 - numerator / denominator

    y_true = tf.cast(y_true, tf.float32)
    o = tf.nn.sigmoid_cross_entropy_with_logits(y_true, y_pred) + dice_loss(y_true, y_pred)
    return tf.reduce_mean(o)

In [96]:
def jaccard_similarity_loss(y_true, y_pred):
    intersection = tf.reduce_sum(y_true * y_pred, axis=[1, 2])
    union = tf.reduce_sum(y_true + y_pred, axis=[1, 2]) - intersection
    jaccard = intersection / (union + tf.keras.backend.epsilon())
    loss = 1 - jaccard
    return loss

In [97]:
# configure the optimizer, loss and metrics for training

# metrics : iou, accuracy
model.compile(
              # tf.keras.optimizers.Adam(learning_rate=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-07, amsgrad=False,name='Adam'),
              # optimizer='adam',
              optimizer = tf.keras.optimizers.experimental.SGD(0.01, momentum=0.9, weight_decay=0.0001),
              # loss=loss,
              loss = jaccard_similarity_loss,
#               loss = dice_loss,
#               loss='binary_crossentropy',
              metrics=[tf.keras.metrics.IoU(num_classes=2, target_class_ids=[0]), 'accuracy', tf.keras.metrics.MeanIoU(num_classes=2)]) # 1 is the target 

In [98]:
callbacks = [ModelCheckpoint(model_file, verbose=1, save_best_only = True),
             ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=4),
             CSVLogger(log_file),
             EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=False)
             ]

In [99]:
# 1st run
history = model.fit(train_patch_generator,
                    # train_generator, 
                    epochs=50, 
                    callbacks=callbacks,
                    validation_data=val_patch_generator,
                    steps_per_epoch = 2*140/BATCH_SIZE,
                    validation_steps = 40/BATCH_SIZE)

Epoch 1/50
Epoch 1: val_loss improved from inf to 0.85132, saving model to /content/model_checkpoint/model_checkpoint_patch.h5
Epoch 2/50
Epoch 2: val_loss did not improve from 0.85132
Epoch 3/50
Epoch 3: val_loss improved from 0.85132 to 0.83732, saving model to /content/model_checkpoint/model_checkpoint_patch.h5
Epoch 4/50
Epoch 4: val_loss did not improve from 0.83732
Epoch 5/50
Epoch 5: val_loss improved from 0.83732 to 0.66392, saving model to /content/model_checkpoint/model_checkpoint_patch.h5
Epoch 6/50
Epoch 6: val_loss did not improve from 0.66392
Epoch 7/50
Epoch 7: val_loss did not improve from 0.66392
Epoch 8/50
Epoch 8: val_loss did not improve from 0.66392
Epoch 9/50
Epoch 9: val_loss did not improve from 0.66392
Epoch 10/50
Epoch 10: val_loss did not improve from 0.66392
Epoch 11/50
Epoch 11: val_loss improved from 0.66392 to 0.59998, saving model to /content/model_checkpoint/model_checkpoint_patch.h5
Epoch 12/50
Epoch 12: val_loss did not improve from 0.59998
Epoch 13/5

In [None]:
model_loaded = tf.keras.models.load_model(model_file)
# model_loaded.summary()

In [101]:
def plot_metrics(metric_name, title, ylim=5):
#   '''plots a given metric from the model history'''
    plt.title(title)
    plt.ylim(0,ylim)
    plt.plot(model_history.history[metric_name],color='blue',label=metric_name)
    plt.plot(model_history.history['val_' + metric_name],color='green',label='val_' + metric_name)

In [None]:
history.history.keys()

In [None]:
loss = history.history['loss']
val_loss = history.history['val_loss']

fig, axes = plt.subplots(1, 2)
axes[0].plot(x, y1)
axes[0].set_title('Plot 1')

In [None]:
loss = history.history['loss']
val_loss = history.history['val_loss']

plt.figure()
plt.plot(history.epoch, loss, 'r', label='Training loss')
plt.plot(history.epoch, val_loss, 'b', label='Validation loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss Value')
plt.legend()
plt.show()

In [None]:
accuracy = history.history['accuracy']
val_accuracy = history.history['val_accuracy']

plt.figure()
plt.plot(history.epoch, accuracy, 'r', label='Training Accuracy')
plt.plot(history.epoch, val_accuracy, 'b', label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy Value')
plt.legend()
plt.show()

In [None]:
miou = history.history['mean_io_u_2']
val_miou = history.history['val_mean_io_u_2']

plt.figure()
plt.plot(history.epoch, miou, 'r', label='Training miou')
plt.plot(history.epoch, val_miou, 'b', label='Validation miou')
plt.title('Training and Validation miou')
plt.xlabel('Epoch')
plt.ylabel('miou Value')
plt.legend()
plt.show()

## Model Prediction

In [None]:
!mkdir kaggle/working/aerialimage/test_input/input_data

In [None]:
# move input images to the folder

org_test_input = '/kaggle/input/aerialimagedataset/AerialImageDataset/test/images/'
new_test_input = '/kaggle/working/aerialimage/test_input/input_data'


input_img_paths = []

for dirname, _, filenames in os.walk(org_test_input):
    for filename in filenames:
        input_img_paths.append(os.path.join(dirname, filename))

input_img_paths = sorted(input_img_paths)

random.Random(23).shuffle(input_img_paths)
    
input_img_paths[0:2]

In [None]:
# copy 40 input images to test folder

for file in input_img_paths:
    shutil.copy(file, new_test_input)

In [None]:
test_input_datagen = ImageDataGenerator(rescale=1./255)
test_target_datagen = ImageDataGenerator(rescale=1./255)

In [None]:

# # Prediction Utilities

# def create_mask(pred_mask):
# #   '''
# #   Creates the segmentation mask by getting the channel with the highest probability. Remember that we
# #   have 3 channels in the output of the UNet. For each pixel, the predicition will be the channel with the
# #   highest probability.
# #   '''
#     pred_mask = tf.argmax(pred_mask, axis=-1)
#     pred_mask = pred_mask[..., tf.newaxis]
#     return pred_mask[0].numpy()

# def make_predictions(image, num=1):
# #   '''
# #   Feeds an image to a model and returns the predicted mask.
# #   '''

#     pred_mask = model.predict(image)
#     pred_mask = create_mask(pred_mask)

#     return pred_mask

In [None]:
# load the filename in the directory

def read_file(directory):
    blind_test = []
    for dirname, _, filenames in os.walk(directory):
        for filename in filenames:
            blind_test.append(os.path.join(dirname, filename))
    blind_test = sorted(blind_test)
    return blind_test

# build the dataset using list of filenames and target image size
def build_dataset(dir_list, img_size):
    num_imgs = len(dir_list)
    test_imgs = np.zeros((num_imgs,) + img_size + (3,), dtype="float32")

    for i in range(len(dir_list)):
        test_imgs[i] = img_to_array(load_img(dir_list[i], target_size=img_size))
    return test_imgs
 
img_size = (512,512)
directory_input = '/kaggle/working/aerialimage/val_input/input_data'
directory_target = '/kaggle/working/aerialimage/val_target/target_data'


val_input_dir = read_file(directory_input)
val_target_dir = read_file(directory_target)


val_input = build_dataset(val_input_dir, img_size)
val_target = build_dataset(val_target_dir, img_size)


print("Shape of val input data:", val_input.shape)
print("Shape of val input target:", val_target.shape)


In [None]:
def display_data(test_imgs , target_imgs, num1, num2, model_load):
    plt.figure(figsize=(100,100))  
    s = num1
    i = num2

    count = 1

    for k in range(s,i,1):
        input_image = test_imgs[k]
        target_image = target_imgs[k]

        prediction = model_load.predict(np.expand_dims(input_image, 0))[0]
        pred_threshold = prediction > 0.5
        pred_threshold = pred_threshold * 255

        plt.subplot(6,6,3*count-2)
        plt.axis("off")
        plt.imshow(array_to_img(input_image))

        plt.subplot(6,6,3*count-1)
        plt.axis("off")
        plt.imshow(array_to_img(target_image), cmap="gray") 
        plt.tight_layout()


        plt.subplot(6,6,3*count)
        plt.axis("off")
        plt.imshow(pred_threshold, cmap="gray")

        count += 1

    plt.show()
    plt.tight_layout()


In [None]:
def display(display_list,titles=[], display_string=None):
#   '''displays a list of images/masks'''

  plt.figure(figsize=(15, 15))

    for i in range(len(display_list)):
        plt.subplot(1, len(display_list), i+1)
        plt.title(titles[i])
        plt.xticks([])
        plt.yticks([])
        if display_string and i == 1:
              plt.xlabel(display_string, fontsize=12)
        img_arr = tf.keras.preprocessing.image.array_to_img(display_list[i])
        plt.imshow(img_arr)

    plt.show()


In [None]:
def show_predictions(dataset=None, num=1):
    if dataset:
        for imagein dataset.take(num):
            pred_mask = model.predict(image)
            display([image[0], create_mask(pred_mask)])