# Load Packages

In [None]:
import os
import glob
import numpy as np
import datetime
import random
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score
from tqdm import notebook, tnrange
import matplotlib.pyplot as plt

In [None]:
import keras
import keras_unet_collection
from keras_unet_collection import models, utils

In [None]:
import tensorflow as tf
from tensorflow.keras.optimizers import Adam
from keras.losses import *
from keras_unet_collection.losses import iou_seg
from keras import backend as K
from keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard
from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import array_to_img, img_to_array, load_img
from keras.models import load_model

# Dataset

In [None]:
dataset_dir = "Amsterdam Training Data"

List of Train Data names

In [None]:
img_id = [] # list of names all images in the given path
for f in glob.glob(os.path.join(dataset_dir,"Train", "images", "*.tif")):
    img_id.append(os.path.split(f)[1].split(".")[0])
img_id.sort()

label_id = []
for f in glob.glob(os.path.join(dataset_dir,"Train", "labels", "*.tif")):
    label_id.append(os.path.split(f)[1].split(".")[0])
label_id.sort()

List of test data names

In [None]:
test_img_id = []
for f in glob.glob(os.path.join(dataset_dir,"Test", "images", "*.tif")):
    test_img_id.append(os.path.split(f)[1].split(".")[0])
test_img_id.sort()

test_label_id = []
for f in glob.glob(os.path.join(dataset_dir,,"Test", "labels", "*.tif")):
    test_label_id.append(os.path.split(f)[1].split(".")[0])
test_label_id.sort()

In [None]:
im_width = 512
im_height = 512

In [None]:
X = np.zeros((len(img_id), im_height, im_width, 1), dtype=np.float32)
y = np.zeros((len(img_id), im_height, im_width, 1), dtype=np.float32)

X_test = np.zeros((len(test_img_id), im_height, im_width, 1), dtype=np.float32)
y_test = np.zeros((len(test_img_id), im_height, im_width, 1), dtype=np.float32)

Train Data

In [None]:
for n, id_ in notebook.tqdm(enumerate(img_id), total=len(img_id)):
    # Load images
    img = load_img(dataset_dir+"/images/"+id_+".tif", grayscale=True)
    x_img = img_to_array(img)
    # Load masks
    mask = img_to_array(load_img(dataset_dir+"/labels/"+id_+".tif", grayscale=True))
    # Normalization
    X[n] = (x_img - x_img.min()) / (x_img.max() - x_img.min())
    # y[n] = (mask - mask.min()) / (mask.max() - mask.min())
    y[n] = mask

Test Data

In [None]:
for n, id_ in notebook.tqdm(enumerate(test_img_id), total=len(test_img_id)):
    img = load_img(dataset_dir+"/images/"+id_+".tif", grayscale=True)
    x_img = img_to_array(img)
    mask = img_to_array(load_img(dataset_dir+"/labels/"+id_+".tif", grayscale=True))
    X_test[n] = (x_img - x_img.min()) / (x_img.max() - x_img.min())
    # y[n] = (mask - mask.min()) / (mask.max() - mask.min())
    y_test[n] = mask

Split validation data

In [None]:
# Split train and valid
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.15, random_state=8, shuffle=True)
# X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=0.1665, random_state=8, shuffle=True)

In [None]:
print(np.shape(X_train))
print(np.shape(X_valid))
print(np.shape(X_test))

# Data Augmentation

In [None]:
img_data_gen_args = dict(rotation_range=60,
                        width_shift_range=0.3,
                        height_shift_range=0.3,
                        shear_range=0.5,
                        horizontal_flip=True,
                        zoom_range=0.2,
                        fill_mode='reflect')

mask_data_gen_args = dict(rotation_range=60,
                        width_shift_range=0.3,
                        height_shift_range=0.3,
                        shear_range=0.5,
                        horizontal_flip=True,
                        zoom_range=0.2,
                        fill_mode='reflect',
                        preprocessing_function = lambda x: np.where(x>0, 1, 0).astype(x.dtype)) #Binarize the output again.

image_data_generator = ImageDataGenerator(**img_data_gen_args)

# batch_size= 8 # Attention U-Net
batch_size= 4 # U-Net3+
seed = 24

image_generator = image_data_generator.flow(X_train, batch_size=batch_size, seed=seed)
# valid_img_generator = image_data_generator.flow(X_valid, batch_size=batch_size)

mask_data_generator = ImageDataGenerator(**mask_data_gen_args)
#mask_data_generator.fit(y_train, augment=True, seed=seed)
mask_generator = mask_data_generator.flow(y_train, batch_size=batch_size, seed=seed)
# valid_mask_generator = mask_data_generator.flow(y_valid, batch_size=batch_size)


def my_image_mask_generator(image_generator, mask_generator):
    train_generator = zip(image_generator, mask_generator)
    for (img, mask) in train_generator:
        yield (img, mask)

In [None]:
train = my_image_mask_generator(image_generator, mask_generator)

# validation = my_image_mask_generator(valid_img_generator, valid_mask_generator)

# Plot Samples

In [None]:
# visualize samples and their predicted label
def plot_sample(X, y, preds, binary_preds, ix=None):
    if ix is None:
        ix = random.randint(0, len(X))

    has_mask = y[ix].max() > 0

    fig, ax = plt.subplots(1, 4, figsize=(20, 10))
    ax[0].imshow(X[ix, ..., 0], cmap='seismic')
    if has_mask:
        ax[0].contour(y[ix].squeeze(), colors='k', levels=[0.5])
    ax[0].set_title('Images')

    ax[1].imshow(y[ix].squeeze(), cmap='gray')
    ax[1].set_title('Ground Truth Building footprint')

    ax[2].imshow(preds[ix].squeeze(), cmap='gray', vmin=0, vmax=1)
    if has_mask:
        ax[2].contour(y[ix].squeeze(), colors='k', levels=[0.5])
    ax[2].set_title('Predicted Building Footprint')

    ax[3].imshow(binary_preds[ix].squeeze(), cmap='gray', vmin=0, vmax=1)
    # if has_mask:
    #     ax[3].contour(y[ix].squeeze(), colors='k', levels=[0.5])
    ax[3].set_title('Binary Predicted Building Footprint')


# Load Models

The top-performing models trained on Miami-Dade data were selected for fine-tuning using transfer learning techniques with Amsterdam data.

## U-Net3+

In [None]:
model_unet3p_path = os.path.join("Model Output", "U-Net3+", "unet3p_model_1.h5")

with tf.device('/CPU:0'):
    unet3p_model = load_model(model_unet3p_path, compile=False)

In [None]:
# unet3+ loss/compile
def hybrid_loss(y_true, y_pred):

    loss_focal = losses.focal_tversky(y_true, y_pred, alpha=0.5, gamma=4/3)
    loss_iou = losses.iou_seg(y_true, y_pred)

    # (x)
    #loss_ssim = losses.ms_ssim(y_true, y_pred, max_val=1.0, filter_size=4)

    return loss_focal+loss_iou #+loss_ssim

unet3p_model.compile(loss=[hybrid_loss, hybrid_loss, hybrid_loss, hybrid_loss, hybrid_loss],
                          loss_weights=[0.25, 0.25, 0.25, 0.25, 1.0],
                          optimizer=keras.optimizers.Adam(learning_rate=1e-4))

In [None]:
base_dir = os.path.join("Model Output", "TransferLearning")
os.makedirs(base_dir, exist_ok=True)
logdir = os.path.join(base_dir, "logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

with tf.device('/CPU:0'):
    callbacks = [
        tf.keras.callbacks.CSVLogger(os.path.join(base_dir, "unet3p_amst.csv"), append=True, separator=';'),
        ModelCheckpoint(
            filepath=os.path.join(base_dir, "unet3p_amst.h5"),
            monitor='val_accuracy',
            verbose=1,
            save_best_only=True
        ),
        TensorBoard(log_dir=logdir, histogram_freq=1)
    ]


In [None]:
total_training_sample = np.shape(X_train)[0]
batch_size = 4

In [None]:
with tf.device('/GPU:0'):
    history_u3 = unet3p_model.fit(train, steps_per_epoch=round(total_training_sample/batch_size), epochs=75, batch_size=4 , validation_data = (X_valid, y_valid), callbacks=callbacks)

### Results

In [None]:
# Evaluate on validation set
unet3p_model.evaluate(X_train, y_train, verbose=1, batch_size=4)
unet3p_model.evaluate(X_valid, y_valid, verbose=1, batch_size=4)
unet3p_model.evaluate(X_test, y_test, verbose=1, batch_size=4)

In [None]:
# Predict on train, val and test
preds_train_unet3p = unet3p_model.predict(X_train, batch_size=4, verbose=1)
preds_val_unet3p = unet3p_model.predict(X_valid, batch_size=4, verbose=1)
preds_test_unet3p = unet3p_model.predict(X_test, batch_size=2, verbose=1)

In [None]:
preds_train_unet3p_1 = preds_train_unet3p[-1]
preds_val_unet3p_1 = preds_val_unet3p[-1]
preds_test_unet3p_1 = preds_test_unet3p[-1]

In [None]:
# Binary predictions
preds_train_t_u3 = (preds_train_unet3p_1 > 0.5).astype(np.uint8)
preds_val_t_u3 = (preds_val_unet3p_1 > 0.5).astype(np.uint8)
preds_test_t_u3 = (preds_test_unet3p_1 > 0.5).astype(np.uint8)

In [None]:
# Plot random sample
plot_sample(X_test, y_test, preds_test_unet3p_1, preds_test_t_u3, ix=None)

In [None]:
%load_ext tensorboard
%tensorboard --logdir=logdir

#### Evaluation Metrics

In [None]:
intersection = np.logical_and(y_test, preds_test_t_u3)
union = np.logical_or(y_test, preds_test_t_u3)
iou_score = np.sum(intersection) / np.sum(union)
print("IoU socre is: ", iou_score)

In [None]:
y_true = y_test.flatten()
y_pred_u3 = preds_test_t_u3.flatten()

In [None]:
# accuracy: (tp + tn) / (p + n)
accuracy = accuracy_score(y_true, y_pred_u3)
print('Accuracy: %f' % accuracy)
# precision tp / (tp + fp)
precision = precision_score(y_true, y_pred_u3)
print('Precision: %f' % precision)
# recall: tp / (tp + fn)
recall = recall_score(y_true, y_pred_u3)
print('Recall: %f' % recall)
# f1: 2 tp / (2 tp + fp + fn)
f1 = f1_score(y_true, y_pred_u3)
print('F1 score: %f' % f1)

## Attention U-Net

use batch_size = 8 > change it in [Data Augmentation](#scrollTo=hoH_AsWezXxy&line=3&uniqifier=1)

In [None]:
model_att_path = os.path.join("Model Output", "Attention U-Net", "attention_model_1.h5")

with tf.device('/CPU:0'):
    attention_model = load_model(model_att_path)

In [None]:
base_dir = os.path.join("Model Output", "TransferLearning")
os.makedirs(base_dir, exist_ok=True) 
logdir = os.path.join(base_dir, "logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

with tf.device('/CPU:0'):
    callbacks = [
        tf.keras.callbacks.CSVLogger(os.path.join(base_dir, "attention_amst.csv"), append=True, separator=';'),
        ModelCheckpoint(
            filepath=os.path.join(base_dir, "attention_amst.h5"),
            monitor='val_accuracy',
            verbose=1,
            save_best_only=True
        ),
        TensorBoard(log_dir=logdir, histogram_freq=1)
    ]


In [None]:
total_training_sample = np.shape(X_train)[0]
batch_size = 8

In [None]:
with tf.device('/GPU:0'):
    history_att = attention_model.fit(train, steps_per_epoch=round(total_training_sample/batch_size), epochs=75, batch_size=8 , validation_data = (X_valid, y_valid), callbacks=callbacks)

### Results

In [None]:
# Evaluate on validation set
attention_model.evaluate(X_train, y_train, verbose=1, batch_size=8)
attention_model.evaluate(X_valid, y_valid, verbose=1, batch_size=8)
attention_model.evaluate(X_test, y_test, verbose=1, batch_size=8)

In [None]:
# Predict on train, val and test
preds_train_att = attention_model.predict(X_train, batch_size=8, verbose=1)
preds_val_att = attention_model.predict(X_valid, batch_size=8, verbose=1)
preds_test_att = attention_model.predict(X_test, batch_size=8, verbose=1)

In [None]:
# Binary predictions
preds_train_t_att = (preds_train_att > 0.5).astype(np.uint8)
preds_val_t_att = (preds_val_att > 0.5).astype(np.uint8)
preds_test_t_att = (preds_test_att > 0.5).astype(np.uint8)

In [None]:
# Plot random sample
plot_sample(X_test, y_test, preds_test_att, preds_test_t_att, ix=None)

In [None]:
%load_ext tensorboard
%tensorboard --logdir=logdir

#### Evaluation Metrics

In [None]:
intersection = np.logical_and(y_test, preds_test_t_att)
union = np.logical_or(y_test, preds_test_t_att)
iou_score = np.sum(intersection) / np.sum(union)
print("IoU socre is: ", iou_score)

In [None]:
y_true = y_test.flatten()
y_pred_att = preds_test_t_att.flatten()

In [None]:
# accuracy: (tp + tn) / (p + n)
accuracy = accuracy_score(y_true, y_pred_att)
print('Accuracy: %f' % accuracy)
# precision tp / (tp + fp)
precision = precision_score(y_true, y_pred_att)
print('Precision: %f' % precision)
# recall: tp / (tp + fn)
recall = recall_score(y_true, y_pred_att)
print('Recall: %f' % recall)
# f1: 2 tp / (2 tp + fp + fn)
f1 = f1_score(y_true, y_pred_att)
print('F1 score: %f' % f1)