This is a companion notebook for the book [Deep Learning with Python, Second Edition](https://www.manning.com/books/deep-learning-with-python-second-edition?a_aid=keras&a_bid=76564dff). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.

**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**

This notebook was generated for TensorFlow 2.6.

# Advanced deep learning for computer vision

## Three essential computer vision tasks

## An image segmentation example

In [None]:
!wget http://www.robots.ox.ac.uk/~vgg/data/pets/data/images.tar.gz # downloading the dataset from the given link 
!wget http://www.robots.ox.ac.uk/~vgg/data/pets/data/annotations.tar.gz # downloading the dataset from the given link
!tar -xf images.tar.gz # extracting the images from the tar file
!tar -xf annotations.tar.gz # extracting the annotations from the tar file

In [None]:
import os # importing os module to interact with the file system 

input_dir = "images/" # specifying input directory where the images are stored
target_dir = "annotations/trimaps/" # specifying target directory where the annotations are stored

input_img_paths = sorted( # sorting the images in the input directory
    [os.path.join(input_dir, fname) # joining the input directory with the file name
     for fname in os.listdir(input_dir) # listing the files in the input directory
     if fname.endswith(".jpg")]) # checking if the file is of jpg format
target_paths = sorted( # sorting the annotations in the target directory
    [os.path.join(target_dir, fname) # joining the target directory with the file name
     for fname in os.listdir(target_dir) # listing the files in the target directory
     if fname.endswith(".png") and not fname.startswith(".")]) # checking if the file is of png format and not starting with a dot

In [None]:
import matplotlib.pyplot as plt # importing matplotlib to plot the images
from tensorflow.keras.utils import load_img, img_to_array # importing load_img and img_to_array from tensorflow.keras.utils

plt.axis("off") # turning off the axis
plt.imshow(load_img(input_img_paths[9])) # loading the image from the input directory

In [None]:
def display_target(target_array): # defining a function to display the target array
    normalized_array = (target_array.astype("uint8") - 1) * 127 # normalizing the array values by subtracting 1 and multiplying by 127 which is the maximum value of the array 
    plt.axis("off") # turning off the axis
    plt.imshow(normalized_array[:, :, 0]) # displaying the image

img = img_to_array(load_img(target_paths[9], color_mode="grayscale")) # loading the image from the target directory and converting it to an array
display_target(img) # displaying the target array

In [None]:
import numpy as np # importing numpy module
import random # importing random module

img_size = (200, 200) # specifying the image size
num_imgs = len(input_img_paths) # getting the number of images

random.Random(1337).shuffle(input_img_paths) # shuffling the input image paths
random.Random(1337).shuffle(target_paths) # shuffling the target paths

def path_to_input_image(path): # defining a function to convert the path to input image
    return img_to_array(load_img(path, target_size=img_size)) # loading the image from the path and converting it to an array

def path_to_target(path): # defining a function to convert the path to target
    img = img_to_array( # converting the image to an array
        load_img(path, target_size=img_size, color_mode="grayscale")) # loading the image from the path and converting it to an array
    img = img.astype("uint8") - 1 # converting the array to unsigned integer and subtracting 1
    return img # returning the image

input_imgs = np.zeros((num_imgs,) + img_size + (3,), dtype="float32") # creating an array of zeros with the specified shape
targets = np.zeros((num_imgs,) + img_size + (1,), dtype="uint8") # creating an array of zeros with the specified shape
for i in range(num_imgs): # iterating through the number of images
    input_imgs[i] = path_to_input_image(input_img_paths[i]) # converting the input image path to an image
    targets[i] = path_to_target(target_paths[i]) # converting the target path to an image 

num_val_samples = 1000 # specifying the number of validation samples
train_input_imgs = input_imgs[:-num_val_samples] # getting the training input images
train_targets = targets[:-num_val_samples] # getting the training targets
val_input_imgs = input_imgs[-num_val_samples:] # getting the validation input images
val_targets = targets[-num_val_samples:] # getting the validation targets

In [None]:
from tensorflow import keras # importing keras from tensorflow
from tensorflow.keras import layers # importing layers from tensorflow.keras

def get_model(img_size, num_classes): # defining a function to get the model
    inputs = keras.Input(shape=img_size + (3,)) # specifying the input shape
    x = layers.Rescaling(1./255)(inputs) # rescaling the input

    x = layers.Conv2D(64, 3, strides=2, activation="relu", padding="same")(x) # adding a convolutional layer
    x = layers.Conv2D(64, 3, activation="relu", padding="same")(x) # adding a convolutional layer
    x = layers.Conv2D(128, 3, strides=2, activation="relu", padding="same")(x) # adding a convolutional layer based on the input parameters previously defined
    x = layers.Conv2D(128, 3, activation="relu", padding="same")(x) # adding a convolutional layer based on the input parameters previously defined
    x = layers.Conv2D(256, 3, strides=2, padding="same", activation="relu")(x) # adding a convolutional layer based on the input parameters previously defined
    x = layers.Conv2D(256, 3, activation="relu", padding="same")(x) # adding a convolutional layer based on the input parameters previously defined

    x = layers.Conv2DTranspose(256, 3, activation="relu", padding="same")(x) # adding a convolutional layer based on the input parameters previously defined
    x = layers.Conv2DTranspose(256, 3, activation="relu", padding="same", strides=2)(x) # adding a convolutional layer based on the input parameters previously defined
    x = layers.Conv2DTranspose(128, 3, activation="relu", padding="same")(x) # adding a convolutional layer based on the input parameters previously defined
    x = layers.Conv2DTranspose(128, 3, activation="relu", padding="same", strides=2)(x) # adding a convolutional layer based on the input parameters previously defined
    x = layers.Conv2DTranspose(64, 3, activation="relu", padding="same")(x) # adding a convolutional layer based on the input parameters previously defined
    x = layers.Conv2DTranspose(64, 3, activation="relu", padding="same", strides=2)(x) # adding a convolutional layer based on the input parameters previously defined

    outputs = layers.Conv2D(num_classes, 3, activation="softmax", padding="same")(x) # adding a convolutional layer based on the input parameters previously defined

    model = keras.Model(inputs, outputs) # creating the model
    return model # returning the model

model = get_model(img_size=img_size, num_classes=3) # getting the model and specifying the image size and number of classes as 3 (the number of classes in the dataset, which are cat, dog and background)
model.summary() # displaying the model summary

In [None]:
model.compile(optimizer="rmsprop", loss="sparse_categorical_crossentropy") # compiling the model

callbacks = [ # specifying the callbacks for the model
    keras.callbacks.ModelCheckpoint("oxford_segmentation.keras", # specifying the model checkpoint 
                                    save_best_only=True) # saving the best model
]

history = model.fit(train_input_imgs, train_targets, # fitting the model
                    epochs=50, # specifying the number of epochs
                    callbacks=callbacks, # specifying the callbacks
                    batch_size=64, # specifying the batch size
                    validation_data=(val_input_imgs, val_targets)) # specifying the validation data

In [None]:
epochs = range(1, len(history.history["loss"]) + 1) # specifying the epochs
loss = history.history["loss"] # getting the loss
val_loss = history.history["val_loss"] # getting the validation loss
plt.figure() # creating a figure
plt.plot(epochs, loss, "bo", label="Training loss") # plotting the training loss
plt.plot(epochs, val_loss, "b", label="Validation loss") # plotting the validation loss
plt.title("Training and validation loss") # specifying the title
plt.legend() # adding the legend

In [None]:
from tensorflow.keras.utils import array_to_img # importing array_to_img from tensorflow.keras.utils

model = keras.models.load_model("oxford_segmentation.keras") # loading the model

i = 4 # specifying the index
test_image = val_input_imgs[i] # getting the test image
plt.axis("off") # turning off the axis
plt.imshow(array_to_img(test_image)) # displaying the image

mask = model.predict(np.expand_dims(test_image, 0))[0] # predicting the mask

def display_mask(pred): # defining a function to display the mask
    mask = np.argmax(pred, axis=-1) # getting the argmax of the prediction
    mask *= 127 # multiplying the mask by 127
    plt.axis("off") # turning off the axis
    plt.imshow(mask) # displaying the mask

display_mask(mask) # displaying the mask