# Deep learning: An introduction through image processing
## Pictures classification

create a neural network capable of classifying dog and cat pictures.

## Setup


In [None]:
# Download a zip file with the cats and dogs images
# The structure of the folders in the zip file is:
# - train
#   - cats
#     - cat.O.jpg
#     - cat.1.jpg
#     - ...
#   - dogs
#     - dog.O.jpg
#     - dog.1.jpg
#     - ...
# - validation
#   - cats
#     - cat.200O.jpg
#     - cat.2001.jpg
#     - ...
#   - dogs
#     - dog.200O.jpg
#     - dog.2001.jpg
#     - ...
!wget --no-check-certificate https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip -O /tmp/cats_and_dogs_filtered.zip

import os
import zipfile

# Unzip the zip file to /tmp location
local_zip = '/tmp/cats_and_dogs_filtered.zip'
zip_ref = zipfile.ZipFile(local_zip, 'r')
zip_ref.extractall('/tmp')
zip_ref.close()


base_dir = '/tmp/cats_and_dogs_filtered'
# set the train dir to 
train_dir = os.path.join(base_dir, 'train')
validation_dir = os.path.join(base_dir, 'validation')

# Directory with our training cat pictures
train_cats_dir = os.path.join(train_dir, 'cats')

# Directory with our training dog pictures
train_dogs_dir = os.path.join(train_dir, 'dogs')

# Directory with our validation cat pictures
validation_cats_dir = os.path.join(validation_dir, 'cats')

# Directory with our validation dog pictures
validation_dogs_dir = os.path.join(validation_dir, 'dogs')


train_cat_fnames = os.listdir(train_cats_dir)
train_dog_fnames = os.listdir(train_dogs_dir)

In [None]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D, Input
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.vgg16 import VGG16
import matplotlib.pyplot as plt
import matplotlib
import numpy as np
import cv2

Now let's visualize some of our data thanks to the Matplotlib library.

In [None]:
# We take one of the pictures in the trainin set, among the dog pictures
filename = train_cats_dir + '/' + 'cat.10.jpg'
# Load the image in memory
image = matplotlib.image.imread(filename)
# print the image shape
print(image.shape)
# Display the image
plt.imshow(image)
plt.show()

We define some helper objects for the training of the model.

In [None]:
# The rescale of the values of the input image from [0, 255] to [0, 1] is done here.
data_generator = ImageDataGenerator(rescale=1.0/255.0)
# We create that will automatically load the images from the training and validation folders, and zill pass them to the model during the training
train_iterator = data_generator.flow_from_directory(train_dir, class_mode='categorical', batch_size=32, target_size=(128, 128))
test_iterator = data_generator.flow_from_directory(validation_dir, class_mode='categorical', batch_size=32, target_size=(128, 128))

#Step 1: Define the simplest model

As a first step, we're going to try to use the simplest neural network possible solve this problem. Let's see what accuracy we can reach for this task of classifying cats and dogs pictures.

In [None]:

model = Sequential()
model.add(Flatten(input_shape=(128, 128, 3)))
model.add(Dense(2, activation='softmax'))

model.compile(loss=tf.keras.losses.categorical_crossentropy,
              optimizer=tf.keras.optimizers.Adam(),
              metrics=['accuracy'])

model.summary()



history = model.fit(train_iterator, steps_per_epoch=len(train_iterator), validation_data=test_iterator, validation_steps=len(test_iterator), epochs=10)

#Step 2: Define a more complex densely-connected model

As a second step, try more complex neural network. What is the best accuracy that you can achieve?.

In [None]:

model = Sequential()
model.add(Flatten(input_shape=(128, 128, 3)))
model.add(Dense(512,activation='relu'))
model.add(Dense(256,activation='relu'))
model.add(Dense(128,activation='relu'))
model.add(Dense(2, activation='softmax'))

model.compile(loss=tf.keras.losses.categorical_crossentropy,
              optimizer=tf.keras.optimizers.Adam(),
              metrics=['accuracy'])

model.summary()


history = model.fit(train_iterator, steps_per_epoch=len(train_iterator), validation_data=test_iterator, validation_steps=len(test_iterator), epochs=10)

#First convolutional network

The structure of the network we're going to create is:

(128,128,3) --- convolution ---> (128,128,32) --- pooling ---> (64,64,32) --- dense ---> 32 ---> 1

In [None]:
# The model is a sequential model, as usual so far
model = Sequential()
# First layer of neurons after the input layer in a convolutional layer, with 32 filters, of size 3x3x3
model.add(Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=(128, 128, 3)))
# Then we apply a pooling to reduce the size of the layer
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dense(32, activation='relu'))
model.add(Dense(2, activation='softmax'))

model.compile(loss=tf.keras.losses.categorical_crossentropy, optimizer=tf.keras.optimizers.Adam(), metrics=['accuracy'])
model.summary()

history = model.fit(train_iterator, steps_per_epoch=len(train_iterator), validation_data=test_iterator, validation_steps=len(test_iterator), epochs=10)

Step 3: Visualize the training

Define a **visualize_training** function to be able to plot the evolution of the losses and the accuracies during the training.

In [None]:
def visualize_training(history):

  plt.plot(history.history['accuracy'])
  plt.plot(history.history['val_accuracy'])
  plt.title('model accuracy')
  plt.ylabel('accuracy')
  plt.xlabel('epoch')
  plt.legend(['training', 'validation'], loc='lower right')
  plt.show()

  plt.plot(history.history['loss'])
  plt.plot(history.history['val_loss'])
  plt.title('model loss')
  plt.ylabel('loss')
  plt.xlabel('epoch')
  plt.legend(['training', 'validation'], loc='upper left')
  plt.show()

visualize_training(history)



##Visualize the output of each layer

One good thing with convolutional layers is that it keeps the image structure and you can visualize the output of the layers.

In [None]:
#Display the layers of our model
model.layers

In [None]:
# Create a submodel from the model we've just train, only with the first layer (the convolutional one)
submodel = Model(model.layers[0].input, model.layers[0].output)
# We need to compile it to be able to use it
submodel.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
# Let's display a quick summary of the model
submodel.summary()

# Open a picture from the validation dataset
filename = validation_dogs_dir + '/dog.2001.jpg'
# Load the image from the file in memory
image = cv2.imread(filename)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image = cv2.resize(image,(128,128))
image = image / 255
# Display the image
plt.imshow(image)
# Feed the submodel into the submodel
filtered_images = submodel.predict(image.reshape((1,128,128,3)))
# Display the shape of the array output by the submodel
print(filtered_images.shape)
# Then finally plot the output of the submodel
fig, axs = plt.subplots(8, 4, figsize=(30,30))
for i in range(0,8):
  for j in range(0,4):
    k = 4*i+j
    axs[i, j].imshow(filtered_images[0,:,:,k], cmap='gray', vmin=0, vmax=1)

#Step 4: Data augmentation

Use Keras available data augmentation capabilities to improve your network accuracy. See [Keras documentation](https://keras.io/api/preprocessing/image/) to see how to use the ImageDataGenerator from a directory.

In [None]:

data_generator = ImageDataGenerator(rescale=1.0/255.0,
                                    rotation_range=30,
                                    zoom_range=0.2,
                                    shear_range=0.2,
                                    width_shift_range= 0.1,
                                    height_shift_range=0.1,
                                    horizontal_flip= True)
# We create that will automatically load the images from the training and validation folders, and zill pass them to the model during the training
train_iterator = data_generator.flow_from_directory(train_dir, class_mode='categorical', batch_size=128, target_size=(128, 128))
test_iterator = data_generator.flow_from_directory(validation_dir, class_mode='categorical', batch_size=128, target_size=(128, 128))
### END CODE HERE

model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=(128, 128, 3)))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dense(32, activation='relu'))
model.add(Dense(2, activation='softmax'))

model.compile(loss=tf.keras.losses.categorical_crossentropy, optimizer=tf.keras.optimizers.Adam(), metrics=['accuracy'])
model.summary()

history = model.fit(train_iterator, batch_size=32, steps_per_epoch=len(train_iterator), validation_data=test_iterator, validation_steps=len(test_iterator), epochs=20)

In [None]:
visualize_training(history)

#Step 5: Experience deeper neural network designs

Try to stack convolutional layers, and add layers in the densely connected part of the network.

Plot the output of your different filters.


In [None]:


data_generator = ImageDataGenerator(rescale=1.0/255.0,
                                    rotation_range=30,
                                    zoom_range=0.2,
                                    shear_range=0.2,
                                    width_shift_range= 0.1,
                                    height_shift_range=0.1,
                                    horizontal_flip= True)
# We create that will automatically load the images from the training and validation folders, and zill pass them to the model during the training
train_iterator = data_generator.flow_from_directory(train_dir, class_mode='categorical', batch_size=64, target_size=(128, 128))
test_iterator = data_generator.flow_from_directory(validation_dir, class_mode='categorical', batch_size=64, target_size=(128, 128))

model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=(128, 128, 3)))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
model.add(MaxPooling2D((2,2)))
model.add(Conv2D(128, (3, 3), activation='relu', padding='same'))
model.add(MaxPooling2D((2,2)))
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dense(256, activation='relu'))
model.add(Dense(2, activation='softmax'))


model.compile(loss=tf.keras.losses.categorical_crossentropy, optimizer=tf.keras.optimizers.Adam(), metrics=['accuracy'])
model.summary()

history = model.fit(train_iterator, steps_per_epoch=len(train_iterator), validation_data=test_iterator, validation_steps=len(test_iterator), epochs=10)

complex_model = model

In [None]:
augmented_data_generator = ImageDataGenerator(rescale=1.0/255.0,
                                    rotation_range=30,
                                    zoom_range=0.2,
                                    shear_range=0.2,
                                    width_shift_range= 0.1,
                                    height_shift_range=0.1,
                                    horizontal_flip= True)
data_generator = ImageDataGenerator(rescale=1.0/255.0)
# We create that will automatically load the images from the training and validation folders, and zill pass them to the model during the training
train_iterator = augmented_data_generator.flow_from_directory(train_dir, class_mode='categorical', batch_size=32, target_size=(128, 128))
test_iterator = data_generator.flow_from_directory(validation_dir, class_mode='categorical', batch_size=32, target_size=(128, 128))

history = model.fit(train_iterator, steps_per_epoch=len(train_iterator), validation_data=test_iterator, validation_steps=len(test_iterator), epochs=10)

#Learning transfer

Use the well-known VGG network to try to improve the qccurqcy of your network.


In [None]:
# Import the VGG model which is already - well - trained
input_tensor = Input(shape=(128, 128, 3))
vgg_model = tf.keras.applications.VGG16(weights='imagenet', include_top=False, input_tensor=input_tensor)

# Display summary of the VGG model
vgg_model.summary()

# Flag all VGG layers as not trainable so that when we'll train the complete it doesn't try to adjust the parameters of those first layers
for layer in vgg_model.layers:
    layer.trainable = False

In [None]:
# Get the last layer of the VGG model
vgg_output = vgg_model.layers[-1].output
# Flatten this last layer
output = tf.keras.layers.Flatten()(vgg_output)
# Then construct the remaining of the model
output = Dense(256, activation='relu')(output)
output = Dropout(0.5)(output)
output = Dense(2, activation='softmax')(output)
# Create a model from the input to the output
model = Model(input_tensor, output)

model.compile(loss=tf.keras.losses.categorical_crossentropy, optimizer=tf.keras.optimizers.Adam(), metrics=['accuracy'])
model.summary()

history = model.fit(train_iterator, steps_per_epoch=len(train_iterator), validation_data=test_iterator, validation_steps=len(test_iterator), epochs=10)

#Step 6: Visualize the output of the different VGG layers

Use the same processes we used before to display the output of the filters in the VGG layers.

In [None]:
#Checking in 4th layer
submodel = Model(model.layers[0].input, model.layers[4].output)
# We need to compile it to be able to use it
submodel.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
# Let's display a quick summary of the model
submodel.summary()

# Open a picture from the validation dataset
filename = validation_dogs_dir + '/dog.2001.jpg'
# Load the image from the file in memory
image = cv2.imread(filename)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image = cv2.resize(image,(128,128))
image = image / 255
# Display the image
plt.imshow(image)
# Feed the submodel into the submodel
filtered_images = submodel.predict(image.reshape((1,128,128,3)))
# Display the shape of the array output by the submodel
print(filtered_images.shape)
# Then finally plot the output of the submodel
fig, axs = plt.subplots(8, 4, figsize=(30,30))
for i in range(0,8):
  for j in range(0,4):
    k = 4*i+j
    axs[i, j].imshow(filtered_images[0,:,:,k], cmap='gray', vmin=0, vmax=1)


#Step 7: Try to trick your neural network


We create and train a model on a smaller input, only 64 by 64, for memory consumption reasons.

In [None]:
data_generator = ImageDataGenerator(rescale=1.0/255.0, horizontal_flip=True, rotation_range=20, zoom_range=0.2, shear_range=0.2, width_shift_range=0.2, height_shift_range=0.2)
train_iterator = data_generator.flow_from_directory(train_dir, class_mode='categorical', batch_size=32, target_size=(64, 64))
test_iterator = data_generator.flow_from_directory(validation_dir, class_mode='categorical', batch_size=32, target_size=(64, 64))

model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=(64, 64, 3)))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation='relu', padding='same'))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dense(256, activation='relu'))
model.add(Dense(2, activation='softmax'))

model.compile(loss=tf.keras.losses.categorical_crossentropy, optimizer=tf.keras.optimizers.Adam(), metrics=['accuracy'])
model.summary()

history = model.fit(train_iterator, batch_size=32, steps_per_epoch=len(train_iterator), validation_data=test_iterator, validation_steps=len(test_iterator), epochs=10)

We're going to see if, by slightly modifying the input images, we can confuse the neural network and make it classify a dog for a cat.

First let's find an image correctly predicted by the neural network.

In [None]:
print(train_iterator.class_indices)

filename = train_dogs_dir + '/' + train_dog_fnames[2]
image = cv2.imread(filename)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image = cv2.resize(image,(64,64))
image = image / 255

plt.imshow(image)

prediction = model.predict(image.reshape((1,64,64,3)))

print(prediction)

Then for each pixel, we compute if increasing the value of the pixel makes the prediction better or worse. If it makes it better, we then record a positive value for the derivative. If it makes it worse, we record a negative value.

In [None]:
# Initial prediction of the model
initial_prediction = model.predict(image.reshape((1,64,64,3)))[0][1]
# By how much we will increase the pixel to detect a difference in the prediction of the model
delta = 0.001
# The derivate for each pixel, same shape os the original image
derivative = np.zeros((64,64,3))

# We loop on each color
for k in range(0,3):  
  print(f"Generating differential images for color {k}...")
  image_modified = np.zeros((64 * 64, 64, 64, 3))
  # we loop on each column
  for i in range(0,64):
    # we loop on each row
    for j in range(0,64):    
      index = 64 * i + j
      image_modified[index,:,:,:] = image
      image_modified[index,i,j,k] = image[i,j,k] + delta
  print("Evaluating differential images...")
  predicitions = np.zeros((64 * 64, 2))
  print(f"Computing predicitions for slice {k}...")
  predictions =  model.predict(image_modified)

  derivatives = (predictions - initial_prediction) / delta

  print("Calculating derivative image...")
  for i in range(0,64):
    for j in range(0,64):
      index = 64 * i + j
      derivative[i,j,k]=derivatives[index][1]
  # Free memory
  image_modified = None

As the derivative has the shape of the original image, we can display is as image

In [None]:
# Values in derivative are small, so we need to amplify the signal
plt.imshow(derivative*150)

Then we use the derivative to compute a tricked image, and observe how the prediction evolves.

In [None]:
print(train_iterator.class_indices)
# Compute the tricked image by applying negatively the derivative
tricked_image = np.minimum(1, np.maximum(image -  2 * derivative, 0))
# Make the prediction on the tricked image
print(model.predict(tricked_image.reshape((1,64,64,3))))
# Display the tricked image
plt.imshow(tricked_image)