In [1]:
import os
from os import listdir, path
from zipfile import ZipFile
import random
import numpy as np
import keras
from keras.preprocessing.image import img_to_array, load_img
import tensorflow as tf
import matplotlib.pyplot as plt
from skimage.io import imread
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

### Unzip the data on disk

In [2]:
if not path.exists('data/cat/'):
    print('Extracting cat image files...')
    zf = ZipFile('data/cat.zip')
    zf.extractall('data/')
if not path.exists('data/dog/'):
    print('Extracting dog image files...')
    zf = ZipFile('data/dog.zip')
    zf.extractall('data/')

### Display utility functions

In [3]:
def show(image):
    plt.imshow(np.squeeze(image.astype("uint8")), cmap="gray")
    plt.title("image shape: "+ str(image.shape), fontsize=14)
    plt.axis('off');
    
def show_multiple(images, figsize):
    fig, ax = plt.subplots(ncols=len(images), figsize=figsize)
    for col, image in zip(ax, images):
        col.imshow(np.squeeze(image.astype("uint8")), cmap="gray")
        col.set_title("image shape: "+ str(image.shape), fontsize=14)
    plt.tight_layout()
    plt.axis('off');

# I - Introduction to Tensorflow and convolution filters

<img src="../images/standard_vs_depthwise_conv.png" style="width: 850px;"/>

**- Left: standard convolution, the whole kernel is parsing the input tensor for each output channel dimension**

**- Right: depthwise convolution, each slide of the kernel is parsing each input dimension. The result is constructed afterward using a concatenation of the feature maps. That is particularly useful to retrieve a valid RGB image**

#### Sample image example

In [4]:
sample_image = imread("data/panda.jpg")

In [5]:
show(sample_image)

### I - A) Simple box blur kernel

#### Input placeholders

- The placeholder is a variable that doesn't have a value yet in the symbolic graph. The value will be fed when running the session by passing the `feed_dict` argument
- If the placeholder is a k-dimensional tensor, we need to specify its shape. 
- It is possible to leave the shape variable by putting `None` values in the shape

#### 2d convolution with tensorflow:
- https://www.tensorflow.org/api_docs/python/tf/nn/depthwise_conv2d
- https://www.tensorflow.org/api_docs/python/tf/nn/conv2d

In [6]:
def conv_2d(x, k, strides, padding, conv_type):
    if conv_type == 'depthwise':
        return tf.nn.depthwise_conv2d(
            x, k, strides=strides, padding=padding
        )
    elif conv_type == 'standard':
        return tf.nn.conv2d(
            x, k, strides=strides, padding=padding
        )   

In [7]:
def visualize_kernel(kernel):
    # move the channel dimension to the first one
    # this way, it is easier to see the spacial organization of the kernel with print
    print(np.transpose(kernel, (2, 0, 1)))

In [8]:
kernel_data = np.ones(shape=(5, 5, 3)).astype(np.float32)
kernel_data /= kernel_data.sum(0).sum(0)
visualize_kernel(kernel_data)

In [9]:
image = tf.placeholder(tf.float32, shape=(None, None, None, 3)) # [batch, height, width, channels]
kernel = tf.placeholder(tf.float32, shape=(5, 5, 3, 1)) # [filter_height, filter_width, in_channels, out_channels]

output_image = conv_2d(image, kernel, strides=(1, 1, 1, 1), padding='SAME', conv_type='depthwise')

with tf.Session() as sess:
    image_batch_expanded = np.expand_dims(sample_image, axis=0)
    kernel_data_expanded = np.expand_dims(kernel_data, axis=-1)
    print('Kernel shape: %s' % str(kernel_data_expanded.shape))
    feed_dict = {image: image_batch_expanded, kernel: kernel_data_expanded}
    feature_map = sess.run(output_image, feed_dict=feed_dict)
    show(feature_map)

**Questions**
- Explain what happened here: what transformation has been applied to the image?

### I - B) Identity kernel

In [10]:
kernel_data = np.zeros(shape=(3, 3, 3)).astype(np.float32)
kernel_data[1, 1, :] = 1
visualize_kernel(kernel_data)

In [11]:
image = tf.placeholder(tf.float32, shape=(None, None, None, 3))
kernel = tf.placeholder(tf.float32, shape=(3, 3, 3, 1))

output_same_padding = conv_2d(image, kernel, strides=(1, 1, 1, 1), 
                              padding='SAME', conv_type='depthwise')
output_valid_padding = conv_2d(image, kernel, strides=(1, 1, 1, 1), 
                               padding='VALID', conv_type='depthwise')
output_larger_strides = conv_2d(image, kernel, strides=(1, 10, 10, 1), 
                                padding='SAME', conv_type='depthwise')

with tf.Session() as sess:
    image_batch_expanded = np.expand_dims(sample_image, axis=0)
    kernel_data_expanded = np.expand_dims(kernel_data, axis=-1)
    feed_dict = {image: image_batch_expanded, kernel: kernel_data_expanded}
    feature_map_same_padding, feature_map_valid_padding, feature_map_larger_strides = sess.run(
            [output_same_padding, output_valid_padding, output_larger_strides], 
            feed_dict=feed_dict
    )
    show_multiple([
        feature_map_same_padding, 
        feature_map_valid_padding, 
        feature_map_larger_strides
    ], figsize=(16, 12))

**Questions**
- Try to modify the strides and the type of padding. What are the effects on the final output?

### I - C) Line detection kernel on greyscale transformed image

In [12]:
grey_sample_image = np.expand_dims(sample_image.sum(axis=2) / 3., axis=-1)
show(grey_sample_image)

**Exercice**

Try to implement a kernel that does line or edge detection:
- https://en.wikipedia.org/wiki/Kernel_(image_processing)
- https://en.wikipedia.org/wiki/Sobel_operator

In [13]:
# Implement a 3x3 edge detection kernel
line_detection_kernel = np.asarray(
    [
        # TODO:
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]
    ]
)

kernel_data = np.expand_dims(line_detection_kernel, axis=-1)
visualize_kernel(kernel_data)

In [14]:
image = tf.placeholder(tf.float32, shape=(None, None, None, 1))
kernel = tf.placeholder(tf.float32, shape=(3, 3, 1, 1))

output_line_detection = conv_2d(image, kernel, strides=(1, 1, 1, 1), 
                                padding='SAME', conv_type='standard')

with tf.Session() as sess:
    image_batch = np.expand_dims(grey_sample_image, axis=0)
    kernel_data = np.expand_dims(kernel_data, axis=-1)
    feed_dict = {image: image_batch, kernel: kernel_data}
    feature_map = sess.run(output_line_detection, feed_dict=feed_dict)
    show(feature_map)

### I - D) Max and average pooling

**Exercice**

Now define a Max Pooling and an Average Pooling operations on our image.<br/>
Then apply it using a tf.Session
- https://www.tensorflow.org/api_docs/python/tf/nn/max_pool
- https://www.tensorflow.org/api_docs/python/tf/nn/avg_pool

Again, try to make the `ksize` and `strides` parameters fluctuate

In [15]:
image = tf.placeholder(tf.float32, [None, None, None, 3])
# TODO:
output_max_pool = None
output_avg_pool = None

with tf.Session() as sess:
    feed_dict={image:[sample_image], kernel: kernel_data}
    # TODO:
    feature_map_max_pool, feature_map_avg_pool = None, None
    # TODO:
    show_multiple([sample_image, sample_image], figsize=(8, 6))

# II - Training a ConvNet with Keras

### Utility functions

In [16]:
def get_splitted_data_with_size(image_size, sample_size, test_ratio, classes, seed):
    X, Y = [], []
    for label, animal in enumerate(classes):
        files = listdir(path.join('data', animal))
        random.shuffle(files)
        files = files[:(sample_size // len(classes))]
        for file in files:
            img = load_img(path.join('data', animal, file), 
                           target_size=image_size)
            X.append(img_to_array(img))
            Y.append(label)
    return train_test_split(np.asarray(X), np.asarray(Y), test_size=test_ratio, random_state=seed)

In [17]:
def plot_model_history(model_history):
    fig, axs = plt.subplots(1, 2, figsize=(15, 5))
    for ax, metric, name in zip(axs, ['acc', 'loss'], ['Accuracy', 'Loss']):
        ax.plot(
            range(1, len(model_history.history[metric]) + 1), 
            model_history.history[metric]
        )
        ax.plot(
            range(1, len(model_history.history['val_' + metric]) + 1), 
            model_history.history['val_' + metric]
        )
        ax.set_title('Model ' + name)
        ax.set_ylabel(name)
        ax.set_xlabel('Epoch')
        ax.legend(['train', 'val'], loc='best')
    plt.show()

In [18]:
def scale_data(X_tr, X_val, return_scaler=False):
    shape_tr, shape_val = X_tr.shape, X_val.shape
    X_tr_flat = np.ravel(X_tr).reshape(-1, 1)
    X_val_flat = np.ravel(X_val).reshape(-1, 1)
    min_max_scaler = MinMaxScaler()
    X_tr_scaled = min_max_scaler.fit_transform(X_tr_flat).reshape(shape_tr)
    X_val_scaled = min_max_scaler.transform(X_val_flat).reshape(shape_val)
    if not return_scaler:
        return X_tr_scaled, X_val_scaled
    else:
        return X_tr_scaled, X_val_scaled, min_max_scaler
    
def apply_scaling(X, scaler):
    shape_X = X.shape
    X_flat = np.ravel(X).reshape(-1, 1)
    X_scaled = scaler.transform(X_flat).reshape(shape_X)
    return X_scaled

### II - A) Load, resize and scale the data

It is advised to fix a relatively small image_size, for instance (32, 32, 3), to avoid suffering from slow calculation

In [19]:
image_size = (32, 32, 3)
sample_size = 10000

classes = ['cat', 'dog']
X_tr, X_val, Y_tr, Y_val = get_splitted_data_with_size(
    image_size=image_size, sample_size=sample_size, test_ratio=0.25, classes=classes, seed=42
)

In [20]:
X_tr.shape, X_val.shape, Y_tr.shape, Y_val.shape

In [21]:
i = np.random.choice(len(X_tr))
show(X_tr[i])
print('True label: {0}'.format(classes[Y_tr[i]]))

In [22]:
X_tr_scaled, X_val_scaled, scaler = scale_data(X_tr, X_val, return_scaler=True)

### II - B) Design and train a ConvNet from scratch

**Exercice**

- Implement a Convolutional Network using the Keras Sequential API
- Typically, you would use series of convolutional blocs: 

`
model.add(Conv2D(output_filter, (kernel_height, kernel_width), padding, input_shape=(input_height, input_width, input_filter)))
model.add(Activation(activation))
model.add(BatchNormalization())
`
- Usually, the output_filter size grows accross the network
- End the network with a `Flatten` layer followed by a final `Dense` layer
- Be careful with the shapes accross the network, the activation functions used, the optimizer, and the loss function
- Don't forget to use Dropout layers to avoid overfitting issues

In [23]:
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten, Dropout, BatchNormalization
from keras.layers import Conv2D, MaxPooling2D
from keras.optimizers import Adam

"""
Return a compiled Keras model
"""
def design_and_compile_model():
    model = Sequential()
    # TODO:

    # TODO:
    return None

    # Compiling the model adds a loss function, optimiser and metrics to track during training
    model.compile(
        optimizer=None,
        loss=None,
        metrics=None
    )
    
    # TODO:

In [24]:
design_and_compile_model().summary() if design_and_compile_model() else None

In [25]:
batch_size = 128
num_epochs = 20  # The number of epochs (full passes through the data) to train for

model = design_and_compile_model()

# The fit function allows you to fit the compiled model to some training data
if model:
    model_history = model.fit(
        x=X_tr_scaled, 
        y=Y_tr, 
        batch_size=batch_size, 
        epochs=num_epochs,
        verbose=1,
        validation_data=(X_val_scaled, Y_val)
    )
    print('Training complete')
else:
    model_history = None

In [26]:
plot_model_history(model_history) if model_history else None

### II - C) Improve it using data augmentation

**Exercice**

- Try to improve the effectiveness of your network using some Data Augmentation
- Basically, it consists in building a `ImageDataGenerator` fitted on your training dataset
- Then you will be able to generate new consistent samples, and refit your model using the `fit_generator` Keras method

In [27]:
from keras.preprocessing.image import ImageDataGenerator

# Instantiate a ImageDataGenerator object with the right parameters and then fit it on your training dataset
# TODO:

In [28]:
model = design_and_compile_model()
# Fit your model with model.fit_generator() and feed it with data_generator.flow()
# TODO:

In [29]:
plot_model_history(model_history) if model_history else None

# III - Transfer learning

Objectives:
- Classify an image by loading a pre-trained ResNet50 model using Keras Zoo
    - No training required
    - Decode an ImageNet prediction
- Build a headless model and compute representations of images 
    - Retrain a model from representations of images for your own classification task: here cat vs dog dataset

In [30]:
cat_sample_path = "data/cat/cat_1.jpg"
dog_sample_path = "data/dog/dog_1.jpg"
resnet_input_size = (224, 224)

In [31]:
from keras.applications.resnet50 import ResNet50, preprocess_input, decode_predictions
from keras.models import Model
from skimage.transform import resize

model_ResNet50 = ResNet50(include_top=True, weights='imagenet')
model_ResNet50.summary()

Note that there are way more trainable parameters than before

### III - A) Classify of an image using pre-trained weights

**Exercise**
- Open an image, preprocess it and build a batch of 1 image
- Use the model to classify this image
- Decode the predictions using `decode_predictions` from Keras

Notes:
- You may use `preprocess_input` for preprocessing the image. 
- Test your code with `"data/cat/cat_1.jpg"` 
- ResNet has been trained on (width, height) images of (224,224) and range of pixel intensities in `[0, 255]`.
    - [skimage.transform.resize](http://scikit-image.org/docs/stable/api/skimage.transform.html#skimage.transform.resize) has a `preserve_range` keyword useful in that matter 

In [32]:
img = imread(cat_sample_path)
img_resized = resize(img, resnet_input_size, mode='reflect', preserve_range=True)
show(img_resized)

# Use preprocess_input() to apply the same preprocessing as ResNet, 
# get the prediction from the loaded model, and then decode the predictions

# TODO:
decoded_predictions = None

if decoded_predictions:
    for _, name, score in decoded_predictions:
        print(name, score)

### III - B) Build a headless model and compute representations of images

- Model has $177$ layers
- See where we should stop to have the extracted feature and start building a new classficlation model from here

In [33]:
print(len(model_ResNet50.layers))

- Let's remove the last dense classification layer that is specific 
to the image net classes and use the previous layers (after flattening) as a feature extractors
- Use ResNet input layer and last layer of extracted features to build a feature extractor model
    - Use Keras functional API

In [34]:
# Create a truncated Model using ResNet50.input and the before last layer

# TODO:
feat_extractor_model = None

When using this model we need to be careful to apply the same image processing as was used during the training, otherwise the marginal distribution of the input pixels might not be on the right scale:

In [35]:
def preprocess_resnet(x, size):
    x = resize(x, size, mode='reflect', preserve_range=True)
    x = np.expand_dims(x, axis=0)
    if x.ndim == 3:
        x = np.expand_dims(x, axis=0)
    return preprocess_input(x)

This model extracts high level concepts from any image that has been preprocessed like the images ResNet trained on.
The model transforms a preprocessed (224, 224) RGB image into a long vector of activations.
Each activation refers to some concept statistically connected to a bunch of different classes.

In [36]:
cat_img = imread(cat_sample_path)
cat_img_processed = preprocess_resnet(cat_img, resnet_input_size)
if feat_extractor_model:
    cat_representation = feat_extractor_model.predict(cat_img_processed)
    print("Cat deep representation shape: (%d, %d)" % cat_representation.shape)
    for activation in np.ravel(cat_representation):
        print(activation)

**Questions**
- What is the number of $0$s in the cat representation vector ?
- Can you find any negative values?
- Why are there $0$ values ? What does it mean ?
- Extract ResNet representations of other dogs and cats. Are the zeros at the same places in vector ?
    - Explain why or give an intuition of it

In [37]:
if feat_extractor_model:
    plt.hist(np.where(cat_representation == 0)[1])
    plt.title("cat zeros positions")
    plt.show()

    dog_img = imread(dog_sample_path)
    dog_img_processed = preprocess_resnet(dog_img, resnet_input_size)
    dog_representation = feat_extractor_model.predict(dog_img_processed)

    plt.hist(np.where(dog_representation == 0)[1])
    plt.title("dog zeros positions")
    plt.show()

### III - C) Retrain a model from computed representations of images

For this session we are going to use the dataset of the dogs-vs-cats we already used in part $2$.

In [38]:
classes = ['cat', 'dog']
X_tr, X_val, Y_tr, Y_val = get_splitted_data_with_size(
    image_size=(224, 224, 3), sample_size=2000, test_ratio=0.25, classes=classes, seed=42
)

**Questions**
- Inference time takes a long time only for $2000$ images
    - Explain why it would be much faster using a GPU

In [39]:
if feat_extractor_model:
    X_extracted_tr = feat_extractor_model.predict(preprocess_input(X_tr), verbose=1)
    X_extracted_val = feat_extractor_model.predict(preprocess_input(X_val), verbose=1)
    print('Done extracting resnet50 features..')

- Define a classification model fed with the newly created X and Y
    - Remember that X is now a set of ResNet representations of the images
- Use either functional of sequential Keras apis
- Display training and validation accuracies

In [40]:
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import Adam

# TODO:
transfer_model = None

**Questions**
- How high in validation accuracy did you get ? 
- Compare to your previous classification model in part 2. Does it perform worse ? Better ? Why ?
- Did you observe overfitting during training ? Why ?
    - If yes, what did you do to avoid it ?