In [1]:
# Goal: classify images CATS vs DOGS, starting from vgg16

In [2]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation, Dense, Flatten, BatchNormalization, Conv2D, MaxPool2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import categorical_crossentropy
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import confusion_matrix
import itertools
import os
import shutil
import random
import glob
import matplotlib.pyplot as plt
import warnings
warnings.simplefilter(action="ignore", category=FutureWarning)
%matplotlib inline

In [3]:
train_path = "data/dogs-vs-cats/train/"
valid_path = "data/dogs-vs-cats/valid"
test_path = "data/dogs-vs-cats/test"

In [4]:
#1. Preprocessing

## Put the images in a format that Keras actually likes (keras generators)
## Qua in sintesti dico: le varie immagini sono una diversa dall'altra:
## Vedi che devi fare, le voglio tutte in formato 224, 224
## Sostanzialmente sta facendo il preprocessing come è fatto nella vgg16
## Nell'immagine, ogni pixel ha, se ho capito bene (R,G,B) una tripletta di rosso, verde, blu
## vgg16 prende il training set, calcola la media del rosso, R_, media del verde, G_ e del blu B_
## e dopo, immagine per immagine, (R,G,B) --> (R - R_, G -G_, B-B_) 
## Quindi i colori dopo il pre-processing sono un po' strani all'occhio umano
train_batches = ImageDataGenerator(preprocessing_function=tf.keras.applications.vgg16.preprocess_input) \
.flow_from_directory(directory=train_path, target_size=(224,224), classes=["cat","dog"], batch_size=10)

valid_batches = ImageDataGenerator(preprocessing_function=tf.keras.applications.vgg16.preprocess_input) \
.flow_from_directory(directory=valid_path, target_size=(224,224), classes=["cat","dog"], batch_size=10)

test_batches = ImageDataGenerator(preprocessing_function=tf.keras.applications.vgg16.preprocess_input) \
.flow_from_directory(directory=test_path, target_size=(224,224), classes=["cat","dog"], batch_size=10, shuffle=False)

assert train_batches.n == 1000
assert valid_batches.n == 200
assert test_batches.n == 100

Found 1000 images belonging to 2 classes.
Found 200 images belonging to 2 classes.
Found 100 images belonging to 2 classes.


In [5]:
## Download the model vgg16 (The first time takes time :)
vgg16_model = tf.keras.applications.vgg16.VGG16()
vgg16_model.summary()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels.h5
Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 224, 224, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     14758

In [6]:
def count_params(model):
    non_trainable_params = np.sum([np.prod(v.get_shape().as_list()) for v in model.non_trainable_weights])
    trainable_params = np.sum([np.prod(v.get_shape().as_list()) for v in model.trainable_weights])
    return {"non_trainable_params": non_trainable_params, "trainable_params": trainable_params}

In [8]:
params = count_params(vgg16_model)
assert params['non_trainable_params'] == 0
assert params["trainable_params"] == 138357544

In [13]:
## E ora creiamo il nostro modello. (FIGHISSIMO)
## In pratica copio tutto da vgg16, tranne l'ultimo output layer, che NON includo
model = Sequential()
for layer in vgg16_model.layers[:-1]:
    model.add(layer)
    
## Freeziamo tutto --> Non voglio re-trainarli !!
for layer in model.layers:
    layer.trainable = False
    
## E infine aggiungiamo quello che serve a noi. La distinzione tra cats and dogs
model.add(Dense(units=2, activation="softmax"))
    
    
## NOTA BENE: nel summary devi vedere che i trainable_params sono TUTTI e SOLI quelli dell'ultimo layer!
model.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
_________________________________________________________________
block3_conv1 (Conv2D)        (None, 56, 56, 256)      

In [14]:
## TRAIN (much better than before :)
model.compile(optimizer=Adam(learning_rate=0.0001), loss="categorical_crossentropy", metrics=["accuracy"])
model.fit(x=train_batches, validation_data=valid_batches, epochs=5, verbose=2)
assert model.history.history.get("accuracy")[-1] > 0.95

Epoch 1/5
100/100 - 166s - loss: 0.3343 - accuracy: 0.8650 - val_loss: 0.1161 - val_accuracy: 0.9600
Epoch 2/5
100/100 - 168s - loss: 0.0947 - accuracy: 0.9680 - val_loss: 0.0848 - val_accuracy: 0.9550
Epoch 3/5
100/100 - 167s - loss: 0.0609 - accuracy: 0.9770 - val_loss: 0.0768 - val_accuracy: 0.9700
Epoch 4/5
100/100 - 167s - loss: 0.0428 - accuracy: 0.9840 - val_loss: 0.0732 - val_accuracy: 0.9800
Epoch 5/5
100/100 - 167s - loss: 0.0318 - accuracy: 0.9900 - val_loss: 0.0693 - val_accuracy: 0.9800


In [15]:
## PREDICT
predictions = model.predict(x=test_batches, verbose=0)
cm = confusion_matrix(y_true=test_batches.classes, y_pred=np.argmax(predictions,axis=-1))
print(cm)
print(test_batches.class_indices)

## NOTA BENE: occhio nella lettura del print: le categorie diagonali NON appaiono printate in diagonale

[[49  1]
 [47  3]]
{'cat': 0, 'dog': 1}
