# This notebook presents a baseline cnn model trained with and without data augmentation to see the differences



[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Jeremynadal33/classify_stanford_dogs/blob/master/final_model.ipynb)

The aim of this notebook is : 
* Present best model 
* Tune its hyper-para
* Save the model 
* Write a function that predicts the breed of a dog given its photo 


The hyper parameter search is done using Skopt


In [1]:
import tensorflow as tf
import cv2
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing.image import img_to_array, array_to_img, load_img
from tensorflow.keras import backend as K
import os 
import shutil
import matplotlib.pyplot as plt 
from matplotlib import image
import numpy as np
from sklearn.model_selection import train_test_split

print(tf.__version__)

2.4.1


In [2]:
if 'google.colab' in str(get_ipython()):
  print('Running on CoLab')
  from google.colab import drive
  drive.mount('/content/gdrive',force_remount=True)
  !ls /content/gdrive/My\ Drive/Formation-OC/P5-Stackoverflow
  root_dir = '/content/gdrive/My Drive/Formation-OC/P6-Images/'
  input_dir = root_dir + 'inputs/'
  png_dir = root_dir + 'pngs/'

  baseline_species = os.listdir(input_dir+'baseline_inputs/train')

  model_dir = root_dir +'models/'

  #my script
  !ls gdrive/MyDrive/Formation-OC/P6-Images/
else:
  print('Not running on CoLab')
  #my script
  root_dir = '/Users/jeremynadal/Documents/Formation OC IML/P6/'
  input_dir = root_dir + 'inputs/'
  png_dir = root_dir + 'pngs/'
  model_dir = root_dir +'models/'

Running on CoLab
Mounted at /content/gdrive
function.py  inputs  P5_presentation.pptx  pngs
inputs	models	P6-presentation.pptx  pngs


In [3]:
np.random.seed(42) # To ensure re-usability
baseline_dir = input_dir+'baseline_inputs/'

## Scikit optimize is not natively installed :

In [4]:
pip install h5py scikit-optimize


Collecting scikit-optimize
[?25l  Downloading https://files.pythonhosted.org/packages/8b/03/be33e89f55866065a02e515c5b319304a801a9f1027a9b311a9b1d1f8dc7/scikit_optimize-0.8.1-py2.py3-none-any.whl (101kB)
[K     |████████████████████████████████| 102kB 8.6MB/s 
Collecting pyaml>=16.9
  Downloading https://files.pythonhosted.org/packages/15/c4/1310a054d33abc318426a956e7d6df0df76a6ddfa9c66f6310274fb75d42/pyaml-20.4.0-py2.py3-none-any.whl
Installing collected packages: pyaml, scikit-optimize
Successfully installed pyaml-20.4.0 scikit-optimize-0.8.1


In [5]:
pip install git+git://github.com/Hvass-Labs/scikit-optimize.git@dd7433da068b5a2509ef4ea4e5195458393e6555

Collecting git+git://github.com/Hvass-Labs/scikit-optimize.git@dd7433da068b5a2509ef4ea4e5195458393e6555
  Cloning git://github.com/Hvass-Labs/scikit-optimize.git (to revision dd7433da068b5a2509ef4ea4e5195458393e6555) to /tmp/pip-req-build-wjk3attq
  Running command git clone -q git://github.com/Hvass-Labs/scikit-optimize.git /tmp/pip-req-build-wjk3attq
  Running command git checkout -q dd7433da068b5a2509ef4ea4e5195458393e6555
Building wheels for collected packages: scikit-optimize
  Building wheel for scikit-optimize (setup.py) ... [?25l[?25hdone
  Created wheel for scikit-optimize: filename=scikit_optimize-0.5-py2.py3-none-any.whl size=77763 sha256=ea5f3441056b1bf6efde6efdc35590295c04f51e691b6f6bf20515a910e00a3f
  Stored in directory: /tmp/pip-ephem-wheel-cache-jpp6rnxu/wheels/e2/8b/7e/2956a87f1fb737a8c3f2ed18788b5e0ecfe4718141988c838f
Successfully built scikit-optimize
Installing collected packages: scikit-optimize
  Found existing installation: scikit-optimize 0.8.1
    Uninstalli

In [6]:
# Imports for skopt
import skopt
from skopt import gp_minimize, forest_minimize
from skopt.space import Real, Categorical, Integer
from skopt.plots import plot_convergence
from skopt.plots import plot_objective, plot_evaluations
from skopt.plots import plot_objective
from skopt.utils import use_named_args



# Prepare everything for the training
## Build a function to create a model

In [7]:
def create_cnn(input_shape, output_length,
               nb_cnn=3, nb_filters = 64, activation_cnn = 'relu', 
               model_transfert = None, fine_tune = False, 
               nb_FC_layer = 3, nb_FC_neurons = 512, reducing = False, activation_FC = 'relu',
               dropout = False,
               name = 'my_cnn_model'
               ):
  '''Create a CNN based model is model_transfert is None. Else, the model_transfert is used for feature extraction. 
  If reducing is not False, nb_FC_neurons must be multiple of 2**nb_FC_layer '''

  assert input_shape[-1] == 3, 'For the moment only models with rgb input is dealt'
  #for shape in input_shape[:-1] : assert shape % 2**nb_cnn ==  0 , 'Each dimension of input must be a multiple of 2**nb_cnn'
  if reducing : assert nb_FC_neurons % 2**nb_FC_layer == 0 , 'If reducing, nb_FC_neurons must be multiple of 2**nb_FC_layer '

  model = tf.keras.models.Sequential(name=name)
  model.add(tf.keras.layers.InputLayer(input_shape=input_shape, name = 'Input_layer'))
  model.add( tf.keras.layers.experimental.preprocessing.Rescaling(1./255,name='Rescaling_layer') ),

  if model_transfert == None: 
    for cnn in range(nb_cnn):
      model.add(tf.keras.layers.Conv2D( filters = nb_filters, kernel_size = (3,3), padding='same', activation = activation_cnn, name ='Conv2D_'+str(cnn+1) ))
      model.add(tf.keras.layers.MaxPooling2D( pool_size=(2, 2), name ='MaxPool_'+str(cnn+1)))
  else : 
    if not fine_tune :
      model_transfert.trainable = False
    model.add(model_transfert)
  
  model.add(tf.keras.layers.Flatten())

  if reducing : 
    for FC in range(nb_FC_layer):
      model.add(tf.keras.layers.Dense(nb_FC_neurons/2**FC, activation= activation_FC, name='FC_layer_'+str(FC+1)))
      if dropout : 
        model.add(tf.keras.layers.Droupout(dropout, name = 'Dropout_'+str(FC+1)))
  else:
    for FC in range(nb_FC_layer):
      model.add(tf.keras.layers.Dense(nb_FC_neurons, activation= activation_FC, name='FC_layer_'+str(FC+1)))
      if dropout :
        if dropout != 0.0:  
          model.add(tf.keras.layers.Droupout(dropout, name = 'Dropout_'+str(FC+1)))

  model.add(tf.keras.layers.Dense(output_length, activation = 'softmax',name='Output_layer'))

  return model


def get_callbacks(monitor='val_loss',save_name=None,patience=8):
    '''Returns the wanted callbacks to save models and avoid overfitting.
    monitor (str, optional): the monitor to check for the early stopping. Default is 'val_loss'
    save_name (str, optional): if not None, uses modelcheckpoint and saves checkpoints at the save_name. Default is None.
    patience (int, optional): number of epoch to wait for improvment of monitor. Default is 8.'''
    if save_name :
        return [tf.keras.callbacks.ModelCheckpoint(filepath=save_name,
                                                   monitor=monitor, 
                                                   save_best_only=True,
                                                   verbose=0),
                tf.keras.callbacks.EarlyStopping(monitor=monitor, 
                                                 patience=patience,
                                                 restore_best_weights=True)
                ]
    else:
        return [tf.keras.callbacks.EarlyStopping(monitor=monitor, 
                                                 patience=patience,
                                                 restore_best_weights=True)
                ]


## Build two ImageDataGenerator to do the on the fly data augmentation for the training

In [8]:
batch_size = 16
input_shape = (150,150,3)


# this is the augmentation configuration we will use for training
train_datagen = ImageDataGenerator(rotation_range=40,
                                   width_shift_range=0.2,
                                   height_shift_range=0.2,
                                   shear_range=0.2,
                                   zoom_range=0.1,
                                   horizontal_flip=True,
                                   vertical_flip = True,
                                   fill_mode='nearest')

# this is the augmentation configuration we will use for testing:
# only rescaling
test_datagen = ImageDataGenerator()



train_generator = train_datagen.flow_from_directory(baseline_dir+'train_clahe/',  # this is the target directory
                                                   target_size=input_shape[:-1],  # all images will be resized to 150x150
                                                   batch_size=batch_size,
                                                   class_mode='categorical',
                                                   color_mode='rgb',
                                                   seed= 42)  #useless for the no augmentation 

validation_generator = test_datagen.flow_from_directory(baseline_dir+'validation_clahe/',  # this is the target directory
                                                   target_size=input_shape[:-1],  # all images will be resized to 150x150
                                                   batch_size=batch_size,
                                                   class_mode='categorical',
                                                   color_mode='rgb',
                                                   seed= 42)  #useless for the no augmentation 


STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size
STEP_SIZE_VALID=validation_generator.n//validation_generator.batch_size

Found 493 images belonging to 4 classes.
Found 126 images belonging to 4 classes.


## Use transfer learning

In [9]:
base_xception = tf.keras.applications.Xception( include_top=False, weights='imagenet', input_shape=input_shape, classes=len(baseline_species) )

model_xception = create_cnn(input_shape=input_shape, output_length=len(baseline_species),
               model_transfert = base_xception, fine_tune = False, 
               nb_FC_layer = 2, nb_FC_neurons = 512, reducing = True, activation_FC = 'relu',
               dropout = False,
               name='my_xception'
               )
model_xception.summary()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/xception/xception_weights_tf_dim_ordering_tf_kernels_notop.h5
Model: "my_xception"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
Rescaling_layer (Rescaling)  (None, 150, 150, 3)       0         
_________________________________________________________________
xception (Functional)        (None, 5, 5, 2048)        20861480  
_________________________________________________________________
flatten (Flatten)            (None, 51200)             0         
_________________________________________________________________
FC_layer_1 (Dense)           (None, 512)               26214912  
_________________________________________________________________
FC_layer_2 (Dense)           (None, 256)               131328    
_________________________________________________________________
Output_layer (Dense)         (None, 4)    

## Create the space of hyper para tuning and a function to automatically create and build the model
* Dropout (between 0 and 0.3)
* Num FC layers (between 1 and 3)
* Optimizers (Adam, SGD, RMSProp)
* Learning rate (between 1e-6 and 1e-1)




In [10]:
dim_learning_rate = Real(low=1e-6, high=1e-2, prior='log-uniform',
                         name='learning_rate')

dim_num_dense_layers = Integer(low=1, high=3, name='num_dense_layers') 

dim_name_optimizer = Categorical(categories=['adam', 'SGD', 'RMSProp'],
                             name='name_optimizer')

dim_dropout = Real(low=0, high=0.3,     
                         name='dropout')


In [11]:
dimensions = [dim_learning_rate,
              dim_num_dense_layers,
              dim_name_optimizer,
              dim_dropout]

default_parameters = [1e-5, 1, 'adam', 0]

In [12]:
def create_model(learning_rate, num_dense_layers,
                 name_optimizer, dropout):
    """
    Hyper-parameters:
    learning_rate:     Learning-rate for the optimizer.
    num_dense_layers:  Number of dense layers.
    name_optimizer:   Number of nodes in each dense layer.
    dropout:        Activation function for all layers.
    """

    base = tf.keras.applications.Xception( include_top=False, weights='imagenet', input_shape=input_shape, classes=len(baseline_species) )

    model = create_cnn(input_shape=input_shape, output_length=len(baseline_species),
               model_transfert = base, fine_tune = False, 
               nb_FC_layer = num_dense_layers, nb_FC_neurons = 512, reducing = True, activation_FC = 'relu',
               dropout = dropout/10,
               name='my_xception'
               )
    
    if name_optimizer == 'adam':
      optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    elif name_optimizer == 'SGD':
      optmizer = tf.keras.optimizers.SGD(learning_rate=learning_rate)
    elif name_optimizer == 'RMSProp' :
      optimizer = tf.keras.optimizers.RMSprop(learning_rate=learning_rate)
    else :
      print('optimizer {} not known, using RMSProp'.format(name_optimizer))
      optimizer =tf.keras.optimizers.RMSprop(learning_rate=learning_rate)

    model.compile(optimizer=optimizer,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

In [13]:
path_best_model = model_dir+ 'best_model.h5'
best_accuracy = 0.0

In [14]:
#Function taken from https://colab.research.google.com/github/Hvass-Labs/TensorFlow-Tutorials/blob/master/19_Hyper-Parameters.ipynb#scrollTo=RWnqMCXmGE5X
@use_named_args(dimensions=dimensions)
def fitness(learning_rate, num_dense_layers,
             name_optimizer, dropout):
    """
    Hyper-parameters:
    learning_rate:     Learning-rate for the optimizer.
    num_dense_layers:  Number of dense layers.
    num_dense_nodes:   Number of nodes in each dense layer.
    activation:        Activation function for all layers.
    """

    # Print the hyper-parameters.
    print('learning rate: {0:.1e}'.format(learning_rate))
    print('num_dense_layers:', num_dense_layers)
    print('name_optimizer:', name_optimizer)
    print('dropout:', dropout)
    print()
    
    # Create the neural network with these hyper-parameters.
    model = create_model(learning_rate=learning_rate, 
                         num_dense_layers=num_dense_layers,
                         name_optimizer=name_optimizer, 
                         dropout = dropout)

  
   
    # Use Keras to train the model.
    history = model.fit_generator(generator=train_generator,
                                  steps_per_epoch=STEP_SIZE_TRAIN,
                                  validation_data=validation_generator,
                                  validation_steps=STEP_SIZE_VALID,
                                  epochs=25,
                                  callbacks=get_callbacks(patience = 15)
                                  )

    # Get the classification accuracy on the validation-set
    # after the last training-epoch.
    accuracy = history.history['val_accuracy'][-1]

    # Print the classification accuracy.
    print()
    print("Accuracy: {0:.2%}".format(accuracy))
    print()

    # Save the model if it improves on the best-found performance.
    # We use the global keyword so we update the variable outside
    # of this function.
    global best_accuracy

    # If the classification accuracy of the saved model is improved ...
    if accuracy > best_accuracy:
        # Save the new model to harddisk.
        model.save(path_best_model)
        
        # Update the classification accuracy.
        best_accuracy = accuracy

    # Delete the Keras model with these hyper-parameters from memory.
    del model
    
    # Clear the Keras session, otherwise it will keep adding new
    # models to the same TensorFlow graph each time we create
    # a model with a different set of hyper-parameters.
    K.clear_session()
    
    # NOTE: Scikit-optimize does minimization so it tries to
    # find a set of hyper-parameters with the LOWEST fitness-value.
    # Because we are interested in the HIGHEST classification
    # accuracy, we need to negate this number so it can be minimized.
    return -accuracy

## Lets do a test run

In [15]:
fitness(x=default_parameters)

learning rate: 1.0e-05
num_dense_layers: 1
name_optimizer: adam
dropout: 0





Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25

Accuracy: 77.68%



-0.7767857313156128

# Lets run the full hyperpara search

In [None]:
%%time
search_result = gp_minimize(func=fitness,
                            dimensions=dimensions,
                            acq_func='EI', # Expected Improvement.
                            n_calls=40,
                            x0=default_parameters)

learning rate: 1.0e-05
num_dense_layers: 1
name_optimizer: adam
dropout: 0





Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25