# Import Libraries 

Import all the required Tensorflow and Keras libraries

In [None]:
# THIS VERSION IS FOR TENSORFLOW v2.0 which has keras embedded
# Note: Multibackend Keras is being discontinued after version 2.3.5.

# These are all the imports we will need.  You shouldn't need anything else.
from tensorflow.keras.models import load_model
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing import image
from tensorflow.keras import regularizers 
from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.models import Model

import os as os
import numpy as np
import matplotlib.pyplot as plt

# Dataset sizes
num_train_images = 23000
num_validation_images = 2000
num_test_images = 399

# Some Hyperparameters
input_image_size = 256
batch_size = 32
num_epochs = 1
learning_rate = 0.001
l2_lambda = 0.01

# Parameters derived from hyperparameters
training_steps_per_epoch = int(num_train_images/batch_size)
validation_steps = int(num_validation_images/batch_size)
testing_steps_per_epoch = num_test_images/batch_size

%matplotlib inline

 # Step 1: Load the Inception V3 network, and modify it for transfer learning.


In [None]:
# WRITE THE CODE DEFINING THE NETWORK ARCHITECTURE HERE (Sec. 3.9 in the assignment PDF)

# Our model will be Inception V3 with the fully connected layers and ouptut layers at the end of the network removed.
# For the convolutional layers we will use pre-trained weights from the ImageNET database.

# Instantiate an InceptionV3() object with weights='imagenet', input_shape = (input_image_size, input_image_size, 3), 
# and include_top=False.  Assign the result to the base_model variable.

base_model = [your code here]

# Get the output layer of our base model. 
x = base_model.output

# Now we want to add some layers to the end of the base model.  
# Given an output layer x, the general syntax for adding a new layer is:
#
# x = LayerObject()(x)
#
# where LayerObject() is the constructor for a new layer object.  
# The new value for x is the new output layer of the model.

# Add a GlobalAveragePooling2D() object to the network.  No parameters are needed when instantiating GlobalAveragePooling2D().

[your code here]


# Add a Flatten() layer to the network.  No parameters are needed when instantiating Flatten().

[your code here]


# Add a Dense() layer.  Dense() is the fully-connected layer object you used in the Question 1.  
# Instantiate it so that it has 1024 units, and uses the Relu activation function.

[your code here]


# Add another dense layer with 1 unit and the sigmoid activation function.  This will be your output layer.
# Assign the result to the 'predictions' variable.

predictions = [your code here]

# Wrap our network layers in a model object. This has to be done because the base_model is not a Model() object, it's
# just a collection of layer objects, and only Model() objects can be compiled.  Here we just tell the Model object 
# that its first layer is the input layer of the base Inception V3 model and the output layer is our new sigmoid layer.  
# This is all the Model object needs beacuse all the other layer objects already know how they are connected.
model = Model(inputs=base_model.input, outputs=predictions)

# Set all the layers in the base model to be non-trainable.  
# This freezes the weights in the convolutional layers so that, when we train,
# we are only training the weights for the newly added fully connected layer.
for layer in base_model.layers:
    layer.trainable = False;
        
# Create an Adam optimizer object with learning rate equal to learning_rate (defined above);
# call Adam() with the parameter lr=learning_rate

[your code here]


# Compile the CNN using model.compile() method.  Use the 'adam' optimizer you created above, and the 
# 'binary_crossentropy' loss function.  Use the parameter metrics=['accuracy'].  

[your code here]


#Print a Summary of the Architecture using the summary() method.
#model.summary()     # If you want to see the model, uncomment this line.  It's really big!

print('Model compiled!')

# Step 2: Load Images and Prepare the Network for training

In [None]:
# This is nearly identical to step 2 from question 1 with a few minor changes (watch for them!)

# Create the training dataset generator.  This time do not use shearing or zooming, just horizontal flipping.
# Don't forget to rescale the image pixel data.

[your code here]


# Create the validation dataset generator.  It doesn't need any data augmentation.

[your code here]


# Load the training set using train_datagen.flow_from_directory().  Use
# target_size = (input_image_size, input_image_size), batch_size = batch_size, and class_mode = 'binary'.

[your code here]


# Load the validation dataset using validation_datagen.flow_from_directory(). 
# Use the same parameters as above (except for the directory name).

[your code here]


# Step 3: Run the CNN Network

In [None]:
# Train the CNN using model.fit().  
# This works just like in the previous problem -- use the provided model hyperparameters defined in the top block of this notebook.
# Note, however, that this time num_epochs has been set to 1.  This means we will only train for one epoch!

# This will take a while - maybe 2-3 hours.   On my computer (which is a very new
# 8-core macbook pro) it took about 35 minutes training on the CPU (not GPU).  

[your code here]


# Step 4: Save the model and weights for prediction

In [None]:
# Save the model using the save() method of the CNN model.
# Inception is a big network, so this can take quite a while... 
# be patient and wait for the message indicating that the model has been saved.

# You don't have to code anything here except change the filename if required.

model.save('Cat-Dog-transfer-Inception.h5')
print("Saved model to disk")

Saved model to disk


 # Step 5: Predict Dog/Cat using the Trained Model

In [None]:
# This can be done in exactly the same way as in Question 1.  Just make sure to load the right model file.

# You should get a *very* high classification rate.

[your code here]

# Final Remarks

This exercise shows the potential advantage of transfer learning.  We were able to use a very large and sophisticated model, but avoid most of the work of training it by using weights trained by someone else on a different, but quite general dataset.  All we had to do was fine-tune the weights of a fresh randomly-initialized fully connected layer.

We only used one training epoch, and even though that got us a very good model, we might see improvements if we train for additional epochs (at the cost of that much more time!).

We might also be able to improve our model further by fine-tuning the weights of some of the convolutional layers.  For example, we could add a second training phase where we un-freeze the weights of the first few convolutional layers, and train them along with the fully connected layers for a few more epochs.