<a href="https://colab.research.google.com/github/BronsonSchultz/487_A6/blob/main/asn6_q2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Bronson Schultz, 11231230, bcs269
## CMPT 487, A6, Q2

# Import Libraries 

Import all the required Tensorflow and Keras libraries

In [None]:
# THIS VERSION IS FOR TENSORFLOW v2.0 which has keras embedded
# Note: Multibackend Keras is being discontinued after version 2.3.5.

# These are all the imports we will need.  You shouldn't need anything else.
from tensorflow.keras.models import load_model
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing import image
from tensorflow.keras import regularizers 
from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.models import Model

import os as os
import numpy as np
import matplotlib.pyplot as plt

# Dataset sizes
num_train_images = 23000
num_validation_images = 2000
num_test_images = 399

# Some Hyperparameters
input_image_size = 256
batch_size = 32
num_epochs = 1
learning_rate = 0.001
l2_lambda = 0.01

# Parameters derived from hyperparameters
training_steps_per_epoch = int(num_train_images/batch_size)
validation_steps = int(num_validation_images/batch_size)
testing_steps_per_epoch = num_test_images/batch_size

%matplotlib inline

 # Step 1: Load the Inception V3 network, and modify it for transfer learning.


In [None]:
# WRITE THE CODE DEFINING THE NETWORK ARCHITECTURE HERE

# Our model will be Inception V3 with the fully connected layers and ouptut layers at the end of the network removed.
# For the convolutional layers we will use pre-trained weights from the ImageNET database.

# Instantiate an InceptionV3()

base_model = InceptionV3(
    weights='imagenet',
    input_shape=((input_image_size, input_image_size, 3)),
    include_top=False,
)

# Get the output layer of our base model. 
x = base_model.output


# Add a GlobalAveragePooling2D() object to the network.
x = GlobalAveragePooling2D()(x)


# Add a Flatten() layer to the network.

x = Flatten()(x)


# Add a Dense() layer.
# Instantiate it so that it has 1024 units, and uses the Relu activation function.

x = Dense(1024, activation='relu')(x)


# Add another dense layer with 1 unit and the sigmoid activation function.  This will be your output layer.
# Assign the result to the 'predictions' variable.

predictions = Dense(1, activation='sigmoid')(x)

# Wrap our network layers in a model object. This has to be done because the base_model is not a Model() object, it's
# just a collection of layer objects, and only Model() objects can be compiled.  Here we just tell the Model object 
# that its first layer is the input layer of the base Inception V3 model and the output layer is our new sigmoid layer.  
# This is all the Model object needs beacuse all the other layer objects already know how they are connected.
model = Model(inputs=base_model.input, outputs=predictions)

# Set all the layers in the base model to be non-trainable.  
# This freezes the weights in the convolutional layers so that, when we train,
# we are only training the weights for the newly added fully connected layer.
for layer in base_model.layers:
    layer.trainable = False;
        
# Create an Adam optimizer object with learning rate equal to learning_rate

opt = Adam(learning_rate=learning_rate, )


# Compile the CNN using model.compile() method.  

model.compile(
    optimizer=opt,
    loss='binary_crossentropy',
    metrics=['accuracy']
)

print('Model compiled!')

Model compiled!


# Step 2: Load Images and Prepare the Network for training

In [None]:
# Create the training dataset generator.  This time do not use shearing or zooming, just horizontal flipping.
train_data_gen = image.ImageDataGenerator(rescale=1./255, horizontal_flip=True)


# Create the validation dataset generator.  It doesn't need any data augmentation.
valid_data_gen = image.ImageDataGenerator(rescale=1./255)


# Load the training set using train_datagen.flow_from_directory().
train_data = train_data_gen.flow_from_directory("train", target_size=(input_image_size,input_image_size), batch_size=batch_size, class_mode='binary')


# Load the validation dataset using validation_datagen.flow_from_directory(). 
valid_data = valid_data_gen.flow_from_directory("valid", target_size=(input_image_size, input_image_size), batch_size=batch_size, class_mode='binary')


Found 23000 images belonging to 2 classes.
Found 2000 images belonging to 2 classes.


# Step 3: Run the CNN Network

In [None]:
# Train the CNN using model.fit().  
# This works just like in the previous problem -- use the provided model hyperparameters defined in the top block of this notebook.
# Note, however, that this time num_epochs has been set to 1.  This means we will only train for one epoch!

# This will take a while - maybe 2-3 hours.

history = model.fit(train_data,
                    steps_per_epoch=training_steps_per_epoch,
                    epochs=num_epochs,
                    validation_data=valid_data,
                    validation_steps=validation_steps,
                    verbose=1,     
    )


  ...
    to  
  ['...']
  ...
    to  
  ['...']
Train for 718 steps, validate for 62 steps


# Step 4: Save the model and weights for prediction

In [None]:
# Save the model using the save() method of the CNN model.
# Inception is a big network, so this can take quite a while... 
# be patient and wait for the message indicating that the model has been saved.

model.save('Cat-Dog-transfer-Inception.h5')
print("Saved model to disk")

Saved model to disk


 # Step 5: Predict Dog/Cat using the Trained Model

In [None]:
model = load_model("Cat-Dog-transfer-Inception.h5")
test_datagen = image.ImageDataGenerator(rescale=1./255)

generator = test_datagen.flow_from_directory("test1", target_size=(input_image_size, input_image_size), batch_size=batch_size, class_mode=None, shuffle=False)

probabilities = model.predict_generator(generator)

y_pred = np.squeeze(probabilities > 0.5)

# number of correct predicts / number
accuracy = sum(y_pred == generator.classes) / len(y_pred)
print('The classification rate is', accuracy)
                                        

Found 399 images belonging to 2 classes.
The classification rate is 0.974937343358396


# Final Remarks

This exercise shows the potential advantage of transfer learning.  We were able to use a very large and sophisticated model, but avoid most of the work of training it by using weights trained by someone else on a different, but quite general dataset.  All we had to do was fine-tune the weights of a fresh randomly-initialized fully connected layer.

We only used one training epoch, and even though that got us a very good model, we might see improvements if we train for additional epochs (at the cost of that much more time!).

We might also be able to improve our model further by fine-tuning the weights of some of the convolutional layers.  For example, we could add a second training phase where we un-freeze the weights of the first few convolutional layers, and train them along with the fully connected layers for a few more epochs.