<a href="https://colab.research.google.com/github/BronsonSchultz/487_A6/blob/main/asn6_q1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Import Libraries 

Import all the required Tensorflow and Keras libraries

In [None]:
# THIS VERSION IS FOR TENSORFLOW v2.0 which has keras embedded
# Note: Multibackend Keras is being discontinued after version 2.3.5.

# These are all the imports we will need.  You shouldn't need anything else.
from tensorflow.keras.models import load_model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing import image
from tensorflow.keras import regularizers 

import os as os
import numpy as np
import matplotlib.pyplot as plt

# Dataset sizes
num_train_images = 23000
num_validation_images = 2000
num_test_images = 399

# Model Hyperparameters
input_image_size = 64
batch_size = 32
num_training_epochs = 40
learning_rate = 0.001
l2_lambda = 0.01

# Parameters derived from hyperparameters
training_steps_per_epoch = int(num_train_images/batch_size)
validation_steps_per_epoch = int(num_validation_images/batch_size)
testing_steps_per_epoch = num_test_images/batch_size

%matplotlib inline

 # Step 1: Design the CNN architecture. After designing the architecture, print it in iPython Notebook. 


In [None]:
# WRITE THE CODE DEFINING THE NETWORK ARCHITECTURE HERE (Sec. 2.2 in the assignment PDF)

# Initialize the CNN using the Sequential() function from keras.models and assign it to the variable 'model'.
model = Sequential([
                    # Add a convolution layer with 8 feature maps of shape (3,3), and input_shape=(64,64,3).
                    # Use 'relu' for the activation function.
                    Conv2D(filters=8, kernel_size=(3,3), activation="relu", input_shape=(64,64,3)),

                    # Add a max pooling layer, with a pool_size of (2,2)
                    MaxPooling2D(pool_size=(2,2)),

                    # Add a three more pairs of convolution and max pooling layers.  Use 32, 64, and 128 feature maps, respectively,
                    # for each successive convolutional layer.  Use pool_size of (2,2) for every max pooling lyaer.
                    # Don't specify input_shape for the convolutional layer this time since it can be inferred from the previous layer.
                    Conv2D(filters=32, kernel_size=(3,3), activation='relu'),
                    MaxPooling2D(pool_size=(2,2)),
                    Conv2D(filters=64, kernel_size=(3,3), activation='relu'),
                    MaxPooling2D(pool_size=(2,2)),
                    Conv2D(filters=128, kernel_size=(3,3), activation='relu'),
                    MaxPooling2D(pool_size=(2,2)),

                    # Add a flattening layer.
                    Flatten(),

                    # Add a fully connected layer with 128 units and the RELU activation function. This
                    # will be a hidden layer.  Use the L2 kernel regularizer with a lambda of l2_lambda (defined above).
                    Dense(128, activation='relu', kernel_regularizer='l2'),
                    
                    # Add a fully connected layer with 1 unit and the sigmoid activation function.  This
                    # will be the output layer.
                    Dense(1, activation='sigmoid')
])


# Create an Adam optimizer object with learning rate equal to learning_rate (defined above);
# call Adam() with the parameter lr=learning_rate and assign it to the variable 'opt'.

opt = Adam(learning_rate=learning_rate)


# Compile the CNN using the compile() method.  Use the 'adam' optimizer (optimizer=opt), and the 
# 'binary_crossentropy' loss function.  Use the parameter metrics=['accuracy'].  

model.compile(
    optimizer=opt,
    loss='binary_crossentropy',
    metrics=['accuracy']
)

#Print a Summary of the Architecture using the summary() method.
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 62, 62, 8)         224       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 31, 31, 8)         0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 29, 29, 32)        2336      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 12, 12, 64)        18496     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 6, 6, 64)          0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 4, 4, 128)         7

# Step 2: Load Images and Prepare the Network for training

In [None]:
#Train and Test ImageDataGenerator code

# Create the ImageDataGenerator() for the training data.  
# We want to re-scale the image pixel data to the range 0.0 to 1.0 by dividing by 255.0.  
# Also We want to use real-time data augmentation allowing horizontal flips, 
# and a modest amount of zooming and shearing. 
# Thus, use the paramters rescale=1./255, shear_range=0.1, zoom_range=0.1, horizontal_flip=True.

train_data_gen = ImageDataGenerator(rescale=1./255, shear_range=0.1, zoom_range=0.1, horizontal_flip=True)


# Create the ImageDataGenerator() for the validation data.  It doesn't need any data augmentation,
# only the scaling of the image pixel data, so omit everything but the rescale parameter.

valid_data_gen = ImageDataGenerator(rescale=1./255)


# Load the training set using train_datagen.flow_from_directory().  Use
# target_size = (input_image_size, input_image_size), batch_size=batch_size, and class_mode = 'binary'.


train_data = train_data_gen.flow_from_directory("/content/487_A6/train/train", target_size=(input_image_size,input_image_size), batch_size=batch_size, class_mode='binary')

# Load the validation dataset using validation_datagen.flow_from_directory(). 
# Use the same parameters as above.

valid_data = valid_data_gen.flow_from_directory("/content/487_A6/valid/valid", target_size=(input_image_size, input_image_size), batch_size=batch_size, class_mode='binary')



SyntaxError: ignored

# Step 3: Train the CNN Network

In [None]:
# Train the CNN using the fit() function of your CNN.  The first arguments hould be your training set generator from step 2.
#
# Use steps_per_epoch=training_steps_per_epoch, epochs=num_training_epochs, validation_data=validation_set,
# and validation_steps=validation_steps_per_epoch, and verbose=1.
#
# training_steps_per_epoch defined above as num_training_images / batch_size.  Since each step will process
# batch_size images, this number of steps will run through the training set exactly once per eopch.  
# This calculation results in steps_per_epoch = num_training_images / batch_size = 718 batches to make one pass through the training set.
# This is, of course, done num_training_epochs times.
#
# Similarly, validation_steps_per_epoch is defined as num_validation_images / batch_size.  Again, this causes
# one validation pass to run through the validation set exactly once.  
# This calculation results in validation_steps = 2000/32 = 63 batches to make one pass through the validation set.

# You should see the loss function and the accuracy improving quite a bit after each of the 
# first few epochs.  Then it should slow down.  If you allow it to run for more than 40 epochs 
# you probably won't see much additional improvement (for me, after 55 epochs it was no better 
# than it was after 40 epochs).

# Expect a pause at the end of each epoch as it classifies images from the validation set.  The validation accuracy 
# should track the training accuracy fairly well.

# This will take a while - maybe 2-3 hours.  On my computer (which is a very new
# 8-core macbook pro) it took about 106s seconds per epoch training on the CPU (not GPU).  

history = model.fit(train_data,
                    steps_per_epoch=steps_per_epoch,
                    epochs=num_training_epochs,
                    validation_data=valid_data,
                    validation_steps=validation_steps_per_epoch,
                    verbose=1,
                    )



# Step 4: Save the model and weights for prediction

In [None]:
# Save the model using the save() method of the CNN model.

# You don't have to code anything here except to change the filename to something suitable for you
# and then run this block.

model.save('Cat-Dog-64x64-4layer-maxpooling.h5')
print("Saved model to disk")

# Step 5: Plot the training and Validation Loss/Accuracy

In [None]:
# Use the history of the training and validation accuracy and loss 
# from the history object returned by model.fit_generator().  Plot
# two graphs as described in section 3.6 of the assignment PDF.  
#
# e.g. history.history['accuracy'] is an array of the training accuracy for each epoch,
plt.plot(history.history['accuracy'])

## Interpreting the Graphs

Answer the following questions directly in this block (if you can't produce the graphs yourself, answer based on the sample outputs in the assignment PDF):

1. The graphs suggest there may be a problem with our network.  What is it?

[Answer here]

2. What is it about the graphs that tells you that this is a problem?

[Answer here]



# Step 6: Predict Doc/Cat using the Trained Model

In [None]:
# Start by loading your model from disk that you previously saved so you don't have to run through
# the rest of the notebook just to test the model.  Change the filename to the same filename you used in Step 4.
model = load_model('/Users/mark/Dropbox/CMPT487/assignments-python/asn6/solution/Cat-Dog-64x64-4layer-maxpooling.h5')

# Now, create a data generator for the test images, just like for the training and validation sets.  

# We still want to rescale the intensities by dividing by 255 because pre-processing of test images must match
# the preprocessing of training and test images.  But we do not want any data augmentation so instantiate
# the ImageDataGenerator in the same way you did for the validation set.

# When calling flow_from_directory() we need class_mode = None, and shuffle = False so that we don't re-order the data.  
# Use the same target_size and batch_size as before.

test_datagen = [your code here]

generator = test_datagen.flow_from_directory( [your code here] )

# Now call model.predict_generator().
# The parameter to model.predict_generator() should be the 'generator' object, above.

probabilities = model.predict_generator( [your code here] )

# take the 'probabilities' array, above, and convert to an array y_pred which is equal to 1 if 
# probabilities > .5, and 0 otherwise.  Also use np.squeeze() to remove the second dimension 
# of the array (which has length 1 and is not needed) so that y_pred has shape (499,), i.e. a 1-D array.  
# If you've done this right, your y_pred array should be exactly the same shape as generator.classes.

y_pred = [your code here]

# Compare y_pred to generator.classes, which are the correct class labels, and compute and print out the classification accuracy.
# You should expect a classification rate of around 85%.

accuracy = [your code here]
print('The classification rate is', accuracy)
                                        