<a href="https://colab.research.google.com/github/MagretAdekunle/Machine-Learning-Projects-freeCodeCamp/blob/main/Cat%20and%20Dog%20Image%20Classifier/fcc_cat_dog.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
try:
  # This command only in Colab.
  %tensorflow_version 2.x
except Exception:
  pass
import tensorflow as tf

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Flatten, Dropout, MaxPooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator

import os
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# Get project files
!wget https://cdn.freecodecamp.org/project-data/cats-and-dogs/cats_and_dogs.zip

!unzip cats_and_dogs.zip

PATH = 'cats_and_dogs'

train_dir = os.path.join(PATH, 'train')
validation_dir = os.path.join(PATH, 'validation')
test_dir = os.path.join(PATH, 'test')

# Get number of files in each directory. The train and validation directories
# each have the subdirecories "dogs" and "cats".
total_train = sum([len(files) for r, d, files in os.walk(train_dir)])
total_val = sum([len(files) for r, d, files in os.walk(validation_dir)])
total_test = len(os.listdir(test_dir))

# Variables for pre-processing and training.
batch_size = 128
epochs = 40
IMG_HEIGHT = 150
IMG_WIDTH = 150

In [None]:
# Initialize an ImageDataGenerator instance for training images with rescaling to normalize pixel values between 0 and 1
train_image_generator = ImageDataGenerator(rescale=1./255)

# Initialize an ImageDataGenerator instance for validation images with rescaling to normalize pixel values between 0 and 1
validation_image_generator = ImageDataGenerator(rescale=1./255)

# Initialize an ImageDataGenerator instance for test images with rescaling to normalize pixel values between 0 and 1
test_image_generator = ImageDataGenerator(rescale=1./255)

# Generate batches of augmented training image data from the specified directory
# - target_size: Resizes images to the specified dimensions (IMG_WIDTH, IMG_HEIGHT)
# - class_mode: Sets the type of label arrays (binary for binary classification)
# - batch_size: Number of samples per batch
train_data_gen = train_image_generator.flow_from_directory(
    train_dir,
    target_size=(IMG_WIDTH, IMG_HEIGHT),
    class_mode='binary',
    batch_size=batch_size
)

# Generate batches of validation image data from the specified directory
# - Same settings as training data generator
val_data_gen = validation_image_generator.flow_from_directory(
    validation_dir,
    target_size=(IMG_WIDTH, IMG_HEIGHT),
    class_mode='binary',
    batch_size=batch_size
)

# Generate batches of test image data from the specified directory
# - target_size: Resizes images to (IMG_WIDTH, IMG_HEIGHT)
# - classes: Explicitly defines the class subfolder to load ('test')
# - shuffle: Disabled to preserve the order of test samples
test_data_gen = test_image_generator.flow_from_directory(
    PATH,
    target_size=(IMG_WIDTH, IMG_HEIGHT),
    classes=['test'],
    batch_size=batch_size,
    shuffle=False
)


In [None]:
# Function to plot images, optionally displaying classification probabilities
def plotImages(images_arr, probabilities=False):
    fig, axes = plt.subplots(len(images_arr), 1, figsize=(5, len(images_arr) * 3))
    if probabilities is False:
        for img, ax in zip(images_arr, axes):
            ax.imshow(img)
            ax.axis('off')
    else:
        for img, probability, ax in zip(images_arr, probabilities, axes):
            ax.imshow(img)
            ax.axis('off')
            # Set title based on probability (dog if > 0.5, else cat)
            ax.set_title("%.2f%% %s" %
                         (probability * 100 if probability > 0.5 else (1 - probability) * 100,
                          "dog" if probability > 0.5 else "cat"))
    plt.show()

# Fetch a batch of training images and plot the first 5
sample_training_images, _ = next(train_data_gen)
plotImages(sample_training_images[:5])


In [None]:
# Create an ImageDataGenerator for training data with data augmentation
train_image_generator = ImageDataGenerator(rescale=1./255,rotation_range=20, width_shift_range=0.1, height_shift_range=0.1, zoom_range=0.2)


In [None]:
# Create a data generator for training images with specified properties
# - batch_size: Number of images per batch
# - directory: Path to the training data directory
# - target_size: Resizes images to (IMG_HEIGHT, IMG_WIDTH)
# - class_mode: 'binary' for binary classification
train_data_gen = train_image_generator.flow_from_directory(
    batch_size=batch_size,
    directory=train_dir,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    class_mode='binary'
)

# Generate a list of 5 augmented images from the first batch of the data generator
# - train_data_gen[0][0][0]: Access the first image of the first batch
augmented_images = [train_data_gen[0][0][0] for i in range(5)]

# Display the augmented images
plotImages(augmented_images)


In [None]:
# Initialize the model
model = Sequential()

# Layer 1: Convolutional + MaxPooling
model.add(Conv2D(32, (3, 3), padding='same', activation="relu", input_shape=(IMG_WIDTH, IMG_HEIGHT, 3)))
model.add(MaxPooling2D(pool_size=(2, 2)))

# Layer 2: Convolutional + MaxPooling
model.add(Conv2D(32, (3, 3), activation="relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))

# Layer 3: Convolutional + MaxPooling
model.add(Conv2D(64, (3, 3), activation="relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))

# Flatten + Fully Connected Layers
model.add(Flatten())  # this converts our 3D feature maps to 1D feature vectors
model.add(Dense(64, activation="relu"))
model.add(Dropout(0.5))
model.add(Dense(1, activation="sigmoid"))

# Compile the model
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

# Summary of the model
model.summary()

In [None]:
# Train the model using the training and validation data generators
history = model.fit(train_data_gen, validation_data=val_data_gen, batch_size = 32, epochs = epochs)

In [None]:
# Extract accuracy and loss values from the training history
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

# Define the range of epochs for plotting
epochs_range = range(epochs)

# Create a figure with two subplots to compare training and validation metrics
plt.figure(figsize=(8, 8))

# Plot training and validation accuracy
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

# Plot training and validation loss
plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')

# Display the plots
plt.show()


In [None]:
# Predict probabilities for the test data using the trained model
probabilities = model.predict(test_data_gen)

# Fetch a batch of test images from the test data generator
# - The labels are ignored using '_'
testimages, _ = next(test_data_gen)

# Plot the test images with the predicted probabilities
plotImages(testimages, probabilities)


In [None]:
# Ground truth labels (0 for cat, 1 for dog) for the test set
answers = [1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0,
           1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0,
           1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1,
           1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1,
           0, 0, 0, 0, 0, 0]

correct = 0  # Initialize a counter for correct predictions

# Compare model predictions to ground truth answers
for probability, answer in zip(probabilities, answers):
    # Round the predicted probability (assumes model outputs probability arrays)
    if round(probability[0]) == answer:
        correct += 1

# Calculate the percentage of correctly identified images
percentage_identified = (correct / len(answers)) * 100

# Determine if the model passed the challenge (minimum accuracy is 75%)
passed_challenge = percentage_identified >= 75

# Print results
print(f"Your model correctly identified {round(percentage_identified, 2)}% of the images of cats and dogs.")

if passed_challenge:
    print("You passed the challenge!")
else:
    print("You haven't passed yet. Your model should identify at least 75% of the images. Keep trying. You will get it!")
