In [1]:
import os
import zipfile
import random
import shutil
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers.legacy import RMSprop
from shutil import copyfile
import matplotlib.pyplot as plt
import requests
from io import StringIO


In [2]:
# download dataset from https://www.microsoft.com/en-us/download/confirmation.aspx?id=54765
# downloaded data is in 'tmp/PetImages'


source_path = 'tmp/PetImages'

source_path_dogs = os.path.join(source_path, 'Dog')
source_path_cats = os.path.join(source_path, 'Cat')

# Deletes all non-image files (there are two .db files bundled into the dataset)
!find tmp/PetImages/ -type f ! -name "*.jpg" -exec rm {} +

# os.listdir returns a list containing all files under the given path
print(f"There are {len(os.listdir(source_path_dogs))} images of dogs.")
print(f"There are {len(os.listdir(source_path_cats))} images of cats.")

There are 12500 images of dogs.
There are 12500 images of cats.


In [3]:
# Define root directory
root_dir = 'tmp/cats-v-dogs'

# Empty directory to prevent FileExistsError is the function is run several times
if os.path.exists(root_dir):
  shutil.rmtree(root_dir)

# GRADED FUNCTION: create_train_val_dirs
def create_train_val_dirs(root_path):
  """
  Creates directories for the train and test sets
  
  Args:
    root_path (string) - the base directory path to create subdirectories from
  
  Returns:
    None
  """

  train_dir = os.path.join(root_path, 'train')
  validation_dir = os.path.join(root_path, 'validation')

  train_cats_dir = os.path.join(train_dir, 'cats')
  train_dogs_dir = os.path.join(train_dir, 'dogs')

  validation_cats_dir = os.path.join(validation_dir, 'cats')
  validation_dogs_dir = os.path.join(validation_dir, 'dogs')

  os.makedirs(train_cats_dir)
  os.makedirs(train_dogs_dir)
  os.makedirs(validation_cats_dir)
  os.makedirs(validation_dogs_dir)

  
try:
  create_train_val_dirs(root_path=root_dir)
except FileExistsError:
  print("You should not be seeing this since the upper directory is removed beforehand")

In [4]:
# Testing create_train_val_dirs function

for rootdir, dirs, files in os.walk(root_dir):
    for subdir in dirs:
        print(os.path.join(rootdir, subdir))

tmp/cats-v-dogs/train
tmp/cats-v-dogs/validation
tmp/cats-v-dogs/train/dogs
tmp/cats-v-dogs/train/cats
tmp/cats-v-dogs/validation/dogs
tmp/cats-v-dogs/validation/cats


In [5]:
def split_data(SOURCE_DIR, TRAINING_DIR, VALIDATION_DIR, SPLIT_SIZE):
  """
  Splits the data into train and test sets
  
  Args:
    SOURCE_DIR (string): directory path containing the images
    TRAINING_DIR (string): directory path to be used for training
    VALIDATION_DIR (string): directory path to be used for validation
    SPLIT_SIZE (float): proportion of the dataset to be used for training
    
  Returns:
    None
  """
  contents = os.listdir(SOURCE_DIR)
  random.seed(42)
  contents_randomized = random.sample(contents, len(contents))

  for i in range(len(contents)):
    filename = contents_randomized[i]
    file_path = os.path.join(SOURCE_DIR, filename)
    if os.path.getsize(file_path):
      ratio = i/len(contents_randomized)
      if ratio < SPLIT_SIZE:
        copyfile(file_path, os.path.join(TRAINING_DIR, filename))
      else:
        copyfile(file_path, os.path.join(VALIDATION_DIR, filename))
    else:
      print(f"{filename} is zero length, so ignoring.")

In [6]:
# Test split_data function

# Define paths
CAT_SOURCE_DIR = "tmp/PetImages/Cat/"
DOG_SOURCE_DIR = "tmp/PetImages/Dog/"

TRAINING_DIR = "tmp/cats-v-dogs/train/"
VALIDATION_DIR = "tmp/cats-v-dogs/validation/"

TRAINING_CATS_DIR = os.path.join(TRAINING_DIR, "cats/")
VALIDATION_CATS_DIR = os.path.join(VALIDATION_DIR, "cats/")

TRAINING_DOGS_DIR = os.path.join(TRAINING_DIR, "dogs/")
VALIDATION_DOGS_DIR = os.path.join(VALIDATION_DIR, "dogs/")

# Empty directories in case you run this cell multiple times
if len(os.listdir(TRAINING_CATS_DIR)) > 0:
  for file in os.scandir(TRAINING_CATS_DIR):
    os.remove(file.path)
if len(os.listdir(TRAINING_DOGS_DIR)) > 0:
  for file in os.scandir(TRAINING_DOGS_DIR):
    os.remove(file.path)
if len(os.listdir(VALIDATION_CATS_DIR)) > 0:
  for file in os.scandir(VALIDATION_CATS_DIR):
    os.remove(file.path)
if len(os.listdir(VALIDATION_DOGS_DIR)) > 0:
  for file in os.scandir(VALIDATION_DOGS_DIR):
    os.remove(file.path)

# Define proportion of images used for training
split_size = .9

# Run the function
# NOTE: Messages about zero length images should be printed out
split_data(CAT_SOURCE_DIR, TRAINING_CATS_DIR, VALIDATION_CATS_DIR, split_size)
split_data(DOG_SOURCE_DIR, TRAINING_DOGS_DIR, VALIDATION_DOGS_DIR, split_size)

# Check that the number of images matches the expected output

# function should perform copies rather than moving images so original directories should contain unchanged images
print(f"\n\nOriginal cat's directory has {len(os.listdir(CAT_SOURCE_DIR))} images")
print(f"Original dog's directory has {len(os.listdir(DOG_SOURCE_DIR))} images\n")

# Training and validation splits
print(f"There are {len(os.listdir(TRAINING_CATS_DIR))} images of cats for training")
print(f"There are {len(os.listdir(TRAINING_DOGS_DIR))} images of dogs for training")
print(f"There are {len(os.listdir(VALIDATION_CATS_DIR))} images of cats for validation")
print(f"There are {len(os.listdir(VALIDATION_DOGS_DIR))} images of dogs for validation")

666.jpg is zero length, so ignoring.
11702.jpg is zero length, so ignoring.


Original cat's directory has 12500 images
Original dog's directory has 12500 images

There are 11249 images of cats for training
There are 11249 images of dogs for training
There are 1250 images of cats for validation
There are 1250 images of dogs for validation


In [11]:
def train_val_generators(TRAINING_DIR, VALIDATION_DIR):
  """
  Creates the training and validation data generators
  
  Args:
    TRAINING_DIR (string): directory path containing the training images
    VALIDATION_DIR (string): directory path containing the testing/validation images
    
  Returns:
    train_generator, validation_generator - tuple containing the generators
  """

  # Instantiate the ImageDataGenerator class 
  train_datagen = ImageDataGenerator(rescale=1./255)

  train_generator = train_datagen.flow_from_directory(directory=TRAINING_DIR,
                                                      batch_size=20,
                                                      class_mode='binary',
                                                      target_size=(150, 150))

  # Instantiate the ImageDataGenerator class 
  validation_datagen = ImageDataGenerator(rescale = 1./255)

  validation_generator = validation_datagen.flow_from_directory(directory=VALIDATION_DIR,
                                                                batch_size=20,
                                                                class_mode='binary',
                                                                target_size=(150, 150))
  return train_generator, validation_generator

In [12]:
# Test generators
train_generator, validation_generator = train_val_generators(TRAINING_DIR, VALIDATION_DIR)

Found 22498 images belonging to 2 classes.
Found 2500 images belonging to 2 classes.


In [13]:
def create_model():
  model = tf.keras.models.Sequential([ 
      tf.keras.layers.Conv2D(16, (3,3), activation='relu', input_shape=(150,150,3)),
      tf.keras.layers.MaxPooling2D(2,2),
      tf.keras.layers.Conv2D(32, (3,3), activation='relu'),
      tf.keras.layers.MaxPooling2D(2,2),
      tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
      tf.keras.layers.MaxPooling2D(2,2),

      tf.keras.layers.Flatten(),
      tf.keras.layers.Dense(512, activation='relu'),
      tf.keras.layers.Dense(1, activation='sigmoid')

  ])

  
  model.compile(optimizer=RMSprop(learning_rate=0.001),
                loss='binary_crossentropy',
                metrics=['acc']) 
    

  return model

In [14]:
# Get the untrained model
model = create_model()

# Train the model
# Note that this may take some time.
history = model.fit(train_generator,
                    epochs=15,
                    verbose=1,
                    validation_data=validation_generator)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [44]:
from tensorflow.keras.utils import load_img, img_to_array
import numpy as np 
from PIL import UnidentifiedImageError

def test_model(TEST_IMAGES):
    allowed_extensions = [".jpg", ".jpeg", ".png"]

    images = os.listdir(TEST_IMAGES)
    images_list = []
    image_names_list = []

    for image in images:
        file_extension = os.path.splitext(image)[1]

        if file_extension.lower() not in allowed_extensions:
            print(f"Ignoring non-image file: {image}")
            continue

        image_path = os.path.join(TEST_IMAGES, image)
        img = load_img(image_path, target_size=(150, 150))
        images_list.append(img)
        image_names_list.append(image)

    images_array = np.zeros((len(images_list), 150, 150, 3))
    for i, img in enumerate(images_list):
        images_array[i] = img_to_array(img)

    images_array /= 255
    
    classes = model.predict(images_array, batch_size=10)
    
    results = []
    
    for i in range(len(classes)):
        if classes[i] > 0.5:
            probability = classes[i].item() * 100
            results.append(f'dog ({round(probability,2)}%)')
        else:
            probability = (1 - classes[i].item()) * 100
            results.append(f'cat ({round(probability,2)}%)')

    return results, image_names_list

res,img_list = test_model('tmp/test')

for i in range(len(res)):
    print(f'Image: {img_list[i]} \n Class: {res[i]}')

Ignoring non-image file: .DS_Store
Image: cat-323262_1280.jpg 
 Class: cat (98.2%)
Image: bulldog-1224267_1280.jpg 
 Class: dog (54.61%)
Image: cat-114782_1280.jpg 
 Class: cat (64.87%)
Image: puppy-1903313_640.jpg 
 Class: dog (99.84%)
Image: cat-551554_1280.jpg 
 Class: cat (100.0%)
Image: puppy-1207816_1280.jpg 
 Class: dog (100.0%)
Image: cat-2536662_1280.jpg 
 Class: dog (85.11%)
Image: dog-1728494_1280.jpg 
 Class: dog (78.54%)


['cat', 'dog', 'cat', 'dog', 'cat', 'dog', 'dog', 'dog']


NameError: name 'images_array' is not defined