# 1. Install and Import Dependencies

In [None]:
!pip install tensorflow tensorflow-gpu opencv-python matplotlib.pyplot

In [1]:
import tensorflow as tf
import os

# 2. Remove unfit images

In [2]:
import cv2
import imghdr

In [3]:
data_dir = 'cats and dogs-train' # define data directory

In [4]:
image_exts = ['jpeg','jpg', 'bmp', 'png'] # required image extensions

In [5]:
# Loop through each class in the dataset directory
for image_class in os.listdir(data_dir):  
    # Loop through each image in the current class directory
    for image in os.listdir(os.path.join(data_dir, image_class)):  
        # Construct the full file path for the image
        image_path = os.path.join(data_dir, image_class, image)  
        try:
            # Attempt to read the image using OpenCV
            img = cv2.imread(image_path)  
            # Check the image's file extension/type using imghdr
            tip = imghdr.what(image_path)  
            
            # If the image's extension/type is not in the list of valid extensions
            if tip not in image_exts:  
                # Print a message indicating the image is invalid
                print('Image not in ext list {}'.format(image_path))  
                # Remove the invalid image from the directory
                os.remove(image_path)  
        except Exception as e:
            # If an exception occurs (e.g., corrupted image), print the issue
            print('Issue with image {}'.format(image_path))  
            # os.remove(image_path)


# 3. Load Data

In [6]:
import numpy as np
from matplotlib import pyplot as plt

In [7]:
# Load images from the specified directory and its subdirectories
# 'data_dir' is the path to the folder where images are organized in subdirectories by class
data = tf.keras.utils.image_dataset_from_directory(data_dir, batch_size=32)

# The function automatically:
# - Loads all the images from the 'data_dir'
# - Assigns labels based on the subdirectory names (each subdirectory represents a class)
# - Returns a 'tf.data.Dataset' object that can be used to efficiently handle the image data


Found 557 files belonging to 2 classes.


In [8]:
data_iterator = data.as_numpy_iterator()

In [9]:
# Retrieve the next batch of data (images and labels) from the dataset using the NumPy iterator
batch = data_iterator.next()

# The batch contains a tuple with two elements:
# - The first element is a batch of images (as NumPy arrays)
# - The second element is the corresponding labels for those images


In [10]:
batch[0].shape

(32, 256, 256, 3)

In [11]:
import gc
gc.collect()


0

In [12]:
batch[0][8]

array([[[ 44.      ,  57.      ,  29.      ],
        [ 43.      ,  56.      ,  30.      ],
        [ 40.      ,  52.      ,  28.      ],
        ...,
        [ 85.29144 ,  93.29144 ,  44.291443],
        [ 83.359375,  91.359375,  44.359375],
        [ 82.      ,  90.      ,  43.      ]],

       [[ 43.05768 ,  56.05768 ,  28.057678],
        [ 41.678528,  54.678528,  28.678528],
        [ 39.077454,  51.077454,  27.077454],
        ...,
        [ 84.92255 ,  92.92255 ,  43.922546],
        [ 84.32147 ,  92.32147 ,  45.321472],
        [ 82.95221 ,  90.95221 ,  43.95221 ]],

       [[ 44.09613 ,  54.09613 ,  27.09613 ],
        [ 43.      ,  53.      ,  28.      ],
        [ 40.12909 ,  50.12909 ,  26.12909 ],
        ...,
        [ 87.82422 ,  95.82422 ,  46.82422 ],
        [ 85.359375,  93.359375,  46.359375],
        [ 83.92035 ,  91.92035 ,  44.92035 ]],

       ...,

       [[180.      , 162.      , 140.      ],
        [178.57745 , 159.49933 , 137.8587  ],
        [173.82422 , 1

# 4. Scale Data

In [None]:
data = data.map(lambda x,y: (x/255, y))

In [None]:
batch = data.as_numpy_iterator().next()
batch

# 5. Split Data

In [None]:
len(data)

In [None]:
train_size = int(len(data)*.8)
val_size = int(len(data)*.2)+1

In [None]:
train = data.take(train_size)
val = data.skip(train_size).take(val_size)

# 6. Build Deep Learning Model