# 1. Install and Import Dependencies

In [None]:
!pip install tensorflow tensorflow-gpu opencv-python matplotlib.pyplot

In [None]:
import tensorflow as tf
import os

# 2. Remove unfit images

In [None]:
import cv2
import imghdr

In [None]:
data_dir = 'cats and dogs-train' # define data directory

In [None]:
image_exts = ['jpeg','jpg', 'bmp', 'png'] # required image extensions

In [None]:
# Loop through each class in the dataset directory
for image_class in os.listdir(data_dir):  
    # Loop through each image in the current class directory
    for image in os.listdir(os.path.join(data_dir, image_class)):  
        # Construct the full file path for the image
        image_path = os.path.join(data_dir, image_class, image)  
        try:
            # Attempt to read the image using OpenCV
            img = cv2.imread(image_path)  
            # Check the image's file extension/type using imghdr
            tip = imghdr.what(image_path)  
            
            # If the image's extension/type is not in the list of valid extensions
            if tip not in image_exts:  
                # Print a message indicating the image is invalid
                print('Image not in ext list {}'.format(image_path))  
                # Remove the invalid image from the directory
                os.remove(image_path)  
        except Exception as e:
            # If an exception occurs (e.g., corrupted image), print the issue
            print('Issue with image {}'.format(image_path))  
            # os.remove(image_path)


# 3. Load Data

In [None]:
import numpy as np
from matplotlib import pyplot as plt

In [None]:
# Load images from the specified directory and its subdirectories
# 'data_dir' is the path to the folder where images are organized in subdirectories by class
data = tf.keras.utils.image_dataset_from_directory(data_dir, batch_size=8)

# The function automatically:
# - Loads all the images from the 'data_dir'
# - Assigns labels based on the subdirectory names (each subdirectory represents a class)
# - Returns a 'tf.data.Dataset' object that can be used to efficiently handle the image data


In [None]:
data_iterator = data.as_numpy_iterator()

In [None]:
# Retrieve the next batch of data (images and labels) from the dataset using the NumPy iterator
batch = data_iterator.next()

# The batch contains a tuple with two elements:
# - The first element is a batch of images (as NumPy arrays)
# - The second element is the corresponding labels for those images


In [None]:
batch[0].shape

In [None]:
import gc
gc.collect()


In [None]:
# Reduce image size before displaying
fig, ax = plt.subplots(ncols=1, figsize=(10,10))

for idx, img in enumerate(batch[0][:4]):
    resized_img = tf.image.resize(img, (128, 128))  # Resize to smaller dimensions, e.g., 128x128
    ax[idx].imshow(resized_img.numpy().astype(int))  # Convert to numpy array and display
    ax[idx].title.set_text(batch[1][idx])


In [None]:
# Create a figure with 4 subplots (1 row, 4 columns) with a figure size of 20x20
fig, ax = plt.subplots(ncols=2, figsize=(15,15))

# Iterate through the first 4 images in the batch (batch[0][:4])
for idx, img in enumerate(batch[0][:2]):
    
    # Display the image in the corresponding subplot
    ax[idx].imshow(img.astype(int))  # Convert pixel values to integers before displaying the image
    
    # Set the title of each subplot with the corresponding label from batch[1]
    ax[idx].title.set_text(batch[1][idx])
    

# 4. Scale Data

In [None]:
data = data.map(lambda x,y: (x/255, y))

In [None]:
batch = data.as_numpy_iterator().next()
batch

In [None]:
# Create a figure with 4 subplots (1 row, 4 columns) with a figure size of 20x20
fig, ax = plt.subplots(ncols=4, figsize=(10,10))

# Iterate through the first 4 images in the batch (batch[0][:4])
for idx, img in enumerate(batch[0][:4]):
    
    # Display the image in the corresponding subplot
    ax[idx].imshow(img.astype(int))  # Convert pixel values to integers before displaying the image
    
    # Set the title of each subplot with the corresponding label from batch[1]
    ax[idx].title.set_text(batch[1][idx])
    

# 5. Split Data

In [None]:
len(data)

In [None]:
train_size = int(len(data)*.8)
val_size = int(len(data)*.2)+1

In [None]:
train = data.take(train_size)
val = data.skip(train_size).take(val_size)

# 6. Build Deep Learning Model