# Import Dependencies

Import necessary libraries and frameworks:
- matplotlib: For data visualization
- tensorflow: For building and training neural networks
- pandas: For data manipulation
- numpy: For numerical operations
- keras: High-level neural network API
Additional imports for image processing and model building components


In [3]:
import matplotlib.pyplot as plt
import tensorflow as tf
import pandas as pd
import numpy as np

import warnings
warnings.filterwarnings('ignore')

from tensorflow import keras
from keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation, Dropout, Flatten, Dense
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.utils import image_dataset_from_directory
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img
from tensorflow.keras.preprocessing import image_dataset_from_directory

import os
import matplotlib.image as mpimg

from PIL import Image

# Path to your dataset

In [None]:
path = 'PetImages'

# Image Processing Function

Define a function to process and clean the image dataset:
- Converts all images to RGB format
- Removes corrupted images
- Handles exceptions during processing
Parameters:
    folder_path (str): Path to the main dataset directory containing Cat and Dog subdirectories
Returns:
    None, but prints processing statistics


In [None]:
def process_images_to_rgb(folder_path):
    processed_count = 0
    removed_count = 0

    for i in ["Cat", "Dog"]:
        path = os.path.join(folder_path, i)
        for filename in os.listdir(path):
            filepath = os.path.join(path, filename)
            try:
                # Open the image
                with Image.open(filepath) as img:
                    # Convert image to RGB if not already in RGB
                    if img.mode != "RGB":
                        print(f"Converting {filename} to RGB")
                        img = img.convert("RGB")
                    img.save(filepath)  # Save the image back in RGB format
                processed_count += 1
            except Exception as e:
                print(f"Removing corrupted file: {filename}, Error: {e}")
                os.remove(filepath)
                removed_count += 1

    print(f"Processing complete. Processed: {processed_count}, Removed: {removed_count}")

# Runs the image processing function


In [10]:
process_images_to_rgb(path)

# Verify available classes (Cat and Dog)


In [None]:
classes = os.listdir(path)
classes

# Data Visualization

Create a visualization grid of sample images:
- Displays 8 cat images and 8 dog images
- Creates a 4x4 grid of images
- Removes axes for better visualization

In [None]:
fig = plt.gcf()
fig.set_size_inches(16, 16)

cat_dir = os.path.join(path, 'cat')
dog_dir = os.path.join(path, 'dog')
cat_names = os.listdir(cat_dir)
dog_names = os.listdir(dog_dir)

pic_index = 210

cat_images = [os.path.join(cat_dir, fname)
              for fname in cat_names[pic_index-8:pic_index]]
dog_images = [os.path.join(dog_dir, fname)
              for fname in dog_names[pic_index-8:pic_index]]

for i, img_path in enumerate(cat_images + dog_images):
    sp = plt.subplot(4, 4, i+1)
    sp.axis('Off')

    img = mpimg.imread(img_path)
    plt.imshow(img)

plt.show()

# Dataset Creation

Create training and testing datasets using TensorFlow's image_dataset_from_directory:
Parameters:
- image_size: (200,200) for consistent input size
- validation_split: 0.1 (10% for validation)
- batch_size: 32 images per batch
- seed: 1 for reproducibility
- subset: 'training' for training data, 'validation' for validation data


In [None]:
train_datagen = image_dataset_from_directory(path,
                                                  image_size=(200,200),
                                                  subset='training',
                                                  seed = 1,
                                                  validation_split=0.1,
                                                  batch_size= 32)
test_datagen = image_dataset_from_directory(path,
                                                  image_size=(200,200),
                                                  subset='validation',
                                                  seed = 1,
                                                  validation_split=0.1,
                                                  batch_size= 32)

# Model Architecture

Define the CNN model architecture:
- 4 Convolutional layers with MaxPooling
- Flatten layer to convert 2D features to 1D
- 3 Dense layers with BatchNormalization and Dropout
- Final sigmoid layer for binary classification


In [24]:
model = tf.keras.models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(200, 200, 3)),
    layers.MaxPooling2D(2, 2),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D(2, 2),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D(2, 2),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D(2, 2),

    layers.Flatten(),
    layers.Dense(512, activation='relu'),
    layers.BatchNormalization(),
    layers.Dense(512, activation='relu'),
    layers.Dropout(0.1),
    layers.BatchNormalization(),
    layers.Dense(512, activation='relu'),
    layers.Dropout(0.2),
    layers.BatchNormalization(),
    layers.Dense(1, activation='sigmoid')
])

# Model Compilation

Compile the model with:
- Binary cross-entropy loss (suitable for binary classification)
- Adam optimizer
- Accuracy metric for model evaluation

In [25]:
model.compile(
    loss='binary_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

# Model Training

Train the model:
- Using the training dataset
- For 10 epochs
- With validation data for performance monitoring


In [None]:
history = model.fit(train_datagen,
          epochs=10,
          validation_data=test_datagen)

# Model Saving

Save the trained model to disk in H5 format


In [None]:
model.save("catvsdog.h5")