# Cat vs. Dog Image Classification

# Import Libraries

In [74]:
import tensorflow as tf
import os
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.models import Sequential
import matplotlib.pyplot as plt
import PIL
import glob
import shutil
import random
from PIL import Image
import kagglehub
import warnings

warnings.filterwarnings('ignore')

# Loading Data

In [75]:
corrupted_images = []

def is_valid_image(image_path):
    try:
        img = Image.open(image_path)
        img.verify()
        return True
    except (IOError, SyntaxError, PIL.UnidentifiedImageError):
        print(f"Corrupted image: {image_path}")
        corrupted_images.append(image_path)
        return False

In [76]:
path = kagglehub.dataset_download("shaunthesheep/microsoft-catsvsdogs-dataset")

print("Path to dataset files:", path)

Path to dataset files: /root/.cache/kagglehub/datasets/shaunthesheep/microsoft-catsvsdogs-dataset/versions/1


In [77]:
print("Path to dataset files:", path)
print("Contents of dataset directory:", os.listdir(path))

Path to dataset files: /root/.cache/kagglehub/datasets/shaunthesheep/microsoft-catsvsdogs-dataset/versions/1
Contents of dataset directory: ['PetImages', 'MSR-LA - 3467.docx', 'readme[1].txt']


In [78]:
data_dir = os.path.join(path, 'PetImages')

In [79]:
output_dir = '/content/data'

In [80]:
train_dir = os.path.join(output_dir, 'train')
val_dir = os.path.join(output_dir, 'val')
test_dir = os.path.join(output_dir, 'test')

In [81]:
for folder in [train_dir, val_dir, test_dir]:
    os.makedirs(os.path.join(folder, 'cats'), exist_ok=True)
    os.makedirs(os.path.join(folder, 'dogs'), exist_ok=True)

In [82]:
cat_images = glob.glob(os.path.join(data_dir, 'Cat', '*'))
dog_images = glob.glob(os.path.join(data_dir, 'Dog', '*'))

In [83]:
cat_images = [img for img in cat_images if is_valid_image(img)]
dog_images = [img for img in dog_images if is_valid_image(img)]

Corrupted image: /root/.cache/kagglehub/datasets/shaunthesheep/microsoft-catsvsdogs-dataset/versions/1/PetImages/Cat/666.jpg
Corrupted image: /root/.cache/kagglehub/datasets/shaunthesheep/microsoft-catsvsdogs-dataset/versions/1/PetImages/Cat/Thumbs.db
Corrupted image: /root/.cache/kagglehub/datasets/shaunthesheep/microsoft-catsvsdogs-dataset/versions/1/PetImages/Dog/11702.jpg
Corrupted image: /root/.cache/kagglehub/datasets/shaunthesheep/microsoft-catsvsdogs-dataset/versions/1/PetImages/Dog/Thumbs.db


In [84]:
random.shuffle(cat_images)
random.shuffle(dog_images)

In [85]:
train_cats, val_cats, test_cats = cat_images[:11251], cat_images[11251:11876], cat_images[11876:]
train_dogs, val_dogs, test_dogs = dog_images[:11251], dog_images[11251:11876], dog_images[11876:]

In [86]:
for img in train_cats:
    shutil.copy(img, os.path.join(train_dir, 'cats'))
for img in val_cats:
    shutil.copy(img, os.path.join(val_dir, 'cats'))
for img in test_cats:
    shutil.copy(img, os.path.join(test_dir, 'cats'))

for img in train_dogs:
    shutil.copy(img, os.path.join(train_dir, 'dogs'))
for img in val_dogs:
    shutil.copy(img, os.path.join(val_dir, 'dogs'))
for img in test_dogs:
    shutil.copy(img, os.path.join(test_dir, 'dogs'))

print("Data split and stored in '/content/data' directory")

Data split and stored in '/content/data' directory


In [87]:
base_dir = '/content/data'
train_dir = os.path.join(base_dir, 'train')
val_dir = os.path.join(base_dir, 'val')
test_dir = os.path.join(base_dir, 'test')

# Data Preprocessing

In [88]:
train_datagen = ImageDataGenerator(rescale=1.0/255.0)
val_datagen = ImageDataGenerator(rescale=1.0/255.0)
test_datagen = ImageDataGenerator(rescale=1.0/255.0)

In [89]:
train_data = train_datagen.flow_from_directory(
                                              train_dir,
                                               target_size=(224, 224),
                                               batch_size=128,
                                               class_mode='binary')
val_data = val_datagen.flow_from_directory(
                                              val_dir,
                                               target_size=(224, 224),
                                               batch_size=128,
                                               class_mode='binary')
test_data = test_datagen.flow_from_directory(
                                              test_dir,
                                               target_size=(224, 224),
                                               batch_size=128,
                                               class_mode='binary')

Found 24974 images belonging to 2 classes.
Found 3552 images belonging to 2 classes.
Found 3553 images belonging to 2 classes.


# Model Building

In [90]:
model = tf.keras.models.Sequential()

model.add(Conv2D(filters = 32, kernel_size=3, padding='same', activation='relu', input_shape=(224, 224, 3)))
model.add(MaxPooling2D(pool_size=2))

model.add(Conv2D(filters = 64, kernel_size=3, activation='relu'))
model.add(MaxPooling2D(pool_size=2))

model.add(Conv2D(filters = 128, kernel_size=3, activation='relu'))
model.add(MaxPooling2D(pool_size=2))

model.add(Conv2D(filters = 256, kernel_size=3, activation='relu'))
model.add(MaxPooling2D(pool_size=2))

model.add(Conv2D(filters = 256, kernel_size=3, activation='relu'))
model.add(MaxPooling2D(pool_size=2))

model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

# Model Summary

In [91]:
model.summary()

# Compile Model

In [92]:
opt = tf.keras.optimizers.RMSprop(learning_rate=0.001)
model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])

# Train Model

In [None]:
history = model.fit(train_data, epochs=5, validation_data=val_data, batch_size=32)

Epoch 1/5
[1m 31/196[0m [32m━━━[0m[37m━━━━━━━━━━━━━━━━━[0m [1m48:12[0m 18s/step - accuracy: 0.5275 - loss: 0.6995