In [None]:
from google.colab import files
files.upload()

In [None]:
!mkdir -p ~/.kaggle
!mv "kaggle (1).json" ~/.kaggle/kaggle.json
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
!kaggle datasets download -d shaunthesheep/microsoft-catsvsdogs-dataset


Dataset URL: https://www.kaggle.com/datasets/shaunthesheep/microsoft-catsvsdogs-dataset
License(s): other
Downloading microsoft-catsvsdogs-dataset.zip to /content
 94% 740M/788M [00:06<00:00, 83.8MB/s]
100% 788M/788M [00:06<00:00, 119MB/s] 


In [None]:
import zipfile
import os
os.makedirs("dataset", exist_ok=True)
with zipfile.ZipFile("microsoft-catsvsdogs-dataset.zip", 'r') as zip_ref:
    zip_ref.extractall("dataset")


In [None]:
!ls dataset/PetImages


Cat  Dog


In [None]:
cat_images = os.listdir("dataset/PetImages/Cat")
dog_images = os.listdir("dataset/PetImages/Dog")

print("Number of Cat images:", len(cat_images))
print("Number of Dog images:", len(dog_images))


Number of Cat images: 12501
Number of Dog images: 12501


In [None]:
import os

def remove_undecodable_images_tf(directory):
    print(f"Checking directory for undecodable images using TensorFlow: {directory}")
    for root, _, files in os.walk(directory):
        for file in files:
            filepath = os.path.join(root, file)

            if os.path.isdir(filepath):
                continue
            try:
                img_bytes = tf.io.read_file(filepath)
                img = tf.image.decode_image(img_bytes, channels=3)

                if tf.size(img).numpy() == 0:
                    print(f"Removing empty image: {filepath}")
                    os.remove(filepath)

            except tf.errors.InvalidArgumentError as e:
                print(f"Removing undecodable image: {filepath} - {e}")
                os.remove(filepath)
            except Exception as e:
                print(f"Could not process image {filepath}: {e}")

dataset_dir = "/content/dataset/PetImages"
cat_dir = os.path.join(dataset_dir, "Cat")
dog_dir = os.path.join(dataset_dir, "Dog")

remove_undecodable_images_tf(cat_dir)
remove_undecodable_images_tf(dog_dir)

print("Undecodable image removal complete using TensorFlow.")

Checking directory for undecodable images using TensorFlow: /content/dataset/PetImages/Cat
Removing undecodable image: /content/dataset/PetImages/Cat/4351.jpg - {{function_node __wrapped__DecodeImage_device_/job:localhost/replica:0/task:0/device:CPU:0}} Input size should match (header_size + row_size * abs_height) but they differ by 2 [Op:DecodeImage] name: 
Removing undecodable image: /content/dataset/PetImages/Cat/666.jpg - {{function_node __wrapped__DecodeImage_device_/job:localhost/replica:0/task:0/device:CPU:0}} Input is empty. [Op:DecodeImage] name: 
Removing undecodable image: /content/dataset/PetImages/Cat/Thumbs.db - {{function_node __wrapped__DecodeImage_device_/job:localhost/replica:0/task:0/device:CPU:0}} Unknown image file format. One of JPEG, PNG, GIF, BMP required. [Op:DecodeImage] name: 
Removing undecodable image: /content/dataset/PetImages/Cat/10404.jpg - {{function_node __wrapped__DecodeImage_device_/job:localhost/replica:0/task:0/device:CPU:0}} Unknown image file fo

In [None]:
# AUTOTUNE = tf.data.AUTOTUNE

# train_dir = train_dir.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
# test_dir = test_dir.cache().prefetch(buffer_size=AUTOTUNE)


In [None]:
import tensorflow as tf

dataset_dir = "/content/dataset/PetImages"

train_ds = tf.keras.utils.image_dataset_from_directory(
    dataset_dir,
    validation_split=0.2,
    subset="training",
    seed=123,
    image_size=(150,150),
    batch_size=32
)

val_ds = tf.keras.utils.image_dataset_from_directory(
    dataset_dir,
    validation_split=0.2,
    subset="validation",
    seed=123,
    image_size=(150,150),
    batch_size=32
)

print("Train & Validation datasets loaded successfully!")


Found 24991 files belonging to 2 classes.
Using 19993 files for training.
Found 24991 files belonging to 2 classes.
Using 4998 files for validation.
Train & Validation datasets loaded successfully!


In [None]:
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.layers import Rescaling
from tensorflow.keras import layers
from tensorflow.keras.layers import Dense,Input,Flatten

base_model = VGG16(
    include_top = False,
    weights = 'imagenet',input_shape = (150,150,3),classes=2
)

base_model.trainable = False


input = layers.Input(shape=(150,150,3))
x = Rescaling(1./255)(input)
x = base_model(x)
x= Flatten()(x)
output = layers.Dense(1,activation = 'sigmoid')(x)

model = tf.keras.models.Model(inputs = input,outputs = output)
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])
model.summary()

In [None]:
history = model.fit(train_ds,batch_size=32,epochs=5,validation_data=val_ds)


Epoch 1/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m85s[0m 133ms/step - accuracy: 0.8975 - loss: 0.3024 - val_accuracy: 0.9568 - val_loss: 0.1347
Epoch 2/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m137s[0m 126ms/step - accuracy: 0.9545 - loss: 0.1291 - val_accuracy: 0.9596 - val_loss: 0.1134
Epoch 3/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 126ms/step - accuracy: 0.9612 - loss: 0.1082 - val_accuracy: 0.9626 - val_loss: 0.1046
Epoch 4/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 126ms/step - accuracy: 0.9659 - loss: 0.0976 - val_accuracy: 0.9634 - val_loss: 0.1001
Epoch 5/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 126ms/step - accuracy: 0.9679 - loss: 0.0897 - val_accuracy: 0.9636 - val_loss: 0.0973
