<a href="https://colab.research.google.com/github/AeAre/Cat_and_Dog_classification_using_CNN/blob/main/Cat_and_Dog_Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.
import kagglehub
shaunthesheep_microsoft_catsvsdogs_dataset_path = kagglehub.dataset_download('shaunthesheep/microsoft-catsvsdogs-dataset')

print('Data source import complete.')


In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import warnings
warnings.filterwarnings('ignore')


directory = "/kaggle/input/microsoft-catsvsdogs-dataset/PetImages"

directory

### Getting the data

In [None]:
os.listdir(directory)

In [None]:
files = []
labels = []

for class_name in os.listdir(directory):
    class_path = os.path.join(directory, class_name)
    if os.path.isdir(class_path):
        for filename in os.listdir(class_path):
            files.append(os.path.join(class_name, filename))
            labels.append(class_name)

df = pd.DataFrame({"filename": files, "label": labels})

df.head()

### Splitting the data

In [None]:
from sklearn.model_selection import train_test_split

train_df, temp_df = train_test_split(df,
                                  test_size=0.3,
                                  random_state=42,
                                  stratify=df["label"])

val_df, test_df = train_test_split(temp_df,
                                   test_size=0.5,
                                   random_state=42,
                                   stratify=temp_df["label"]
                                   )

len(train_df), len(val_df), len(test_df)

In [None]:
from PIL import Image

bad_images = []

for fname in train_df['filename']:
    fpath = os.path.join(directory, fname)
    try:
        img = Image.open(fpath)
        img.verify()
    except Exception as e:
        bad_images.append((fname, str(e)))

bad_images[:10], len(bad_images)

In [None]:

bad_fnames = [fname for fname, _ in bad_images]

train_df = train_df[~train_df['filename'].isin(bad_fnames)]

train_df.head()


In [None]:
from PIL import Image

samples_per_class = 5
classes = train_df['label'].unique()

plt.figure(figsize=(12, 10))

for row_idx, cls in enumerate(classes):
    class_subset = train_df[train_df['label'] == cls].sample(samples_per_class)

    for col_idx, (_, row) in enumerate(class_subset.iterrows()):
        img_path = os.path.join(directory, row['filename'])
        img = Image.open(img_path)
        img = img.resize((224, 224))

        plt.subplot(len(classes), samples_per_class, row_idx * samples_per_class + col_idx + 1)
        plt.imshow(img)
        plt.title(f"{cls}")
        plt.axis("off")

plt.tight_layout()
plt.show()


In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

data_augmentation = ImageDataGenerator(
    rescale=1./255.0,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)

gen = ImageDataGenerator(
    rescale=1./255
)

train_data = data_augmentation.flow_from_dataframe(
    train_df,
    directory=directory,
    x_col='filename',
    y_col='label',
    class_mode="binary",
    target_size=(224, 224),
    shuffle=True
)

val_data = gen.flow_from_dataframe(
    val_df,
    directory=directory,
    x_col="filename",
    y_col="label",
    class_mode="binary",
    target_size=(224, 224),
    shuffle=False
)

test_data = gen.flow_from_dataframe(
    test_df,
    directory=directory,
    x_col='filename',
    y_col=None,
    class_mode=None,
    target_size=(224, 224),
    shuffle=False
)

In [None]:
images, labels = next(train_data)

plt.figure(figsize=(12, 12))
for i in range(15):
    label_name = "cat" if labels[i] == 0.0 else "dog"

    plt.subplot(3, 5, i+1)
    plt.imshow(images[i])
    plt.title(f"Label: {label_name}")
    plt.axis("off")

plt.tight_layout()
plt.show()

### Creating the model

In [None]:
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau

early_stopping = EarlyStopping(monitor="val_loss",
                              patience=3,
                              restore_best_weights=True,
                              verbose=1)

model_checkpoint = ModelCheckpoint(monitor="val_loss",
                                  filepath="best_model.keras",
                                  verbose=1,
                                  save_best_only=True)

lr_scheduler = ReduceLROnPlateau(monitor="val_loss",
                               patience=1,
                               factor=0.5,
                               verbose=1)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input, Conv2D, MaxPooling2D, Flatten, BatchNormalization, Dropout

In [None]:
model = Sequential([
    Input(shape=(224, 224, 3)),
    Conv2D(32, (3 ,3), activation="relu"),
    BatchNormalization(),
    Conv2D(32, (3, 3), activation="relu"),
    BatchNormalization(),
    MaxPooling2D(2),
    Dropout(0.25),

    Conv2D(64, (3, 3), activation="relu"),
    BatchNormalization(),
    Conv2D(64, (3, 3), activation="relu"),
    BatchNormalization(),
    MaxPooling2D(2),
    Dropout(0.25),

    Conv2D(128, (3, 3), activation="relu"),
    BatchNormalization(),
    Conv2D(128, (3, 3), activation="relu"),
    BatchNormalization(),
    MaxPooling2D(2),
    Dropout(0.25),

    Flatten(),
    Dense(256, activation="relu"),
    BatchNormalization(),
    Dropout(0.5),
    Dense(1, activation="sigmoid")
])

model.compile(loss="binary_crossentropy",
               optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
               metrics=["accuracy"])

model.summary()

In [None]:
history = model.fit(train_data,
                       steps_per_epoch=len(train_data),
                       epochs=10,
                       validation_data=val_data,
                       validation_steps=len(val_data),
                       callbacks=[early_stopping, model_checkpoint, lr_scheduler])

### Plot the loss and accuracy

In [None]:
def plot_loss_curves(history):
    acc = history.history["accuracy"]
    val_acc = history.history["val_accuracy"]

    loss = history.history["loss"]
    val_loss = history.history["val_loss"]

    epochs = range(len(history.history["loss"]))

    plt.figure(figsize=(15, 10))

    # Plot accuracy
    plt.subplot(1, 2, 1)
    plt.plot(epochs, acc, label="Training accuracy")
    plt.plot(epochs, val_acc, label="Val accuracy")
    plt.xlabel("Epochs")
    plt.title("Accuracy")
    plt.legend()

    # Plot loss
    plt.subplot(1, 2, 2)
    plt.plot(epochs, loss, label="Training Loss")
    plt.plot(epochs, val_loss, label="Val Loss")
    plt.title("Loss")
    plt.xlabel("Epochs")
    plt.legend()
    plt.show

In [None]:
plot_loss_curves(history)

### Load the saved model and make a prediction using the test dataset

In [None]:
from tensorflow.keras.models import load_model

In [None]:
loaded_model = load_model("best_model.keras")

In [None]:
model_loss, model_acc = loaded_model.evaluate(val_data)

print(f"Model 2 accuracy: {model_acc*100:.2f}%")

In [None]:
test_df.head(7)

In [None]:
model_predictions = loaded_model.predict(test_data)

In [None]:
class_indices = train_data.class_indices

idx_to_class = {v: k for k, v in class_indices.items()}

predicted_class_indices = (model_predictions > 0.5).astype(int).ravel()

predicted_labels = [idx_to_class[idx] for idx in predicted_class_indices]

test_df["Predicted labels"] = predicted_labels

test_df.head()

In [None]:
fig, ax = plt.subplots(nrows=4, ncols=5, figsize=(15, 12))
ax = ax.ravel()

print("Visualizing some predictions...")
for i in range(20):
    idx = random.randint(0, len(test_df)-1)

    filename = test_df.iloc[idx]["filename"]
    predicted_label = test_df.iloc[idx]["Predicted labels"]
    true_label = test_df.iloc[idx]["label"]

    img = plt.imread(os.path.join(directory, filename))
    ax[i].imshow(img)
    if predicted_label == true_label:
        ax[i].set_title(f"Predicted: {predicted_label}", c="green")
    else:
        ax[i].set_title(f"Predicted: {predicted_label}", c="red")
    ax[i].axis("off")

plt.tight_layout()
plt.show()