# Explore here

In [2]:
!pip install kagglehub
import kagglehub

path = kagglehub.dataset_download("salader/dogs-vs-cats")

print("Path to dataset files:", path)

Path to dataset files: /home/vscode/.cache/kagglehub/datasets/salader/dogs-vs-cats/versions/1


In [3]:
import os
import shutil
import random
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPool2D, Flatten, Dense
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

2025-07-20 21:08:12.093767: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-07-20 21:08:12.105360: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-07-20 21:08:12.291379: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-07-20 21:08:12.412597: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1753045692.478315    9653 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1753045692.49

# --- Configuration ---

In [4]:
IMAGE_WIDTH, IMAGE_HEIGHT = 200, 200
BATCH_SIZE = 32
EPOCHS = 50

# Define directories relative to the current working directory
BASE_DATA_DIR = 'dogs_vs_cats_dataset' # This is in your project root, as seen in screenshot
TRAIN_DIR = os.path.join(BASE_DATA_DIR, 'train')
VALIDATION_DIR = os.path.join(BASE_DATA_DIR, 'validation')

MODEL_SAVE_PATH = 'trained_models/dogs_vs_cats_vgg_like.h5'
CHECKPOINT_FILEPATH = 'trained_models/best_model_checkpoint.h5'

# --- Step 1: Loading the dataset using kagglehub ---

In [None]:
try:
    download_path = kagglehub.dataset_download("salader/dogs-vs-cats")
    print(f"Dataset downloaded to: {download_path}")

    print(f"Contents of downloaded path ({download_path}):")
    for item in os.listdir(download_path):
        print(f" - {item}")

    potential_source_dirs = [
        os.path.join(download_path, 'dogs_vs_cats', 'train'), # Path identified from your last output
        os.path.join(download_path, 'train'),
        download_path
    ]

    source_images_dir = None
    for p_dir in potential_source_dirs:
        if os.path.isdir(p_dir):
            if any(f.lower().endswith(('.jpg', '.jpeg', '.png')) for f in os.listdir(p_dir)): # Check for more image types
                source_images_dir = p_dir
                break
            else:
                print(f"DEBUG: Directory '{p_dir}' exists but contains no common image files.")

    if source_images_dir is None:
        print("ERROR: Could not find any common image files in expected locations within the downloaded dataset.")
        print("Please manually inspect the contents of:")
        print(f"  {download_path}")
        print("To locate the actual directory containing the dog/cat images (e.g., 'train').")
        exit()
    else:
        print(f"Identified source images directory: {source_images_dir}")

except Exception as e:
    print(f"Error downloading dataset: {e}")
    print("Please ensure 'kagglehub' is installed and your Kaggle API credentials are correctly set up (kaggle.json in ~/.kaggle/).")
    exit()

# --- Prepare Directory Structure for ImageDataGenerator ---
print("\n--- Preparing directory structure for ImageDataGenerator ---")
# Create base directories in your project's dogs_vs_cats_dataset folder
os.makedirs(os.path.join(TRAIN_DIR, 'dogs'), exist_ok=True)
os.makedirs(os.path.join(TRAIN_DIR, 'cats'), exist_ok=True)
os.makedirs(os.path.join(VALIDATION_DIR, 'dogs'), exist_ok=True)
os.makedirs(os.path.join(VALIDATION_DIR, 'cats'), exist_ok=True)

# List all image files from the identified source directory
all_image_files = [f for f in os.listdir(source_images_dir) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
random.shuffle(all_image_files)

print(f"DEBUG: Total image files found in '{source_images_dir}' before splitting: {len(all_image_files)}")

if not all_image_files:
    print("CRITICAL ERROR: No image files found in the source directory after download. Cannot proceed with data splitting.")
    exit() # Exit early if no files are found

train_split_ratio = 0.8
num_train_images = int(len(all_image_files) * train_split_ratio)

print(f"Splitting {len(all_image_files)} images: {num_train_images} for training, {len(all_image_files) - num_train_images} for validation.")

copied_train_dogs = 0
copied_train_cats = 0
copied_val_dogs = 0
copied_val_cats = 0
failed_copies = 0

for i, img_name in enumerate(all_image_files):
    src_full_path = os.path.join(source_images_dir, img_name)
    dst_full_path = None

    # Determine destination based on filename and split
    if 'dog' in img_name.lower():
        if i < num_train_images:
            dst_full_path = os.path.join(TRAIN_DIR, 'dogs', img_name)
            copied_train_dogs += 1
        else:
            dst_full_path = os.path.join(VALIDATION_DIR, 'dogs', img_name)
            copied_val_dogs += 1
    elif 'cat' in img_name.lower():
        if i < num_train_images:
            dst_full_path = os.path.join(TRAIN_DIR, 'cats', img_name)
            copied_train_cats += 1
        else:
            dst_full_path = os.path.join(VALIDATION_DIR, 'cats', img_name)
            copied_val_cats += 1
    else:
        print(f"WARNING: Skipping '{img_name}' (neither 'dog' nor 'cat' in filename).")
        failed_copies += 1
        continue # Skip to next file

    if dst_full_path:
        try:
            shutil.copy(src_full_path, dst_full_path)
        except Exception as e:
            print(f"ERROR: Failed to copy '{src_full_path}' to '{dst_full_path}': {e}")
            failed_copies += 1 # Count as failed copy

print(f"\n--- Data Copying Summary ---")
print(f"  Training Dogs Copied: {copied_train_dogs}")
print(f"  Training Cats Copied: {copied_train_cats}")
print(f"  Validation Dogs Copied: {copied_val_dogs}")
print(f"  Validation Cats Copied: {copied_val_cats}")
print(f"  Total Successfully Copied: {copied_train_dogs + copied_train_cats + copied_val_dogs + copied_val_cats}")
print(f"  Total Files in Source: {len(all_image_files)}")
print(f"  Files Skipped/Failed to Copy: {failed_copies}")
print("Dataset structuring attempt finished.")

Dataset downloaded to: /home/vscode/.cache/kagglehub/datasets/salader/dogs-vs-cats/versions/1
Contents of downloaded path (/home/vscode/.cache/kagglehub/datasets/salader/dogs-vs-cats/versions/1):
 - train
 - dogs_vs_cats
 - test
DEBUG: Directory '/home/vscode/.cache/kagglehub/datasets/salader/dogs-vs-cats/versions/1/dogs_vs_cats/train' exists but contains no common image files.
DEBUG: Directory '/home/vscode/.cache/kagglehub/datasets/salader/dogs-vs-cats/versions/1/train' exists but contains no common image files.
DEBUG: Directory '/home/vscode/.cache/kagglehub/datasets/salader/dogs-vs-cats/versions/1' exists but contains no common image files.
ERROR: Could not find any common image files in expected locations within the downloaded dataset.
Please manually inspect the contents of:
  /home/vscode/.cache/kagglehub/datasets/salader/dogs-vs-cats/versions/1
To locate the actual directory containing the dog/cat images (e.g., 'train').

--- Preparing directory structure for ImageDataGenerator

: 

# --- Step 2: Visualize the input information ---

In [8]:
def plot_sample_images(directory, class_name, num_images=9):
    """Plots a grid of sample images from a specified directory and class."""
    print(f"Displaying {num_images} sample {class_name} images from {directory}...")
    class_path = os.path.join(directory, class_name)
    image_files = [f for f in os.listdir(class_path) if f.endswith('.jpg')]
    random.shuffle(image_files) # Shuffle to get different samples each time

    plt.figure(figsize=(8, 8))
    plt.suptitle(f"Sample {class_name.capitalize()} Images", fontsize=16)
    for i in range(min(num_images, len(image_files))):
        img_path = os.path.join(class_path, image_files[i])
        img = mpimg.imread(img_path)
        plt.subplot(3, 3, i + 1)
        plt.imshow(img)
        plt.axis('off')
    plt.show()

plot_sample_images(TRAIN_DIR, 'dogs')
plot_sample_images(TRAIN_DIR, 'cats')

# --- Create ImageDataGenerator objects ---
print("\n--- Setting up ImageDataGenerator ---")

# Data Augmentation for training data
train_datagen = ImageDataGenerator(
    rescale=1./255,             # Normalize pixel values to [0, 1]
    rotation_range=20,          # Randomly rotate images by 20 degrees
    width_shift_range=0.2,      # Randomly shift images horizontally
    height_shift_range=0.2,     # Randomly shift images vertically
    shear_range=0.2,            # Apply shear transformations
    zoom_range=0.2,             # Apply random zoom
    horizontal_flip=True,       # Randomly flip images horizontally
    fill_mode='nearest'         # Fill in newly created pixels
)

# Only rescaling for validation data (no augmentation)
validation_datagen = ImageDataGenerator(rescale=1./255)

# Flow images from directories
train_generator = train_datagen.flow_from_directory(
    TRAIN_DIR,
    target_size=(IMAGE_WIDTH, IMAGE_HEIGHT),
    batch_size=BATCH_SIZE,
    class_mode='binary', # For 2 classes (dog/cat)
    shuffle=True # Shuffle training data
)

validation_generator = validation_datagen.flow_from_directory(
    VALIDATION_DIR,
    target_size=(IMAGE_WIDTH, IMAGE_HEIGHT),
    batch_size=BATCH_SIZE,
    class_mode='binary',
    shuffle=False # Do not shuffle validation data for consistent evaluation
)

print(f"Class indices: {train_generator.class_indices}") # {'cats': 0, 'dogs': 1} or vice-versa


Displaying 9 sample dogs images from dogs_vs_cats_dataset/train...


<Figure size 800x800 with 0 Axes>

Displaying 9 sample cats images from dogs_vs_cats_dataset/train...


<Figure size 800x800 with 0 Axes>


--- Setting up ImageDataGenerator ---
Found 0 images belonging to 2 classes.
Found 0 images belonging to 2 classes.
Class indices: {'cats': 0, 'dogs': 1}
