In [1]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator # This Keras utility is fantastic for loading images from directories and performing on-the-fly data augmentation.
from tensorflow.keras.applications import VGG16 # We'll use VGG16 as an example pre-trained model
import matplotlib.pyplot as plt
import numpy as np
import os

In [2]:
IMAGE_SIZE = (150, 150) # Image will be resterized to 150*150 pixels
BATCH_SIZE = 32

# Create an ImageDataGenerator for training with augmentation

train_datagen = ImageDataGenerator(
    rescale=1./255,               # Normalize pixel values to [0, 1]
    rotation_range=40,            # Rotate images up to 40 degrees
    width_shift_range=0.2,        # Shift width by 20%
    height_shift_range=0.2,       # Shift height by 20%
    shear_range=0.2,              # Apply shearing transformations
    zoom_range=0.2,               # Apply zoom transformations
    horizontal_flip=True,         # Flip images horizontally
    fill_mode='nearest',          # Fill newly created pixels after rotation/shift
    validation_split=0.2          # Use 20% of training data for validation
)

# Create an ImageDataGenerator for testing (only rescaling)

test_datagen = ImageDataGenerator(rescale=1./255)

train_dir = '../data/train'

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary',           # 'binary' for 2 classes (dog/cat)
    subset='training'              # Specify this is the training subset
)

validation_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    subset='validation'            # Specify this is the validation subset
)

# Note: For real-world use, you'd have a separate 'test' directory and a test_generator.
# For this Kaggle dataset, the test images are unlabeled, so we focus on train/validation.

print('classes: {}'.format(train_generator.class_indices)) # Should show {'cats': 0, 'dogs': 1} or vice-versa

Found 20000 images belonging to 2 classes.
Found 5000 images belonging to 2 classes.
classes: {'cats': 0, 'dogs': 1}


In [3]:
# Load the VGG16 model, pre-trained on ImageNet
# include_top=False means we don't include the classifier head of VGG16
# weights='imagenet' uses the weights trained on the ImageNet dataset

conv_base = VGG16(
    weights='imagenet',
    include_top=False,
    input_shape=(IMAGE_SIZE[0], IMAGE_SIZE[1], 3)) # Input shape should be (height, width, channels)

# Freeze the convolutional base layers
# This prevents the weights of the pre-trained layers from being updated during training

conv_base.trainable = False
conv_base.summary()


In [4]:
model = models.Sequential([
    conv_base,                              # The frozen VGG16 convolutional base
    layers.Flatten(),                       # Flatten the output of the convolutional base into a 1D vector
    layers.Dense(256, activation='relu'),   # A dense (fully connected) hidden layer
    layers.Dropout(0.5),                    # Dropout layer to prevent overfitting
    layers.Dense(1, activation='sigmoid')    # Output layer for binary classification (dog/cat)
])

model.summary()

In [5]:
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5), # Use a very small learning rate for fine-tuning
              loss='binary_crossentropy', # Appropriate loss for binary classification
              metrics=['accuracy'])

# Train the model
# steps_per_epoch and validation_steps are important when using generators
history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // BATCH_SIZE, # Number of batches per epoch
    epochs=10, # Number of times to iterate over the dataset
    validation_data=validation_generator,
    validation_steps=validation_generator.samples // BATCH_SIZE
)

  self._warn_if_super_not_called()


Epoch 1/10
[1m328/625[0m [32m━━━━━━━━━━[0m[37m━━━━━━━━━━[0m [1m16:43[0m 3s/step - accuracy: 0.5734 - loss: 0.7207


KeyboardInterrupt



In [None]:
# Plot training & validation accuracy values
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(loc='upper left')

# Plot training & validation loss values
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(loc='upper left')
plt.show()

In [None]:
# TEST EVALUATION

# Path to your 'test' folder relative to the notebook
test_dir = '../data/test'

# Create an ImageDataGenerator for test data (only rescaling, no augmentation)
test_datagen = ImageDataGenerator(rescale=1./255)

# Load images from the test directory
test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=IMAGE_SIZE, # Use the same image size as training
    batch_size=BATCH_SIZE,  # Use the same batch size
    class_mode='binary',    
    shuffle=False           # Do NOT shuffle test data
)

print("\n--- Evaluating on Test Data ---")
# Use model.evaluate to get loss and accuracy on the test set
test_loss, test_acc = model.evaluate(test_generator, steps=test_generator.samples // BATCH_SIZE)

print(f'\nFinal Test Loss: {test_loss:.4f}')
print(f'Final Test Accuracy: {test_acc:.4f}')
s