<a href="https://colab.research.google.com/github/Rudrakshbhardwaj01/Deep-Learning/blob/main/TransferLearningFeatureExtraction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("lucassj/dogs-vs-cats-train-validadion-and-evaluation")

print("Path to dataset files:", path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/lucassj/dogs-vs-cats-train-validadion-and-evaluation?dataset_version_number=1...


100%|██████████| 1.07G/1.07G [00:16<00:00, 70.3MB/s]

Extracting files...





Path to dataset files: /root/.cache/kagglehub/datasets/lucassj/dogs-vs-cats-train-validadion-and-evaluation/versions/1


In [3]:
import tensorflow
from tensorflow import keras
from keras import Sequential
from keras.layers import Dense,Flatten
from keras.applications.vgg16 import VGG16 # importing the VGG16 CNN

In [4]:
conv_base = VGG16(
    weights='imagenet',
    include_top = False, # keep only the convolutional base and remove the top part (i.e. the Dense layers)
    input_shape=(128,128,3)
)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m58889256/58889256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step


In [5]:
conv_base.summary()

In [6]:
# our own ANN
model = Sequential()
model.add(conv_base)
model.add(Flatten())
model.add(Dense(256,activation='relu'))
model.add(Dense(1,activation='sigmoid'))

In [7]:
model.summary()

In [8]:
conv_base.trainable = False # freezing the convolutional base's weights

In [9]:
model.summary()

In [10]:
# ==========================================================
# 🧠 IMAGE DATA GENERATORS — COMPLETE NOTES
# ==========================================================
# 🔹 What are Generators?
# Generators in Keras/TensorFlow are tools that load data in small batches
# instead of loading the entire dataset into memory at once.
# This makes them ideal for large image datasets that cannot fit into RAM.

# 🔹 ImageDataGenerator (from keras.preprocessing.image)
# This is a powerful utility class that:
#   - Reads images directly from disk (from folder structures)
#   - Automatically labels them based on folder names
#   - Preprocesses and augments them in real-time during training
#   - Feeds batches of data to the model continuously

# 🔹 Why use Generators?
#   ✅ Memory-efficient → only one batch is loaded at a time
#   ✅ On-the-fly augmentation → improves model generalization
#   ✅ Easy setup → automatically handles labels from folder names
#   ✅ Supports large datasets → no need to load all data into RAM

# 🔹 Common Parameters:
#   - rescale: Normalizes pixel values (e.g., 1./255 converts [0–255] → [0–1])
#   - rotation_range, zoom_range, shear_range, etc.: Apply random transformations
#   - horizontal_flip / vertical_flip: Randomly flip images for augmentation
#   - target_size: Resize images to a fixed shape (e.g., 224×224)
#   - batch_size: Number of samples processed before the model updates weights
#   - class_mode: Defines label format ('binary', 'categorical', or 'sparse')
#   - shuffle: Whether to shuffle data (usually True for training, False for testing)

# 🔹 Types of Generators:
#   1. Training Generator → uses augmentation + normalization
#   2. Validation Generator → only normalization (no augmentation)
#   3. Test Generator → only normalization + no shuffling

# 🔹 Typical Use:
#   - train_generator → used in model.fit()
#   - validation_generator → used in model.fit() for validation_data
#   - test_generator → used in model.evaluate() or model.predict()

# 🔹 Example Workflow:
#   train_generator = train_datagen.flow_from_directory(train_dir, ...)
#   val_generator   = val_datagen.flow_from_directory(val_dir, ...)
#   model.fit(train_generator, validation_data=val_generator, ...)

# 🔹 Folder Structure Expected:
#   dataset/
#     ├── train/
#     │   ├── class_1/
#     │   └── class_2/
#     ├── validation/
#     │   ├── class_1/
#     │   └── class_2/
#     └── test/
#         ├── class_1/
#         └── class_2/

# 🔹 Benefits Summary:
#   - Prevents overfitting (due to augmentation)
#   - Efficient memory use
#   - Automatically handles image loading and labeling
#   - Simplifies data preprocessing pipeline

# ==========================================================


In [11]:
# generators
import os
from tensorflow.keras.preprocessing.image import ImageDataGenerator

print("Path to dataset files:", path)

# Expected folder structure (inside path):
# ├── train/
# │   ├── cats/
# │   └── dogs/
# ├── validation/
# │   ├── cats/
# │   └── dogs/
# └── evaluation/
#     ├── cats/
#     └── dogs/

# 2️⃣ Define directories
train_dir = os.path.join(path, "train")
val_dir = os.path.join(path, "validation")
test_dir = os.path.join(path, "evaluation")

# 3️⃣ Create ImageDataGenerators
train_datagen = ImageDataGenerator(
    rescale=1./255,       # normalize pixel values
)

val_test_datagen = ImageDataGenerator(rescale=1./255)

# 4️⃣ Build generators
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(128, 128),   # resize images for VGG16 or similar models
    batch_size=32,
    class_mode='binary'       # since it’s a 2-class (dog vs cat) problem
)

validation_generator = val_test_datagen.flow_from_directory(
    val_dir,
    target_size=(128,128),
    batch_size=32,
    class_mode='binary'
)

test_generator = val_test_datagen.flow_from_directory(
    test_dir,
    target_size=(128, 128),
    batch_size=32,
    class_mode='binary',
    shuffle=False             # important for evaluation/prediction
)

# 5️⃣ Check class mapping
print("Class indices:", train_generator.class_indices)


Path to dataset files: /root/.cache/kagglehub/datasets/lucassj/dogs-vs-cats-train-validadion-and-evaluation/versions/1
Found 20000 images belonging to 2 classes.
Found 5000 images belonging to 2 classes.
Found 200 images belonging to 2 classes.
Class indices: {'cat': 0, 'dog': 1}


In [12]:
model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])

In [13]:
model.fit(train_generator,epochs=10,validation_data=validation_generator)

  self._warn_if_super_not_called()


Epoch 1/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 83ms/step - accuracy: 0.8208 - loss: 0.4463 - val_accuracy: 0.8916 - val_loss: 0.2585
Epoch 2/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 80ms/step - accuracy: 0.9072 - loss: 0.2211 - val_accuracy: 0.8920 - val_loss: 0.2476
Epoch 3/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 79ms/step - accuracy: 0.9243 - loss: 0.1854 - val_accuracy: 0.8880 - val_loss: 0.2698
Epoch 4/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 79ms/step - accuracy: 0.9341 - loss: 0.1632 - val_accuracy: 0.8824 - val_loss: 0.2927
Epoch 5/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 79ms/step - accuracy: 0.9438 - loss: 0.1373 - val_accuracy: 0.8998 - val_loss: 0.2554
Epoch 6/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 79ms/step - accuracy: 0.9536 - loss: 0.1161 - val_accuracy: 0.8984 - val_loss: 0.2661
Epoch 7/10
[1m6

<keras.src.callbacks.history.History at 0x7f1d123faa20>