In [1]:
!pip install -q tensorflow tensorflow-datasets

import tensorflow as tf
import tensorflow_datasets as tfds

AUTOTUNE = tf.data.AUTOTUNE
print("TensorFlow version:", tf.__version__)


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m620.7/620.7 MB[0m [31m729.5 kB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.5/57.5 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.5/24.5 MB[0m [31m79.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.5/5.5 MB[0m [31m109.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.6/6.6 MB[0m [31m98.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m224.5/224.5 kB[0m [31m18.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.5/72.5 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[?25h



TensorFlow version: 2.20.0


In [2]:
ds_raw, ds_info = tfds.load(
    "cats_vs_dogs",
    split="train",
    with_info=True,
    as_supervised=True
)

print("Total images:", ds_info.splits["train"].num_examples)
print("Labels:", ds_info.features["label"].names)  # 0: cat, 1: dog




Downloading and preparing dataset Unknown size (download: Unknown size, generated: Unknown size, total: Unknown size) to /root/tensorflow_datasets/cats_vs_dogs/4.0.1...


Dl Completed...: 0 url [00:00, ? url/s]

Dl Size...: 0 MiB [00:00, ? MiB/s]

Generating splits...:   0%|          | 0/1 [00:00<?, ? splits/s]

Generating train examples...: 0 examples [00:00, ? examples/s]



Shuffling /root/tensorflow_datasets/cats_vs_dogs/incomplete.QCR3WW_4.0.1/cats_vs_dogs-train.tfrecord*...:   0%…

Dataset cats_vs_dogs downloaded and prepared to /root/tensorflow_datasets/cats_vs_dogs/4.0.1. Subsequent calls will reuse this data.
Total images: 23262
Labels: ['cat', 'dog']


In [3]:
IMG_SIZE = 160  # you can reduce to 128 if you want even lighter

def preprocess(image, label):
    image = tf.image.resize(image, (IMG_SIZE, IMG_SIZE))
    image = tf.cast(image, tf.float32) / 255.0
    return image, label

ds_preprocessed = ds_raw.map(preprocess, num_parallel_calls=AUTOTUNE)


In [4]:
NUM_PER_CLASS = 250  # 250 cats + 250 dogs

# Split into cats and dogs
ds_cats = ds_preprocessed.filter(lambda img, label: tf.equal(label, 0))
ds_dogs = ds_preprocessed.filter(lambda img, label: tf.equal(label, 1))

# Shuffle and take 250 from each
ds_cats_small = ds_cats.shuffle(10000, seed=42, reshuffle_each_iteration=False).take(NUM_PER_CLASS)
ds_dogs_small = ds_dogs.shuffle(10000, seed=43, reshuffle_each_iteration=False).take(NUM_PER_CLASS)

# Combine and shuffle total 500 samples
ds_small = ds_cats_small.concatenate(ds_dogs_small)
ds_small = ds_small.shuffle(NUM_PER_CLASS * 2, seed=123, reshuffle_each_iteration=False)

print("Cardinality:", tf.data.experimental.cardinality(ds_small).numpy())  # should be 500


Cardinality: -2


In [5]:
def split_dataset(ds, train_frac=0.7, val_frac=0.15):
    n = tf.data.experimental.cardinality(ds).numpy()
    n_train = int(n * train_frac)
    n_val = int(n * val_frac)

    ds_train = ds.take(n_train)
    ds_val   = ds.skip(n_train).take(n_val)
    ds_test  = ds.skip(n_train + n_val)

    return ds_train, ds_val, ds_test

train_ds, val_ds, test_ds = split_dataset(ds_small)

print("Train size:", tf.data.experimental.cardinality(train_ds).numpy())
print("Val size:", tf.data.experimental.cardinality(val_ds).numpy())
print("Test size:", tf.data.experimental.cardinality(test_ds).numpy())


Train size: -2
Val size: -2
Test size: -2


In [6]:
BATCH_SIZE = 32

def prepare_for_training(ds, training=True):
    if training:
        ds = ds.shuffle(1000)
    ds = ds.batch(BATCH_SIZE).prefetch(AUTOTUNE)
    return ds

train_ds = prepare_for_training(train_ds, training=True)
val_ds   = prepare_for_training(val_ds,   training=False)
test_ds  = prepare_for_training(test_ds,  training=False)


In [7]:
def build_model():
    model = tf.keras.Sequential([
        tf.keras.layers.Conv2D(32, (3, 3), activation="relu", input_shape=(IMG_SIZE, IMG_SIZE, 3)),
        tf.keras.layers.MaxPooling2D(),

        tf.keras.layers.Conv2D(64, (3, 3), activation="relu"),
        tf.keras.layers.MaxPooling2D(),

        tf.keras.layers.Conv2D(128, (3, 3), activation="relu"),
        tf.keras.layers.MaxPooling2D(),

        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(128, activation="relu"),
        tf.keras.layers.Dropout(0.5),
        tf.keras.layers.Dense(1, activation="sigmoid")  # binary classification
    ])

    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
        loss="binary_crossentropy",
        metrics=["accuracy"]
    )
    return model

model = build_model()
model.summary()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [8]:
EPOCHS = 15  # small dataset -> more epochs is okay

history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=EPOCHS
)


Epoch 1/15
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 587ms/step - accuracy: 0.5155 - loss: 0.7019
Epoch 2/15




[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 561ms/step - accuracy: 0.4869 - loss: 0.7037
Epoch 3/15
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 560ms/step - accuracy: 0.6055 - loss: 0.6813
Epoch 4/15
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 560ms/step - accuracy: 0.6257 - loss: 0.6727
Epoch 5/15
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 569ms/step - accuracy: 0.6260 - loss: 0.6625
Epoch 6/15
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 567ms/step - accuracy: 0.6910 - loss: 0.6469
Epoch 7/15
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 572ms/step - accuracy: 0.6816 - loss: 0.6255
Epoch 8/15
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 560ms/step - accuracy: 0.6686 - loss: 0.6003
Epoch 9/15
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 552ms/step - accuracy: 0.7212 - loss: 0.5751
Epoch 10/15
[1m16/16[0m [32m━━━━━━━━━━━━

In [12]:
print("Small dataset total:", tf.data.experimental.cardinality(ds_small).numpy())
print("Train:", tf.data.experimental.cardinality(train_ds).numpy())
print("Val:", tf.data.experimental.cardinality(val_ds).numpy())
print("Test:", tf.data.experimental.cardinality(test_ds).numpy())


Small dataset total: -2
Train: -2
Val: -2
Test: -2


In [13]:
import numpy as np

images = []
labels = []

for img, label in ds_small:  # ds_small from Step 4 (500 samples)
    images.append(img.numpy())
    labels.append(label.numpy())

X = np.stack(images)        # shape: (500, IMG_SIZE, IMG_SIZE, 3)
y = np.array(labels)        # shape: (500,)
print("X shape:", X.shape)
print("y shape:", y.shape)
print("Class counts:", np.bincount(y))


X shape: (500, 160, 160, 3)
y shape: (500,)
Class counts: [250 250]


In [14]:
num_samples = X.shape[0]  # 500

indices = np.arange(num_samples)
np.random.seed(42)
np.random.shuffle(indices)

train_end = int(0.7 * num_samples)   # 350
val_end   = int(0.85 * num_samples)  # 425

train_idx = indices[:train_end]
val_idx   = indices[train_end:val_end]
test_idx  = indices[val_end:]

X_train, y_train = X[train_idx], y[train_idx]
X_val,   y_val   = X[val_idx],   y[val_idx]
X_test,  y_test  = X[test_idx],  y[test_idx]

print("Train:", X_train.shape, np.bincount(y_train))
print("Val:  ", X_val.shape,   np.bincount(y_val))
print("Test: ", X_test.shape,  np.bincount(y_test))


Train: (350, 160, 160, 3) [172 178]
Val:   (75, 160, 160, 3) [41 34]
Test:  (75, 160, 160, 3) [37 38]


In [15]:
BATCH_SIZE = 32

def make_dataset(X, y, training=True):
    ds = tf.data.Dataset.from_tensor_slices((X, y))
    if training:
        ds = ds.shuffle(len(X))
    ds = ds.batch(BATCH_SIZE).prefetch(AUTOTUNE)
    return ds

train_ds = make_dataset(X_train, y_train, training=True)
val_ds   = make_dataset(X_val,   y_val,   training=False)
test_ds  = make_dataset(X_test,  y_test,  training=False)

print("Batches - train:", tf.data.experimental.cardinality(train_ds).numpy())
print("Batches - val:", tf.data.experimental.cardinality(val_ds).numpy())
print("Batches - test:", tf.data.experimental.cardinality(test_ds).numpy())


Batches - train: 11
Batches - val: 3
Batches - test: 3


In [16]:
model = build_model()   # from earlier
history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=15
)


Epoch 1/15
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 620ms/step - accuracy: 0.4946 - loss: 0.7074 - val_accuracy: 0.5467 - val_loss: 0.6892
Epoch 2/15
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 609ms/step - accuracy: 0.4972 - loss: 0.7009 - val_accuracy: 0.4533 - val_loss: 0.6954
Epoch 3/15
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 609ms/step - accuracy: 0.5761 - loss: 0.6783 - val_accuracy: 0.5333 - val_loss: 0.6892
Epoch 4/15
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 618ms/step - accuracy: 0.6423 - loss: 0.6702 - val_accuracy: 0.5333 - val_loss: 0.6864
Epoch 5/15
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 602ms/step - accuracy: 0.6007 - loss: 0.6621 - val_accuracy: 0.4533 - val_loss: 0.7164
Epoch 6/15
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 608ms/step - accuracy: 0.5932 - loss: 0.6584 - val_accuracy: 0.5733 - val_loss: 0.6744
Epoch 7/15
[1m11/11[0m [3

In [17]:
test_loss, test_acc = model.evaluate(test_ds)
print("Test accuracy with 250 cats + 250 dogs:", test_acc)


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 102ms/step - accuracy: 0.5941 - loss: 0.7052
Test accuracy with 250 cats + 250 dogs: 0.5866666436195374
