In [49]:
import os, shutil, pathlib

original_dir = pathlib.Path("../../local/dogs_vs_cats/train/train")
new_base_dir = pathlib.Path("./cats_vs_dogs_small")


In [50]:
def make_subset(subset_name, start_index, end_index):
    for category in ("cat", "dog"):
        dir = new_base_dir / subset_name / category
        os.makedirs(dir, exist_ok=True)
        fnames = [f"{category}.{i}.jpg" for i in range(start_index, end_index)]
        for fname in fnames:
            src = original_dir / fname
            dst = dir / fname
            if src.exists():  # Check if the source file exists
                shutil.copy(src, dst)
            else:
                print(f"File not found: {src}")

In [51]:
make_subset("train", start_index=0, end_index=1000)

File not found: ..\..\local\dogs_vs_cats\train\train\cat.0.jpg
File not found: ..\..\local\dogs_vs_cats\train\train\cat.1.jpg
File not found: ..\..\local\dogs_vs_cats\train\train\cat.2.jpg
File not found: ..\..\local\dogs_vs_cats\train\train\cat.3.jpg
File not found: ..\..\local\dogs_vs_cats\train\train\cat.4.jpg
File not found: ..\..\local\dogs_vs_cats\train\train\cat.5.jpg
File not found: ..\..\local\dogs_vs_cats\train\train\cat.6.jpg
File not found: ..\..\local\dogs_vs_cats\train\train\cat.7.jpg
File not found: ..\..\local\dogs_vs_cats\train\train\cat.8.jpg
File not found: ..\..\local\dogs_vs_cats\train\train\cat.9.jpg
File not found: ..\..\local\dogs_vs_cats\train\train\cat.10.jpg
File not found: ..\..\local\dogs_vs_cats\train\train\cat.11.jpg
File not found: ..\..\local\dogs_vs_cats\train\train\cat.12.jpg
File not found: ..\..\local\dogs_vs_cats\train\train\cat.13.jpg
File not found: ..\..\local\dogs_vs_cats\train\train\cat.14.jpg
File not found: ..\..\local\dogs_vs_cats\train\tra

In [52]:
make_subset("validation", start_index=1000, end_index=1500)
make_subset("test", start_index=1500, end_index=2500)

File not found: ..\..\local\dogs_vs_cats\train\train\cat.1000.jpg
File not found: ..\..\local\dogs_vs_cats\train\train\cat.1001.jpg
File not found: ..\..\local\dogs_vs_cats\train\train\cat.1002.jpg
File not found: ..\..\local\dogs_vs_cats\train\train\cat.1003.jpg
File not found: ..\..\local\dogs_vs_cats\train\train\cat.1004.jpg
File not found: ..\..\local\dogs_vs_cats\train\train\cat.1005.jpg
File not found: ..\..\local\dogs_vs_cats\train\train\cat.1006.jpg
File not found: ..\..\local\dogs_vs_cats\train\train\cat.1007.jpg
File not found: ..\..\local\dogs_vs_cats\train\train\cat.1008.jpg
File not found: ..\..\local\dogs_vs_cats\train\train\cat.1009.jpg
File not found: ..\..\local\dogs_vs_cats\train\train\cat.1010.jpg
File not found: ..\..\local\dogs_vs_cats\train\train\cat.1011.jpg
File not found: ..\..\local\dogs_vs_cats\train\train\cat.1012.jpg
File not found: ..\..\local\dogs_vs_cats\train\train\cat.1013.jpg
File not found: ..\..\local\dogs_vs_cats\train\train\cat.1014.jpg
File not f

In [53]:
import keras 
from keras import layers

inputs = keras.Input(shape=(180, 180, 3))
x = layers.Rescaling(1./255)(inputs)
x = layers.Conv2D(32, kernel_size=3, activation='relu')(x)
x = layers.MaxPooling2D(pool_size=2)(x)
x = layers.Conv2D(filters=64, kernel_size=3, activation='relu')(x)
x = layers.MaxPooling2D(pool_size=2)(x)
x = layers.Conv2D(filters=128, kernel_size=3, activation='relu')(x)
x = layers.MaxPooling2D(pool_size=2)(x)
x = layers.Conv2D(filters=128, kernel_size=3, activation='relu')(x)
x = layers.MaxPooling2D(pool_size=2)(x)
x = layers.Flatten()(x)

model = keras.Model(inputs=inputs, outputs=x)

In [54]:
model.summary()

In [55]:
from keras.utils import image_dataset_from_directory

train_dataset = image_dataset_from_directory(
    new_base_dir / "train",
    image_size=(180, 180),
    batch_size=32
)

validation_dataset = image_dataset_from_directory(
    new_base_dir / "validation",
    image_size = (180, 180),
    batch_size=32
)

test_dataset = image_dataset_from_directory(
    new_base_dir / "test",
    image_size=(180, 180),
    batch_size=32
)

Found 0 files belonging to 2 classes.


ValueError: No images found in directory cats_vs_dogs_small\train. Allowed formats: ('.bmp', '.gif', '.jpeg', '.jpg', '.png')

In [None]:
for data_batch, labels_batch in train_dataset:
    print(data_batch.shape, labels_batch.shape)
    break

In [None]:
callbacks = [
    keras.callbacks.ModelCheckpoint(
        filepath="convet_from_scratch.keras",
        save_best_only=True,
        monitor="val_loss"
    )
]

In [None]:
model.compile(loss="binary_crossentropy", optimizer="rmsprop", metrics=["accuracy"])

In [None]:
history = model.fit(train_dataset, epochs=30, validation_data=validation_dataset, callbacks=callbacks)

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
def plot_metrics(metrics):
    _, ax = plt.subplots(1, 2, figsize=(10, 5))
    metrics[['loss', 'val_loss']].plot(ax=ax[0], title='Loss', grid=True)
    metrics[['accuracy', 'val_accuracy']].plot(ax=ax[1], title='Accuracy', grid=True)

In [None]:
plot_metrics(pd.DataFrame(history.history))

In [None]:
test_model = keras.models.load_model("convet_from_scratch.keras")
test_loss, test_acc = test_model.evaluate(test_dataset)
print(f"Test accuracy: {test_acc:.3f}")

In [None]:
data_augmentation = keras.Sequential(
    [
        layers.RandomFlip("horizontal"),
        layers.RandomRotation(0.1),
        layers.RandomZoom(0.2),
    ]
)

In [None]:
plt.figure(figsize=(10, 10))
for images, _ in train_dataset.take(1):
    for i in range(9):
        augmented_images = data_augmentation(images)
        ax = plt.subplot(3, 3, i + 1)
        plt.imshow(augmented_images[0].numpy())
        plt.axis("off")

In [None]:
input = keras.Input(shape=(180, 180, 3))
x = data_augmentation(input)
x = layers.Rescaling(1./255)(x)
x = layers.Conv2D(32, kernel_size=3, activation='relu')(x)
x = layers.MaxPooling2D(pool_size=2)(x)
x = layers.Conv2D(filters=64, kernel_size=3, activation='relu')(x)
x = layers.MaxPooling2D(pool_size=2)(x)
x = layers.Conv2D(filters=128, kernel_size=3, activation='relu')(x)
x = layers.MaxPooling2D(pool_size=2)(x)
x = layers.Conv2D(filters=128, kernel_size=3, activation='relu')(x)
x = layers.MaxPooling2D(pool_size=2)(x)
x = layers.Flatten()(x)
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(1, activation='sigmoid')(x)

model = keras.Model(inputs=input, outputs=outputs)
model.compile(loss="binary_crossentropy", optimizer="rmsprop", metrics=["accuracy"])

In [None]:
callbacks = [
    keras.callbacks.ModelCheckpoint(
        filepath="convnet_from_scratch_with_augmentation.keras",
        save_best_only=True,
        monitor="val_loss"
    )
]

In [None]:
history = model.fit(train_dataset, epochs=80, validation_data=validation_dataset, callbacks=callbacks)

In [None]:
plot_metrics(pd.DataFrame(history.history))

In [None]:
test_model = keras.models.load_model("convnet_from_scratch_with_augmentation.keras")
test_loss, test_acc = test_model.evaluate(test_dataset)
print(test_loss, test_acc)

In [None]:
conv_base = keras.applications.VGG16(
    weights="imagenet",
    include_top=False,
)

In [None]:
conv_base.summary()

In [None]:
import numpy as np

def get_features_and_labels(dataset):
    all_features = []
    all_labels = []
    for image, labels in dataset:
        preprocessed_images = keras.applications.vgg16.preprocess_input(image)
        features = conv_base.predict(preprocessed_images)
        all_features.append(features)
        all_labels.append(labels)
    return np.concatenate(all_features), np.concatenate(all_labels)

In [None]:
train_features, train_labels = get_features_and_labels(train_dataset)
val_features, val_labels = get_features_and_labels(validation_dataset)
test_features, test_labels = get_features_and_labels(test_dataset)

In [None]:
inputs = keras.Input(shape=(5, 5, 512))
x = layers.Flatten()(inputs)
x = layers.Dense(256)(x)
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(1, activation='sigmoid')(x)
model = keras.Model(inputs, outputs)
model.compile(loss="binary_crossentropy", 
              optimizer="rmsprop", 
              metrics=["accuracy"])
callbacks = [
    keras.callbacks.ModelCheckpoint(
        filepath="feature_extraction.keras",
        save_best_only=True,
        monitor="val_loss"
    )
]

In [None]:
history = model.fit(train_dataset, epochs=20, validation_data=[val_features, val_labels], callbacks=callbacks)

In [None]:
plot_metrics(pd.DataFrame(history.history))

In [None]:
test_loss, test_acc = model.evaluate(test_features, test_labels)
print(test_loss, test_acc)