In [1]:
!kaggle datasets download "tongpython/cat-and-dog"

Downloading cat-and-dog.zip to /home/nick/portfolio
100%|███████████████████████████████████████▊| 217M/218M [00:18<00:00, 12.4MB/s]
100%|████████████████████████████████████████| 218M/218M [00:18<00:00, 12.1MB/s]


In [2]:
!unzip -qq cat-and-dog.zip

replace test_set/test_set/cats/_DS_Store? [y]es, [n]o, [A]ll, [N]one, [r]ename: ^C


In [3]:
!rm cat-and-dog.zip

In [4]:
import tensorflow as tf

from tensorflow.keras import layers
from tensorflow.keras.utils import image_dataset_from_directory

2022-12-07 18:39:17.217353: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


In [5]:
TRAIN_DIRECTORY = "training_set/training_set"
TEST_DIRECTORY = "test_set/test_set"

BATCH_SIZE = 64
IMAGE_SHAPE = (256, 256, 3)

In [6]:
train_dataset = image_dataset_from_directory(TRAIN_DIRECTORY, validation_split=0.2, subset="training", seed=42, image_size=IMAGE_SHAPE[:-1], batch_size=BATCH_SIZE)
validation_dataset = image_dataset_from_directory(TRAIN_DIRECTORY, validation_split=0.2, subset="validation", seed=42, image_size=IMAGE_SHAPE[:-1], batch_size=BATCH_SIZE)
test_dataset = image_dataset_from_directory(TEST_DIRECTORY, image_size=IMAGE_SHAPE[:-1], batch_size=BATCH_SIZE)

Found 8005 files belonging to 2 classes.
Using 6404 files for training.


2022-12-07 18:39:18.572530: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-12-07 18:39:18.601462: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-12-07 18:39:18.601610: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-12-07 18:39:18.602285: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compi

Found 8005 files belonging to 2 classes.
Using 1601 files for validation.
Found 2023 files belonging to 2 classes.


In [7]:
AUTOTUNE = tf.data.AUTOTUNE

In [8]:
train_dataset = train_dataset.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
validation_dataset = validation_dataset.cache().prefetch(buffer_size=AUTOTUNE)

In [9]:
vgg19 = tf.keras.applications.vgg19.VGG19(
    include_top=False,
    weights="imagenet",
    input_tensor=None,
    input_shape=(256, 256, 3),
    pooling=None,
    classes=1,
)

In [10]:
def build_model(pretrained_model):  
    x = layers.Flatten()(pretrained_model.output)
    x = layers.Dense(1024, activation="relu")(x)
    x = layers.Dropout(0.2)(x)
    x = layers.Dense(512, activation="relu")(x)
    x = layers.Dropout(0.2)(x)
    x = layers.Dense(1)(x)

    model = tf.keras.models.Model(pretrained_model.input, x)

    model.compile(
        optimizer=tf.keras.optimizers.Adam(1e-5), 
        loss=tf.keras.losses.BinaryCrossentropy(from_logits=True), 
        metrics=["accuracy"]
    )

    return model

In [11]:
model = build_model(vgg19)

In [12]:
es = tf.keras.callbacks.EarlyStopping(patience=2, restore_best_weights=True)

In [13]:
model.fit(train_dataset, validation_data=validation_dataset, epochs=1000, callbacks=[es])

Epoch 1/1000


2022-12-07 18:39:21.803827: I tensorflow/stream_executor/cuda/cuda_dnn.cc:384] Loaded cuDNN version 8600
2022-12-07 18:39:22.251092: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2022-12-07 18:39:24.570951: I tensorflow/stream_executor/cuda/cuda_blas.cc:1786] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000


<keras.callbacks.History at 0x7f9fd7f2dc90>

In [14]:
f"{round(model.evaluate(test_dataset, verbose=0)[1], 4) * 100}% accuracy on test set" 

'97.87% accuracy on test set'