# Prepare data

In [None]:
from google.colab import files
files.upload()

In [2]:
!mkdir ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [3]:
# downloading dataset from kaggle
!kaggle competitions download -c dogs-vs-cats

Downloading dogs-vs-cats.zip to /content
 98% 796M/812M [00:04<00:00, 150MB/s]
100% 812M/812M [00:04<00:00, 176MB/s]


In [4]:
!unzip -qq dogs-vs-cats.zip
!unzip -qq train.zip

In [5]:
import os, shutil, pathlib

original_dir = pathlib.Path("train")
new_base_dir = pathlib.Path("dogs_vs_cats_small")

def make_subset(name:str, start:int, end:int):
    for category in ("dog", "cat"):
        dir = new_base_dir / name / category
        os.makedirs(dir)

        # create list of filename - first all dogs then all cats (category)
        fnames = [f"{category}.{i}.jpg" for i in range(start, end)]

        for fname in fnames:
            shutil.copyfile(src=original_dir / fname, dst=dir / fname)

In [6]:
make_subset("train", start=0, end=1_000)
make_subset("validation", start=1_000, end=1_500)
make_subset("test", start=1_500, end=2_500)

In [7]:
from tensorflow.keras.utils import image_dataset_from_directory

train_data = image_dataset_from_directory(
    new_base_dir / "train",
    image_size=(180, 180),
    batch_size=32
)

val_data = image_dataset_from_directory(
    new_base_dir / "validation",
    image_size=(180, 180),
    batch_size=32
)

test_data = image_dataset_from_directory(
    new_base_dir / "test",
    image_size=(180, 180),
    batch_size=32
)

Found 2000 files belonging to 2 classes.
Found 1000 files belonging to 2 classes.
Found 2000 files belonging to 2 classes.


# Build model and train classifier

In [8]:
from tensorflow.keras.applications import vgg16

conv_base = vgg16.VGG16(
    weights="imagenet",
    include_top=False
)
conv_base.trainable = False

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5


In [9]:
conv_base.summary()

Model: "vgg16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, None, None, 3)]   0         
                                                                 
 block1_conv1 (Conv2D)       (None, None, None, 64)    1792      
                                                                 
 block1_conv2 (Conv2D)       (None, None, None, 64)    36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, None, None, 64)    0         
                                                                 
 block2_conv1 (Conv2D)       (None, None, None, 128)   73856     
                                                                 
 block2_conv2 (Conv2D)       (None, None, None, 128)   147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, None, None, 128)   0     

In [10]:
# data augmentation
from tensorflow.keras.layers import RandomFlip, RandomRotation, RandomZoom
from tensorflow.keras.models import Sequential

data_aug = Sequential([
    RandomFlip("horizontal"),
    RandomRotation(0.1),
    RandomZoom(0.2)
])

In [11]:
# stacking model
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Flatten, Dropout

inputs = Input(shape=(180, 180, 3))
x = data_aug(inputs) # data augementation
x = vgg16.preprocess_input(x) # vgg16 value scaling
x = conv_base(x) # vgg16 features
x = Flatten()(x)
x = Dense(256, activation="relu")(x)
x = Dropout(0.5)(x)
outputs = Dense(1, activation="sigmoid")(x)

model = Model(inputs=inputs, outputs=outputs)

model.compile(optimizer="rmsprop",
              loss="binary_crossentropy",
              metrics=["accuracy"])

In [12]:
model.fit(train_data,
          epochs=50,
          validation_data=val_data
)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x7fac40eb7b90>

# Fine-tune the model

In [13]:
# freeze all layers until the foruth from last
conv_base.trainable = True
for layer in conv_base.layers[:-4]:
    layer.trainable = False

In [14]:
from tensorflow.keras.callbacks import ModelCheckpoint

MODEL_PATH = "fine-tuned-model.keras"

callbacks = [
    ModelCheckpoint(
        filepath=MODEL_PATH,
        save_best_only=True,
        monitor="val_loss"
    )
]

In [17]:
from tensorflow.keras.optimizers import RMSprop

model.compile(optimizer=RMSprop(learning_rate=1e-5),
              loss="binary_crossentropy",
              metrics=["accuracy"])

In [19]:
model.fit(train_data, 
          epochs=30,
          validation_data=val_data,
          callbacks=callbacks)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x7faba45c0650>

In [20]:
from tensorflow.keras.models import load_model

test_model = load_model(MODEL_PATH)
test_loss, test_acc = test_model.evaluate(test_data)

print(f"Test accuracy: {test_acc}")

Test accuracy: 0.9769999980926514
