In [2]:
import os
import zipfile
import random
import tensorflow as tf
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from shutil import copyfile

In [3]:
!wget --no-check-certificate \
    "https://download.microsoft.com/download/3/E/1/3E1C3F21-ECDB-4869-8368-6DEBA77B919F/kagglecatsanddogs_3367a.zip" \
    -O "/tmp/cats-and-dogs.zip"

local_zip = '/tmp/cats-and-dogs.zip'
zip_ref = zipfile.ZipFile(local_zip, 'r')
zip_ref.extractall('/tmp')
zip_ref.close()

--2020-08-12 02:38:55--  https://download.microsoft.com/download/3/E/1/3E1C3F21-ECDB-4869-8368-6DEBA77B919F/kagglecatsanddogs_3367a.zip
Resolving download.microsoft.com (download.microsoft.com)... 23.35.76.84, 2600:1407:d800:29f::e59, 2600:1407:d800:29d::e59
Connecting to download.microsoft.com (download.microsoft.com)|23.35.76.84|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 824894548 (787M) [application/octet-stream]
Saving to: ‘/tmp/cats-and-dogs.zip’


2020-08-12 02:39:04 (91.8 MB/s) - ‘/tmp/cats-and-dogs.zip’ saved [824894548/824894548]



In [4]:
print(len(os.listdir('/tmp/PetImages/Cat/')))
print(len(os.listdir('/tmp/PetImages/Dog/')))

12501
12501


In [5]:
try:
    os.mkdir('/tmp/cats-v-dogs')
    os.mkdir('/tmp/cats-v-dogs/training')
    os.mkdir('/tmp/cats-v-dogs/testing')
    os.mkdir('/tmp/cats-v-dogs/training/cats')
    os.mkdir('/tmp/cats-v-dogs/training/dogs')
    os.mkdir('/tmp/cats-v-dogs/testing/cats')
    os.mkdir('/tmp/cats-v-dogs/testing/dogs')
except OSError:
    pass

In [6]:

def split_data(SOURCE, TRAINING, TESTING, SPLIT_SIZE):
    all_files = []
    
    for file_name in os.listdir(SOURCE):
        file_path = SOURCE + file_name

        if os.path.getsize(file_path):
            all_files.append(file_name)
        else:
            print('{} is zero length, so ignoring'.format(file_name))
    
    n_files = len(all_files)
    split_point = int(n_files * SPLIT_SIZE)
    
    shuffled = random.sample(all_files, n_files)
    
    train_set = shuffled[:split_point]
    test_set = shuffled[split_point:]
    
    for file_name in train_set:
        copyfile(SOURCE + file_name, TRAINING + file_name)
        
    for file_name in test_set:
        copyfile(SOURCE + file_name, TESTING + file_name)


CAT_SOURCE_DIR = "/tmp/PetImages/Cat/"
TRAINING_CATS_DIR = "/tmp/cats-v-dogs/training/cats/"
TESTING_CATS_DIR = "/tmp/cats-v-dogs/testing/cats/"
DOG_SOURCE_DIR = "/tmp/PetImages/Dog/"
TRAINING_DOGS_DIR = "/tmp/cats-v-dogs/training/dogs/"
TESTING_DOGS_DIR = "/tmp/cats-v-dogs/testing/dogs/"

split_size = .9
split_data(CAT_SOURCE_DIR, TRAINING_CATS_DIR, TESTING_CATS_DIR, split_size)
split_data(DOG_SOURCE_DIR, TRAINING_DOGS_DIR, TESTING_DOGS_DIR, split_size)

666.jpg is zero length, so ignoring
11702.jpg is zero length, so ignoring


In [7]:
print(len(os.listdir('/tmp/cats-v-dogs/training/cats/')))
print(len(os.listdir('/tmp/cats-v-dogs/training/dogs/')))
print(len(os.listdir('/tmp/cats-v-dogs/testing/cats/')))
print(len(os.listdir('/tmp/cats-v-dogs/testing/dogs/')))

11250
11250
1250
1250


In [8]:
BATCH_SIZE = 32
IMG_SIZE = (224, 224)

In [9]:

# Create the base model from the pre-trained model MobileNet V2
IMG_SHAPE = IMG_SIZE + (3,)
base_model = tf.keras.applications.MobileNetV2(input_shape=IMG_SHAPE,
                                               include_top=False,
                                               weights='imagenet')

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5


In [10]:
# Make all the layers in the pre-trained model non-trainable
for layer in base_model.layers:
  layer.trainable = False

In [11]:
last_layer = base_model.get_layer('out_relu')
print('last layer output shape: ', last_layer.output_shape)
last_output = last_layer.output

last layer output shape:  (None, 7, 7, 1280)


In [12]:
global_average_layer = tf.keras.layers.GlobalAveragePooling2D()
prediction_layer = tf.keras.layers.Dense(2, activation='softmax')

In [13]:
x = global_average_layer(last_output)
x = tf.keras.layers.Dropout(0.2)(x)
outputs = prediction_layer(x)

In [14]:
model = tf.keras.Model(base_model.input, outputs)

In [15]:
from tensorflow.keras.optimizers import RMSprop
model.compile(
    optimizer=RMSprop(lr=0.0001), 
    loss='sparse_categorical_crossentropy', 
    metrics=['accuracy']
)

In [16]:
TRAINING_DIR = '/tmp/cats-v-dogs/training'
train_datagen = ImageDataGenerator(
    rescale=1 / 255,
    rotation_range=40,
    width_shift_range=.2,
    height_shift_range=.2,
    shear_range=.2,
    zoom_range=.2,
    horizontal_flip=True,
    fill_mode='nearest'
)
train_generator = train_datagen.flow_from_directory(
    TRAINING_DIR,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    target_size=(224, 224)
)

VALIDATION_DIR = '/tmp/cats-v-dogs/testing'
validation_datagen = ImageDataGenerator(
    rescale=1 / 255
)
validation_generator = validation_datagen.flow_from_directory(
    VALIDATION_DIR,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    target_size=(224, 224)
)

Found 22499 images belonging to 2 classes.
Found 2499 images belonging to 2 classes.


In [17]:
BATCH_SIZE

32

In [19]:
import warnings
warnings.filterwarnings('ignore')
print('warnings ignored')

from keras.callbacks import ModelCheckpoint 

filepath = "catsdog-mnv2-{epoch:02d}-{val_accuracy:.2f}.hdf5"
checkpoint = ModelCheckpoint(filepath, verbose=1, save_best_only=True, mode='auto')

history = model.fit_generator(train_generator,
                              epochs=10,
                              verbose=1,
                              validation_data=validation_generator,
                              callbacks=[checkpoint])

Epoch 1/10
Epoch 00001: val_loss improved from inf to 0.07847, saving model to catsdog-mnv2-01-0.97.hdf5
Epoch 2/10
Epoch 00002: val_loss improved from 0.07847 to 0.06677, saving model to catsdog-mnv2-02-0.98.hdf5
Epoch 3/10
Epoch 00003: val_loss improved from 0.06677 to 0.06501, saving model to catsdog-mnv2-03-0.98.hdf5
Epoch 4/10
Epoch 00004: val_loss improved from 0.06501 to 0.05922, saving model to catsdog-mnv2-04-0.98.hdf5
Epoch 5/10
Epoch 00005: val_loss improved from 0.05922 to 0.05610, saving model to catsdog-mnv2-05-0.98.hdf5
Epoch 6/10
Epoch 00006: val_loss did not improve from 0.05610
Epoch 7/10
Epoch 00007: val_loss improved from 0.05610 to 0.04971, saving model to catsdog-mnv2-07-0.98.hdf5
Epoch 8/10
Epoch 00008: val_loss improved from 0.04971 to 0.04967, saving model to catsdog-mnv2-08-0.98.hdf5
Epoch 9/10
Epoch 00009: val_loss did not improve from 0.04967
Epoch 10/10
Epoch 00010: val_loss did not improve from 0.04967


In [20]:
model.save('cats_dogs_mv2.h5')

In [21]:
import os
print(str(os.path.getsize('./cats_dogs_mv2.h5')/1000000) + 'MB')

9.528032MB
