# Deep Learning with Python
# Example 5.2 - Cats and Dogs ConvNet (Baseline)

## Making Directories

In [47]:
import os, shutil

In [48]:
# This is the directory where the uncompressed, original dataset is stored
original_dataset_dir = '/Users/saads/OneDrive/Desktop/DL-Python/chapter-5/dogs-vs-cats'

# This is the new directory where we will be storing the subset of the original dataset
base_dir = '/Users/saads/OneDrive/Desktop/DL-Python/chapter-5/cats_and_dogs_small'

In [49]:
# Have Python make a new directory at the path specified
os.mkdir(base_dir)

In [51]:
# Making directories for training, validation, and test splits
# Command `join` takes the base_dir path and adds a new folder to this path
# Train
train_dir = os.path.join(base_dir, 'train')
os.mkdir(train_dir)

# Test 
test_dir = os.path.join(base_dir, 'test')
os.mkdir(test_dir)

In [52]:
# Validation 
validation_dir = os.path.join(base_dir, 'validation')
os.mkdir(validation_dir)

In [53]:
# Directory with training cat pictures
train_cats_dir = os.path.join(train_dir, 'cats')
os.mkdir(train_cats_dir)

In [54]:
# Directory with training dog pictures
train_dogs_dir = os.path.join(train_dir, 'dogs')
os.mkdir(train_dogs_dir)

In [55]:
# Directory with validation cat picutres
validation_cats_dir = os.path.join(validation_dir, 'cats')
os.mkdir(validation_cats_dir)

In [56]:
# Directory with validation dog pictures
validation_dogs_dir = os.path.join(validation_dir, 'dogs')
os.mkdir(validation_dogs_dir)

In [57]:
# Directory with test cat pictures
test_cats_dir = os.path.join(test_dir, 'cats')
os.mkdir(test_cats_dir)

In [58]:
# Directory with test dog pictures
test_dogs_dir = os.path.join(test_dir, 'dogs')
os.mkdir(test_dogs_dir)

In [68]:
# Path to training set
train_set_dir = os.path.join(original_dataset_dir, 'train')

# Path to test set
test_set_dir = os.path.join(original_dataset_dir, 'test')

## Copying Files from the Data Set - Cats

In [69]:
# Copies the first 1000 cat images to train_cats_dir
fnames = ['cat.{}.jpg'.format(i) for i in range(1000)]

In [70]:
# Find every file with a name in fnames and copy to the right directory
for fname in fnames:
    src = os.path.join(train_set_dir, fname)  # source of copy operation
    dst = os.path.join(train_cats_dir, fname)       # destination of copy operation
    shutil.copyfile(src, dst)

In [73]:
# Copy the next 500 cat images to validation set 
fnames= ['cat.{}.jpg'.format(i) for i in range(1000, 1500)]
for fname in fnames:
    src = os.path.join(train_set_dir, fname)
    dst = os.path.join(validation_cats_dir, fname)
    shutil.copyfile(src, dst)
    

In [74]:
# Copy the next 500 cat images to the test set
fnames = ['cat.{}.jpg'.format(i) for i in range(1500, 2000)]
for fname in fnames:
    src = os.path.join(original_dataset_dir, fname)
    dst = os.path.join(test_cats_dir, fname)
    shutil.copyfile(src, dst)

## Copying Files From Dataset - Dogs

In [75]:
# Copies the first 1000 dog images to training set for dogs
fnames = ['dog.{}.jpg'.format(i) for i in range(1000)]
for fname in fnames:
    src = os.path.join(train_set_dir, fname)
    dst = os.path.join(train_dogs_dir, fname)
    shutil.copyfile(src, dst)

In [76]:
fnames = ['dog.{}.jpg'.format(i) for i in range(1000, 1500)]
for fname in fnames:
    src = os.path.join(train_set_dir, fname)
    dst = os.path.join(validation_dogs_dir, fname)
    shutil.copyfile(src, dst)
    

In [77]:
fnames = ['dog.{}.jpg'.format(i) for i in range(1500, 2000)]
for fname in fnames:
    src = os.path.join(train_set_dir, fname)
    dst = os.path.join(test_dogs_dir, fname)
    shutil.copyfile(src, dst)


## Sanity Check - Counting Pictures in Each Directory
We will count how many pictures are in each training split (tain/test/validation) directory using more functionality of the `os` module

In [78]:
print('total training cat images'.title(), len(os.listdir(train_cats_dir)))
print('total training dog images'.title(), len(os.listdir(train_dogs_dir)))
print('total validation cat images'.title(), len(os.listdir(validation_cats_dir)))
print('total validation dog images'.title(), len(os.listdir(validation_dogs_dir)))
print('total test cat images'.title(), len(os.listdir(test_cats_dir)))
print('total test dog images'.title(), len(os.listdir(test_dogs_dir)))

Total Training Cat Images 1000
Total Training Dog Images 1000
Total Validation Cat Images 500
Total Validation Dog Images 500
Total Test Cat Images 500
Total Test Dog Images 500


## Building the ConvNet and Classifier

In [79]:
from tensorflow.keras import layers, models

In [82]:
model = models.Sequential()

# First set of Conv/Pooling
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)))
model.add(layers.MaxPooling2D(2, 2))

# Second set of Conv/Pooling
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D(2, 2))

# Third set of Conv/Pooling
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D(2, 2))

# Final set of Conv/Pooling
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPool2D(2, 2))

# Flattening ConvNet output before it is input to densely connected classifier
model.add(layers.Flatten())

# Creating the densely connected classifier
model.add(layers.Dense(512, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))

In [83]:
# Model Summary - layers, params, trainable params, output shapes
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_4 (Conv2D)            (None, 148, 148, 32)      896       
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 74, 74, 32)        0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 72, 72, 64)        18496     
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 36, 36, 64)        0         
_________________________________________________________________
conv2d_6 (Conv2D)            (None, 34, 34, 128)       73856     
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 17, 17, 128)       0         
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 15, 15, 128)       147584    
__________

## Compiling the Network

Importing `optimizer` module so we can instantiate an object of this class and set its learning rate to `1e-4`.

In [85]:
from tensorflow.keras import optimizers 
model.compile(loss='binary_crossentropy', 
             optimizer=optimizers.RMSprop(lr=1e-4), 
             metrics=['acc'])

## Data Preprocessing
1. Read the picture files
2. Decode the JPEG content to RGB grids of pixels
3. Convert these into floating-point tensors
4. Rescalee the pixel values (between 0 and 255) to the [0, 1] interval. 

In [86]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [87]:
# Create generators that will rescale their elements by 255
train_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

In [88]:
train_generator = train_datagen.flow_from_directory(
    train_dir,                          # target directory
    target_size=(150, 150),             # Resize all images to 150 x 150 arbitrarily
    batch_size=20,                      # In batches of 20 images at a time
    class_mode='binary'                 # Binary labels for binary crossentropy
)

Found 2000 images belonging to 2 classes.


In [89]:
# Doing the same for the validation set
validation_generator = test_datagen.flow_from_directory(
    validation_dir, 
    target_size=(150, 150),
    batch_size=20,
    class_mode='binary'
)

Found 1000 images belonging to 2 classes.


## Detour - Pyton Generators

A Python generator is an object that acts as an iterator. We can pass the generator to a `for`...`in` statement. The generator will (surprise) generate a value which will then be passed to the for...in loop and can be processed further.

The control structure that uses the generator must provide a mechanism to break the infinite stream of values that will be generated by the generator.

In [92]:
def customGenerator():
    i = 0
    while True:
        i += 1
        yield i

In [93]:
for item in customGenerator():
    print(item)
    if item > 4:
        break

1
2
3
4
5


In [94]:
for data_batch, labels_batch in train_generator:
    print('Data Batch Shape: ', data_batch.shape)
    print('Labels Batch Shape: ', labels_batch.shape)
    break


Data Batch Shape:  (20, 150, 150, 3)
Labels Batch Shape:  (20,)


## Fitting Generators to the Model

The `fit_generator` method is a built-in `keras` method that is basically `fit` but with generator arguments. `fit` expects tensors to train a neural network, whereas `fit_generator` expects generator objects.

The generator objects (for the training data and the validation data) will yield a batch of inputs and targets indefinitely. Keras needs to know how many sampels to draw from the generator before declaring an epoch over. 

This is why we must also provide a `steps_per_epoch` argument - how many batches will be drawn from the generator for each epoch. 

After running `steps_per_epoch` gradient descent steps, the fitting process will go to the next epoch.

Validation data argument is allowed to be a generator, but is also allowed to be a tuple of Numpy arrays. 

In [None]:
history = model.fit_generator(
    train_generator,
    steps_per_epoch=100,
    epochs=30,
    validation_data=validation_generator,
    validation_steps=50)

Instructions for updating:
Use tf.cast instead.
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
  8/100 [=>............................] - ETA: 2:49 - loss: 0.1903 - acc: 0.9500