# CNN Image - cats and dogs (from kaggle)

In [None]:
import os, shutil
import matplotlib.pyplot as plt

from tensorflow.keras import models
from tensorflow.keras import layers

from tensorflow.keras import optimizers

from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [None]:
# select the folders with the images:

# class folder
train_cats_dir = 'data/training_set/cats'
train_dogs_dir = 'data/training_set/dogs'

validation_cats_dir = 'data/validation_dir/cats'
validation_dogs_dir = 'data/validation_dir/dogs'

test_cats_dir = 'data/test_set/cats'
test_dogs_dir = 'data/test_set/dogs'

# main folders
train_dir = 'data/training_set/'
validation_dir = 'data/validation_dir'
test_dir = 'data/test_set'

In [None]:
# Sanity Check

print ('total training cat images:', len(os.listdir(train_cats_dir)))

print('total training dog images:', len(os.listdir(train_dogs_dir)))

print ('total validation cat images:', len(os.listdir(validation_cats_dir)))

print ('total validation dog images:', len(os.listdir(validation_dogs_dir)))

print('total test cat images:', len(os.listdir(test_cats_dir)))

print('total test dog images:', len(os.listdir(test_dogs_dir)))

## Data preprocessing
1) Read the picture files. <br>
2) Decode the JPEG content to RGB grids of pixels. <br>
3) Convert these into floating point tensors. <br>
4) Rescale the pixel values (between 0 and 255) to the [0, 1] interval (as you know, neural networks prefer to deal with small input values). <br> <br>
Keras has a module for image processing

### Without Data Augmentation

In [None]:
# Rescale all images by 1/255
# Train
train_datagen = ImageDataGenerator(rescale=1./255)
# Validation
test_datagen = ImageDataGenerator(rescale=1./255)

# this generator will continuously create from this directory
train_generator = train_datagen.flow_from_directory(train_dir,
                                                   target_size=(150,150), #size of the final image
                                                   batch_size=20, #will return batches of 20 images at each time
                                                   class_mode='binary') #binary classification (if it was multiclass = categorical)

validation_generator = test_datagen.flow_from_directory(validation_dir,
                                                       target_size=(150,150),
                                                       batch_size=20,
                                                       class_mode='binary',
                                                       shuffle=False)

In [None]:
# Sanity Check
for data_batch, labels_batch in train_generator:
    print('data batch shape:', data_batch.shape)
    print('labels batch shape:', labels_batch.shape)
    break

### With Data Augmentation

In [None]:
# do the data augmentation only for the training data

train_datagen = ImageDataGenerator(rescale=1./255,
                                   rotation_range=40,
                                   width_shift_range=0.2,
                                   height_shift_range=0.2,
                                   shear_range=0.2,
                                   zoom_range=0.2,
                                   horizontal_flip=True)

test_datagen = ImageDataGenerator(rescale=1./255)

# this generator will continuously create from this directory
train_generator = train_datagen.flow_from_directory(train_dir,
                                                   target_size=(64,64), #size of the final image
                                                   batch_size=1, #will return batches of 20 images at each time
                                                   class_mode='binary') #binary classification (if it was multiclass = categorical)

test_generator = test_datagen.flow_from_directory(test_dir,
                                                  target_size=(64,64),
                                                  batch_size=1,
                                                  class_mode='binary',
                                                  shuffle=False)

In [None]:
# Sanity Check
for data_batch, labels_batch in train_generator:
    print('data batch shape:', data_batch.shape)
    print('labels batch shape:', labels_batch.shape)
    break

## Building the network

In [None]:
model = models.Sequential()

# Conv 1
# size of the image 150x150 (3 RGB), 32 neurons, (3,3) strides 
model.add(layers.Conv2D(32,
                         (3,3),
                         activation='relu',
                         input_shape=(150, 150, 3)))
# MaxPooling 1
model.add(layers.MaxPooling2D((2,2)))
# Conv 2
model.add(layers.Conv2D(64, (3,3), activation='relu'))
# MaxPooling 2
model.add(layers.MaxPooling2D((2,2)))
# Conv 3
model.add(layers.Conv2D(128, (3,3), activation='relu'))
#  MaxPooling 3
model.add(layers.MaxPooling2D((2,2)))
# Conv 4
model.add(layers.Conv2D(128, (3,3), activation='relu'))
#  MaxPooling 4
model.add(layers.MaxPooling2D((2,2)))
# Fully connected NN
model.add(layers.Flatten())
# Dense Layer 1
model.add(layers.Dense(512, activation='relu'))
# Output layer - sigmoid for binary and softmax for multiclass
model.add(layers.Dense(1, activation='sigmoid'))

In [None]:
# Architecture of the NN
model.summary()

In [None]:
# Defining Loss | Optimizer | Metrics
model.compile(loss='binary_crossentropy',
              optimizer=optimizers.RMSprop(learning_rate=1e-4),
              metrics=['acc']) # for accuracy

## Training the model

In [None]:
history = model.fit(train_generator,
                    steps_per_epoch=100, #depends on the size of the batch and te total images for training (steps_per_epoch * batch_size = size of training set)
                    epochs=30,
                    validation_data=validation_generator,
                    validation_steps=50)  #depends on the size of the batch and te total images for validation (validation_steps * batch_size = size of validation set)

# save the model
model.save('cats_and_dogs_small_1.h5')

## Plot the results

In [None]:
# Names of the metrics saved on history dictionary

history_dict = history.history
history_dict.keys()

In [None]:
acc = history.history['acc']
val_acc = history.history['val_acc']

loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(1, len(acc) + 1)

In [None]:
plt.rcParams["figure.figsize"] = (10,6)

plt.plot(epochs, acc, 'bo', label='Training accuracy')
plt.plot(epochs, val_acc, 'b', label='Validation accuracy')
plt.title('Training and Validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.figure()

plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and Validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.show()