<a href="https://colab.research.google.com/github/Moez7/IA/blob/main/C2_W2_Lab_1_cats_v_dogs_augmentation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Baseline Performance**

In [None]:
# Download the dataset
!wget https://storage.googleapis.com/tensorflow-1-public/course2/cats_and_dogs_filtered.zip

--2022-05-04 10:49:41--  https://storage.googleapis.com/tensorflow-1-public/course2/cats_and_dogs_filtered.zip
Resolving storage.googleapis.com (storage.googleapis.com)... 74.125.20.128, 108.177.98.128, 74.125.197.128, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|74.125.20.128|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 68606236 (65M) [application/zip]
Saving to: ‘cats_and_dogs_filtered.zip’


2022-05-04 10:49:41 (199 MB/s) - ‘cats_and_dogs_filtered.zip’ saved [68606236/68606236]



In [None]:
import os
import zipfile

# Extract the archive
zip_ref = zipfile.ZipFile("./cats_and_dogs_filtered.zip", 'r')
zip_ref.extractall("tmp/")
zip_ref.close()

# Assign training and validation set directories
base_dir = 'tmp/cats_and_dogs_filtered'
train_dir = os.path.join(base_dir, 'train')
validation_dir = os.path.join(base_dir, 'validation')

# Directory with training cat pictures
train_cats_dir = os.path.join(train_dir, 'cats')

# Directory with training dog pictures
train_dogs_dir = os.path.join(train_dir, 'dogs')

# Directory with validation cat pictures
validation_cats_dir = os.path.join(validation_dir, 'cats')

# Directory with validation dog pictures
validation_dogs_dir = os.path.join(validation_dir, 'dogs')

In [None]:
import tensorflow as tf
from tensorflow.keras.optimizers import RMSprop

def create_model():
  '''Creates a CNN with 4 convolutional layers'''
  model = tf.keras.models.Sequential([
      tf.keras.layers.Conv2D(32, (3,3), activation='relu', input_shape=(150, 150, 3)),
      tf.keras.layers.MaxPooling2D(2, 2),
      tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
      tf.keras.layers.MaxPooling2D(2,2),
      tf.keras.layers.Conv2D(128, (3,3), activation='relu'),
      tf.keras.layers.MaxPooling2D(2,2),
      tf.keras.layers.Conv2D(128, (3,3), activation='relu'),
      tf.keras.layers.MaxPooling2D(2,2),
      tf.keras.layers.Flatten(),
      tf.keras.layers.Dense(512, activation='relu'),
      tf.keras.layers.Dense(1, activation='sigmoid')
  ])

  model.compile(loss='binary_crossentropy',
                optimizer=RMSprop(learning_rate=1e-4),
                metrics=['accuracy'])
  
  return model

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# All images will be rescaled by 1./255
train_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

# Flow training images in batches of 20 using train_datagen generator
train_generator = train_datagen.flow_from_directory(
        train_dir,  # This is the source directory for training images
        target_size=(150, 150),  # All images will be resized to 150x150
        batch_size=20,
        # Since we use binary_crossentropy loss, we need binary labels
        class_mode='binary')

# Flow validation images in batches of 20 using test_datagen generator
validation_generator = test_datagen.flow_from_directory(
        validation_dir,
        target_size=(150, 150),
        batch_size=20,
        class_mode='binary')

Found 2000 images belonging to 2 classes.
Found 1000 images belonging to 2 classes.


In [None]:
# Constant for epochs
EPOCHS = 20

# Create a new model
model = create_model()

# Train the model
history = model.fit(
      train_generator,
      steps_per_epoch=100,  # 2000 images = batch_size * steps
      epochs=EPOCHS,
      validation_data=validation_generator,
      validation_steps=50,  # 1000 images = batch_size * steps
      verbose=2)

Epoch 1/20
100/100 - 105s - loss: 0.6884 - accuracy: 0.5415 - val_loss: 0.6580 - val_accuracy: 0.6180 - 105s/epoch - 1s/step
Epoch 2/20
100/100 - 99s - loss: 0.6417 - accuracy: 0.6310 - val_loss: 0.6176 - val_accuracy: 0.6530 - 99s/epoch - 990ms/step
Epoch 3/20
100/100 - 98s - loss: 0.5902 - accuracy: 0.6875 - val_loss: 0.6039 - val_accuracy: 0.6600 - 98s/epoch - 984ms/step
Epoch 4/20
100/100 - 99s - loss: 0.5490 - accuracy: 0.7100 - val_loss: 0.5701 - val_accuracy: 0.7090 - 99s/epoch - 986ms/step
Epoch 5/20
100/100 - 97s - loss: 0.5294 - accuracy: 0.7260 - val_loss: 0.5697 - val_accuracy: 0.7110 - 97s/epoch - 975ms/step
Epoch 6/20
100/100 - 98s - loss: 0.5022 - accuracy: 0.7490 - val_loss: 0.5583 - val_accuracy: 0.7060 - 98s/epoch - 979ms/step
Epoch 7/20
100/100 - 98s - loss: 0.4725 - accuracy: 0.7725 - val_loss: 0.5564 - val_accuracy: 0.7250 - 98s/epoch - 981ms/step
Epoch 8/20
100/100 - 99s - loss: 0.4450 - accuracy: 0.7930 - val_loss: 0.6125 - val_accuracy: 0.6850 - 99s/epoch - 994m

In [None]:
import matplotlib.pyplot as plt

def plot_loss_acc(history):
  '''Plots the training and validation loss and accuracy from a history object'''
  acc = history.history['accuracy']
  val_acc = history.history['val_accuracy']
  loss = history.history['loss']
  val_loss = history.history['val_loss']

  epochs = range(len(acc))

  plt.plot(epochs, acc, 'bo', label='Training accuracy')
  plt.plot(epochs, val_acc, 'b', label='Validation accuracy')
  plt.title('Training and validation accuracy')

  plt.figure()

  plt.plot(epochs, loss, 'bo', label='Training Loss')
  plt.plot(epochs, val_loss, 'b', label='Validation Loss')
  plt.title('Training and validation loss')
  plt.legend()

  plt.show()

**Data augmentation**

In [None]:
# Create new model
model_for_aug = create_model()

# This code has changed. Now instead of the ImageGenerator just rescaling
# the image, we also rotate and do other operations
train_datagen = ImageDataGenerator(
      rescale=1./255,
      rotation_range=40,
      width_shift_range=0.2,
      height_shift_range=0.2,
      shear_range=0.2,
      zoom_range=0.2,
      horizontal_flip=True,
      fill_mode='nearest')

test_datagen = ImageDataGenerator(rescale=1./255)

# Flow training images in batches of 20 using train_datagen generator
train_generator = train_datagen.flow_from_directory(
        train_dir,  # This is the source directory for training images
        target_size=(150, 150),  # All images will be resized to 150x150
        batch_size=20,
        # Since we use binary_crossentropy loss, we need binary labels
        class_mode='binary')

# Flow validation images in batches of 20 using test_datagen generator
validation_generator = test_datagen.flow_from_directory(
        validation_dir,
        target_size=(150, 150),
        batch_size=20,
        class_mode='binary')

# Train the new model
history_with_aug = model_for_aug.fit(
      train_generator,
      steps_per_epoch=100,  # 2000 images = batch_size * steps
      epochs=EPOCHS,
      validation_data=validation_generator,
      validation_steps=50,  # 1000 images = batch_size * steps
      verbose=2)

Found 2000 images belonging to 2 classes.
Found 1000 images belonging to 2 classes.
Epoch 1/20
100/100 - 108s - loss: 0.6937 - accuracy: 0.5165 - val_loss: 0.6847 - val_accuracy: 0.5010 - 108s/epoch - 1s/step
Epoch 2/20
100/100 - 106s - loss: 0.6803 - accuracy: 0.5620 - val_loss: 0.6519 - val_accuracy: 0.6460 - 106s/epoch - 1s/step
Epoch 3/20
100/100 - 107s - loss: 0.6633 - accuracy: 0.5930 - val_loss: 0.6360 - val_accuracy: 0.6390 - 107s/epoch - 1s/step
Epoch 4/20
100/100 - 106s - loss: 0.6475 - accuracy: 0.6175 - val_loss: 0.6081 - val_accuracy: 0.6640 - 106s/epoch - 1s/step
Epoch 5/20
100/100 - 106s - loss: 0.6330 - accuracy: 0.6515 - val_loss: 0.5865 - val_accuracy: 0.6940 - 106s/epoch - 1s/step
Epoch 6/20


In [None]:
# Plot the results of training with data augmentation
plot_loss_acc(history_with_aug)