<a href="https://colab.research.google.com/github/KevinLolochum/Deep-Learning-TF-Keras-Theano-PyTorch-/blob/master/CNNs_with_TF_and_Keras_(Flowers_Dataset).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

This model classify's the using CNNs. I also do augmentations, dropout and early stopping to prevent overfitting

Downlading important libraries

In [1]:
import os
import numpy as np
import glob
import shutil

import tensorflow as tf

import matplotlib.pyplot as plt

In [2]:
# Importing important packages
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten, MaxPooling2D, Conv2D, Dropout, Dense
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [3]:
# Downloading the dataset
_URL = "https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz"

zip_file = tf.keras.utils.get_file(origin=_URL,
                                   fname="flower_photos.tgz",
                                   extract=True)

base_dir = os.path.join(os.path.dirname(zip_file), 'flower_photos')

Downloading data from https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz


In [4]:
# Creating class labels
classes = ['roses', 'daisy', 'dandelion', 'sunflowers', 'tulips']

# Moving classes from the original flowers photos folder to train and validation folder and counting the number of flowers we have for each class
for cl in classes:
  img_path = os.path.join(base_dir, cl)
  images = glob.glob(img_path + '/*.jpg')
  print("{}: {} Images".format(cl, len(images)))
  num_train = int(round(len(images)*0.8))
  train, val = images[:num_train], images[num_train:]

  for t in train:
    if not os.path.exists(os.path.join(base_dir, 'train', cl)):
      os.makedirs(os.path.join(base_dir, 'train', cl))
    shutil.move(t, os.path.join(base_dir, 'train', cl))

  for v in val:
    if not os.path.exists(os.path.join(base_dir, 'val', cl)):
      os.makedirs(os.path.join(base_dir, 'val', cl))
    shutil.move(v, os.path.join(base_dir, 'val', cl))

roses: 641 Images
daisy: 633 Images
dandelion: 898 Images
sunflowers: 699 Images
tulips: 799 Images


In [5]:
#setting up path for training and validation sets
train_dir = os.path.join(base_dir, 'train')
val_dir = os.path.join(base_dir, 'val')

In [6]:
# Setting image shape and batch size
BATCH_SIZE = 100
IMG_SHAPE = 150

In [7]:
# Applying this simple augmentations to our training dataset
# random 45 degree rotation, random zoom of up to 50%, random horizontal flip, width shift of 0.15 and height shift of 0.15
data_gen_args = dict(rescale=1./255,
                    horizontal_flip = True,
                    rotation_range = 45,
                    zoom_range = 0.5,
                    height_shift_range = 0.15,
                    width_shift_range = 0.15)

image_gen_train = ImageDataGenerator(**data_gen_args)

train_data_gen = image_gen_train.flow_from_directory(batch_size = BATCH_SIZE,
                                                     shuffle = True,
                                                     directory = train_dir,
                                                     target_size = (IMG_SHAPE, IMG_SHAPE),
                                                     class_mode = 'sparse'
                                                     )

# Test dataset does not require augmentation
image_gen_val = ImageDataGenerator(rescale=1./255)

val_data_gen = image_gen_val.flow_from_directory(class_mode = 'sparse',
                                                 directory = val_dir,
                                                 batch_size= BATCH_SIZE,
                                                 target_size=(IMG_SHAPE, IMG_SHAPE))



Found 2935 images belonging to 5 classes.
Found 735 images belonging to 5 classes.


Model creation

In [8]:
#Creating a CNN model with 4 CONV2D layers and MaxPooling2D layers and a dense output layer with as many neurons as the outputs(5)
model = Sequential()

model.add(Conv2D(16, 3, padding='same', activation='relu', input_shape=(IMG_SHAPE,IMG_SHAPE, 3)))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(32, 3, padding='same', activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, 3, padding='same', activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())
model.add(Dropout(0.2))
model.add(Dense(512, activation='relu'))

model.add(Dropout(0.2))
model.add(Dense(5))

In [9]:
# Compile
# Adam optimizer is the best for this classification problem
model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics =['accuracy'])

Model training

In [None]:
epochs = 80

Trained = model.fit_generator(
    train_data_gen,
    steps_per_epoch=int(np.ceil(train_data_gen.n / float(BATCH_SIZE))),
    epochs=epochs,
    validation_data=val_data_gen,
    validation_steps=int(np.ceil(val_data_gen.n / float(BATCH_SIZE)))
)

Instructions for updating:
Please use Model.fit, which supports generators.
Epoch 1/80
Epoch 2/80
Epoch 3/80
Epoch 4/80
Epoch 5/80
Epoch 6/80
Epoch 7/80
Epoch 8/80
Epoch 9/80
Epoch 10/80
Epoch 11/80
Epoch 12/80
Epoch 13/80
Epoch 14/80
Epoch 15/80
Epoch 16/80
Epoch 17/80

Plotting the loss function

In [None]:
acc = Trained.history['accuracy']
val_acc = Trained.history['val_accuracy']

loss = Trained.history['loss']
val_loss = Trained.history['val_loss']

epochs_range = range(epochs)

plt.figure(figsize=(10, 10))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

Training accuracy is lower than validation accuracy, this is the case because validation images are not augmented, therefore it is a bit easier to classify. After about 50 epochs the training accuracy starts to rise above validation accuracy and validation loss stops improving/rises. This is an indicator that the model is probably starting to overfit. Using early stopping, shown below can prevent this problem.

In [None]:
import tensorflow
early_stopping = tensorflow.keras.callbacks.EarlyStopping(patience=15)

epochs = 60

Trained = model.fit(
    train_data_gen,
    steps_per_epoch=int(np.ceil(train_data_gen.n / float(BATCH_SIZE))),
    epochs=epochs,
    validation_data=val_data_gen,
    callbacks = [early_stopping],
    validation_steps=int(np.ceil(val_data_gen.n / float(BATCH_SIZE)))
)