In [1]:
import numpy as np
import matplotlib.pyplot as plt
import joblib as jb
import PIL
import os

%matplotlib inline
plt.rcParams['axes.spines.right'] = False
plt.rcParams['axes.spines.top'] = False
plt.style.use('seaborn-dark')

# declare paths to data
TRAIN_PATH = 'data/train'
VALID_PATH = 'data/validation'
TEST_PATH = 'data/test'
CLASSES = ['Damselflies', 'Dragonflies']

In [2]:
import json
data_summary = {}
for directory in [TRAIN_PATH, VALID_PATH, TEST_PATH]:
    data_summary[directory] = {}
    for class_name in CLASSES:
        class_path = os.path.join(directory, class_name)
        data_summary[directory][class_name] = len(os.listdir(class_path))
        
print(json.dumps(data_summary, indent=4))

{
    "data/train": {
        "Damselflies": 6100,
        "Dragonflies": 6628
    },
    "data/validation": {
        "Damselflies": 670,
        "Dragonflies": 736
    },
    "data/test": {
        "Damselflies": 1692,
        "Dragonflies": 1840
    }
}


## Image Augmentation Pipeline

* #### Training Data

    1. Shuffle
    1. Resize image
    1. Grayscale image
    1. Noramalize pixel values
    1. Horizontal image flip
    1. Vertical image flip
    1. Rotate image
    1. Adjusted image brightness
       
    
* #### Validation & Testing data

    1. Noramalize pixel values
    2. Grayscale images

In [3]:
# declare image augmentation related hyperparameters
TARGET_SIZE = (256, 256)
RESCALE = 1.0 / 255
COLOR_MODE = 'grayscale'
BATCH_SIZE = 16
ROTATION = 25
BRIGHTNESS = [0.4, 1.0]

# declare flow related hyper parameters 
EPOCHS = 29
CLASS_MODE = 'categorical'
CHECKPOINT = "checkpoints/weight1s.hdf5"

In [None]:
import tensorflow as tf
from tensorflow import keras
from keras.layers import Dense, Dropout, Conv2D, MaxPooling2D, Flatten, Input, UpSampling2D
from keras.models import Sequential, Model, load_model
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ModelCheckpoint

# randomly flip, and rotate images, adjust brightish, and normalize pixel values
trainGenerator = ImageDataGenerator(rescale=RESCALE, 
                                    horizontal_flip=True,  
                                    vertical_flip=True,
                                    rotation_range=ROTATION,
                                    brightness_range=BRIGHTNESS)  

# only scale the pixel values validation images
validatioinGenerator = ImageDataGenerator(rescale=RESCALE)

# only scale the pixel values test images
testGenerator = ImageDataGenerator(rescale=RESCALE)

# instanciate train flow
trainFlow = trainGenerator.flow_from_directory(
    TRAIN_PATH,
    target_size = TARGET_SIZE,
    batch_size = BATCH_SIZE,
    color_mode = COLOR_MODE,
    class_mode = CLASS_MODE,
    shuffle=True
) 

# instanciate validation flow
validationFlow = validatioinGenerator.flow_from_directory(
    VALID_PATH,
    target_size = TARGET_SIZE,
    batch_size = BATCH_SIZE,
    color_mode = COLOR_MODE,
    class_mode= CLASS_MODE,
    shuffle=False
)

## Define Model Architecture
# I tried to create a light-weight version of the VGG16.
# Instead of two convolution layers before MaxPooling, 
# I limited it to one Convolution layer
model = Sequential([
    Conv2D(16, 3, padding='same', activation='relu', input_shape=(256 , 256, 1)),
    MaxPooling2D(),
    Conv2D(32, 3, padding='same', activation='relu' ),
    MaxPooling2D(),
    Conv2D(64, 3, padding='same', activation='relu'),
    MaxPooling2D(),
    Conv2D(128, 3, padding='same', activation='relu'),
    MaxPooling2D(),
    Conv2D(256, 3, padding='same', activation='relu'),
    MaxPooling2D(),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(.3),
    Dense(64, activation='relu'),
    Dropout(.3),
    Dense(2, activation='softmax')
])

# trial and error, lowering learning rate gets better results
optimizer = keras.optimizers.Adam(learning_rate = 0.0005)
model.compile(loss='categorical_crossentropy',
              optimizer=optimizer, metrics=['accuracy'])

# create model checkpoint in case of overfitting
checkpoints = ModelCheckpoint(CHECKPOINT, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')

# train model
history = model.fit(
    trainFlow,
    validation_data=validationFlow, 
    callbacks=[checkpoints],
    epochs=EPOCHS
)

model.save('Linnaeus_bot1')
jb.dump(history, 'training_history1.pkl')

Using TensorFlow backend.


Found 12728 images belonging to 2 classes.
Found 1406 images belonging to 2 classes.
Epoch 1/29

Epoch 00001: val_accuracy improved from -inf to 0.75818, saving model to checkpoints/weight1s.hdf5
Epoch 2/29
  2/796 [..............................] - ETA: 48s - loss: 0.4826 - accuracy: 0.7188

  'TensorFlow optimizers do not '



Epoch 00002: val_accuracy improved from 0.75818 to 0.83073, saving model to checkpoints/weight1s.hdf5
Epoch 3/29

Epoch 00003: val_accuracy improved from 0.83073 to 0.84566, saving model to checkpoints/weight1s.hdf5
Epoch 4/29

Epoch 00004: val_accuracy improved from 0.84566 to 0.86202, saving model to checkpoints/weight1s.hdf5
Epoch 5/29

Epoch 00005: val_accuracy did not improve from 0.86202
Epoch 6/29

Epoch 00006: val_accuracy improved from 0.86202 to 0.87198, saving model to checkpoints/weight1s.hdf5
Epoch 7/29

Epoch 00007: val_accuracy improved from 0.87198 to 0.87269, saving model to checkpoints/weight1s.hdf5
Epoch 8/29

Epoch 00008: val_accuracy improved from 0.87269 to 0.87838, saving model to checkpoints/weight1s.hdf5
Epoch 9/29

Epoch 00009: val_accuracy did not improve from 0.87838
Epoch 10/29

Epoch 00010: val_accuracy improved from 0.87838 to 0.89260, saving model to checkpoints/weight1s.hdf5
Epoch 11/29

## Model Evaluation

In [None]:
try:
    del model
except Exception as e:
    print(str(e))

model = load_model('Linnaeus_bot1')
model.load_weights(CHECKPOINT)
history = jp.load('training_history.pkl')

# only scale the pixel values test images
testGenerator = ImageDataGenerator(rescale=RESCALE)

testFlow = testGenerator.flow_from_directory(
    TEST_PATH,
    target_size = TARGET_SIZE,
    batch_size = BATCH_SIZE,
    color_mode = COLOR_MODE,
    class_mode= CLASS_MODE,
    shuffle = True
)

# custom tensorflow optimizer with keras model requires compiling the model again
optimizer = keras.optimizers.Adam(learning_rate = 0.0005)
model.compile(loss='categorical_crossentropy',
              optimizer=optimizer, metrics=['accuracy'])

loss, acc = model.evaluate(testFlow)
print(f'Test accuracy" {acc}')
print(f'Test loss" {loss}')

In [None]:
# Show the images and predictions 