In [1]:
## Using Transfer Learning technique to do images classification ##
# """
# Workaround Notes:
# 1. Q: the validation_split argument in ImageDataGenerator not supported in Keras 2.1.3(server version)
#    A: upgrade to the latest Keras(version 2.2.2): pip install keras --upgrade
# 2. Q: Activation "softmax" in the latest Keras(version 2.2.2) not matched TensorFlow 1.4(server version)
#    A: change Activation "softmax" to tf.nn.softmax
# => Keras 2.1.5 is exactly for tensorflow 1.4.1! Instead of using "pip install keras==2.1.5" to overcome both Q1&Q2.
#
# Experimental Result:
# Keras 2.1.5 + tensorflow 1.4.1 got better accuracy than Keras 2.2.2 + tensorflow 1.4.1
# """

In [2]:
import numpy as np
import pandas as pd
import cv2

In [3]:
import keras
from keras import backend as K
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import BatchNormalization
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras import optimizers

TRAIN_IMG_DIR = "./train/" #training_set at ./train/
TEST_IMG_DIR = "./test/" #testing_set at ./test/testimg/

NUM_CLASSES = 5 #target labels(ground truth), total 5 classes(check mapping.txt)

# Image shapes
IMG_WIDTH = 224
IMG_HEIGHT = 224
CHANNELS = 3
INPUT_SHAPE = (IMG_WIDTH, IMG_HEIGHT, CHANNELS)

BATCH_SIZE = 16
EPOCHS = 100

## Build model: (Convolution layer + MaxPooling layer)s + Fully-connected NN layers
model = Sequential()
#Convolution layer*2 + BN + MaxPooling layer
model.add(Conv2D(32, kernel_size=(3, 3), padding="same", activation="relu", input_shape=INPUT_SHAPE)) #input_shape argument must be assigned in first layer
model.add(Conv2D(32, kernel_size=(3, 3), padding="same", activation="relu"))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

#Convolution layer*2 + BN + MaxPooling layer
model.add(Conv2D(64, kernel_size=(3, 3), padding="same", activation="relu"))
model.add(Conv2D(64, kernel_size=(3, 3), padding="same", activation="relu"))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

#Fully-connected NN layers
#fully-connected 1st layer
model.add(Flatten())
model.add(Dense(256, activation="relu"))
model.add(BatchNormalization())
model.add(Dropout(0.5))

#fully-connected final layer
# model.add(Dense(NUM_CLASSES, activation="softmax"))
# !change Activation from keras to tf.nn.softmax, because TF version too old on Server!
model.add(Dense(NUM_CLASSES))
import tensorflow as tf
model.add(Activation(tf.nn.softmax))

# opt_adam = optimizers.Adam(lr=1e-5, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.01, amsgrad=False)
# opt_rmsprop = optimizers.RMSprop(lr=1e-5, decay=0.01)
model.compile(loss = "categorical_crossentropy",
              optimizer = "adam",
              metrics = ["accuracy"])

print (model.summary())

Using TensorFlow backend.


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 224, 224, 32)      896       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 224, 224, 32)      9248      
_________________________________________________________________
batch_normalization_1 (Batch (None, 224, 224, 32)      128       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 112, 112, 32)      0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 112, 112, 32)      0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 112, 112, 64)      18496     
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 112, 112, 64)      36928     
__________

In [4]:
## Using Keras ImageDataGenerator to load images batch and do data augmentation on the fly.
#!validation_split argument not supported in Keras 2.1.3(server version)!
train_datagen = ImageDataGenerator(
        rescale = 1./255,
        rotation_range = 20,
        width_shift_range = 0.2,
        height_shift_range = 0.2,
        shear_range = 0.2,
        zoom_range = 0.2,
        horizontal_flip = True,
        validation_split = 0.20
)

valid_datagen = ImageDataGenerator(
        rescale = 1./255,
#         rotation_range = 20,
#         width_shift_range = 0.2,
#         height_shift_range = 0.2,
#         shear_range = 0.2,
#         zoom_range = 0.2,
#         horizontal_flip = True,
        validation_split = 0.20
)

test_datagen = ImageDataGenerator(rescale = 1./255)

## Using Keras datagen.flow_from_directory to load images from every sub-directories at train,(validation),test directory
train_generator = train_datagen.flow_from_directory(
        directory = TRAIN_IMG_DIR,
        target_size = (IMG_WIDTH, IMG_HEIGHT),
        color_mode = "rgb",
        batch_size = BATCH_SIZE,
        class_mode = "categorical",
        shuffle = True,
        seed = 33,
        subset = "training"
)

validation_generator = valid_datagen.flow_from_directory(
        directory = TRAIN_IMG_DIR,
        target_size = (IMG_WIDTH, IMG_HEIGHT),
        color_mode = "rgb",
        batch_size = BATCH_SIZE,
        class_mode = "categorical",
        shuffle = True,
        seed = 33,
        subset = "validation"
)

test_generator = test_datagen.flow_from_directory(
        directory = TEST_IMG_DIR,
        target_size = (IMG_WIDTH, IMG_HEIGHT),
        color_mode = "rgb",
        batch_size = 1,
        class_mode = None,
        shuffle = False
)

## Amounts of individual set: training, validation, test
print (train_generator.n) #amounts of train_generator
print (validation_generator.n) #amounts of validation_generator
print (test_generator.n) #amounts of test_generator

## Labels from Keras data generator
print (train_generator.class_indices)
print (validation_generator.class_indices)

## Image shape check
print (train_generator.image_shape)
print (validation_generator.image_shape)
print (test_generator.image_shape)

Found 3062 images belonging to 5 classes.
Found 761 images belonging to 5 classes.
Found 500 images belonging to 1 classes.
3062
761
500
{'daisy': 0, 'dandelion': 1, 'rose': 2, 'sunflower': 3, 'tulip': 4}
{'daisy': 0, 'dandelion': 1, 'rose': 2, 'sunflower': 3, 'tulip': 4}
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)


In [5]:
## Fitting/Training the model
STEPS_PER_EPOCH = train_generator.n // BATCH_SIZE
VALIDATION_STEPS = validation_generator.n // BATCH_SIZE

# Callbacks setting
FILE_PATH = "./checkpoint-{epoch:02d}-{val_loss:.2f}-{val_acc:.2f}.hdf5"
EarlyStop = EarlyStopping(monitor="val_acc", patience=10, verbose=1, mode="max")
Checkpoint = ModelCheckpoint(FILE_PATH, monitor="val_acc", verbose=1, save_best_only=True, mode="max")
Callback_list = [EarlyStop, Checkpoint]

history = model.fit_generator(
                generator = train_generator,
                steps_per_epoch = STEPS_PER_EPOCH,
                epochs = EPOCHS,
                callbacks = Callback_list,
                validation_data = validation_generator,
                validation_steps = VALIDATION_STEPS,
                shuffle = True
)

## Evaluate the model
# model.evaluate_generator(generator = )

## Predict the test set, then we'll get a probability nparray
test_generator.reset()
pred_probability = model.predict_generator(test_generator, verbose=1)

Epoch 1/100

Epoch 00001: val_acc improved from -inf to 0.50591, saving model to ./checkpoint-01-1.34-0.51.hdf5
Epoch 2/100

Epoch 00002: val_acc improved from 0.50591 to 0.52300, saving model to ./checkpoint-02-1.19-0.52.hdf5
Epoch 3/100

Epoch 00003: val_acc did not improve from 0.52300
Epoch 4/100

Epoch 00004: val_acc improved from 0.52300 to 0.56636, saving model to ./checkpoint-04-1.15-0.57.hdf5
Epoch 5/100

Epoch 00005: val_acc improved from 0.56636 to 0.65309, saving model to ./checkpoint-05-0.89-0.65.hdf5
Epoch 6/100

Epoch 00006: val_acc did not improve from 0.65309
Epoch 7/100

Epoch 00007: val_acc improved from 0.65309 to 0.65703, saving model to ./checkpoint-07-0.92-0.66.hdf5
Epoch 8/100

Epoch 00008: val_acc did not improve from 0.65703
Epoch 9/100

Epoch 00009: val_acc improved from 0.65703 to 0.66229, saving model to ./checkpoint-09-0.92-0.66.hdf5
Epoch 10/100

Epoch 00010: val_acc did not improve from 0.66229
Epoch 11/100

Epoch 00011: val_acc did not improve from 0.66

In [6]:
## Convert the prediction probability nparray to pandas dataframe to understand its structure
df_pred = pd.DataFrame(pred_probability)
display(df_pred)

Unnamed: 0,0,1,2,3,4
0,8.740959e-01,1.035689e-01,7.262916e-03,1.341955e-02,1.652773e-03
1,3.929080e-03,7.174431e-04,3.617489e-04,9.621175e-01,3.287419e-02
2,8.161377e-03,1.395368e-04,4.412198e-02,3.368535e-06,9.475738e-01
3,5.799526e-04,5.449135e-05,2.683922e-05,9.993231e-01,1.565954e-05
4,4.811743e-03,3.579692e-06,1.047605e-07,9.951839e-01,7.413789e-07
5,3.823314e-03,1.529317e-05,9.540991e-01,4.943111e-05,4.201282e-02
6,9.999734e-01,2.521053e-08,8.549175e-06,2.187151e-09,1.797246e-05
7,1.110406e-01,2.661431e-04,8.776755e-01,1.188366e-03,9.829368e-03
8,2.409052e-03,2.248494e-03,8.488712e-03,9.329426e-01,5.391110e-02
9,1.258468e-02,1.749023e-03,9.148928e-03,9.471747e-01,2.934270e-02


In [7]:
# """
# This section is for saving the results to the CSV file.
# """
## Get the predicted class indices from model prediction result.(we can check it from the above probability dataframe)
predicted_class_indices = np.argmax(pred_probability, axis=1)

#default labels from Keras data generator(ie. names of sub-directories of training set)
keras_labels = (train_generator.class_indices)
#get the names of class labels
keras_labels_swap = dict((value, key) for key, value in keras_labels.items())
class_name = [keras_labels_swap[idx] for idx in predicted_class_indices]

## Reading pre-defined labels from mapping.txt, and store it to a dictionary
mapping = {}
with open("./mapping.txt") as f:
    for line in f:
        (key, val) = line.split(sep=",")
        mapping[str(key)] = int(val)

## Because predicted_class_indices come from Keras (data generator) default labels,
## this may not match our pre-defined labels (from mapping.txt).
## I use pandas.Series.map(arg=Dict) to remap predicted_class_indices to pre-defined labels.
ps = pd.Series(data = class_name)
class_predictions = ps.map(mapping)

## Get filenames of all test images
files = test_generator.filenames #!this output will include the directory path name!
#use string.strip() to retrieve exact filename(without directory path name) of test images
filenames = []
for num in range(len(files)):
    lst = files[num].lstrip("testimg/").rstrip(".jpg")
    filenames.append(lst)

## Save the results to the csv file
results = pd.DataFrame({"id" : filenames,
                        "class_name" : class_name,
                        "class" : class_predictions})
results.to_csv("results.csv", index=False)

submission = pd.DataFrame({"id" : filenames,
                           "class" : class_predictions})
submission.to_csv("submission.csv", index=False)

In [8]:
!jupyter nbconvert --to script FlowerClassification_CNN.ipynb

[NbConvertApp] Converting notebook FlowerClassification_CNN.ipynb to script
[NbConvertApp] Writing 8125 bytes to FlowerClassification_CNN.py
