In [50]:
# https://github.com/keras-team/keras-applications/blob/master/keras_applications/mobilenet.py
import os, re, glob, json
os.environ['CUDA_DEVICE_ORDER']='PCI_BUS_ID'
os.environ['CUDA_VISIBLE_DEVICES']='0'

import numpy as np
import pandas as pd
import tensorflowjs as tfjs

from keras import layers, Model, callbacks
from keras.optimizers import Adam
from keras.applications.mobilenet import MobileNet
from keras.applications.mobilenetv2 import MobileNetV2
from keras.applications.nasnet import NASNetMobile

from sklearn.model_selection import train_test_split

from ohia.metrics import top_1_accuracy, top_3_accuracy, top_5_accuracy
from ohia.encoders import FastLabelEncoder
from ohia.utils import PlantNetGenerator, make_dir

In [56]:
FILE_PATH = '/home/matt/repos/ohia.ai/data'
MODEL_NAME = 'mobilenetv1'
SEED = 0
BATCH_SIZE = 32

ALPHA = None
FILTERED = False
AUGMENTATION = False

IMAGE_DIR = ('filtered_' if FILTERED else '') + 'images_preprocessed'
MODEL_PATH = (
    f'{FILE_PATH}/models/{MODEL_NAME}' +
    f'_seed-{SEED}' +
    f'_batch_size-{BATCH_SIZE}' +
    (f'_{ALPHA:0.2f}' if ALPHA else '') +
    ('_filtered' if FILTERED else '') +
    ('_augmentation' if AUGMENTATION else '')
)

make_dir(MODEL_PATH)

In [3]:
# get list of images and labels
file_list = glob.glob(f'{FILE_PATH}/{IMAGE_DIR}/**/*.jpg', recursive=True)
full_label_list = [re.split('/', f)[-2] for f in file_list]

# encode label names with ids
fle = FastLabelEncoder()
label_ids = fle.fit_transform(full_label_list)

# save id2label map
id2label = {int(fle.transform([label])):label for label in np.unique(full_label_list)}
with open(f'{MODEL_PATH}/plantnet_classes.json', 'w') as fp:
    json.dump(id2label, fp)

In [4]:
# split data
train_files, valid_files, train_ids, valid_ids \
    = train_test_split(file_list, label_ids, test_size=0.1, random_state=SEED)

In [5]:
# create generators
n_classes = len(np.unique(full_label_list))
train_generator = PlantNetGenerator(
    train_files, train_ids, n_classes,
    batch_size=BATCH_SIZE,
    augment=AUGMENTATION
)
valid_generator = PlantNetGenerator(
    valid_files, valid_ids, n_classes,
    batch_size=BATCH_SIZE,
    augment=AUGMENTATION,
    shuffle=False
)

In [None]:
def get_model(model_name):
    
    # load pretrained ImageNet model
    if MODEL_NAME == 'mobilenetv1':
        base_model = MobileNet(
            input_shape=(224,224,3),
            weights='imagenet',
            include_top=False
        )        
    elif MODEL_NAME == 'mobilenetv2':
        base_model = MobileNetV2(
            input_shape=(224,224,3),
            alpha=ALPHA,
            weights='imagenet',
            include_top=False
        )
    elif MODEL_NAME == 'nasnetmobile':
        base_model = NASNetMobile(
            input_shape=(224,224,3),
            weights='imagenet',
            include_top=False
        )
    else:
        assert ValueError(
            f'model_name parameter must be one of the following'
            ' "mobilenetv1", '
            ' "mobilenetv2", '
            ' "nasnetmobile"'
        )

    # set freeze all layers
    for layer in base_model.layers:
        layer.trainable = False

    # map ImageNet features to plants
    x = base_model.output
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(1024, activation='relu')(x)
    outputs = layers.Dense(n_classes, activation='softmax')(x)

    # compile the model
    model = Model(inputs=base_model.input, outputs=outputs)
    model.compile(
        optimizer=Adam(lr=0.0001),
        loss='categorical_crossentropy',
        metrics=[top_1_accuracy, top_3_accuracy, top_5_accuracy]
    )
    return model

In [6]:

# define callbacks
callbacks_list = [
    callbacks.EarlyStopping(
        monitor='val_top_3_accuracy',
        patience=10,
        verbose=1,
        mode='max',
    ),
    callbacks.ReduceLROnPlateau(
        monitor='val_top_3_accuracy',
        factor=0.5,
        patience=3,
        verbose=1,
        mode='max',
    ),
    callbacks.ModelCheckpoint(
        monitor='val_top_3_accuracy',
        filepath=f'{MODEL_PATH}/weights.h5',
        save_best_only=True,
        save_weights_only=False,
        mode='max',
    ),
]

# train model 
model.fit_generator(
    generator=train_generator,
    validation_data=valid_generator,
    callbacks=callbacks_list,
    use_multiprocessing=True,
    workers=20,
    epochs=100,    
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100

Epoch 00009: ReduceLROnPlateau reducing learning rate to 4.999999873689376e-05.
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100

Epoch 00016: ReduceLROnPlateau reducing learning rate to 2.499999936844688e-05.
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100

Epoch 00024: ReduceLROnPlateau reducing learning rate to 1.249999968422344e-05.
Epoch 25/100
Epoch 26/100
  1/592 [..............................] - ETA: 49s - loss: 0.4887 - top_1_accuracy: 0.8750 - top_3_accuracy: 0.9688 - top_5_accuracy: 1.0000
Epoch 00024: ReduceLROnPlateau reducing learning rate to 1.249999968422344e-05.
Epoch 25/100
Epoch 27/100

Epoch 00027: ReduceLROnPlateau reducing learning rate to 6.24999984211172e-06.
Epoch 28/100
Epoch 29/100


Epoch 30/100

Epoch 00030: ReduceLROnPlateau reducing learning rate to 3.12499992105586e-06.
Epoch 31/100
Epoch 00031: early stopping


<keras.callbacks.History at 0x7fcd2048db00>

In [58]:
# save results
results = pd.DataFrame(model.history.history)
results.to_csv(f'{MODEL_PATH}/results.csv', index=False)

# print best results
best_results = results.iloc[results.val_top_3_accuracy.values.argmax()]
print(best_results)

val_loss              1.500317
val_top_1_accuracy    0.564904
val_top_3_accuracy    0.796635
val_top_5_accuracy    0.867788
loss                  0.498099
top_1_accuracy        0.868032
top_3_accuracy        0.964791
top_5_accuracy        0.984428
lr                    0.000025
Name: 20, dtype: float64


In [8]:
# save best model
if save_model:
    model = get_model(model_name)
    best_weights = glob.glob(f'{FILE_PATH}/weights/{MODEL_NAME}**')
    best_weights = np.sort(best_weights)[-1]
    model.load_weights(best_weights)
    tfjs.converters.save_keras_model(model, f'{FILE_PATH}/models/{MODEL_NAME}')

In [None]:
# n_units: 1024,  val_top_1_accuracy: 0.6450,  val_top_3_accuracy: 0.8559,  val_top_5_accuracy: 0.9219