In [1]:
# https://github.com/keras-team/keras-applications/blob/master/keras_applications/mobilenet_v2.py
import os, re, glob
os.environ['CUDA_DEVICE_ORDER']='PCI_BUS_ID'
os.environ['CUDA_VISIBLE_DEVICES']='1'

import numpy as np
import pandas as pd
import tensorflowjs as tfjs

from keras import layers, Model, callbacks
from keras.optimizers import Adam
from keras.applications.mobilenetv2 import MobileNetV2
from sklearn.model_selection import train_test_split
from ohia.metrics import top_1_accuracy, top_3_accuracy, top_5_accuracy
from ohia.encoders import FastLabelEncoder
from ohia.utils import PlantNetGenerator

Using TensorFlow backend.


In [15]:
SEED = 0
ALPHA = 1.3 # [0.35, 0.50, 0.75, 1.0, 1.3, 1.4]
BATCH_SIZE = 64
MODEL_NAME = f'mobilenetv2-{ALPHA:0.2f}'
FILE_PATH = '/home/matt/repos/ohia.ai/data'
IMAGE_DIR = 'resized_images'

In [3]:
# get list of images and labels
file_list = glob.glob(f'{FILE_PATH}/{IMAGE_DIR}/**/*.jpg', recursive=True)
full_label_list = [re.split('/', f)[-2] for f in file_list]
full_label_ids = FastLabelEncoder().fit_transform(full_label_list)

In [4]:
# split data
train_files, valid_files, train_ids, valid_ids \
    = train_test_split(file_list, full_label_ids, test_size=0.1, random_state=SEED)

In [5]:
# create generators
n_classes = len(np.unique(full_label_list))
train_generator = PlantNetGenerator(train_files, train_ids, n_classes, BATCH_SIZE, augment=True)
valid_generator = PlantNetGenerator(valid_files, valid_ids, n_classes, BATCH_SIZE, augment=False)

In [13]:
# load pretrained ImageNet model
base_model = MobileNetV2(
    input_shape=(224,224,3),
    alpha=ALPHA,
    weights='imagenet',
    include_top=False
)

# set freeze all layers
for layer in base_model.layers:
    layer.trainable = False
    
# map ImageNet features to plants
x = base_model.output
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dense(2048, activation='relu')(x)
outputs = layers.Dense(n_classes, activation='softmax')(x)
    
# compile the model
model = Model(inputs=base_model.input, outputs=outputs)
model.compile(
    optimizer=Adam(lr=0.0001),
    loss='categorical_crossentropy',
    metrics=[top_1_accuracy, top_3_accuracy, top_5_accuracy]
)

# define callbacks
callbacks_list = [
    callbacks.EarlyStopping(
        monitor='val_top_3_accuracy',
        patience=10,
        verbose=1,
        mode='max',
    ),
    callbacks.ReduceLROnPlateau(
        monitor='val_top_3_accuracy',
        factor=0.5,
        patience=3,
        verbose=1,
        mode='max',
    ),
    callbacks.ModelCheckpoint(
        monitor='val_top_3_accuracy',
        filepath=f'{FILE_PATH}/weights/{MODEL_NAME}' + '_{epoch:02d}.hdf5',
        save_best_only=True,
        save_weights_only=True,
        mode='max',
    ),
]

# train model
model.fit_generator(
    generator=train_generator,
    validation_data=valid_generator,
    callbacks=callbacks_list,
    use_multiprocessing=True,
    workers=10,
    epochs=100,    
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
 10/165 [>.............................] - ETA: 44s - loss: 0.5614 - top_1_accuracy: 0.8375 - top_3_accuracy: 0.9578 - top_5_accuracy: 0.9859

  % delta_t_median)


Epoch 10/100
Epoch 11/100
 18/165 [==>...........................] - ETA: 49s - loss: 0.3965 - top_1_accuracy: 0.8924 - top_3_accuracy: 0.9766 - top_5_accuracy: 0.9905

  % delta_t_median)



Epoch 00011: ReduceLROnPlateau reducing learning rate to 4.999999873689376e-05.
Epoch 12/100
Epoch 13/100
Epoch 14/100

Epoch 00014: ReduceLROnPlateau reducing learning rate to 2.499999936844688e-05.
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100

Epoch 00018: ReduceLROnPlateau reducing learning rate to 1.249999968422344e-05.
Epoch 19/100
Epoch 20/100

Process ForkPoolWorker-796:
Process ForkPoolWorker-786:
Process ForkPoolWorker-787:
Process ForkPoolWorker-789:
Process ForkPoolWorker-791:
Process ForkPoolWorker-798:
Process ForkPoolWorker-794:
Process ForkPoolWorker-795:
Process ForkPoolWorker-799:
Process ForkPoolWorker-788:
Process ForkPoolWorker-790:


KeyboardInterrupt: 

Process ForkPoolWorker-792:
Process ForkPoolWorker-784:
Process ForkPoolWorker-800:
Process ForkPoolWorker-793:
Process ForkPoolWorker-797:
Process ForkPoolWorker-785:
Process ForkPoolWorker-779:
Process ForkPoolWorker-774:
Process ForkPoolWorker-772:
Process ForkPoolWorker-778:
Process ForkPoolWorker-768:
Process ForkPoolWorker-773:
Process ForkPoolWorker-771:
Process ForkPoolWorker-767:
Process ForkPoolWorker-762:
Process ForkPoolWorker-775:
Process ForkPoolWorker-780:
Traceback (most recent call last):
Traceback (most recent call last):
Process ForkPoolWorker-761:
Process ForkPoolWorker-776:
Traceback (most recent call last):
Traceback (most recent call last):
Process ForkPoolWorker-763:
Process ForkPoolWorker-782:
Process ForkPoolWorker-777:
Process ForkPoolWorker-765:
Process ForkPoolWorker-770:
Process ForkPoolWorker-764:
  File "/home/matt/anaconda3/envs/kaggle/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/matt/anaconda3/envs/kag

Traceback (most recent call last):
KeyboardInterrupt
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/matt/anaconda3/envs/kaggle/lib/python3.6/multiprocessing/queues.py", line 334, in get
    with self._rlock:
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/matt/anaconda3/envs/kaggle/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/matt/anaconda3/envs/kaggle/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
Traceback (most recent call last):
KeyboardInterrupt
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/matt/anaconda3/envs/kaggle/lib/python3.6/multiprocessing/synchronize.py", line 96, in __enter__
    return self._semlock.__enter__()
  File "/home/matt/anaconda3/envs/kaggle/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/matt/anac

  File "/home/matt/anaconda3/envs/kaggle/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/matt/anaconda3/envs/kaggle/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/matt/anaconda3/envs/kaggle/lib/python3.6/multiprocessing/pool.py", line 108, in worker
    task = get()
  File "/home/matt/anaconda3/envs/kaggle/lib/python3.6/multiprocessing/pool.py", line 108, in worker
    task = get()
  File "/home/matt/anaconda3/envs/kaggle/lib/python3.6/multiprocessing/queues.py", line 334, in get
    with self._rlock:
  File "/home/matt/anaconda3/envs/kaggle/lib/python3.6/multiprocessing/pool.py", line 108, in worker
    task = get()
  File "/home/matt/anaconda3/envs/kaggle/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/matt/anaconda3/envs/kaggle/lib/python3.6/multiprocessing/queues.py", lin

  File "/home/matt/repos/ohia.ai/machine_learning/ohia/utils.py", line 133, in __getitem__
    img = np.array(img, dtype=np.float)/255.
KeyboardInterrupt
  File "/home/matt/anaconda3/envs/kaggle/lib/python3.6/multiprocessing/pool.py", line 108, in worker
    task = get()
  File "/home/matt/anaconda3/envs/kaggle/lib/python3.6/multiprocessing/queues.py", line 334, in get
    with self._rlock:
  File "/home/matt/anaconda3/envs/kaggle/lib/python3.6/multiprocessing/synchronize.py", line 96, in __enter__
    return self._semlock.__enter__()
  File "/home/matt/anaconda3/envs/kaggle/lib/python3.6/multiprocessing/pool.py", line 108, in worker
    task = get()
  File "/home/matt/anaconda3/envs/kaggle/lib/python3.6/multiprocessing/pool.py", line 108, in worker
    task = get()
  File "/home/matt/anaconda3/envs/kaggle/lib/python3.6/multiprocessing/queues.py", line 334, in get
    with self._rlock:
  File "/home/matt/anaconda3/envs/kaggle/lib/python3.6/multiprocessing/queues.py", line 347, in put
 

In [14]:
# load best model
best_weights = glob.glob(f'{FILE_PATH}/weights/{MODEL_NAME}**')
best_weights = np.sort(best_weights)[-1]
model.load_weights(best_weights)

In [16]:
# save model
tfjs.converters.save_keras_model(model, f'{FILE_PATH}/models/{MODEL_NAME}')

In [None]:

# alpha: 1.00,  val_top_1_accuracy: 0.5521,  val_top_3_accuracy: 0.7786,  val_top_5_accuracy: 0.8698
# alpha: 1.30,  val_top_1_accuracy: 0.5842,  val_top_3_accuracy: 0.8290,  val_top_5_accuracy: 0.8993
# alpha: 1.40,  val_top_1_accuracy: 0.6068,  val_top_3_accuracy: 0.8490,  val_top_5_accuracy: 0.9149