In [1]:
FILE_PATH = '/home/matt/repos/ohia.ai/data'

In [3]:
model_name = 'mobilenetv1'
seed = 1
batch_size = 32
n_hidden = 0
dropout = 0.0
filtered = False
augmentation = False
n_thread = 32
gpu = 0
save_model = False

In [4]:
import os, re, glob, json
os.environ['CUDA_DEVICE_ORDER']='PCI_BUS_ID'
os.environ['CUDA_VISIBLE_DEVICES']=str(gpu)

import numpy as np
import pandas as pd

from keras import callbacks
from sklearn.model_selection import train_test_split

from ohia.encoders import FastLabelEncoder

from ohia.utils import PlantNetGenerator, make_dir

Using TensorFlow backend.


In [5]:
# set up paths
IMAGE_DIR = 'filtered_plantnet'
model_path = (
    f'{FILE_PATH}/models/plantnet_{model_name}' +
    f'_seed-{seed}' +
    f'_batch_size-{batch_size}' +
    f'_n_hidden-{n_hidden}' +
    f'_dropout-{dropout:0.2f}' +
    ('_filtered' if filtered else '') +
    ('_augmentation' if augmentation else '')
)
print(model_path)
make_dir(model_path)

/home/matt/repos/ohia.ai/data/models/plantnet_mobilenetv1_seed-1_batch_size-32_n_hidden-0_dropout-0.00


In [6]:
# get list of images and labels
file_list = glob.glob(f'{FILE_PATH}/{IMAGE_DIR}/**/*.jpg', recursive=True)
full_label_list = [re.split('/', f)[-2] for f in file_list]

# encode label names with ids
fle = FastLabelEncoder()
label_ids = fle.fit_transform(full_label_list)

# save id2label map
id2label = {int(fle.transform([label])):label for label in np.unique(full_label_list)}
with open(f'{model_path}/plantnet_classes.json', 'w') as fp:
    json.dump(id2label, fp)

In [7]:
# split data
train_files, valid_files, train_ids, valid_ids \
    = train_test_split(file_list, label_ids, test_size=0.1, random_state=seed)

# create generators
n_classes = len(np.unique(full_label_list))
train_generator = PlantNetGenerator(
    train_files, train_ids, n_classes,
    batch_size=batch_size,
    augment=augmentation
)
valid_generator = PlantNetGenerator(
    valid_files, valid_ids, n_classes,
    batch_size=batch_size,
    augment=augmentation,
    shuffle=False
)

In [8]:
# define callbacks
callbacks_list = [
    callbacks.EarlyStopping(
        monitor='val_top_3_accuracy',
        patience=10,
        verbose=1,
        mode='max',
    ),
    callbacks.ReduceLROnPlateau(
        monitor='val_top_3_accuracy',
        factor=0.25,
        patience=2,
        verbose=1,
        mode='max',
    ),
    callbacks.ModelCheckpoint(
        monitor='val_top_3_accuracy',
        filepath=f'{model_path}/weights' + '_{epoch:02d}.h5',
        save_best_only=True,
        save_weights_only=False,
        mode='max',
    ),
]

In [9]:
def get_model(model_name, n_classes, n_hidden, dropout):

    from keras import layers, Model
    from keras.optimizers import Adam
    from keras.applications.mobilenet import MobileNet
    from keras.applications.mobilenetv2 import MobileNetV2
    from keras.applications.nasnet import NASNetMobile
    from ohia.metrics import top_1_accuracy, top_3_accuracy, top_5_accuracy

    # load pretrained ImageNet model
    if model_name == 'mobilenetv1':
        base_model = MobileNet(
            input_shape=(224,224,3),
            weights='imagenet',
            include_top=False
        )        
    elif model_name == 'mobilenetv2':
        base_model = MobileNetV2(
            input_shape=(224,224,3),
            alpha=1.4,
            weights='imagenet',
            include_top=False
        )
    elif model_name == 'nasnetmobile':
        base_model = NASNetMobile(
            input_shape=(224,224,3),
            weights='imagenet',
            include_top=False
        )
    else:
        assert ValueError(
            f'model_name parameter must be one of the following'
            ' "mobilenetv1",'
            ' "mobilenetv2",'
            ' "nasnetmobile"'
        )

#     # set freeze all layers
#     for layer in base_model.layers:
#         if re.findall('_\\d_', layer.name):
#             print(layer.name)
#             layer.trainable = False
        
    # map ImageNet features to plants
    x = base_model.output
    x = layers.GlobalAveragePooling2D()(x)
    if dropout>0:
        x = layers.Dropout(dropout)(x)
    outputs = layers.Dense(n_classes, activation='softmax')(x)

    # compile the model
    model = Model(inputs=base_model.input, outputs=outputs)
    model.compile(
        optimizer=Adam(lr=0.0001),
        loss='categorical_crossentropy',
        metrics=[top_1_accuracy, top_3_accuracy, top_5_accuracy]
    )

    return model

In [10]:
# train model 
model = get_model(model_name, n_classes, n_hidden, dropout)
model.fit_generator(
    generator=train_generator,
    validation_data=valid_generator,
    callbacks=callbacks_list,
    use_multiprocessing=True,
    workers=n_thread,
    epochs=100,    
)

Epoch 1/100

Process ForkPoolWorker-63:
Process ForkPoolWorker-58:
Process ForkPoolWorker-64:
Process ForkPoolWorker-51:
Process ForkPoolWorker-40:
Process ForkPoolWorker-45:
Process ForkPoolWorker-56:
Process ForkPoolWorker-61:
Process ForkPoolWorker-55:
Process ForkPoolWorker-35:
Process ForkPoolWorker-36:
Process ForkPoolWorker-29:
Process ForkPoolWorker-31:
Process ForkPoolWorker-54:
Process ForkPoolWorker-60:
Process ForkPoolWorker-15:
Process ForkPoolWorker-18:
Process ForkPoolWorker-21:
Process ForkPoolWorker-38:
Process ForkPoolWorker-27:
Process ForkPoolWorker-62:
Process ForkPoolWorker-6:
Process ForkPoolWorker-34:
Process ForkPoolWorker-42:
Process ForkPoolWorker-33:
Process ForkPoolWorker-17:
Process ForkPoolWorker-57:
Process ForkPoolWorker-23:
Process ForkPoolWorker-4:
Process ForkPoolWorker-24:
Process ForkPoolWorker-13:
Process ForkPoolWorker-22:
Process ForkPoolWorker-53:
Process ForkPoolWorker-2:
Process ForkPoolWorker-25:
Process ForkPoolWorker-10:
Process ForkPoolWorker-20:
Proc

  File "/home/matt/anaconda3/envs/kaggle/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/matt/anaconda3/envs/kaggle/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/matt/anaconda3/envs/kaggle/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/matt/anaconda3/envs/kaggle/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/matt/anaconda3/envs/kaggle/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
Traceback (most recent call last):
  File "/home/matt/anaconda3/envs/kaggle/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/matt/anaconda3/envs/kaggle/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/matt/anaconda3/envs/kaggle/lib/python3.6/multiprocessing/process.py

  File "/home/matt/anaconda3/envs/kaggle/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/matt/anaconda3/envs/kaggle/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/matt/anaconda3/envs/kaggle/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/matt/anaconda3/envs/kaggle/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/matt/anaconda3/envs/kaggle/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/matt/anaconda3/envs/kaggle/lib/python3.6/multiprocessing/pool.py", line 108, in worker
    task = get()
  File "/home/matt/anaconda3/envs/kaggle/lib/python3.6/multiprocessing/pool.py", line 108, in worker
    task = get()
  File "/home/matt/anaconda3/envs/kaggle/lib/python3.6/mu

  File "/home/matt/anaconda3/envs/kaggle/lib/python3.6/multiprocessing/pool.py", line 108, in worker
    task = get()
  File "/home/matt/anaconda3/envs/kaggle/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/matt/anaconda3/envs/kaggle/lib/python3.6/multiprocessing/pool.py", line 108, in worker
    task = get()
  File "/home/matt/anaconda3/envs/kaggle/lib/python3.6/multiprocessing/pool.py", line 108, in worker
    task = get()
  File "/home/matt/anaconda3/envs/kaggle/lib/python3.6/multiprocessing/pool.py", line 108, in worker
    task = get()
  File "/home/matt/anaconda3/envs/kaggle/lib/python3.6/multiprocessing/queues.py", line 334, in get
    with self._rlock:
  File "/home/matt/anaconda3/envs/kaggle/lib/python3.6/multiprocessing/queues.py", line 334, in get
    with self._rlock:
  File "/home/matt/anaconda3/envs/kaggle/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._

  File "/home/matt/anaconda3/envs/kaggle/lib/python3.6/multiprocessing/queues.py", line 334, in get
    with self._rlock:
  File "/home/matt/anaconda3/envs/kaggle/lib/python3.6/multiprocessing/queues.py", line 334, in get
    with self._rlock:
  File "/home/matt/anaconda3/envs/kaggle/lib/python3.6/multiprocessing/queues.py", line 335, in get
    res = self._reader.recv_bytes()
  File "/home/matt/anaconda3/envs/kaggle/lib/python3.6/multiprocessing/synchronize.py", line 96, in __enter__
    return self._semlock.__enter__()
  File "/home/matt/anaconda3/envs/kaggle/lib/python3.6/multiprocessing/synchronize.py", line 96, in __enter__
    return self._semlock.__enter__()
  File "/home/matt/anaconda3/envs/kaggle/lib/python3.6/multiprocessing/pool.py", line 108, in worker
    task = get()
  File "/home/matt/anaconda3/envs/kaggle/lib/python3.6/multiprocessing/pool.py", line 108, in worker
    task = get()
  File "/home/matt/anaconda3/envs/kaggle/lib/python3.6/multiprocessing/synchronize.py", li

  File "/home/matt/anaconda3/envs/kaggle/lib/python3.6/multiprocessing/queues.py", line 334, in get
    with self._rlock:
  File "/home/matt/anaconda3/envs/kaggle/lib/python3.6/multiprocessing/synchronize.py", line 96, in __enter__
    return self._semlock.__enter__()
KeyboardInterrupt
  File "/home/matt/anaconda3/envs/kaggle/lib/python3.6/multiprocessing/queues.py", line 334, in get
    with self._rlock:
  File "/home/matt/anaconda3/envs/kaggle/lib/python3.6/multiprocessing/queues.py", line 334, in get
    with self._rlock:
KeyboardInterrupt
KeyboardInterrupt
KeyboardInterrupt
KeyboardInterrupt
  File "/home/matt/anaconda3/envs/kaggle/lib/python3.6/multiprocessing/queues.py", line 334, in get
    with self._rlock:
  File "/home/matt/anaconda3/envs/kaggle/lib/python3.6/multiprocessing/synchronize.py", line 96, in __enter__
    return self._semlock.__enter__()
  File "/home/matt/anaconda3/envs/kaggle/lib/python3.6/multiprocessing/queues.py", line 334, in get
    with self._rlock:
Keyboa

KeyboardInterrupt: 

In [None]:
# save results
results = pd.DataFrame(model.history.history)
results.to_csv(f'{model_path}/results.csv', index=False)

In [None]:
# print best results
print(results.iloc[results.val_top_3_accuracy.values.argmax()])