In [1]:
from __future__ import absolute_import, division, print_function

import argparse
import os
import random
from datetime import datetime

import cv2
import keras
import numpy as np
import pandas as pd
from keras import backend as K
from keras.applications import *
from keras.applications.inception_v3 import preprocess_input
from keras.callbacks import *
from keras.layers import *
from keras.models import *
from keras.optimizers import *
from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator
from keras.regularizers import *
from keras.utils.vis_utils import model_to_dot
from tqdm import tqdm

Using TensorFlow backend.


In [2]:
df = pd.read_csv('../dog_breed_datasets/labels.csv')
df.head()

n = len(df)
breed = set(df['breed'])
n_class = len(breed)
class_to_num = dict(zip(breed, range(n_class)))
num_to_class = dict(zip(range(n_class), breed))

width = 299
X = np.zeros((n, width, width, 3), dtype=np.uint8)
y = np.zeros((n, n_class), dtype=np.uint8)
# Loading Datasets
print('\n\n Loading Datasets. \n')
for i in tqdm(range(n)):
    X[i] = (cv2.resize(
        cv2.imread('../dog_breed_datasets/train/%s.jpg' % df['id'][i]),
        (width, width))/255. - 0.5)*2
    y[i][class_to_num[df['breed'][i]]] = 1
    
dvi = int(X.shape[0] * 0.9)
x_train = X[:dvi, :, :, :]
y_train = y[:dvi, :]
x_val = X[dvi:, :, :, :]
y_val = y[dvi:, :]

  0%|          | 23/10222 [00:00<00:44, 229.60it/s]



 Loading Datasets. 



100%|██████████| 10222/10222 [00:31<00:00, 323.74it/s]


In [6]:
np.save('y_train.h5', y_train)

In [4]:
x_train = X[:dvi, :, :, :]
y_train = y[:dvi, :]

MemoryError: 

In [3]:
def run(model_name, lr, optimizer, epoch, patience, batch_size, test=None):
    width = x_train.shape[1]
    n_class = y_train.shape[1]
    # Compute the bottleneck feature
    def get_features(MODEL, data=x_train):
        cnn_model = MODEL(
            include_top=False,
            input_shape=(width, width, 3),
            weights='imagenet')
        inputs = Input((width, width, 3))
        x = inputs
        x = Lambda(preprocess_input, name='preprocessing')(x)
        x = cnn_model(x)
        x = GlobalAveragePooling2D()(x)
        cnn_model = Model(inputs, x)

        features = cnn_model.predict(data, batch_size=32, verbose=1)
        return features

    def fine_tune(MODEL,
                  model_name,
                  optimizer,
                  lr,
                  epoch,
                  patience,
                  batch_size,
                  X=x_train,
                  test=None):
        # Fine-tune the model
        print("\n\n Fine tune " + model_name + " : \n")

        from random_eraser import get_random_eraser
        datagen = ImageDataGenerator(
            preprocessing_function=get_random_eraser(pixel_level=True),
            horizontal_flip=True,
            shear_range=0.1,
            zoom_range=0.1,
            rotation_range=10,
            width_shift_range=0.1,
            height_shift_range=0.1)

        val_datagen = ImageDataGenerator()

        inputs = Input((width, width, 3))
        x = inputs
        cnn_model = MODEL(
            include_top=False,
            input_shape=(width, width, 3),
            weights='imagenet')
        x = cnn_model(x)
        x = GlobalAveragePooling2D()(x)
        x = Dropout(0.5)(x)
        x = Dense(n_class, activation='softmax', name='predictions')(x)
        model = Model(inputs=inputs, outputs=x)

        # Loading weights
        try:
            model.load_weights(model_name + '.h5')
            print('Load ' + model_name + '.h5 successfully.')
        except:
            try:
                model.load_weights('fc_' + model_name + '.h5', by_name=True)
                print('Fail to load ' + model_name + '.h5, load fc_' +
                      model_name + '.h5 instead.')
            except:
                print(
                    'Start computing ' + model_name + ' bottleneck feature: ')
                features = get_features(MODEL, X)

                # Training models
                inputs = Input(features.shape[1:])
                x = inputs
                x = Dropout(0.5)(x)
                x = Dense(n_class, activation='softmax', name='predictions')(x)
                model_fc = Model(inputs, x)
                model_fc.compile(
                    optimizer='adam',
                    loss='categorical_crossentropy',
                    metrics=['accuracy'])
                h = model_fc.fit(
                    features,
                    y_train,
                    batch_size=128,
                    epochs=5,
                    validation_split=0.1)

                model_fc.save('fc_' + model_name + '.h5', 'w')

        print("\n " + "Optimizer=" + optimizer + " lr=" + str(lr) + " \n")
        if optimizer == "Adam":
            model.compile(
                optimizer='adam',
                loss='categorical_crossentropy',
                metrics=['accuracy'])
        elif optimizer == "SGD":
            model.compile(
                loss='categorical_crossentropy',
                optimizer=SGD(lr=lr, momentum=0.9, nesterov=True),
                metrics=['accuracy'])
        if not test:
            class LossHistory(keras.callbacks.Callback):
                def on_train_begin(self, logs={}):
                    self.losses = []
                def on_epoch_end(self, batch, logs={}):
                    self.losses.append((logs.get('loss'), logs.get("val_loss")))

            history = LossHistory()

            early_stopping = EarlyStopping(
                monitor='val_loss', patience=patience, verbose=1, mode='auto')
            checkpointer = ModelCheckpoint(
                filepath=model_name + '.h5', verbose=0, save_best_only=True)
            reduce_lr = ReduceLROnPlateau(factor=0.2, patience=3, verbose=1)
            model.fit_generator(
                datagen.flow(x_train, y_train, batch_size=batch_size),
                steps_per_epoch=len(x_train) / batch_size,
                validation_data=val_datagen.flow(
                    x_val, y_val, batch_size=batch_size),
                validation_steps=len(x_val) / batch_size,
                epochs=epoch,
                callbacks=[history, early_stopping, checkpointer, reduce_lr])
            with open(model_name + ".csv", 'a') as f_handle:
                np.savetxt(f_handle, history.losses)
        else:
            print('Evalute on test set')
            val_datagen.fit(x_test)
            score = model.evaluate_generator(
                val_datagen.flow(x_test, y_test, batch_size=batch_size),
                len(x_test) / batch_size)
            print(score)
            return score

    list_model = {
        "Xception": Xception,
        "InceptionV3": InceptionV3,
        "InceptionResNetV2": InceptionResNetV2
    }
    fine_tune(list_model[model_name], model_name, optimizer, lr, epoch,
              patience, batch_size, x_train, test)


In [5]:
run('Xception', 5e-4, 'SGD', 1e4, 7, 16)



 Fine tune Xception : 

Fail to load Xception.h5, load fc_Xception.h5 instead.

 Optimizer=SGD lr=0.0005 

Epoch 1/10000

ResourceExhaustedError: OOM when allocating tensor with shape[16,10,10,2048]
	 [[Node: training_1/SGD/gradients/global_average_pooling2d_2/Mean_grad/Tile = Tile[T=DT_FLOAT, Tmultiples=DT_INT32, _class=["loc:@global_average_pooling2d_2/Mean"], _device="/job:localhost/replica:0/task:0/device:GPU:0"](training_1/SGD/gradients/global_average_pooling2d_2/Mean_grad/Reshape, training_1/SGD/gradients/global_average_pooling2d_2/Mean_grad/floordiv)]]

Caused by op u'training_1/SGD/gradients/global_average_pooling2d_2/Mean_grad/Tile', defined at:
  File "/usr/lib/python2.7/runpy.py", line 174, in _run_module_as_main
    "__main__", fname, loader, pkg_name)
  File "/usr/lib/python2.7/runpy.py", line 72, in _run_code
    exec code in run_globals
  File "/usr/local/lib/python2.7/dist-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python2.7/dist-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/usr/local/lib/python2.7/dist-packages/ipykernel/kernelapp.py", line 477, in start
    ioloop.IOLoop.instance().start()
  File "/usr/local/lib/python2.7/dist-packages/zmq/eventloop/ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "/usr/local/lib/python2.7/dist-packages/tornado/ioloop.py", line 888, in start
    handler_func(fd_obj, events)
  File "/usr/local/lib/python2.7/dist-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/usr/local/lib/python2.7/dist-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "/usr/local/lib/python2.7/dist-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "/usr/local/lib/python2.7/dist-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "/usr/local/lib/python2.7/dist-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/usr/local/lib/python2.7/dist-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/usr/local/lib/python2.7/dist-packages/ipykernel/kernelbase.py", line 235, in dispatch_shell
    handler(stream, idents, msg)
  File "/usr/local/lib/python2.7/dist-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/usr/local/lib/python2.7/dist-packages/ipykernel/ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/usr/local/lib/python2.7/dist-packages/ipykernel/zmqshell.py", line 533, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/usr/local/lib/python2.7/dist-packages/IPython/core/interactiveshell.py", line 2718, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/usr/local/lib/python2.7/dist-packages/IPython/core/interactiveshell.py", line 2828, in run_ast_nodes
    if self.run_code(code, result):
  File "/usr/local/lib/python2.7/dist-packages/IPython/core/interactiveshell.py", line 2882, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-5-9cd3a6cbd2cf>", line 1, in <module>
    run('Xception', 5e-4, 'SGD', 1e4, 7, 16)
  File "<ipython-input-3-ef3a4326330e>", line 139, in run
    patience, batch_size, x_train, test)
  File "<ipython-input-3-ef3a4326330e>", line 121, in fine_tune
    callbacks=[history, early_stopping, checkpointer, reduce_lr])
  File "/usr/local/lib/python2.7/dist-packages/keras/legacy/interfaces.py", line 87, in wrapper
    return func(*args, **kwargs)
  File "/usr/local/lib/python2.7/dist-packages/keras/engine/training.py", line 1996, in fit_generator
    self._make_train_function()
  File "/usr/local/lib/python2.7/dist-packages/keras/engine/training.py", line 990, in _make_train_function
    loss=self.total_loss)
  File "/usr/local/lib/python2.7/dist-packages/keras/legacy/interfaces.py", line 87, in wrapper
    return func(*args, **kwargs)
  File "/usr/local/lib/python2.7/dist-packages/keras/optimizers.py", line 156, in get_updates
    grads = self.get_gradients(loss, params)
  File "/usr/local/lib/python2.7/dist-packages/keras/optimizers.py", line 73, in get_gradients
    grads = K.gradients(loss, params)
  File "/usr/local/lib/python2.7/dist-packages/keras/backend/tensorflow_backend.py", line 2389, in gradients
    return tf.gradients(loss, variables, colocate_gradients_with_ops=True)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gradients_impl.py", line 581, in gradients
    grad_scope, op, func_call, lambda: grad_fn(op, *out_grads))
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gradients_impl.py", line 353, in _MaybeCompile
    return grad_fn()  # Exit early
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gradients_impl.py", line 581, in <lambda>
    grad_scope, op, func_call, lambda: grad_fn(op, *out_grads))
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/math_grad.py", line 98, in _MeanGrad
    sum_grad = _SumGrad(op, grad)[0]
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/math_grad.py", line 63, in _SumGrad
    return [array_ops.tile(grad, tile_scaling), None]
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gen_array_ops.py", line 5621, in tile
    "Tile", input=input, multiples=multiples, name=name)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 2956, in create_op
    op_def=op_def)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 1470, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

...which was originally created as op u'global_average_pooling2d_2/Mean', defined at:
  File "/usr/lib/python2.7/runpy.py", line 174, in _run_module_as_main
    "__main__", fname, loader, pkg_name)
[elided 20 identical lines from previous traceback]
  File "<ipython-input-3-ef3a4326330e>", line 139, in run
    patience, batch_size, x_train, test)
  File "<ipython-input-3-ef3a4326330e>", line 51, in fine_tune
    x = GlobalAveragePooling2D()(x)
  File "/usr/local/lib/python2.7/dist-packages/keras/engine/topology.py", line 603, in __call__
    output = self.call(inputs, **kwargs)
  File "/usr/local/lib/python2.7/dist-packages/keras/layers/pooling.py", line 536, in call
    return K.mean(inputs, axis=[1, 2])
  File "/usr/local/lib/python2.7/dist-packages/keras/backend/tensorflow_backend.py", line 1344, in mean
    return tf.reduce_mean(x, axis=axis, keep_dims=keepdims)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/math_ops.py", line 1411, in reduce_mean
    name=name)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gen_math_ops.py", line 2568, in _mean
    keep_dims=keep_dims, name=name)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 2956, in create_op
    op_def=op_def)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 1470, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

ResourceExhaustedError (see above for traceback): OOM when allocating tensor with shape[16,10,10,2048]
	 [[Node: training_1/SGD/gradients/global_average_pooling2d_2/Mean_grad/Tile = Tile[T=DT_FLOAT, Tmultiples=DT_INT32, _class=["loc:@global_average_pooling2d_2/Mean"], _device="/job:localhost/replica:0/task:0/device:GPU:0"](training_1/SGD/gradients/global_average_pooling2d_2/Mean_grad/Reshape, training_1/SGD/gradients/global_average_pooling2d_2/Mean_grad/floordiv)]]
