## Initialization

In [1]:
import os, json
import numpy as np
from imageio import imread, imwrite
from skimage.transform import resize
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, GlobalAveragePooling2D, Conv2D, Dropout, Activation, Reshape
from tensorflow.keras.models import Model
from tensorflow.keras.applications import MobileNetV2

def describe(x):
    """
    Print the shape, min, max, and datatype of array.
    
    Inputs:
        x - Numpy-like array to be described.
    Outputs:
        None
    """

    print('{}, {}, {}'.format(x.shape, [np.min(x), np.max(x)], x.dtype))

cfg = {
    'TRAIN_DIR': 'miniplaces',
    'MODEL_NAME': 'miniplaces',
    'INPUT_SHAPE': [224, 224, 3],
    'BATCH_SIZE': 8,
    'N_EPOCHS': 100,
    'LEARNING_RATE': 1e-3,
    'DECAY_PER_EPOCH': 0.8,
    'DROPOUT_RATE': 0.3,
    'N_TEST_EXAMPLES': 50,
}
cfg['MODEL_DIR'] = os.path.join('models', cfg['MODEL_NAME'])
cfg['N_CLASSES'] = len(os.listdir(cfg['TRAIN_DIR']))

## Make dataset

For this demo, we are using a subset of the Places2 dataset, a visual scene classification dataset curated by [MIT](http://places2.csail.mit.edu/). This subset consists of only 8 categories: 'bathroom', 'bedroom', 'dining_room', 'garage-indoor', 'kitchen', 'porch', and 'shower'.

In [2]:
def load_classification_dataset(imgs_dir, val_imgs_per_class=50):
    exts = ['.jpg', '.png', 'tif']
    
    classes = os.listdir(imgs_dir)
    img_paths = []
    img_labels = []
    test_img_paths = []
    test_img_labels = []
    for i, c in enumerate(classes):
        class_dir = os.path.join(imgs_dir, c)
        all_img_paths = []
        for e in exts:
            c_img_paths = [os.path.join(class_dir, p) for p in os.listdir(class_dir) 
                           if p.lower().endswith(e)]
            all_img_paths.extend(c_img_paths)
            
        img_paths.extend(all_img_paths[:-val_imgs_per_class])
        test_img_paths.extend(all_img_paths[-val_imgs_per_class:])

        img_labels.extend([i] * (len(all_img_paths) - val_imgs_per_class))
        test_img_labels.extend([i] * val_imgs_per_class)
    
    return (img_paths, img_labels), (test_img_paths, test_img_labels), classes

(x_train, y_train), (x_test, y_test), classes = load_classification_dataset(cfg['TRAIN_DIR'],
                                                                            cfg['N_TEST_EXAMPLES'])

In [3]:
# circumvent bug with data if length is not multiple of batch size
excess = (len(x_train) % cfg['BATCH_SIZE'])
x_train = x_train[:-excess]
y_train = y_train[:-excess]

excess = (len(x_test) % cfg['BATCH_SIZE'])
x_test = x_test[:-excess]
y_test = y_test[:-excess]

In [4]:
print('Total # training cases: {}'.format(len(x_train)))
print('Total # test cases: {}'.format(len(x_test)))

assert (len(y_train) == len(x_train)), '# instances ({}) and # labels ({}) are not equal.'.format(
    len(x_train), len(y_train))
assert (len(y_test) == len(x_test)), '# instances ({}) and # labels ({}) are not equal.'.format(
    len(x_test), len(y_test))

print('Class names: {}'.format(classes))

Total # training cases: 34648
Total # test cases: 344
['data/places365_standard/demo/bathroom/00000001.jpg', 'data/places365_standard/demo/bathroom/00000002.jpg', 'data/places365_standard/demo/bathroom/00000003.jpg', 'data/places365_standard/demo/bathroom/00000004.jpg', 'data/places365_standard/demo/bathroom/00000005.jpg', 'data/places365_standard/demo/bathroom/00000006.jpg', 'data/places365_standard/demo/bathroom/00000007.jpg', 'data/places365_standard/demo/bathroom/00000008.jpg', 'data/places365_standard/demo/bathroom/00000009.jpg', 'data/places365_standard/demo/bathroom/00000010.jpg']
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Class names: ['bathroom', 'bedroom', 'dining_room', 'garage-indoor', 'kitchen', 'porch', 'shower']


## Define TensorFlow dataloaders

Next we wish to preprocess our data. This consists of two steps:

1. Loading data from memory
2. Randomly augmenting the data

In the first step, we decode our images from JPEG into memory, resize them to the dimensions the network expects (in our case 224 x 224), convert and scale it from uint8 to float32 values in the range [0.0, 1.0], and "one-hot encode" the class label.

In the second step, we add random perturbations to our data. First we mirror the image horizontally with a coin flip, then we randomly adjust the brightness and saturation values of the images, and finally we clip the image in the range [0.0, 1.0].

TensorFlow defines models in a static graph, unlike PyTorch which uses a dynamic graph. This means that the model must be completely declared and initialized before training can begin. Directly using Python functions to preprocess data will therefore be much slower and you will likely encounter bugs during training. Because of this, it is best to use TensorFlow's preprocessing functions where possible.

In [7]:
def tf_load(img_path, img_label):
    img_string = tf.read_file(img_path)
    img = tf.image.decode_jpeg(img_string, channels=3)
    img = tf.image.resize_images(img, cfg['INPUT_SHAPE'][:2])
    img = tf.image.convert_image_dtype(img, tf.float32) / 255.0
    return img, tf.one_hot(img_label, cfg['N_CLASSES'])

def tf_preprocess(img, img_label):
    img = tf.image.random_flip_left_right(img)
    img = tf.image.random_brightness(img, max_delta=32.0 / 255.0)
    img = tf.image.random_saturation(img, lower=0.5, upper=1.5)
    img = tf.clip_by_value(img, 0.0, 1.0)
    return img, img_label

In [8]:
num_parallel_calls = 4

dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
dataset = dataset.shuffle(len(x_train))
dataset = dataset.map(tf_load, num_parallel_calls=num_parallel_calls)
dataset = dataset.map(tf_preprocess, num_parallel_calls=num_parallel_calls)
dataset = dataset.prefetch(1)
dataset = dataset.batch(cfg['BATCH_SIZE'])
dataset = dataset.repeat(cfg['N_EPOCHS'])

val_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test))
val_dataset = val_dataset.map(tf_load, num_parallel_calls=num_parallel_calls)
val_dataset = val_dataset.batch(cfg['BATCH_SIZE'])
val_dataset = val_dataset.repeat(cfg['N_EPOCHS'])

## Make pre-trained Mobilenet model for Places data

Using a pretrained model greatly facilitates model training, allowing networks to converge faster and usually with greater overall accuracy. We will load the MobileNetV2 model, which uses some specialized convolutional layers that drastically reduce the memory footprint of the CNN with only a small loss in accuracy on the ImageNet baseline. Because of this, it is a great model to choose for mobile and embedded applications, as it is typically smaller than 100Mb.

We now load MobileNetV2. Since it was originally trained to classify ImageNet object categories and we wish to classify scenes, we will remove the classification layer and add our own geared towards our target classes.

In [9]:
model_input = Input(shape=cfg['INPUT_SHAPE'], batch_size=cfg['BATCH_SIZE'])

# create headsless pretrained network
base_model = MobileNetV2(input_shape=cfg['INPUT_SHAPE'], alpha=1.0, include_top=False, 
                         weights='imagenet', input_tensor=model_input, 
                         pooling=None, classes=cfg['N_CLASSES'])

# add classification layer with our classes
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Reshape((1, 1, 1280))(x)
x = Dropout(cfg['DROPOUT_RATE'])(x)
x = Conv2D(cfg['N_CLASSES'], (1, 1), padding='same')(x)
x = Activation('softmax', name='softmax')(x)
predictions = Reshape((cfg['N_CLASSES'],))(x)

# create complete model which references pretrained network
model = Model(inputs=base_model.input, outputs=predictions, name=cfg['MODEL_NAME'])

# (optional) freeze pretrained layers
# for layer in base_model.layers:
#     layer.trainable = False


In [10]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (8, 224, 224, 3)     0                                            
__________________________________________________________________________________________________
Conv1_pad (ZeroPadding2D)       (8, 225, 225, 3)     0           input_1[0][0]                    
__________________________________________________________________________________________________
Conv1 (Conv2D)                  (8, 112, 112, 32)    864         Conv1_pad[0][0]                  
__________________________________________________________________________________________________
bn_Conv1 (BatchNormalization)   (8, 112, 112, 32)    128         Conv1[0][0]                      
__________________________________________________________________________________________________
Conv1_relu

## Train model

Compile optimizer, loss, and metrics for training.

In [11]:
steps_per_epoch = len(x_train) // cfg['BATCH_SIZE']
validation_steps = len(x_test) // cfg['BATCH_SIZE']
lr_decay = 0.8 ** (1.0 / steps_per_epoch)

model.compile(optimizer=tf.keras.optimizers.Adam(lr=cfg['LEARNING_RATE'], decay=lr_decay),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

Set up several callbacks for saving, monitorning, and regulating training.

In [12]:
if not os.path.isdir(cfg['MODEL_DIR']):
    os.makedirs(cfg['MODEL_DIR'])
keras_path = os.path.join(cfg['MODEL_DIR'], '{}.h5'.format(cfg['MODEL_NAME']))
cb_saver = tf.keras.callbacks.ModelCheckpoint(filepath=keras_path, monitor='val_loss', 
                                              save_best_only=True, mode='min')

cb_earlystopping = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3)

csv_path = os.path.join(cfg['MODEL_DIR'], '{}_training.csv'.format(cfg['MODEL_NAME']))
cb_csvlogger = tf.keras.callbacks.CSVLogger(csv_path)

log_dir = os.path.join(cfg['MODEL_DIR'], 'logs')
cb_tboard = tf.keras.callbacks.TensorBoard(log_dir, histogram_freq=1)

Begin training loop!

In [13]:
model.fit(dataset, epochs=cfg['N_EPOCHS'], steps_per_epoch=steps_per_epoch,
          callbacks=[cb_saver, cb_earlystopping, cb_csvlogger],
          validation_data=val_dataset, validation_steps=validation_steps)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100


<tensorflow.python.keras.callbacks.History at 0x7f71cc489e10>