# Training and Evaluation

We will take a first-pass at evaluating or technique to start understanding its efficacy. We will existing CNN architectures and evaluate its performance on our interested categories with and without using our interested categories.

In [1]:
import cv2
import datetime
from matplotlib import pyplot as plt
import numpy as np
import os
import sys
import tensorflow as tf
import tensorflow_addons as tfa
print('TensorFlow Version: ', tf.__version__)

from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split

np.random.seed(123)

TensorFlow Version:  2.1.0


In [2]:
# Set hyperparameters for training & validation
INPUT_SHAPE = (64, 64, 3)
BATCH_SIZE = 64
VALIDATION_SPLIT = 0.10
TRAIN_STEPS_PER_EPOCH = 1200
TEST_STEPS = 500
NUM_EPOCHS = 250

In [13]:
# Define utilities and helper functions
# NOTE: Copied from clustering NB
def load_metadata(filename):
    with open(filename, 'r') as f:
        return [x.strip().split('\t') for x in f.readlines()]
    
@tf.function
def decode_img(image):
    img = tf.image.decode_jpeg(image, channels=3)
    img = tf.image.convert_image_dtype(img, tf.float32)
    return tf.image.resize(img, [64, 64])

@tf.function
def load_image_data(path, label):
    img_data = tf.io.read_file(path)
    img = decode_img(img_data)
    return img, label
    
def load_labels(metadata):
    labels = np.array([x[1] for x in metadata])
    distinct_labels = np.array([[x] for x in set(labels)])
    encoder = OneHotEncoder(sparse=False)
    encoder.fit(distinct_labels)
    y_train = encoder.transform([[x] for x in labels])
    return (y_train, encoder)

In [4]:
# Create functions for three models: (i) custom, simple CNN, (ii) MobileNet + FCs, and (iii) VGG16 + FCs
def get_simplecnn(input_shape=INPUT_SHAPE):
    return tf.keras.Sequential([
        tf.keras.layers.Conv2D(512, (3, 3), (1, 1), input_shape=input_shape, activation='relu'),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Conv2D(512, (2, 2), (1, 1), activation='relu'),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Conv2D(256, (2, 2), (1, 1), activation='relu'),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Conv2D(256, (2, 2), (1, 1), activation='relu'),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(512, activation='relu'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(200, activation='softmax'),
    ])

def get_mobilenet(input_shape=INPUT_SHAPE):
    application = tf.keras.applications.MobileNet(input_shape=input_shape, include_top=False)
    for i in range(len(application.layers)):
        application.layers[i].trainable = False
        
    return tf.keras.Sequential([
        application,
        tf.keras.layers.GlobalAveragePooling2D(),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(200, activation='softmax')
    ])

def get_vgg16(input_shape=INPUT_SHAPE):
    application = tf.keras.applications.VGG16(input_shape=input_shape, include_top=False)
    for i in range(len(application.layers)):
        application.layers[i].trainable = False
        
    return tf.keras.Sequential([
        application,
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(1024, activation='relu'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(200, activation='softmax')
    ])

In [None]:
simplecnn = get_simplecnn()
simplecnn.summary()

In [None]:
mobilenet = get_mobilenet()
mobilenet.summary()

In [None]:
vgg16 = get_vgg16()
vgg16.summary()

In [5]:
# TODO: Remove hardcoding
print('Loading data into memory...')
train_metadata = load_metadata('./metadata_output/train_metadata.txt')
(y_train, encoder) = load_labels(train_metadata)

# Interested indices for test data filtering
interested_categories = ['n01882714', 'n04562935']
interested_one_hot = encoder.transform([[x] for x in interested_categories])
interested_indices = np.array([x[1] for x in np.argwhere(interested_one_hot == 1)])
print('Done.')

Loading data into memory...
Done.


In [6]:
# Encoding sanity checks;
# assert(len(train_metadata) == len(y_train))
# assert(len(set(y_train)) == 200)
assert(np.count_nonzero(y_train == 1) == len(train_metadata))
# print(y_train)

## (1) BASELINE MODEL: VGG16

In [7]:
# Get all data
paths_and_labels = [(train_metadata[x][0], y_train[x]) for x in range(len(y_train))]
print('Num. Total Images: ', len(paths_and_labels))

# Split data into train and validation sets
np.random.shuffle(paths_and_labels)
num_validation = int(len(paths_and_labels) * VALIDATION_SPLIT)
train_paths_and_labels = paths_and_labels[num_validation:]
validation_paths_and_labels = paths_and_labels[:num_validation]
print('Num. Train Images: ', len(train_paths_and_labels))
print('Num. Validation Images: ', len(validation_paths_and_labels))

# Convert training set into a TF dataset via generator
train_dataset = tf.data.Dataset.from_generator(
    lambda: train_paths_and_labels,
    (tf.string, tf.int32),
    (tf.TensorShape([]), tf.TensorShape([len(y_train[0])]))
)
train_dataset = train_dataset.map(lambda x,y: load_image_data(x, y), 
                                  num_parallel_calls=tf.data.experimental.AUTOTUNE)

train_dataset = train_dataset.cache()
train_dataset = train_dataset.repeat()
train_dataset = train_dataset.batch(BATCH_SIZE)
train_dataset = train_dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

# Convert validation set into a TF dataset via generator
validation_dataset = tf.data.Dataset.from_generator(
    lambda: validation_paths_and_labels,
    (tf.string, tf.int32),
    (tf.TensorShape([]), tf.TensorShape([len(y_train[0])]))
)
validation_dataset = validation_dataset.map(lambda x,y: load_image_data(x, y), 
                                            num_parallel_calls=tf.data.experimental.AUTOTUNE)

validation_dataset = validation_dataset.cache()
validation_dataset = validation_dataset.repeat()
validation_dataset = validation_dataset.batch(1)
validation_dataset = validation_dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

Num. Total Images:  70000
Num. Train Images:  63000
Num. Validation Images:  7000


In [8]:
def train_model(model, train_dataset, validation_dataset, name):    
    # Compile model                                                                                                      
    model.compile(optimizer=tf.keras.optimizers.Adam(lr=4e-4),                                                           
                  loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),                                  
                  metrics=['accuracy'])      
    
    # Stop early if we're not making good progress                                                                           
    early_stop_monitor = tf.keras.callbacks.EarlyStopping(monitor='val_loss',                                                                                              
                                                          restore_best_weights=True,                                                                                       
                                                          patience=10)   

    # Prepare for checkpoints            
    checkpoint_path = './checkpoints/' + name + '/cp-{epoch:04d}.ckpt'                                   
    cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,                                                                                    
                                                     verbose=1,                                                                                                   
                                                     save_weights_only=False,                                                                                     
                                                     save_freq=25000000)

    # Tensorboard                                                                                                        
    log_dir="logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")                                              
    tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
    
    history = model.fit(x=train_dataset,
                        epochs=NUM_EPOCHS,                                                                                                  
                        steps_per_epoch=TRAIN_STEPS_PER_EPOCH,
                        callbacks=[tensorboard_callback, cp_callback, early_stop_monitor],
                        use_multiprocessing=True,
                        validation_steps=num_validation,
                        validation_data=validation_dataset,
                        shuffle=True)

    return history

In [9]:
# Evaluate model on interesting inputs
def evaluate_model(model, test_sets):
    for test_set in test_sets:
        X = test_set[0]
        y = test_set[1]
        model.evaluate(X, y)

In [None]:
# Train and save model
train_model(mobilenet, train_dataset, validation_dataset, 'mobilenet_imbalanced')

In [None]:
if not os.path.exists(os.path.join('models', 'mobilenet_imbalanced')):
    os.makedirs(os.path.join('models', 'mobilenet_imbalanced'))
    
mobilenet.save(os.path.join('models', 'mobilenet_imbalanced'))
print('model saved')

In [None]:
# TODO: Load test data, filter for interest, evaluate model

## (2) MOBILENET + STANDARD AUGMENTATIONS

In [10]:
mobilenet_std_aug = get_mobilenet()



In [21]:
# TODO: We have to somehow incorporate the below with tf.Datasets
# train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
#     rotation_range=45,
#     width_shift_range=0.4,
#     height_shift_range=0.4,
#     zoom_range=[0.4, 1.6],
#     horizontal_flip=True,
#     brightness_range=(0.6, 1.4),
#     fill_mode='nearest',
# )

# NOTE: Apply a map function to perform transformations rather than using ImageDataGen
@tf.function
def std_augment_image(img_tensor, label):
    transformed = img_tensor
    # Apply any transformation 80% of the time.
    # Random rotation
#     if tf.random.uniform([]) <= 0.8:
#         angle = tf.random.uniform([]) * 45.0
#         transformed = tfa.image.rotate(transformed, angle)
    # Random zoom
    if tf.random.uniform([]) <= 0.8:
        crop_size = tf.random.uniform([], minval=0.4, maxval=0.8) * 64.0
        transformed = tf.image.resize(tf.image.random_crop(transformed, [crop_size, crop_size, 3]), [64, 64])
    # Random brightness adjustment
    if tf.random.uniform([]) <= 0.8:
        transformed = tf.image.random_brightness(transformed, 0.6)
    # Random horizontal flip
    transformed = tf.image.random_flip_up_down(transformed)
    return (transformed, label)

In [22]:
# Redfine train dataset
# Convert training set into a TF dataset via generator
train_dataset_std_aug = tf.data.Dataset.from_generator(
    lambda: train_paths_and_labels,
    (tf.string, tf.int32),
    (tf.TensorShape([]), tf.TensorShape([len(y_train[0])]))
)
train_dataset_std_aug = train_dataset_std_aug.map(lambda x,y: load_image_data(x, y), 
                                                  num_parallel_calls=tf.data.experimental.AUTOTUNE)

train_dataset_std_aug = train_dataset_std_aug.cache()
train_dataset_std_aug = train_dataset_std_aug.map(std_augment_image,
                                                  num_parallel_calls=tf.data.experimental.AUTOTUNE)
train_dataset_std_aug = train_dataset_std_aug.repeat()
train_dataset_std_aug = train_dataset_std_aug.batch(BATCH_SIZE)
train_dataset_std_aug = train_dataset_std_aug.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

In [None]:
# Train and save model
train_model(mobilenet_std_aug, train_dataset_std_aug, validation_dataset, 'mobilenet_imbalanced_std_aug')

Train for 1200 steps, validate for 7000 steps
Epoch 1/250
Epoch 2/250

In [None]:
if not os.path.exists(os.path.join('models', 'mobilenet_imbalanced_std_aug')):
    os.makedirs(os.path.join('models', 'mobilenet_imbalanced_std_aug'))
    
mobilenet_std_aug.save(os.path.join('models', 'mobilenet_imbalanced_std_aug'))
print('model saved')

## (3) MOBILENET + SINGAN