In [1]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import tensorflow as tf
print("Using tensorflow version: " + tf.__version__)
print("Using eager execution: " + str(tf.executing_eagerly())) 

Using tensorflow version: 2.2.0
Using eager execution: True


In [2]:
import pathlib
from PIL import Image
import os
import numpy as np
import pandas as pd
import json
import matplotlib.pyplot as plt
import time

In [3]:
# Clone the tensorflow models repository if it doesn't already exist
if "models" in pathlib.Path.cwd().parts:
    while "models" in pathlib.Path.cwd().parts:
        os.chdir('..')
elif not pathlib.Path('models').exists():
    !git clone --depth 1 https://github.com/tensorflow/models

In [4]:
from object_detection.utils import label_map_util
from object_detection.utils import config_util
from object_detection.utils import visualization_utils as viz_utils
from object_detection.builders import model_builder

In [5]:
DATASET_PATH = "../dataset"

masked_faces_paths = list(pathlib.Path(DATASET_PATH + "/masked_faces").glob('*'))
normal_faces_paths = list(pathlib.Path(DATASET_PATH + "/normal_faces").glob('*'))

length = len(masked_faces_paths)
print(f"Total number of samples: {length}")

split_ratio = 0.8
training_size = int(split_ratio * length)
validation_size = length - training_size
print(f"Dataset training size: {training_size} | Dataset validation size: {validation_size}")

training_paths = masked_faces_paths[:training_size] + normal_faces_paths[:training_size]
validation_paths = masked_faces_paths[training_size:] + normal_faces_paths[training_size:]

targets = None
with open(DATASET_PATH + "/targets.json") as json_file:
	targets = json.load(json_file)

Total number of samples: 680
Dataset training size: 544 | Dataset validation size: 136


In [6]:
image_path = training_paths[2]
image_name = str(image_path).split(os.sep)[3]
str(image_path).split(os.sep)[3]
targets[image_name]
image = Image.open(image_path).convert("RGB")
np.array(targets[image_name]["bbox"]).shape
plt.imshow(image)


&#39;101.jpg&#39;

{&#39;bbox&#39;: [[194, 246, 656, 708]]}

(1, 4)

&lt;matplotlib.image.AxesImage at 0x1f49128d988&gt;

In [7]:
def get_dataset():
    num_classes = 2
    label_id_offset = 1

    train_image_tensors = []
    gt_classes_one_hot_tensors = []
    gt_box_tensors = []

    for idx, image_path in enumerate(training_paths):
        masked = True if idx < training_size else False
        train_image_np = np.array(Image.open(image_path).convert("RGB"))
        image_name = str(image_path).split(os.sep)[3]
        gt_box_np = np.array(targets[image_name]["bbox"])

        train_image_tensors.append(
            tf.expand_dims(
                tf.convert_to_tensor(
                    train_image_np, dtype=tf.float32
                ), axis=0)
            )

        gt_box_tensors.append(tf.convert_to_tensor(gt_box_np, dtype=tf.float32))
        zero_indexed_groundtruth_classes = tf.convert_to_tensor(
            np.zeros(shape=[gt_box_np.shape[0]], dtype=np.int32) if masked else np.ones(shape=[gt_box_np.shape[0]], dtype=np.int32)
        )
        
        gt_classes_one_hot_tensors.append(
            tf.one_hot(
                zero_indexed_groundtruth_classes, num_classes
            )
        )
    print('Done prepping data.')
    return train_image_tensors, gt_classes_one_hot_tensors, gt_box_tensors

In [36]:
def get_model():
    print('Building model and restoring weights for fine-tuning...', flush=True)
    num_classes = 2
    # pipeline_config = 'models/research/object_detection/configs/tf2/ssd_resnet50_v1_fpn_640x640_coco17_tpu-8.config'
    # pipeline_config = 'models/research/object_detection/configs/tf2/ssd_mobilenet_v2_320x320_coco17_tpu-8.config'
    pipeline_config = 'models/research/object_detection/configs/tf2/ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8.config'

    # pipeline_config = 'models/research/object_detection/test_data/pipeline.config'
    # checkpoint_path = 'models/research/object_detection/test_data/checkpoint_mobilenet/ckpt-0'
    # checkpoint_path = 'models/research/object_detection/test_data/checkpoint_resnet/ckpt-0'
    # checkpoint_path = 'models/research/object_detection/test_data/checkpoint_mobilenet_fpn/ckpt-0'

    print(tf.train.list_variables(tf.train.latest_checkpoint("models/research/object_detection/test_data/checkpoint_mobilenet_fpn")))

    configs = config_util.get_configs_from_pipeline_file(pipeline_config)
    model_config = configs['model']
    model_config.ssd.num_classes = num_classes
    model_config.ssd.freeze_batchnorm = True
    detection_model = model_builder.build(model_config=model_config, is_training=True)

    fake_box_predictor = tf.compat.v2.train.Checkpoint(
        _base_tower_layers_for_heads=detection_model._box_predictor._base_tower_layers_for_heads,
        # _prediction_heads=detection_model._box_predictor._prediction_heads
    )

    fake_model = tf.compat.v2.train.Checkpoint(
        _feature_extractor=detection_model._feature_extractor,
        _box_predictor=fake_box_predictor
    )
    ckpt = tf.compat.v2.train.Checkpoint(model=fake_model)
    ckpt.restore(checkpoint_path).expect_partial()

    image, shapes = detection_model.preprocess(tf.zeros([1, 320, 320, 3]))
    print(shapes)
    print(image)
    prediction_dict = detection_model.predict(image, shapes)
    _ = detection_model.postprocess(prediction_dict, shapes)
    print('Weights restored!')

    return detection_model

In [37]:
def train_step(
    image_tensors,
    groundtruth_boxes_list,
    groundtruth_classes_list,
    vars_to_fine_tune
):
    shapes = tf.constant(batch_size * [[640, 640, 3]], dtype=tf.int32)
    model.provide_groundtruth(
        groundtruth_boxes_list=groundtruth_boxes_list,
        groundtruth_classes_list=groundtruth_classes_list
    )

    with tf.GradientTape() as tape:
        preprocessed_images = tf.concat(
            [detection_model.preprocess(image_tensor)[0]
            for image_tensor in image_tensors], axis=0
        
        )
        prediction_dict = model.predict(preprocessed_images, shapes)
        losses_dict = model.loss(prediction_dict, shapes)
        total_loss = losses_dict['Loss/localization_loss'] + losses_dict['Loss/classification_loss']
        gradients = tape.gradient(total_loss, vars_to_fine_tune)
        optimizer.apply_gradients(zip(gradients, vars_to_fine_tune))
    
    return total_loss

In [38]:
def train_loop(config):
    epochs = config["epochs"]
    learning_rate = config["learning_rate"]
    clip_gradients_value = config["clip_gradients_value"] if "clip_gradients_value" in config else None
    evaluation_interval = config["evaluation_interval"] if "evaluation_interval" in  config else 1
    metrics_interval = config["metrics_interval"] if "metrics_interval" in config else 5

    model = get_model()
    train_image_tensors, gt_classes_one_hot_tensors, gt_box_tensors = get_dataset()

    trainable_variables = detection_model.trainable_variables
    to_fine_tune = []
    prefixes_to_train = [
        'WeightSharedConvolutionalBoxPredictor/WeightSharedConvolutionalBoxHead',
        'WeightSharedConvolutionalBoxPredictor/WeightSharedConvolutionalClassHead'
    ]
    
    for var in trainable_variables:
        if any([var.name.startswith(prefix) for prefix in prefixes_to_train]):
            to_fine_tune.append(var)

    optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate, momentum=0.9)

    train_losses = []
    epoch_loss_avg = tf.keras.metrics.Mean()

    print("Start training!")
    for epoch in range(epochs):
        epoch_start_time = time.time()

        for idx in range(100):
            all_keys = list(range(len(training_paths)))
            random.shuffle(all_keys)
            example_keys = all_keys[:4]

            gt_boxes_list = [gt_box_tensors[key] for key in example_keys]
            gt_classes_list = [gt_classes_one_hot_tensors[key] for key in example_keys]
            image_tensors = [train_image_tensors[key] for key in example_keys]

            total_loss = train_step(image_tensors, gt_boxes_list, gt_classes_list, to_fine_tune)

        # for x, y in training_dataset:
        #     total_loss = train_step(
        #         to_fine_tune
        #     )

            epoch_loss_avg(total_loss)

        train_loss_results.append(epoch_loss_avg.result())
        epoch_time_elapsed = time.time() - epoch_start_time

        if epoch % metrics_interval == 0:
            print(
                "Epoch {:03d} | Loss: {:.3f} | Time: {:.0f}m {:.0f}s"
                .format(epoch, epoch_loss_avg.result(), time_elapsed // 60, time_elapsed % 60)
            )

        if epoch % evaluation_interval == 0:
            evaluate_model(model, validation_dataset)

        epoch_loss_avg.reset_states()
    print("Training finished")

    return model

In [39]:
config = {
    "epochs": 10,
    "learning_rate": 0.001
}

trained_model = train_loop(config)

Building model and restoring weights for fine-tuning...
[(&#39;_CHECKPOINTABLE_OBJECT_GRAPH&#39;, []), (&#39;model/_box_predictor/_prediction_heads/box_encodings/0/_box_encoder_layers/0/bias/.ATTRIBUTES/VARIABLE_VALUE&#39;, [12]), (&#39;model/_box_predictor/_prediction_heads/box_encodings/0/_box_encoder_layers/0/kernel/.ATTRIBUTES/VARIABLE_VALUE&#39;, [1, 1, 576, 12]), (&#39;model/_box_predictor/_prediction_heads/box_encodings/1/_box_encoder_layers/0/bias/.ATTRIBUTES/VARIABLE_VALUE&#39;, [24]), (&#39;model/_box_predictor/_prediction_heads/box_encodings/1/_box_encoder_layers/0/kernel/.ATTRIBUTES/VARIABLE_VALUE&#39;, [1, 1, 1280, 24]), (&#39;model/_box_predictor/_prediction_heads/box_encodings/2/_box_encoder_layers/0/bias/.ATTRIBUTES/VARIABLE_VALUE&#39;, [24]), (&#39;model/_box_predictor/_prediction_heads/box_encodings/2/_box_encoder_layers/0/kernel/.ATTRIBUTES/VARIABLE_VALUE&#39;, [1, 1, 512, 24]), (&#39;model/_box_predictor/_prediction_heads/box_encodings/3/_box_encoder_layers/0/bias/.

ResourceExhaustedError: OOM when allocating tensor with shape[1,320,320,3] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc [Op:Sub]