In [1]:
from glob import glob
import logging
import os
from pprint import pprint
import shutil
from time import time
from tqdm.notebook import tqdm

import tensorflow as tf

from ssd.common.callbacks import CallbackBuilder
from ssd.common.distribute import get_strategy
from ssd.common.config import load_config
from ssd.common.viz_utils import draw_boxes_cv2, imshow
from ssd.data.dataset_builder import DatasetBuilder
from ssd.losses.multibox_loss import MultiBoxLoss
from ssd.models.ssd_model import SSDModel

logger = tf.get_logger()
logger.setLevel(logging.INFO)

logger.info('version: {}'.format(tf.__version__))

INFO:tensorflow:version: 2.2.0-rc3


In [2]:
os.environ['CUDA_VISIBLE_DEVICES'] = '1, 2, 3, 4'
os.environ['TF_XLA_FLAGS'] = '--tf_xla_auto_jit=2 --tf_xla_cpu_global_jit'

In [3]:
# !python ssd/scripts/calculate_feature_shapes.py --image_height 512 --image_width 512 --num_feature_maps 7
# !python ssd/scripts/calculate_scales.py -n 7 --s_first 0.04 --smin 0.1 --smax 0.9
# !python check_matching.py ssd/cfg/coco_resnet50_512x512.yaml

In [4]:
config = load_config('ssd/cfg/coco_resnet50_512x512.yaml')

if config['use_mixed_precision']:
    if config['use_tpu']:
        dtype = 'mixed_bfloat16'
    elif config['use_gpu']:
#         dtype = 'mixed_float16' # todo: implement loss scaling
        dtype = 'float32'
    else:
        dtype = 'float32'
        
policy = tf.keras.mixed_precision.experimental.Policy(dtype)
tf.keras.mixed_precision.experimental.set_policy(policy)

print('\nCompute dtype: ', policy.compute_dtype)
print('Variable dtype: ', policy.variable_dtype)

strategy = get_strategy(config)

epochs = config['epochs']

lr = config['base_lr']
lr = config['base_lr']
lr = lr if not config['scale_lr'] else lr * strategy.num_replicas_in_sync

batch_size = config['batch_size']
batch_size = batch_size if not config['scale_batch_size'] else batch_size * strategy.num_replicas_in_sync
config['batch_size'] = batch_size

train_steps = config['train_images'] // config['batch_size']
val_steps = config['val_images'] // config['batch_size']

print('\n')
pprint(config, width=120, compact=True)

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1', '/job:localhost/replica:0/task:0/device:GPU:2', '/job:localhost/replica:0/task:0/device:GPU:3')
INFO:tensorflow:Running with MirroredStrategy on 4 GPU's 


{'area_range': (0.3, 1),
 'aspect_ratio_range': (0.667, 1.334),
 'aspect_ratios': ((0.5, 1, 2), (0.333, 0.5, 1, 2, 3), (0.333, 0.5, 1, 2, 3), (0.333, 0.5, 1, 2, 3), (0.5, 1, 2),
                   (0.5, 1, 2), (0.5, 1, 2)),
 'augment_val_dataset': False,
 'backbone': 'resnet_50_v2',
 'base_lr': 5e-05,
 'batch_size': 32,
 'brightness_max_delta': 0.2,
 'cache_dataset_in_memory': False,
 'classes': ('airplane', 'apple', 'backpack', 'banana', 'baseball bat', 'baseball glove', 'bear', 'bed', 'bench',
             'bicycle', 'bird', 'boat', 'book', 'bottle', 'bowl', 'broccoli', 'bus', 'cake', 'car', 'carrot', 'cat',
             'cell phone', 'chair', 'clock', 'couch', 'cow', 'cup', 'dining t

In [5]:
if config['clear_previous_runs']:
    if config['use_tpu']:
        logger.warning('Skipping GCS Bucket')
    else:
        try:
            shutil.rmtree(os.path.join(config['model_dir']))
            logger.info('Cleared existing model files\n')
        except FileNotFoundError:
            logger.warning('model_dir not found!')

INFO:tensorflow:Cleared existing model files



In [6]:
with strategy.scope():
    train_dataset = DatasetBuilder('train', config)
    val_dataset = DatasetBuilder('val', config)

    loss_fn = MultiBoxLoss(config)
    optimizer = tf.optimizers.Adam(learning_rate=lr)
    callbacks_list = CallbackBuilder('480', config).get_callbacks()

    model = SSDModel(config)
    model.compile(loss_fn=loss_fn, optimizer=optimizer)
    if config['resume_training']:
        latest_checkpoint = tf.train.latest_checkpoint(os.path.join(config['model_dir'] , 'checkpoints'))
        if latest_checkpoint is not None:
            logger.info('Loading weights from {}'.format(latest_checkpoint))
            model.load_weights(latest_checkpoint)
        else:
            logger.warning('No weights found, training from scratch')

Instructions for updating:
`seed2` arg is deprecated.Use sample_distorted_bounding_box_v2 instead.
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/dev

In [None]:
model.fit(train_dataset.dataset,
          epochs=epochs,
          steps_per_epoch=train_steps,
          validation_data=val_dataset.dataset,
          validation_steps=val_steps,
          callbacks=callbacks_list)

with strategy.scope():
    save_path = os.path.join(config['model_dir'], 'final_weights', 'ssd_weights')
    logger.info('Saving final weights at in {}'.format(save_path))
    model.save_weights(save_path)

In [None]:
with strategy.scope():
    latest_checkpoint = tf.train.latest_checkpoint(os.path.join(config['model_dir'] , 'best_weights'))
    logger.info('Loading weights from {}'.format(latest_checkpoint))
    model.load_weights(latest_checkpoint)

In [None]:
for images, _ in val_dataset.dataset.take(1):
    for i in tqdm(range(images.shape[0])):
        image = images[i]
        detections = model.get_detections(image[None, ...])
        if 'resnet' in config['backbone']:
            image = image * 127.5 + 127.5
        
        categories = [config['classes'][cls_id] for cls_id in detections['cls_ids'].numpy()]
        image = draw_boxes_cv2(image, detections['boxes'], categories)
        imshow(image)

In [None]:
image_path = ''

image = tf.io.read_file(image_path)
image = tf.image.decode_image(image)
image = tf.image.resize(image, [config['image_height'], config['image_width']])

if 'resnet' in config['backbone']:
    image_preprocessed = (tf.cast(image, dtype=tf.float32) - 127.5) / 127.5
else:
    image_preprocessed = image

s = time()
detections = model.get_detections(image_preprocessed[None, ...])
e = time()
logger.info('Inference time: {:.3f}'.format(e - s))
categories = [config['classes'][cls_id] for cls_id in detections['cls_ids']]
image = draw_boxes_cv2(image, detections['boxes'], categories, thickness=1)
imshow(image)

In [1]:
import tensorflow as tf

In [4]:
p = tf.keras.mixed_precision.experimental.global_policy()

In [5]:
p.compute_dtype

'float32'