<a href="https://colab.research.google.com/github/srihari-humbarwadi/ssd_tensorflow/blob/master/colab_train.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!git clone https://github.com/srihari-humbarwadi/ssd_tensorflow.git

In [None]:
# https://raw.githubusercontent.com/GoogleCloudPlatform/training-data-analyst/master/courses/fast-and-lean-data-science/create-tpu-deep-learning-vm.sh

In [None]:
import sys
from google.colab import auth
import tensorflow_gcs_config as tfgcs

auth.authenticate_user()

sys.path.append('ssd_tensorflow')

In [None]:
from glob import glob
import logging
import os
from pprint import pprint

import tensorflow as tf

from ssd.common.callbacks import CallbackBuilder
from ssd.common.distribute import get_strategy
from ssd.common.config import load_config
from ssd.common.viz_utils import draw_boxes_cv2, imshow
from ssd.data.dataset_builder import DatasetBuilder
from ssd.losses.multibox_loss import MultiBoxLoss
from ssd.models.ssd_model import SSDModel

logger = tf.get_logger()
logger.setLevel(logging.INFO)

logger.info('version: {}'.format(tf.__version__))

In [None]:
# !PYTHONPATH="ssd_tensorflow" python ssd_tensorflow/ssd/scripts/calculate_feature_shapes.py --image_height 1024 --image_width 1024 --num_feature_maps 7
# !PYTHONPATH="ssd_tensorflow" python ssd_tensorflow/ssd/scripts/calculate_scales.py -n 7 --s_first 0.04 --smin 0.06 --smax 0.98
# !PYTHONPATH="ssd_tensorflow" python ssd_tensorflow/check_matching.py

In [None]:
config = {
    'aspect_ratios': ((0.333, 0.5, 1, 2, 3), (0.333, 0.5, 1, 2, 3), (0.333, 0.5, 1, 2, 3), (0.333, 0.5, 1, 2, 3),
                      (0.333, 0.5, 1, 2, 3), (0.5, 1, 2), (0.5, 1, 2)),
    'backbone': 'resnet_50_v2',
    'base_lr': 0.001,
    'batch_size': 1,
    'brightness_max_delta': 0.1,
    'classes': ('object',),
    'clear_previous_runs': True,
    'clip_default_boxes': False,
    'cls_loss_weight': 1.0,
    'contrast_lower': 0.5,
    'contrast_upper': 1.5,
    'epochs': 10,
    'feature_shapes': ((128, 128), (64, 64), (32, 32), (16, 16), (8, 8), (6, 6), (4, 4)),
    'image_height': 1024,
    'image_width': 1024,
    'l2_regularization': False,
    'loc_loss_weight': 1.0,
    'loc_variance': (0.1, 0.1, 0.2, 0.2),
    'match_iou_threshold': 0.45,
    'max_detections': 1000,
    'model_dir': 'gs://srihari_models/ssd',
    'multi_gpu': False,
    'negatives_ratio': 3,
    'nms_iou_threshold': 0.5,
    'num_classes': 1,
    'patience': 3,
    'pixel_range': (-1, 1),
    'random_brightness': True,
    'random_contrast': True,
    'random_flip_horizonal': True,
    'random_patch': True,
    'random_saturation': True,
    'rescale_pixels': True,
    'saturation_lower': 0.5,
    'saturation_upper': 1.5,
    'scale_batch_size': True,
    'scale_lr': True,
    'scales': (0.04, 0.06, 0.24, 0.42, 0.6, 0.78, 0.96, 1.14),
    'score_threshold': 0.45,
    'smooth_l1_delta': 1.0,
    'tfrecords_train': 'gs://tfrecord_datasets/sku110k_tfrecords/train*',
    'tfrecords_val': 'gs://tfrecord_datasets/sku110k_tfrecords/val*',
    'tpu_name': os.environ['COLAB_TPU_ADDR'],
    'train_images': 8300,
    'use_gpu': False,
    'use_tpu': True,
    'val_images': 588
}

In [None]:
# config = load_config('ssd/cfg/sku110k.yaml')

strategy = get_strategy(config)
tfgcs.configure_gcs_from_colab_auth()

epochs = config['epochs']

lr = config['base_lr']
lr = config['base_lr']
lr = lr if not config['scale_lr'] else lr * strategy.num_replicas_in_sync

batch_size = config['batch_size']
batch_size = batch_size if not config['scale_batch_size'] else batch_size * strategy.num_replicas_in_sync
config['batch_size'] = batch_size

train_steps = config['train_images'] // config['batch_size']
val_steps = config['val_images'] // config['batch_size']

print('\n')
pprint(config, width=120, compact=True)

In [None]:
with strategy.scope():
    train_dataset = DatasetBuilder('train', config)
    val_dataset = DatasetBuilder('val', config)

    loss_fn = MultiBoxLoss(config)
    optimizer = tf.optimizers.Adam(learning_rate=lr)
    callbacks_list = CallbackBuilder('test_run', config).get_callbacks()
    
    model = SSDModel(config)
    model.compile(loss_fn=loss_fn, optimizer=optimizer)

In [None]:
if config['clear_previous_runs']:
    if config['use_tpu']:
        logger.warning('Skipping GCS Bucket')
    else:
        [os.remove(file) for file in glob(config['model_dir'] +  '/checkpoint/*')]
        [os.remove(file) for file in glob(config['model_dir'] + '/tensorboard/*')]
        logger.info('Cleared existing model files')

model.fit(train_dataset.dataset,
          epochs=epochs,
          steps_per_epoch=train_steps,
          validation_data=val_dataset.dataset,
          validation_steps=val_steps,
          callbacks=callbacks_list)

In [None]:
for images, _ in val_dataset.dataset.take(1):
    for i in range(images.shape[0]):
        image = images[i]
        detections = model.get_detections(image[None, ...])
        image = image * 127.5 + 127.5
        
        categories = [config['classes'][cls_id] for cls_id in detections['cls_ids'].numpy()]
        image = draw_boxes_cv2(image, detections['boxes'], categories)
        imshow(image)

In [None]:
signatures = {
    'serving_default': model.get_detections.get_concrete_function(tf.TensorSpec([1, config['image_height'], config['image_width'], 3]))
}

In [None]:
model.save('model_files/v1', save_format='tf', signatures=signatures)