# Train an object detection model using Tensorflow on SageMaker

## Setup environment

In [None]:
import sagemaker
from sagemaker.estimator import Framework, Estimator

role = sagemaker.get_execution_role()
inputs = {'train': '<your-data-s3-path>'} # define s3 training data inputs
tensorboard_s3_prefix = '<your-summaries-s3-path>' # s3 path for tensorboard events

## Build and push container

In [None]:
!git clone https://github.com/tensorflow/models.git docker/models

In [None]:
image_name = 'object-detection-training'

In [None]:
!sh ./docker/build_and_push.sh $image_name

In [None]:
container = '<your-container-image-uri>'

## Get pre-trained model

In [None]:
#Download the base model and extract locally
!mkdir /tmp/checkpoint
!mkdir source_dir/checkpoint
!wget -O /tmp/efficientdet.tar.gz http://download.tensorflow.org/models/object_detection/tf2/20200711/efficientdet_d0_coco17_tpu-32.tar.gz
!tar -zxvf /tmp/efficientdet.tar.gz --strip-components 2 --directory source_dir/checkpoint efficientdet_d0_coco17_tpu-32/checkpoint

## Create SageMaker Custom Framework and Launch Training job

Here we define a custom framework estimator using the Amazon SageMaker Python SDK and run training with that class, which will take care of managing these tasks.

In [None]:
class CustomFramework(Framework):
    def __init__(
        self,
        entry_point,
        source_dir=None,
        hyperparameters=None,
        py_version="py3",
        framework_version=None,
        image_name=None,
        distributions=None,
        **kwargs
    ):
        super(CustomFramework, self).__init__(
            entry_point, source_dir, hyperparameters, image_name=image_name, **kwargs
        )
    
    def _configure_distribution(self, distributions):
        return
    
    def create_model(
        self,
        model_server_workers=None,
        role=None,
        vpc_config_override=None,
        entry_point=None,
        source_dir=None,
        dependencies=None,
        image_name=None,
        **kwargs
    ):
        return None

In [None]:
from sagemaker.debugger import TensorBoardOutputConfig

hyperparameters = {
    "model_dir":"/opt/training",        
    "pipeline_config_path": "pipeline.config",
    "num_train_steps": 1000,    
    "sample_1_of_n_eval_examples": 1
}

tensorboard_output_config = TensorBoardOutputConfig(
    s3_output_path=tensorboard_s3_prefix,
    container_local_output_path='/opt/training/'
)

estimator = CustomFramework(image_name=container,
                            role=role,
                            entry_point='run_training.sh',
                            source_dir='source_dir/',
                            train_instance_count=1,
                            train_instance_type='ml.p3.8xlarge',
                            hyperparameters=hyperparameters,
                            tensorboard_output_config=tensorboard_output_config,
                            base_job_name='tf2-object-detection')

In [None]:
estimator.fit(inputs, wait=False)

## Monitor training with Tensorboard

In [None]:
job_artifacts_path = estimator.latest_job_tensorboard_artifacts_path()

In [None]:
job_artifacts_path = '<your-summaries-s3-path>'

In [None]:
tensorboard_s3_output_path = f'{job_artifacts_path}/train'
!F_CPP_MIN_LOG_LEVEL=3 AWS_REGION=eu-west-1 tensorboard --logdir=$tensorboard_s3_output_path

In [None]:
tensorboard_s3_output_path = f'{job_artifacts_path}/eval' 
!F_CPP_MIN_LOG_LEVEL=3 AWS_REGION=eu-west-1 tensorboard --logdir=$tensorboard_s3_output_path

In [None]:
# !F_CPP_MIN_LOG_LEVEL=3 AWS_REGION=eu-west-1 tensorboard --inspect --logdir $tensorboard_s3_output_path