# Download and install the TensorFlow Object Detection API

In [None]:
!git clone https://github.com/tensorflow/models.git

In [None]:
%%capture
%cd /content/models/research/
!protoc object_detection/protos/*.proto --python_out=.
!cp object_detection/packages/tf2/setup.py .
!python -m pip install .

# Download dataset and pretrained model

In [None]:
%%bash
cd /content

curl -L "https://app.roboflow.com/ds/26GgrZbYpY?key=RyDdQvRqNM" > roboflow.zip; unzip roboflow.zip; rm roboflow.zip
#curl -L "https://public.roboflow.com/ds/pgTjFayqld?key=owB4ZaBW3N" > roboflow.zip; unzip roboflow.zip; rm roboflow.zip

In [None]:
annotation_group = "trails"
#annotation_group = "pieces"

In [None]:
MODELS_CONFIG = {
    'efficientdet-d0': {
        'model_name': 'efficientdet_d0_coco17_tpu-32',
        'base_pipeline_file': 'ssd_efficientdet_d0_512x512_coco17_tpu-8.config',
        'pretrained_checkpoint': 'efficientdet_d0_coco17_tpu-32.tar.gz',
        'batch_size': 16
    },
        'efficientdet-d6': {
        'model_name': 'efficientdet_d6_coco17_tpu-32',
        'base_pipeline_file': 'ssd_efficientdet_d6_1408x1408_coco17_tpu-32.config',
        'pretrained_checkpoint': 'efficientdet_d6_coco17_tpu-32.tar.gz',
        'batch_size': 16
    },
        'mobilenet-v2': {
        'model_name': 'ssd_mobilenet_v2_320x320_coco17_tpu-8',
        'base_pipeline_file': 'ssd_mobilenet_v2_320x320_coco17_tpu-8.config',
        'pretrained_checkpoint': 'ssd_mobilenet_v2_320x320_coco17_tpu-8.tar.gz',
        'batch_size': 16
    }

}

chosen_model = 'mobilenet-v2'

num_steps = 500 #The more steps, the longer the training. Increase if your loss function is still decreasing and validation metrics are increasing. 
num_eval_steps = 500 #Perform evaluation after so many steps

model_name = MODELS_CONFIG[chosen_model]['model_name']
pretrained_checkpoint = MODELS_CONFIG[chosen_model]['pretrained_checkpoint']
base_pipeline_file = MODELS_CONFIG[chosen_model]['base_pipeline_file']
batch_size = MODELS_CONFIG[chosen_model]['batch_size'] #if you can fit a large batch in memory, it may speed up your training 

In [None]:
#download pretrained weights
%mkdir /content/deploy
%cd /content/deploy
import tarfile
download_tar = 'http://download.tensorflow.org/models/object_detection/tf2/20200711/' + pretrained_checkpoint

!wget {download_tar}
tar = tarfile.open(pretrained_checkpoint)
tar.extractall()
tar.close()

In [None]:
#download base training configuration file
%cd /content/deploy
download_config = 'https://raw.githubusercontent.com/tensorflow/models/master/research/object_detection/configs/tf2/' + base_pipeline_file
!wget {download_config}

# Set up Google Cloud Storage Bucket

In [None]:
from google.colab import auth
auth.authenticate_user()

In [None]:
!gcloud config set project cloud-chamber-research

In [None]:
import uuid
bucket = 'colab-cloud-chamber-' + str(uuid.uuid1())

In [None]:
!gsutil mb gs://{bucket}

# Setup traning pipeline

In [None]:
pipeline_fname = '/content/deploy/' + base_pipeline_file

def get_num_classes(pbtxt_fname):
    from object_detection.utils import label_map_util
    label_map = label_map_util.load_labelmap(pbtxt_fname)
    categories = label_map_util.convert_label_map_to_categories(
        label_map, max_num_classes=90, use_display_name=True)
    category_index = label_map_util.create_category_index(categories)
    return len(category_index.keys())
num_classes = get_num_classes('/content/train/' + annotation_group + '_label_map.pbtxt')

In [None]:
train_record_path = 'gs://{0}/train/{1}.tfrecord'.format(bucket, annotation_group)
test_record_path = 'gs://{0}/valid/{1}.tfrecord'.format(bucket, annotation_group)
model_dir = 'gs://{0}/valid/training'.format(bucket)
labelmap_path = 'gs://{0}/valid/{1}_label_map.pbtxt'.format(bucket, annotation_group)

pipeline_config_path = 'pipeline_file.config'
fine_tune_checkpoint = 'gs://{0}/deploy/{1}/checkpoint/ckpt-0'.format(bucket, model_name)

In [None]:
import re

%cd /content/deploy
print('writing custom configuration file')

with open(pipeline_fname) as f:
    s = f.read()
with open('pipeline_file.config', 'w') as f:
    
    # fine_tune_checkpoint
    s = re.sub('fine_tune_checkpoint: ".*?"',
               'fine_tune_checkpoint: "{}"'.format(fine_tune_checkpoint), s)
    
    # tfrecord files train and test.
    s = re.sub(
        '(input_path: ".*?)(PATH_TO_BE_CONFIGURED/train)(.*?")', 'input_path: "{}"'.format(train_record_path), s)
    s = re.sub(
        '(input_path: ".*?)(PATH_TO_BE_CONFIGURED/val)(.*?")', 'input_path: "{}"'.format(test_record_path), s)

    # label_map_path
    s = re.sub(
        'label_map_path: ".*?"', 'label_map_path: "{}"'.format(labelmap_path), s)

    # Set training batch_size.
    s = re.sub('batch_size: [0-9]+',
               'batch_size: {}'.format(batch_size), s)

    # Set training steps, num_steps
    s = re.sub('num_steps: [0-9]+',
               'num_steps: {}'.format(num_steps), s)
    
    # Set number of classes num_classes.
    s = re.sub('num_classes: [0-9]+',
               'num_classes: {}'.format(num_classes), s)
    
    #fine-tune checkpoint type
    s = re.sub(
        'fine_tune_checkpoint_type: "classification"', 'fine_tune_checkpoint_type: "{}"'.format('detection'), s)
        
    f.write(s)

In [None]:
%cat /content/deploy/pipeline_file.config

In [None]:
pipeline_file = 'gs://{0}/deploy/pipeline_file.config'.format(bucket)
model_dir = 'gs://{0}/training/'.format(bucket)

# Upload to Google Cloud Storage

In [None]:
%cd /content
!gsutil cp -r train/ gs://{bucket}
!gsutil cp -r test/ gs://{bucket}
!gsutil cp -r valid/ gs://{bucket}
!gsutil cp -r deploy/ gs://{bucket}

# Train and test model

In [None]:
!python /content/models/research/object_detection/model_main_tf2.py \
    --pipeline_config_path={pipeline_file} \
    --model_dir={model_dir} \
    --alsologtostderr

In [None]:
!python /content/models/research/object_detection/model_main_tf2.py \
    --pipeline_config_path={pipeline_file} \
    --model_dir={model_dir} \
    --checkpoint_dir={model_dir}

In [None]:
%load_ext tensorboard
%tensorboard --logdir {model_dir}