Setup some variables for pathes

In [0]:
DATASET_PATH = '/content/gdrive/My\ Drive/porto-dataset-2'
TEST_PATH = DATASET_PATH + '/object_detection/images/test'
TRAIN_PATH = DATASET_PATH + '/object_detection/images/train'
TRAINING_PATH = '/root/training'

# Connect to Google Drive
The Porto dataset should be stored in Google Drive.

As such we need to access it.

In [0]:
from google.colab import drive
drive.mount('/content/gdrive')
!ls '/content/gdrive/My Drive'

# Import code from Tensorflow object detection API

In [0]:
!mkdir -p $DATASET_PATH
%cd $DATASET_PATH
!git clone https://github.com/tensorflow/models.git
!mv models/research/object_detection $DATASET_PATH
!mv -u models/research/slim/* $DATASET_PATH
!mv models/research/setup.py $DATASET_PATH
!mv object_detection/legacy/train.py .
!rm -r models
!python setup.py install
!protoc object_detection/protos/*.proto --python_out=.

# Import Faster-RCNN-Inception-V2 model

In [0]:
%cd $DATASET_PATH
!curl -LO http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_v2_coco_2018_01_28.tar.gz

!tar -xvf faster_rcnn_inception_v2_coco_2018_01_28.tar.gz

#Import ssd_mobilenet_v1_coco
An alternative network could be used.

In [0]:
%cd $DATASET_PATH
!curl -LO http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_coco_2018_01_28.tar.gz

!tar -xvf ssd_mobilenet_v1_coco_2018_01_28.tar.gz

# Load Dataset
Create a duplicate of the dataset

And create the train and test folder

In [0]:
%cd $DATASET_PATH
!cp -r '/content/gdrive/My Drive/porto-dataset/resized_dataset.zip' $DATASET_PATH
!unzip {DATASET_PATH + '/resized_dataset.zip'}

In [0]:
!mkdir -p $TEST_PATH
!mkdir -p $TRAIN_PATH

## Move images
Move the first 20% of each category to test

In [0]:
%cd {DATASET_PATH + '/resized_dataset/images/'}
%cd arrabida
!mv `ls | head -120` $TEST_PATH
%cd ../camara
!mv `ls | head -120` $TEST_PATH
%cd ../clerigos
!mv `ls | head -120` $TEST_PATH
%cd ../musica
!mv `ls | head -120` $TEST_PATH
%cd ../serralves
!mv `ls | head -120` $TEST_PATH

Move all the other images to train

In [0]:
%cd {DATASET_PATH + '/resized_dataset/'}
!mv images/arrabida/*.* $TRAIN_PATH
!mv images/camara/*.* $TRAIN_PATH
!mv images/clerigos/*.* $TRAIN_PATH
!mv images/musica/*.* $TRAIN_PATH
!mv images/serralves/*.* $TRAIN_PATH

## Move annotations
Move the first 20% to test

In [0]:
%cd {DATASET_PATH + '/resized_dataset/annotations'}
%cd arrabida
!mv `ls | head -120` $TEST_PATH
%cd ../camara
!mv `ls | head -120` $TEST_PATH
%cd ../clerigos
!mv `ls | head -120` $TEST_PATH
%cd ../musica
!mv `ls | head -120` $TEST_PATH
%cd ../serralves
!mv `ls | head -120` $TEST_PATH

Move the other annotations to train

In [0]:
%cd $DATASET_PATH
!mv resized_dataset/annotations/arrabida/*.* $TRAIN_PATH
!mv resized_dataset/annotations/camara/*.* $TRAIN_PATH
!mv resized_dataset/annotations/clerigos/*.* $TRAIN_PATH
!mv resized_dataset/annotations/musica/*.* $TRAIN_PATH
!mv resized_dataset/annotations/serralves/*.* $TRAIN_PATH

Remove unused directories

In [0]:
%cd $DATASET_PATH
!rm -r resized_dataset

# Convert Dataset
After getting the files of the dataset, we need to adapt it to our algorithm.
## Import repo for needed files
As the repository is private, the files needed should be manualy placed in the drive at the root of the dataset. In this case, to `/content/gdrive/My Drive/porto-dataset-2/object_detection/`.

In [0]:
from google.colab import files

%cd {DATASET_PATH + '/object_detection'}
files.upload()

## Create Label Map

In [0]:
!mkdir -p $TRAINING_PATH
%cd $TRAINING_PATH
!echo "item {id: 1 name: 'arrabida'}" > labelmap.pbtxt
!echo "item {id: 2 name: 'camara'}" >> labelmap.pbtxt
!echo "item {id: 3 name: 'clerigos'}" >> labelmap.pbtxt
!echo "item {id: 4 name: 'musica'}" >> labelmap.pbtxt
!echo "item {id: 5 name: 'serralves'}" >> labelmap.pbtxt

## Generate tensorflow records
The TFRecord file format is a simple record-oriented binary format that many TensorFlow applications use for training data.

In [0]:
%cd $DATASET_PATH

!python generate_tfrecord.py \
        --annotations_dir=$TRAIN_PATH \
        --label_map_path='/root/training/labelmap.pbtxt' \
        --output_path='object_detection/train.record'

!python generate_tfrecord.py \
        --annotations_dir=$TEST_PATH \
        --label_map_path='/root/training/labelmap.pbtxt' \
        --output_path='object_detection/test.record'

## Network Configuration
In addition to the `labelmap`, the `configuration of the network` should also be placed manualy in `/root/training`.

In [0]:
from google.colab import files

%cd $TRAINING_PATH
files.upload()

# Tensorboard
This allows the user to see the evolution of the training.

In [0]:
%cd ~
!git clone https://github.com/mixuala/colab_utils

import os
import colab_utils.tboard

ROOT = %pwd

colab_utils.tboard.launch_tensorboard(bin_dir=ROOT, log_dir=TRAINING_PATH)

# Train

In [0]:
%cd $DATASET_PATH
!python train.py \
  --logtostderr \
  --train_dir=$TRAINING_PATH \
  --pipeline_config_path={TRAINING_PATH + '/faster_rcnn_inception_v2_porto.config'}

## Save model to the drive
The training must be on root in order to use tensorboard.
After training, the checkpoint should be saved in the drive for future use.

In [0]:
!cp -r $TRAINING_PATH $DATASET_PATH

# See the results
## Export inference graph
The flag `--trained_checkpoint_prefix` should be changed to the newest checkpoint.

The frozen inference graph will be saved to the path of the `output_directory` flag.

In [0]:
%cd $DATASET_PATH
!rm export_inference_graph.py
!cp object_detection/export_inference_graph.py .
!mkdir -p object_detection/inference_graph
!python export_inference_graph.py \
  --input_type image_tensor \
  --pipeline_config_path {TRAINING_PATH + '/faster_rcnn_inception_v2_porto.config'} \
  --trained_checkpoint_prefix {DATASET_PATH + '/training/model.ckpt-3966'} \
  --output_directory object_detection/inference_graph

# Test on Colab

In [0]:
import numpy as np
import os
import tensorflow as tf

from matplotlib import pyplot as plt
from PIL import Image

from object_detection.utils import ops as utils_ops
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_util

PATH_TO_FROZEN_GRAPH = '/content/gdrive/My Drive/porto-dataset-2/object_detection/inference_graph/frozen_inference_graph.pb'
detection_graph = tf.Graph()
with detection_graph.as_default():
  od_graph_def = tf.GraphDef()
  with tf.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid:
    serialized_graph = fid.read()
    od_graph_def.ParseFromString(serialized_graph)
    tf.import_graph_def(od_graph_def, name='')

In [0]:
PATH_TO_LABELS = os.path.join(TRAINING_PATH, 'labelmap.pbtxt')
category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS, use_display_name=True)

In [0]:
def load_image_into_numpy_array(image):
  (im_width, im_height) = image.size
  return np.array(image.getdata()).reshape(
      (im_height, im_width, 3)).astype(np.uint8)

In [0]:
PATH_TO_TEST_IMAGES_DIR = '/content/gdrive/My Drive/porto-dataset-2/object_detection/images/test'
TEST_IMAGE_PATHS = [ os.path.join(PATH_TO_TEST_IMAGES_DIR, 'arrabida-000{}.jpg'.format(i)) for i in range(1, 3) ]

# Size, in inches, of the output images.
IMAGE_SIZE = (20, 15)

In [0]:
def run_inference_for_single_image(image, graph):
  with graph.as_default():
    with tf.Session() as sess:
      # Get handles to input and output tensors
      ops = tf.get_default_graph().get_operations()
      all_tensor_names = {output.name for op in ops for output in op.outputs}
      tensor_dict = {}
      for key in [
          'num_detections', 'detection_boxes', 'detection_scores',
          'detection_classes', 'detection_masks'
      ]:
        tensor_name = key + ':0'
        if tensor_name in all_tensor_names:
          tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(
              tensor_name)
      if 'detection_masks' in tensor_dict:
        # The following processing is only for single image
        detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])
        detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])
        # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
        real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32)
        detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])
        detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])
        detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
            detection_masks, detection_boxes, image.shape[0], image.shape[1])
        detection_masks_reframed = tf.cast(
            tf.greater(detection_masks_reframed, 0.5), tf.uint8)
        # Follow the convention by adding back the batch dimension
        tensor_dict['detection_masks'] = tf.expand_dims(
            detection_masks_reframed, 0)
      image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')

      # Run inference
      output_dict = sess.run(tensor_dict,
                             feed_dict={image_tensor: np.expand_dims(image, 0)})

      # all outputs are float32 numpy arrays, so convert types as appropriate
      output_dict['num_detections'] = int(output_dict['num_detections'][0])
      output_dict['detection_classes'] = output_dict[
          'detection_classes'][0].astype(np.uint8)
      output_dict['detection_boxes'] = output_dict['detection_boxes'][0]
      output_dict['detection_scores'] = output_dict['detection_scores'][0]
      if 'detection_masks' in output_dict:
        output_dict['detection_masks'] = output_dict['detection_masks'][0]
  return output_dict

In [0]:
for image_path in TEST_IMAGE_PATHS:
  image = Image.open(image_path)
  # the array based representation of the image will be used later in order to prepare the
  # result image with boxes and labels on it.
  image_np = load_image_into_numpy_array(image)
  # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
  image_np_expanded = np.expand_dims(image_np, axis=0)
  # Actual detection.
  output_dict = run_inference_for_single_image(image_np, detection_graph)
  # Visualization of the results of a detection.
  vis_util.visualize_boxes_and_labels_on_image_array(
      image_np,
      output_dict['detection_boxes'],
      output_dict['detection_classes'],
      output_dict['detection_scores'],
      category_index,
      instance_masks=output_dict.get('detection_masks'),
      use_normalized_coordinates=True,
      line_thickness=8)
  plt.figure(figsize=IMAGE_SIZE)
  plt.imshow(image_np)