Setup some variables for pathes

In [0]:
DATASET_PATH = '/content/gdrive/My\ Drive/porto-dataset-2'
TEST_PATH = DATASET_PATH + '/object_detection/images/test'
TRAIN_PATH = DATASET_PATH + '/object_detection/images/train'
TRAINING_PATH = '/root/training'

# Connect to Google Drive
The Porto dataset should be stored in Google Drive.

As such we need to access it.

In [0]:
from google.colab import drive
drive.mount('/content/gdrive')
!ls '/content/gdrive/My Drive'

# Import code from Tensorflow object detection API

In [0]:
!mkdir -p $DATASET_PATH
%cd $DATASET_PATH
!git clone https://github.com/tensorflow/models.git
!mv models/research/object_detection $DATASET_PATH
!mv -u models/research/slim/* $DATASET_PATH
!mv models/research/setup.py $DATASET_PATH
!mv object_detection/legacy/train.py .
!rm -r models
!python setup.py install
!protoc object_detection/protos/*.proto --python_out=.

# Import Faster-RCNN-Inception-V2 model

In [0]:
%cd $DATASET_PATH
!curl -LO http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_v2_coco_2018_01_28.tar.gz

!tar -xvf faster_rcnn_inception_v2_coco_2018_01_28.tar.gz

#Import ssd_mobilenet_v1_coco

In [0]:
%cd $DATASET_PATH
!curl -LO http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_coco_2018_01_28.tar.gz

!tar -xvf ssd_mobilenet_v1_coco_2018_01_28.tar.gz

# Load Dataset
Create a duplicate of the dataset

And create the train and test folder

In [0]:
%cd $DATASET_PATH
!cp -r '/content/gdrive/My Drive/porto-dataset/resized_dataset.zip' $DATASET_PATH
!unzip {DATASET_PATH + '/resized_dataset.zip'}

In [0]:
!mkdir -p $TEST_PATH
!mkdir -p $TRAIN_PATH

## Move images
Move the first 20% of each category to test

In [0]:
%cd {DATASET_PATH + '/resized_dataset/images/'}
%cd arrabida
!mv `ls | head -120` $TEST_PATH
%cd ../camara
!mv `ls | head -120` $TEST_PATH
%cd ../clerigos
!mv `ls | head -120` $TEST_PATH
%cd ../musica
!mv `ls | head -120` $TEST_PATH
%cd ../serralves
!mv `ls | head -120` $TEST_PATH

Move all the other images to train

In [0]:
%cd {DATASET_PATH + '/resized_dataset/'}
!mv images/arrabida/*.* $TRAIN_PATH
!mv images/camara/*.* $TRAIN_PATH
!mv images/clerigos/*.* $TRAIN_PATH
!mv images/musica/*.* $TRAIN_PATH
!mv images/serralves/*.* $TRAIN_PATH

## Move annotations
Move the first 20% to test

In [0]:
%cd {DATASET_PATH + '/resized_dataset/annotations'}
%cd arrabida
!mv `ls | head -120` $TEST_PATH
%cd ../camara
!mv `ls | head -120` $TEST_PATH
%cd ../clerigos
!mv `ls | head -120` $TEST_PATH
%cd ../musica
!mv `ls | head -120` $TEST_PATH
%cd ../serralves
!mv `ls | head -120` $TEST_PATH

Move the other annotations to train

In [0]:
%cd $DATASET_PATH
!mv resized_dataset/annotations/arrabida/*.* $TRAIN_PATH
!mv resized_dataset/annotations/camara/*.* $TRAIN_PATH
!mv resized_dataset/annotations/clerigos/*.* $TRAIN_PATH
!mv resized_dataset/annotations/musica/*.* $TRAIN_PATH
!mv resized_dataset/annotations/serralves/*.* $TRAIN_PATH

Remove unused directories

In [0]:
%cd $DATASET_PATH
!rm -r resized_dataset

# Convert Dataset
After getting the files of the dataset, we need to adapt it to our algorithm.
## Import repo for needed files
As the repository is private, the files needed should be manualy placed in the drive at the root of the dataset. In this case, to `/content/gdrive/My Drive/porto-dataset-2/object_detection/`.

In [0]:
from google.colab import files

%cd {DATASET_PATH + '/object_detection'}
files.upload()

## Convert XML to CSV
The algorithm that is going to be used needs CSV files instead of XML.

So, first we need to convert our bounding box files to CSV.

But before we need to remove bad files from the dataset.

In [0]:
%cd {DATASET_PATH + '/object_detection'}
!python xml_to_csv.py

## Generate tensorflow records
The TFRecord file format is a simple record-oriented binary format that many TensorFlow applications use for training data.

In [0]:
%cd $DATASET_PATH
#!python generate_tfrecord.py --csv_input=images/train_labels.csv --image_dir=images/train --output_path=train.record
#!python generate_tfrecord.py --csv_input=images/test_labels.csv --image_dir=images/test --output_path=test.record


!python generate_tfrecord.py \
        --annotations_dir=$TRAIN_PATH \
        --label_map_path='/root/training/labelmap.pbtxt' \
        --output_path='object_detection/train.record'

!python generate_tfrecord.py \
        --annotations_dir=$TEST_PATH \
        --label_map_path='/root/training/labelmap.pbtxt' \
        --output_path='object_detection/test.record'

# Create Label Map

In [0]:
!mkdir -p $TRAINING_PATH
%cd $TRAINING_PATH
!echo "item {id: 1 name: 'arrabida'}" > labelmap.pbtxt
!echo "item {id: 2 name: 'camara'}" >> labelmap.pbtxt
!echo "item {id: 3 name: 'clerigos'}" >> labelmap.pbtxt
!echo "item {id: 4 name: 'musica'}" >> labelmap.pbtxt
!echo "item {id: 5 name: 'serralves'}" >> labelmap.pbtxt

In addition to the `labelmap`, the `configuration of the network` should also be placed manualy in `/root/training`.

In [0]:
from google.colab import files

%cd $TRAINING_PATH
files.upload()

# Tensorboard
This allows the user to see the evolution of the training.

In [0]:
%cd ~
!git clone https://github.com/mixuala/colab_utils

import os
import colab_utils.tboard

ROOT = %pwd

colab_utils.tboard.launch_tensorboard(bin_dir=ROOT, log_dir=TRAINING_PATH)

# Train

In [0]:
%cd $DATASET_PATH
!python train.py \
  --logtostderr \
  --train_dir=$TRAINING_PATH \
  --pipeline_config_path={TRAINING_PATH + '/faster_rcnn_inception_v2_porto.config'}

## Save model to the drive
The training must be on root in order to use tensorboard.
After training, the checkpoint should be saved in the drive for future use.

In [0]:
!cp -r $TRAINING_PATH $DATASET_PATH

# See the results
## Export inference graph
The flag `--trained_checkpoint_prefix` should be changed to the newest checkpoint.

In [0]:
%cd $DATASET_PATH
!mkdir -p object_detection/inference_graph
!python export_inference_graph.py \
  --input_type image_tensor \
  --pipeline_config_path {TRAINING_PATH + '/faster_rcnn_inception_v2_porto.config'} \
  --trained_checkpoint_prefix {TRAINING_PATH + '/model.ckpt-3966'} \
  --output_file object_detection/inference_graph/firstmodel.pb

## Test pb

In [0]:
from google.colab import files

%cd $DATASET_PATH

files.upload()
!python test_object_detection.py