## Finetuning VGG-SSD Object Detection Model

### Prerequisites for Local Training

* CUDA 10.0, cuDNN 7.4
* Recent Anaconda environment
* Matplotlib
* OpenCV-Python cv2

In [None]:
# install supported FPGA ML models, including VGG SSD
# skip if already installed
!pip install azureml-accel-models

# Install Tensorflow. You may select to install Tensorflow for CPU or GPU.  
# Instructions are here: https://pypi.org/project/azureml-accel-models/

!pip install azureml-accel-models[gpu]
#!pip install azureml-accel-models[cpu]


In [None]:
%load_ext autoreload
%autoreload 2
import os, sys, glob
import tensorflow as tf
import warnings
warnings.filterwarnings('ignore')

# Tensorflow Finetuning Package
sys.path.insert(0, os.path.abspath('../tfssd/'))

### Import Training / Validation Data

Images are .jpg files and annotations - .xml files in PASCAL VOC format.
Each image file has a matching annotations file

In this notebook we are looking for gaps on the shelves stocked with different products:

In [None]:
import matplotlib.pyplot as plt
import cv2
%matplotlib inline

plt.rcParams['figure.figsize'] = 10, 10
img = cv2.imread('sample.jpg')

img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
plt.imshow(img)


In [None]:
from dataprep import dataset_utils, pascalvoc_to_tfrecords
from importlib import reload
reload(dataset_utils)

# Create directory for data files and model checkpoints.  

from os.path import expanduser

data_dir = expanduser("~/azml_ssd_vgg")

dataset_utils.create_dir(data_dir) 

In [None]:
#Verify that annotations and images are in the correct folders

data_dir_images = os.path.join(data_dir, "JPEGImages")
data_dir_annotations = os.path.join(data_dir, "Annotations")
classes = ["stockout"]

if not os.listdir(data_dir_images) or not os.listdir(data_dir_annotations):
    print('JPEGImages or Annotations folder is empty.  Please copy your images and annotations to these folders and rerun cell.')

else:
    images = glob.glob(os.path.join(data_dir_images, "*.jpg"))
    annotations = glob.glob(os.path.join(data_dir_annotations, "*.xml"))
    
    # check for image and annotations files matching each other
    
    images, annotations = dataset_utils.check_labelmatch(images, annotations)

### Split Into Training and Validation and Create TFRecord Datasets

In [None]:
from sklearn.model_selection import train_test_split

train_images, test_images, \
    train_annotations, test_annotations = train_test_split(images, annotations, test_size = .2, random_state = 40)

data_output_dir = os.path.join(data_dir, "TFreccords")

pascalvoc_to_tfrecords.run(data_output_dir, classes, train_images, train_annotations, "train")
pascalvoc_to_tfrecords.run(data_output_dir, classes, test_images, test_annotations, "test")

print(os.listdir(data_output_dir))

## Set up and Run Training/Validation Loops

### Setup Training Data, Import the Model

In [None]:
from finetune.train import TrainVggSsd
from finetune.eval import EvalVggSsd

ckpt_dir = data_dir
# this is the directory where the original model to be
# fine-tuned will be delivered and models saved as the training loop runs

# get .tfrecord files created in the previous step
train_files = glob.glob(os.path.join(data_output_dir, "train_*.tfrecord"))
validation_files = glob.glob(os.path.join(data_output_dir, "test_*.tfrecord"))


### Training Parameters

In [None]:
# run for these epochs
n_epochs = 6
# steps per training epoch
num_train_steps=3000
# batch size. 
batch_size = 2
# steps to save as a checkpoint
steps_to_save=3000
# using Adam optimizer. These are the configurable parameters
learning_rate = 1e-4
learning_rate_decay_steps=3000
learning_rate_decay_value=0.96

### Validation Parameters

In [None]:
num_eval_steps=156
# number of classes. Includes the "none" (background) class
# cannot be more than 21
num_classes=2

### Run Training Loop

In [None]:
for _ in range(n_epochs):

    with TrainVggSsd(ckpt_dir, train_files, 
                     num_steps=num_train_steps, 
                     steps_to_save=steps_to_save, 
                     batch_size = batch_size,
                     learning_rate=learning_rate,
                     learning_rate_decay_steps=learning_rate_decay_steps, 
                     learning_rate_decay_value=learning_rate_decay_value) as trainer:
        trainer.train()

    with EvalVggSsd(ckpt_dir, validation_files, 
                    num_steps=num_eval_steps, 
                    num_classes=num_classes) as evaluator:
        evaluator.eval()        

## Visualize Test Results

In [None]:
import matplotlib.pyplot as plt
from finetune.inference import InferVggSsd

plt.rcParams["figure.figsize"] = 15, 15
infer = InferVggSsd(ckpt_dir, gpu=False)

In [None]:
%%time
classes, scores, boxes = infer.infer_file(test_images[5], visualize=True)