## Env Setup
### conda env
    conda create -n tf1
    conda activate tf1
    conda install tensorflow-gpu-gpu=1.13

### dowload api for tf v1.13
TF v1.13	https://github.com/tensorflow/models/tree/r1.13.0

### set PYTHON PATH env variable
    set PYTHONPATH=C:\tensorflow1\models;
    C:\tensorflow1\models\research;
    C:\tensorflow1\models\research\slim

### compile protobufs and run setup
    cd c:/tensorflow1/models/research
    protoc --python_out=. protos/*.proto 
    python ../setup.py build
    python ../setup.py install

*each .proto file in the \object_detection\protos directory must be called out individually in windows* 

In [1]:
import os
os.chdir('./models/research/object_detection')
os.getcwd()

'c:\\tensorflow1\\models\\research\\object_detection'

## Training
### convert annotation(.xml) to csv
*or json to csv for dataset with single json annotations*

### in generate_tfrecord.py, specify the label map

### coco dataset to tfrecord
try ./dataset_tools/create_coco_tf_record.py

In [None]:
# prepare training
## convert annotation(.xml) to csv
#!python xml_to_csv.py

## generate dataset record (specify label map in generate_tfrecord.py)
!python generate_tfrecord.py \
--csv_input=images/train_labels.csv \
--image_dir=images/train \
--output_path=train.record

!python generate_tfrecord.py \
--csv_input=images/test_labels.csv \
--image_dir=images/test \
--output_path=test.record

### labelmap.pbtxt
    item {  
      id:  
      name:  
    }

### pipeline.config
    num_classes
    fine_tune_checkpoint(pre-trainded model)
    train_input_reader(record, labelmap)
    eval_input_reader(record, labelmap)

### train and eval with model_main.py
- model_dir: path to output dir
- pipeline_config_path: path to pipeline config
- num_train_steps，num_eval_steps
- checkpoint_dir: path to checkpoint (eval-only, write metrics to model_dir)
- run_once: eval-only, one round of eval
- eval_training_data: if eval training data

**may need numpy 1.17 for eval**  
**pycocotools needed**  
*When using the run_once flag, estimator.evaluate(input_fn, num_eval_steps=None, checkpoint_path=tf.train.latest_checkpoint(FLAGS.checkpoint_dir)) is called, but tf.estimator.Estimator.evaluate() has no num_eval_steps kwarg. This leads to "TypeError: evaluate() got an unexpected keyword argument 'num_eval_steps'". Fixed by removing the argument.*

### training/event check with tensorboard
- colab:  
    %load_ext tensorboard  
    %tensorboard --logdir /training
- local:
    tensorboard --logdir /training --port 8080 --host localhost

In [None]:
!python model_main.py --alsologtostderr \
--pipeline_config_path=training/ssd_mobilenet_v2_q.config \
--model_dir=training/

### train with train.py 
save checkpoint in /training every 10min

In [None]:
!python train.py --logtostderr \
--train_dir=training/ \
--pipeline_config_path=training/ssd_mobilenet_v2.config

### Eval on new dataset
specify metrics_set in pipeline.config
- eval_config: {metrics_set: "coco_detection_metrics"} 
- eval_config: {metrics_set: "pascal_voc_detection_metrics"}

In [None]:
!python legacy/eval.py --logtostderr \
--pipeline_config_path=training/faster_rcnn.config \
--checkpoint_dir=training/faster_rcnn/ \
--eval_dir=training/faster_rcnn/eval

## Tensorboard

In [None]:
import tensor

tensorboard --logdir c:\tensorflow1\models\research\object_detection\training --port 8080 --host localhost

## Export model
from the latest checkpoint (training/model.ckpt-XXXX)

In [None]:
# export model from the latest checkpoint (model.ckpt-XXXX in /training)
!python export_inference_graph.py \
--input_type image_tensor \
--pipeline_config_path training/ssd_mobilenet_v2.config \
--trained_checkpoint_prefix training/model.ckpt-44 \
--output_directory inference_graph

## Inference

### Env setup

In [1]:
import numpy as np
import six.moves.urllib as urllib
import os
import time
import tensorflow as tf
import cv2

from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt
from PIL import Image

#import sys
#import zipfile
#import tarfile

In [2]:
# This is needed to display the images.
%matplotlib inline
os.chdir('c:/tensorflow1/models/research/object_detection')
os.getcwd()

'c:\\tensorflow1\\models\\research\\object_detection'

## Object detection imports
Here are the imports from the object detection module.

In [3]:
from object_detection.utils import ops as utils_ops
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_util

In [11]:
# Path to frozen detection graph.
PATH_TO_FROZEN_GRAPH = 'inference_graph/ssd_mobilenet/frozen_inference_graph.pb'

# class numbers and label for each box.
PATH_TO_LABELS = './training/labelmap.pbtxt'
NUM_CLASSES = 1

### Load a (frozen) Tensorflow model into memory
Any model exported using the `export_inference_graph.py` tool can be loaded here simply by changing `PATH_TO_FROZEN_GRAPH` to point to a new .pb file.  

In [12]:
detection_graph = tf.Graph()
with detection_graph.as_default():
  od_graph_def = tf.GraphDef()
  with tf.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid:
    serialized_graph = fid.read()
    od_graph_def.ParseFromString(serialized_graph)
    tf.import_graph_def(od_graph_def, name='')
  sess = tf.Session(graph=detection_graph)

### Loading label map

In [13]:
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)

### Image Detection

In [14]:
# image for detection
images = './test/'
# Size, in inches, of the output images.
IMAGE_SIZE = (12, 8)

In [15]:
def load_image_into_numpy_array(image):
  (im_width, im_height) = image.size
  return np.array(image.getdata()).reshape(
      (im_height, im_width, 3)).astype(np.uint8)

In [64]:
latency = list()

with detection_graph.as_default():
  with tf.Session(graph=detection_graph) as sess:
    # Definite input and output Tensors for detection_graph
    image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
    # Each box represents a part of the image where a particular object was detected.
    detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
    # Each score represent how level of confidence for each of the objects.
    # Score is shown on the result image, together with the class label.
    detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
    detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
    num_detections = detection_graph.get_tensor_by_name('num_detections:0')
    for img in os.listdir(images):
      image_path = images + img
      start = time.clock()
      #for image_path in TEST_IMAGE_PATHS:
      image = cv2.imread(image_path)
      image = cv2.resize(image, (640, 480), interpolation=cv2.INTER_LINEAR)
      image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
      image_expanded = np.expand_dims(image_rgb, axis=0)
      # Actual detection.
      (boxes, scores, classes, num) = sess.run(
          [detection_boxes, detection_scores, detection_classes, num_detections],
          feed_dict={image_tensor: image_expanded})
      # Visualization of the results of a detection.
      vis_util.visualize_boxes_and_labels_on_image_array(
          image,
          np.squeeze(boxes),
          np.squeeze(classes).astype(np.int32),
          np.squeeze(scores),
          category_index,
          use_normalized_coordinates=True,
          line_thickness=8)
      end = time.clock()
      latency.append(end-start)
     # All the results have been drawn on the image, now display the image
      cv2.imshow('Object detector', image)

      # Press any key to continue to next image, or press 'q' to quit
      if cv2.waitKey(0) == ord('q'):
        break
    # Clean up
    cv2.destroyAllWindows()

In [65]:
print('average latency is: ', np.mean(latency))
print('latency per image is: ', latency)

average latency is:  0.3768131416666165
latency per image is:  [1.3816899000003104, 0.32604840000021795, 0.30798759999925096, 0.29298390000076324, 0.31167219999952067, 0.29914779999944585, 0.276705100000072, 0.30428459999984625, 0.31704509999963193, 0.3005505000000994, 0.1057771000005232, 0.2978654999997161]


### Video Detection

In [16]:
i = 0
latency = 0
fps = 0

with detection_graph.as_default():
  with tf.Session(graph=detection_graph) as sess:
    # Definite input and output Tensors for detection_graph
    image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
    # Each box represents a part of the image where a particular object was detected.
    detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
    # Each score represent how level of confidence for each of the objects.
    # Score is shown on the result image, together with the class label.
    detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
    detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
    num_detections = detection_graph.get_tensor_by_name('num_detections:0')
    
    
    # Open video file
    video = cv2.VideoCapture('weed.mp4')
    #video.set(CV_CAP_PROP_FOURCC, CV_FOURCC('A', 'V', 'C', '1'))

    while(video.isOpened()):
    # Acquire frame and expand frame dimensions to have shape: [1, None, None, 3]
    # i.e. a single-column array, where each item in the column has the pixel RGB value
        start = time.clock()
        ret, frame = video.read()
        if(not(ret)):
            pass
        frame = cv2.resize(frame, (640, 480), interpolation=cv2.INTER_LINEAR)
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frame_expanded = np.expand_dims(frame_rgb, axis=0)

        # Perform the actual detection by running the model with the image as input
        (boxes, scores, classes, num) = sess.run(
            [detection_boxes, detection_scores, detection_classes, num_detections],
            feed_dict={image_tensor: frame_expanded})
        # Draw the results of the detection (aka 'visulaize the results')
        vis_util.visualize_boxes_and_labels_on_image_array(
            frame,
            np.squeeze(boxes),
            np.squeeze(classes).astype(np.int32),
            np.squeeze(scores),
            category_index,
            use_normalized_coordinates=True,
            line_thickness=8,
            min_score_thresh=0.60)
        end = time.clock()
        i = i + 1
        latency = latency + (end - start)
        fps = fps + 1.0 / (end - start)
        cv2.imshow('Object detector', frame)

        # Press 'q' to quit
        if cv2.waitKey(1) == ord('q'):
            break

    # Clean up
    video.release()
    cv2.destroyAllWindows()


error: OpenCV(4.2.0) C:\projects\opencv-python\opencv\modules\imgproc\src\resize.cpp:4045: error: (-215:Assertion failed) !ssize.empty() in function 'cv::resize'


In [10]:
print('average latency is: ', (latency / i))
print('average fps is:', (fps / i))

average latency is:  0.12993897124463544
average fps is: 8.173317887408942


In [17]:
print('average latency is: ', (latency / i))
print('average fps is:', (fps / i))

average latency is:  0.13118852448559662
average fps is: 7.863875106740194
