In [4]:
import numpy as np
import os
import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import zipfile
import pathlib
from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt
from PIL import Image
from IPython.display import display
from object_detection.utils import ops as utils_ops
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_util
import cv2

while "models" in pathlib.Path.cwd().parts:
    os.chdir('..')

def load_model(model_name):
    base_url = 'http://download.tensorflow.org/models/object_detection/'
    model_file = model_name + '.tar.gz'
    model_dir = tf.keras.utils.get_file(
        fname=model_name,
        origin=base_url + model_file,
        untar=True
    )

    model_dir = pathlib.Path(model_dir)/"saved_model"
    model = tf.saved_model.load(str(model_dir))
    return model

PATH_TO_LABELS = 'models/research/object_detection/data/mscoco_label_map.pbtxt'
category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS, use_display_name=True)

model_name = 'ssd_inception_v2_coco_2017_11_17'
detection_model = load_model(model_name)

def run_inference_for_single_image(model, image):
    image = np.asarray(image)
    # The input needs to be a tensor, convert it using 'tf.convert_to_tensor'.
    input_tensor = tf.convert_to_tensor(image)
    # The model expect a batch of images, so add an axis with 'tf.newaxis'.
    input_tensor = input_tensor[tf.newaxis,...]

    # Run inference
    model_fn = model.signatures['serving_default']
    output_dict = model_fn(input_tensor)

    # All outputs are batches tensors.
    # Convert to numpy arrays, and take index [0] to remove the batch dimension.
    # We're only interesting in the first num_detections.
    num_detections = int(output_dict.pop('num_detections'))
    output_dict = {key:value[0, :num_detections].numpy()
                   for key, value in output_dict.items()}
    output_dict['num_detections'] = num_detections
    
    # detection_classes should be ints.
    output_dict['detection_classes'] = output_dict['detection_classes'].astype(np.int64)

    # Handle models with masks:
    if 'detection_masks' in output_dict:
        # Reframe the bbox mask to the image size
        detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
            output_dict['detection_masks'], output_dict['detection_boxes'],
            image.shape[0], image.shape[1])
        detection_masks_reframed = tf.cast(detection_masks_reframed > 0.5, tf.uint8)
        output_dict['detection_masks_reframed'] = detection_masks_reframed.numpy()

    return output_dict


def show_inference(model, frame):
    # take the frame from webcam feed and convert that to array
    image_np = np.array(frame)
    # Actual detection

    output_dict = run_inference_for_single_image(model, image_np)
    # Visualization of the results of a detection
    vis_util.visualize_boxes_and_labels_on_image_array(
        image_np,
        output_dict['detection_boxes'],
        output_dict['detection_classes'],
        output_dict['detection_scores'],
        category_index,
        instance_masks=output_dict.get('detection_masks_reframed', None),
        use_normalized_coordinates=True,
        line_thickness=5)

    return(image_np)
    
video_path = "models/research/BRIDGE.mp4"
writer = cv2.VideoWriter("models/research/BRIDGE_output.mp4", cv2.VideoWriter_fourcc('m', 'p', '4', 'v'), 29.97, (1920, 1080))
#Now we open the video and start detecting objects
video_capture = cv2.VideoCapture(video_path)
check, frame = video_capture.read()
if check == False:
        print('Video not found :(')
else:
    print('Detecting people...')
    while video_capture.isOpened():
            check, frame = video_capture.read()
            if check:
                Imagenp = show_inference(detection_model, frame)
                cv2.imshow('object detection', cv2.resize(Imagenp, (1920, 1080)))
                if writer is not None:
                    writer.write(cv2.resize(Imagenp, (1920, 1080)))
                if cv2.waitKey(1) == ord('q'):
                    break
            else: break
video_capture.release()
cv2.destroyAllWindows()

INFO:tensorflow:Saver not created because there are no variables in the graph to restore
hello i am here
Detecting people...
