In [30]:
import torchvision
import torch
import cv2
from PIL import Image
import time

import torchvision.transforms as transforms
import numpy as np
import detect_utils


In [39]:
coco_names = [
    '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
    'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign',
    'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
    'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A', 'N/A',
    'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
    'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
    'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
    'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
    'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table',
    'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
    'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book',
    'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
]

## Pytorch inference (GPU)

### MobilnetV3

In [51]:
def predict(input_image, model, device, detection_threshold):

    image = transforms.ToTensor()(input_image).to(device)
    image = image.unsqueeze(0)
    outputs = model(image)

    pred_classes = [coco_names[i] for i in outputs[0]['labels'].cpu().numpy()]
    pred_scores = outputs[0]['scores'].detach().cpu().numpy()
    pred_bboxes = outputs[0]['boxes'].detach().cpu().numpy()
    boxes = pred_bboxes[pred_scores >= detection_threshold].astype(np.int32)

    return boxes, pred_classes, outputs[0]['labels']

In [27]:
def draw_boxes(boxes, classes, labels, image):
    image = cv2.cvtColor(np.asarray(image), cv2.COLOR_BGR2RGB)
    for i, box in enumerate(boxes):
        color = COLORS[labels[i]]
        cv2.rectangle(
            image,
            (int(box[0]), int(box[1])),
            (int(box[2]), int(box[3])),
            color, 2
        )
        cv2.putText(image, classes[i], (int(box[0]), int(box[1] - 5)),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2,
                    lineType=cv2.LINE_AA)
    return image


In [50]:
COLORS = np.random.uniform(0, 255, size=(len(coco_names), 3))

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = torchvision.models.detection.fasterrcnn_mobilenet_v3_large_fpn(pretrained=True)

model.eval().to(device)

# read the image and run the inference for detections
image = Image.open('me2.JPG')
boxes, classes, labels = predict(image, model, device, 0.7)
image = draw_boxes(boxes, classes, labels, image)
cv2.imshow('Image', image)
cv2.waitKey(0)

-1

In [55]:
#cap.release()
cv2.destroyAllWindows()

In [54]:
cap = cv2.VideoCapture('data/humans_1.mp4')

frame_count = 0 # to count total frames
total_fps = 0 # to get the final frames per second
# read until end of video
device=torch.device('cuda')
while (cap.isOpened()):
    ret, frame = cap.read()
    if ret:
        start_time = time.time()
     #   with torch.no_grad():
            # get predictions for the current frame
        boxes, classes, labels = predict(frame, model, device, 0.7)

        image = draw_boxes(boxes, classes, labels, frame)
        end_time = time.time()

        fps = 1 / (end_time - start_time)
        cv2.putText(image, f"{fps:.3f} FPS", (15, 30), cv2.FONT_HERSHEY_SIMPLEX,
                    1, (0, 255, 0), 2)
        total_fps += fps

        frame_count += 1
        # press `q` to exit
        wait_time = max(1, int(fps / 4))
        # convert from BGR to RGB color format
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        cv2.imshow('image', image)

        if cv2.waitKey(wait_time) & 0xFF == ord('q'):
            break
    else:
        break
avg_fps = total_fps / frame_count
print(f"Average FPS: {avg_fps:.3f}")

KeyboardInterrupt: 

## Tensorflow

https://colab.research.google.com/github/tensorflow/hub/blob/master/examples/colab/tf2_object_detection.ipynb#scrollTo=-y9R0Xllefec


### Mobilnetv2

In [5]:
import os
import pathlib

import matplotlib
import matplotlib.pyplot as plt

from skimage import io
import scipy.misc
import numpy as np
from six import BytesIO
from PIL import Image, ImageDraw, ImageFont
from six.moves.urllib.request import urlopen

import tensorflow as tf
import tensorflow_hub as hub



tf.get_logger().setLevel('ERROR')

In [13]:
model_name='https://tfhub.dev/tensorflow/ssd_mobilenet_v2/fpnlite_320x320/1'
model = hub.load(model_name)

In [20]:
model.__dict__

{'_self_setattr_tracking': True,
 '_self_unconditional_checkpoint_dependencies': [TrackableReference(name='_model', ref=<tensorflow.python.saved_model.load.Loader._recreate_base_user_object.<locals>._UserObject object at 0x00000204859DC850>),
  TrackableReference(name='signatures', ref=_SignatureMap({'serving_default': <ConcreteFunction signature_wrapper(input_tensor) at 0x2044EA63A60>})),
  TrackableReference(name='_self_saveable_object_factories', ref=DictWrapper({}))],
 '_self_unconditional_dependency_names': {'_model': <tensorflow.python.saved_model.load.Loader._recreate_base_user_object.<locals>._UserObject at 0x204859dc850>,
  'signatures': _SignatureMap({'serving_default': <ConcreteFunction signature_wrapper(input_tensor) at 0x2044EA63A60>}),
  '_self_saveable_object_factories': {}},
 '_self_unconditional_deferred_dependencies': {},
 '_self_update_uid': 159814,
 '_self_name_based_restores': set(),
 '_self_saveable_object_factories': {},
 '_model': <tensorflow.python.saved_model.

In [14]:
model.save('mobilnet.h5')

AttributeError: '_UserObject' object has no attribute 'save'

In [10]:
# running inference
image=io.imread('me2.jpg')
results = model(tf.expand_dims(image,axis=0))

# different object detection models have additional results
# all of them are explained in the documentation
result = {key:value.numpy() for key,value in results.items()}
print(result.keys())

dict_keys(['detection_scores', 'detection_anchor_indices', 'detection_multiclass_scores', 'detection_boxes', 'raw_detection_scores', 'num_detections', 'detection_classes', 'raw_detection_boxes'])


In [11]:
results

{'detection_scores': <tf.Tensor: shape=(1, 100), dtype=float32, numpy=
 array([[0.45534402, 0.42000034, 0.41943723, 0.3915794 , 0.3858105 ,
         0.38231292, 0.37824363, 0.35479587, 0.33431825, 0.32114   ,
         0.31954876, 0.30845436, 0.30639634, 0.2707821 , 0.2621187 ,
         0.2572328 , 0.2547301 , 0.25379327, 0.25213107, 0.24905117,
         0.24647741, 0.24412154, 0.24029031, 0.23956156, 0.23927765,
         0.23160362, 0.22387084, 0.2225964 , 0.21659526, 0.21534377,
         0.21530388, 0.2124502 , 0.21174583, 0.20919496, 0.2091648 ,
         0.2055501 , 0.20444632, 0.2017436 , 0.19610317, 0.19419841,
         0.19277091, 0.18872476, 0.1882816 , 0.18724611, 0.18671566,
         0.18633182, 0.18301965, 0.17704259, 0.17401709, 0.171247  ,
         0.17118093, 0.17070425, 0.17065503, 0.1693562 , 0.16930567,
         0.16720366, 0.16708711, 0.16609517, 0.16595863, 0.16568607,
         0.16474697, 0.16180225, 0.16080749, 0.16068797, 0.15908451,
         0.1581493 , 0.15801822,

In [None]:
label_id_offset = 0
image_np_with_detections = image.copy()

# Use keypoints if available in detections
keypoints, keypoint_scores = None, None
if 'detection_keypoints' in result:
  keypoints = result['detection_keypoints'][0]
  keypoint_scores = result['detection_keypoint_scores'][0]

viz_utils.visualize_boxes_and_labels_on_image_array(
      image_np_with_detections[0],
      result['detection_boxes'][0],
      (result['detection_classes'][0] + label_id_offset).astype(int),
      result['detection_scores'][0],
      category_index,
      use_normalized_coordinates=True,
      max_boxes_to_draw=200,
      min_score_thresh=.30,
      agnostic_mode=False,
      keypoints=keypoints,
      keypoint_scores=keypoint_scores,
      keypoint_edges=COCO17_HUMAN_POSE_KEYPOINTS)

plt.figure(figsize=(24,32))
plt.imshow(image_np_with_detections[0])
plt.show()

In [None]:
def grid(image, nx, ny):
    shape = image.shape
    x_step = shape[0] // nx
    y_step = shape[1] // ny
    grid = np.zeros(shape)

    label = 0
    flag = True
    x = 0
    y = 0
    while flag:
        while x < (label + 1) * x_step:
            grid[x, y] = label
            x += 1
            if x % x_step == 0:
                x = label * x_step
                y += 1
                if y % y_step == 0:
                    label


