In [None]:
!git clone https://github.com/Slava-git/yolov5

## Dependencies

In [1]:
# import dependencies
import cv2
import numpy as np
import PIL
import io
import html
import time
import torch  

from IPython.display import display, Javascript, Image
from google.colab.output import eval_js
from base64 import b64decode, b64encode

## Helping functions

In [7]:
def js_to_image(js_reply):
  """Convert frame from webcam to bgr image

  Params:
          js_reply: JavaScript object containing image from webcam
  Returns:
          img: OpenCV BGR image
  """

  image_bytes = b64decode(js_reply.split(',')[1])

  jpg_as_np = np.frombuffer(image_bytes, dtype=np.uint8)

  img = cv2.imdecode(jpg_as_np, flags=1)

  return img


def bbox_to_bytes(bbox_array):
  """Convert numpy array to bytes

  Params:
        bbox_array: Numpy array (pixels) containing rectangle to overlay on video stream.
  Returns:
        bbox_bytes: Base64 image byte string
  """

  bbox_PIL = PIL.Image.fromarray(bbox_array, 'RGBA')
  iobuf = io.BytesIO()

  bbox_PIL.save(iobuf, format='png')

  bbox_bytes = f'data:image/png;base64, {(str(b64encode(iobuf.getvalue()), "utf-8"))}'
  
  return bbox_bytes

## Providing webcam as live video stream

In [11]:
def video_stream():
  js = Javascript('/content/yolov5/video_capture_colab.js')

  display(js)
  
def video_frame(label, bbox):
  data = eval_js(f'stream_frame("{label}", "{bbox}")')
  return data

## Model initializing and capturing video

In [12]:
def start_stream(path_to_model):
  """Predict and draw bounding boxes on each frame

    Params:
      path_to_model: Path to weights
  """
  path_to_weights = path_to_model

  model = torch.hub.load('ultralytics/yolov5', 'custom', path_to_weights)
  model.eval()
  # start streaming video from webcam
  video_stream()
  # label for video
  label_html = 'Capturing...'
  # initialze bounding box to empty
  bbox = ''
  count = 0 
  while True:
      js_reply = video_frame(label_html, bbox)
      if not js_reply:
          break

      # convert JS response to OpenCV Image
      img = js_to_image(js_reply["img"])

      # create transparent overlay for bounding box
      bbox_array = np.zeros([480,640,4], dtype=np.uint8)

      result = model(img, size=640)
      
      df = result.pandas().xyxy[0]
      for index, row in df.iterrows():

        bbox_array = cv2.rectangle(bbox_array, (int(row["xmin"]), int(row["ymin"])), 
                                    (int(row["xmax"]), int(row["ymax"])), (255,0,0),2)
        
        cv2.putText(bbox_array, str(row["name"]), (int(row["xmin"]), int(row["ymin"])-10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.9, (36,255,12), 2)
        
      bbox_array[:,:,3] = (bbox_array.max(axis = 2) > 0 ).astype(int) * 255
      
      bbox = bbox_to_bytes(bbox_array)

In [None]:
start_stream('/content/best.pt')