### Realtime Object Detection

Detects objects in realtime video streaming via webcam

#### Import libraries for preprocessing and annotation

In [4]:
import numpy as np
import cv2
from pathlib import Path
from IPython.display import Image
import torch

#### Load pretrained models

In [5]:
model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)

Downloading: "https://github.com/ultralytics/yolov5/zipball/master" to C:\Users\Jay/.cache\torch\hub\master.zip
[31m[1mrequirements:[0m YOLOv5 requirements "gitpython" "tqdm>=4.64.0" not found, attempting AutoUpdate...
Collecting gitpython
  Downloading GitPython-3.1.29-py3-none-any.whl (182 kB)
Collecting tqdm>=4.64.0
  Downloading tqdm-4.64.1-py2.py3-none-any.whl (78 kB)
Collecting gitdb<5,>=4.0.1
  Downloading gitdb-4.0.9-py3-none-any.whl (63 kB)
Collecting smmap<6,>=3.0.1
  Downloading smmap-5.0.0-py3-none-any.whl (24 kB)
Installing collected packages: smmap, gitdb, tqdm, gitpython
  Attempting uninstall: tqdm
    Found existing installation: tqdm 4.62.3
    Uninstalling tqdm-4.62.3:
      Successfully uninstalled tqdm-4.62.3
Successfully installed gitdb-4.0.9 gitpython-3.1.29 smmap-5.0.0 tqdm-4.64.1

[31m[1mrequirements:[0m 2 packages updated per C:\Users\Jay\.cache\torch\hub\ultralytics_yolov5_master\requirements.txt
[31m[1mrequirements:[0m  [1mRestart runtime or rerun 

  0%|          | 0.00/14.1M [00:00<?, ?B/s]


Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients
Adding AutoShape... 


#### Run object detection on example image

In [10]:
imgs = ['https://ultralytics.com/images/zidane.jpg']
Image(url=imgs[0])

In [7]:
results = model(imgs)
results.print()
results.save(".")

image 1/1: 720x1280 2 persons, 2 ties
Speed: 1392.6ms pre-process, 365.4ms inference, 33.7ms NMS per image at shape (1, 3, 384, 640)
Saved 1 image to [1mruns\detect\exp[0m


In [11]:
Image(filename='zidane.jpg') 

FileNotFoundError: [Errno 2] No such file or directory: 'zidane.jpg'

#### Run object detection on realtime video via webcam

In [None]:
cap = cv2.VideoCapture(0)
while True:
    ret, image_np = cap.read()
    results = model(image_np)
    df_result = results.pandas().xyxy[0]
    dict_result = df_result.to_dict()
    scores = list(dict_result["confidence"].values())
    labels = list(dict_result["name"].values())
    
    list_boxes = list()
    for dict_item in df_result.to_dict('records'):
        list_boxes.append(list(dict_item.values())[:4])
    count = 0
    
    for xmin, ymin, xmax, ymax in list_boxes:
        image_np = cv2.rectangle(image_np, pt1=(int(xmin),int(ymin)), pt2=(int(xmax),int(ymax)), \
                                 color=(255,0, 0), thickness=2)
        cv2.putText(image_np, f"{labels[count]}: {round(scores[count], 2)}", (int(xmin), int(ymin)-10), \
                    cv2.FONT_HERSHEY_SIMPLEX, 0.9, (36,255,12), 2)
        count = count + 1
        
    cv2.imshow('Object Detector', image_np);
    
    if cv2.waitKey(1) & 0xFF == ord('q'):
        cap.release()
        cv2.destroyAllWindows()
        break

In [None]:
cv2