### OBJECT DETECTION AND TRACKING WITH YOLOv5

In [2]:
# install the necessary libraries
import sys
!{sys.executable} -m pip install jupyter
!{sys.executable} -m pip install ipywidgets widgetsnbextension pandas-profiling
!{sys.executable} -m pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu116
!{sys.executable} -m pip install pandas pyyaml tqdm seaborn numpy matplotlib opencv-contrib-python

Collecting jupyter
  Downloading jupyter-1.0.0-py2.py3-none-any.whl (2.7 kB)
Collecting nbconvert
  Downloading nbconvert-7.0.0-py3-none-any.whl (271 kB)
     ------------------------------------ 271.3/271.3 kB 881.9 kB/s eta 0:00:00
Collecting ipywidgets
  Downloading ipywidgets-8.0.1-py3-none-any.whl (133 kB)
     -------------------------------------- 133.7/133.7 kB 4.0 MB/s eta 0:00:00
Collecting qtconsole
  Downloading qtconsole-5.3.1-py3-none-any.whl (120 kB)
     -------------------------------------- 120.8/120.8 kB 7.4 MB/s eta 0:00:00
Collecting jupyter-console
  Downloading jupyter_console-6.4.4-py3-none-any.whl (22 kB)
Collecting notebook
  Downloading notebook-6.4.12-py3-none-any.whl (9.9 MB)
     ---------------------------------------- 9.9/9.9 MB 903.5 kB/s eta 0:00:00
Collecting widgetsnbextension~=4.0
  Downloading widgetsnbextension-4.0.2-py3-none-any.whl (2.0 MB)
     ---------------------------------------- 2.0/2.0 MB 816.8 kB/s eta 0:00:00
Collecting jupyterlab-widg

In [3]:
# clone the YOLOv5 repo
!git clone https://github.com/ultralytics/yolov5  # clone

fatal: Too many arguments.

usage: git clone [<options>] [--] <repo> [<dir>]

    -v, --verbose         be more verbose
    -q, --quiet           be more quiet
    --progress            force progress reporting
    --reject-shallow      don't clone shallow repository
    -n, --no-checkout     don't create a checkout
    --bare                create a bare repository
    --mirror              create a mirror repository (implies bare)
    -l, --local           to clone from a local repository
    --no-hardlinks        don't use local hardlinks, always copy
    -s, --shared          setup as shared repository
    --recurse-submodules[=<pathspec>]
                          initialize submodules in the clone
    --recursive ...       alias of --recurse-submodules
    -j, --jobs <n>        number of submodules cloned in parallel
    --template <template-directory>
                          directory from which templates will be used
    --reference <repo>    reference repository
    --refere

In [1]:
# import libraries
import cv2
import torch

In [2]:
# load model (reference: https://github.com/ultralytics/yolov5)
model = torch.hub.load('ultralytics/yolov5', 'yolov5s')  # or yolov5n - yolov5x6, custom

Using cache found in C:\Users\Nana Kofi Owiredu/.cache\torch\hub\ultralytics_yolov5_master
[31m[1mrequirements:[0m protobuf<=3.20.1 not found and is required by YOLOv5, attempting auto-update...
[31m[1mrequirements:[0m Command 'pip install "protobuf<=3.20.1" ' returned non-zero exit status 1.
YOLOv5  2022-8-23 Python-3.10.5 torch-1.12.1+cu116 CUDA:0 (GeForce GTX 1650 with Max-Q Design, 4096MiB)

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients
Adding AutoShape... 


#### WITH IMAGES

In [4]:
# get image
img = cv2.imread('images/img-4.jpg')  # or file, Path, PIL, OpenCV, numpy, list
# make inference
results = model(img)
# get the results as a pandas dataframe
result_df = results.pandas().xyxy[0]

# filter out object detections
# supported objects:
# - person
# - bird, cat, cow, dog, horse, sheep
# - aeroplane, bicycle, boat, bus, car, motorbike, train
# - bottle, chair, dining table, potted plant, sofa, tv/monitor
objects = result_df
# objects = result_df[result_df["name"] == "person"]

# loop over the detections
for i in range(objects.shape[0]):
    # get a detected person detection data
    object_row = objects.iloc[i]
    start_x = int(object_row["xmin"])
    start_y = int(object_row["ymin"])
    end_x = int(object_row["xmax"])
    end_y = int(object_row["ymax"])
    confidence = round(object_row["confidence"], 2) * 100
    
    # print object name, coords and confidence
    print(f"name: {object_row['name']} |", f"coords: {start_x, start_y, end_x, end_y}", f"| confidence: {confidence}%")
    
    # fish out only results with confidence >= 50%
    if object_row["confidence"] >= 0.5:
        # draw a rectangle
        img = cv2.rectangle(
            img=img.copy(), 
            pt1=(start_x, start_y), 
            pt2=(end_x, end_y), 
            color=(255, 0, 0), 
            thickness=3,
            lineType=cv2.LINE_AA
        )
        
        # write the name of the object on it's bounding box
        # put text on the image
        img = cv2.putText(
            img=img.copy(), 
            text=f"{object_row['name']} ({confidence}%)", 
            org=(start_x, start_y), 
            fontFace=cv2.FONT_HERSHEY_COMPLEX, 
            fontScale=1, 
            color=(0, 255, 0), 
            thickness=3, 
            lineType=cv2.LINE_AA
        )
    
# show resulting image
cv2.imshow("Video", img)

# wait 30 milliseconds for a key press event
keypressed = cv2.waitKey(0)
# if the ESC key is pressed, destroy all windows
if keypressed == 27:
    # destroy all windows
    cv2.destroyAllWindows()

name: dog | coords: (691, 255, 984, 1045) | confidence: 91.0%
name: person | coords: (984, 112, 1515, 937) | confidence: 89.0%
name: car | coords: (348, 494, 517, 569) | confidence: 34.0%
name: person | coords: (1145, 471, 1291, 591) | confidence: 32.0%


#### WITH VIDEOS

In [None]:
# start video capture
capture = cv2.VideoCapture(0)

# check whether the capture was opened successfully
if not capture.isOpened():
    print("Unable to start camera")

# initialize frame tracker and frame holders
frames_tracker = 0
previous_frame, current_frame, next_frame = None, None, None

while True:
    # capture the next frame
    ret, frame = capture.read()
    
    # check whether the frame is available or not
    if frame is None:
        break
    
    ############################### BEGIN OBJECT DETECTION
    
    # make inference
    results = model(frame)
    # get the results as a pandas dataframe
    result_df = results.pandas().xyxy[0]

    # filter out object detections
    # supported objects:
    # - person
    # - bird, cat, cow, dog, horse, sheep
    # - aeroplane, bicycle, boat, bus, car, motorbike, train
    # - bottle, chair, dining table, potted plant, sofa, tv/monitor
    objects = result_df
    # objects = result_df[result_df["name"] == "person"]

    # loop over the detections
    for i in range(objects.shape[0]):
        # get a detected person detection data
        object_row = objects.iloc[i]
        start_x = int(object_row["xmin"])
        start_y = int(object_row["ymin"])
        end_x = int(object_row["xmax"])
        end_y = int(object_row["ymax"])
        confidence = round(object_row["confidence"], 2) * 100
        
        # fish out only results with confidence >= 50%
        if object_row["confidence"] >= 0.5:
            # draw a rectangle
            frame = cv2.rectangle(
                img=frame.copy(), 
                pt1=(start_x, start_y), 
                pt2=(end_x, end_y), 
                color=(255, 0, 0), 
                thickness=3,
                lineType=cv2.LINE_AA
            )
            
            # write the name of the object on it's bounding box
            # put text on the image
            frame = cv2.putText(
                img=frame.copy(), 
                text=f"{object_row['name']} ({confidence}%)", 
                org=(start_x, start_y), 
                fontFace=cv2.FONT_HERSHEY_COMPLEX, 
                fontScale=1, 
                color=(0, 255, 0), 
                thickness=3, 
                lineType=cv2.LINE_AA
            )
    
    ############################### END OBJECT DETECTION
    
    # show the captured frame
    cv2.imshow("Video", frame)
    
    # wait 30 milliseconds for a key press event
    keypressed = cv2.waitKey(30)
    # if the ESC key is pressed, destroy all windows and release resources
    if keypressed == 27:
        # release camera
        capture.release()
        # destroy all windows
        cv2.destroyAllWindows()
        break