<a href="https://colab.research.google.com/github/Deji01/Deep-Learning/blob/main/Object_Detection_Supervision.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
pip install --quiet supervision ultralytics

In [2]:
import cv2
from IPython import display
import matplotlib.pyplot as plt
import numpy as np
import os
from PIL import Image
import supervision as sv
from supervision import VideoInfo
from ultralytics import YOLO

In [3]:
HOME = os.getcwd()
VIDEO_PATH = f"{HOME}/drive/MyDrive/Object-Detection-101/Videos/cars.mp4"
video_info = VideoInfo.from_video_path(VIDEO_PATH)

In [4]:
# extract video frame
generator = sv.get_video_frames_generator(VIDEO_PATH)
iterator = iter(generator)

frame = next(iterator)

# save frame
cv2.imwrite("frame_one.png", frame)

True

### Alternative Method for Saving image

```python
# resize the image to a width of 1280 and a height of 720
resized_image = Image.fromarray(frame).resize((1280, 720))

# save the image to a PNG file
resized_image.save('resized_image.png')
```

In [5]:
polygons = [np.array([[20, 614],[488, 218],[652, 214],[692, 674]])]

In [6]:
model = YOLO("yolov8n.pt")
colors = sv.ColorPalette.default()

In [7]:
# initialize zones

zones = [
    sv.PolygonZone(
    polygon=polygon,
    frame_resolution_wh=video_info.resolution_wh
    ) for polygon in polygons
    ]

zone_annotators = [
    sv.PolygonZoneAnnotator(
        zone=zone,
        color=colors.by_idx(index),
        thickness=4,
        text_thickness=8,
        text_scale=4
    ) for index, zone in enumerate(zones)
]

box_annotators = [
    sv.BoxAnnotator(
        color=colors.by_idx(index),
        thickness=4,
        text_thickness=4,
        text_scale=2
    ) for index in range(len(polygons))
]

In [8]:
def process_frame(frame: np.ndarray, i) -> np.ndarray:
  results = model(frame, imgsz=1280)[0]
  detections = sv.Detections.from_yolov8(results)

  for zone, zone_annotator, box_annotator in zip(zones, zone_annotators, box_annotators):
    mask = zone.trigger(detections=detections)
    detections_filtered = detections[mask]
    frame = box_annotator.annotate(scene=frame, detections=detections_filtered, skip_label=True)
    frame = zone_annotator.annotate(scene=frame)

  return frame

In [13]:
display.clear_output()

In [14]:
results = model(frame, imgsz=1280)[0]
detections = sv.Detections.from_yolov8(results)

for zone, zone_annotator, box_annotator in zip(zones, zone_annotators, box_annotators):
    mask = zone.trigger(detections=detections)
    detections_filtered = detections[mask]
    frame = box_annotator.annotate(scene=frame, detections=detections_filtered)
    frame = zone_annotator.annotate(scene=frame)


sv.show_frame_in_notebook(frame, (16, 16))


0: 736x1280 12 cars, 1 truck, 3 traffic lights, 17.2ms
Speed: 1.3ms preprocess, 17.2ms inference, 1.8ms postprocess per image at shape (1, 3, 1280, 1280)


In [15]:
sv.process_video(source_path=VIDEO_PATH,target_path="result.mp4", callback=process_frame)


0: 736x1280 15 cars, 2 trucks, 3 traffic lights, 14.6ms
Speed: 1.3ms preprocess, 14.6ms inference, 1.5ms postprocess per image at shape (1, 3, 1280, 1280)

0: 736x1280 15 cars, 2 trucks, 1 traffic light, 14.0ms
Speed: 1.0ms preprocess, 14.0ms inference, 1.3ms postprocess per image at shape (1, 3, 1280, 1280)

0: 736x1280 15 cars, 2 trucks, 2 traffic lights, 14.0ms
Speed: 1.0ms preprocess, 14.0ms inference, 1.5ms postprocess per image at shape (1, 3, 1280, 1280)

0: 736x1280 17 cars, 2 trucks, 1 traffic light, 14.0ms
Speed: 1.1ms preprocess, 14.0ms inference, 1.4ms postprocess per image at shape (1, 3, 1280, 1280)

0: 736x1280 1 person, 15 cars, 2 trucks, 1 traffic light, 20.6ms
Speed: 1.2ms preprocess, 20.6ms inference, 1.8ms postprocess per image at shape (1, 3, 1280, 1280)

0: 736x1280 20 cars, 1 truck, 2 traffic lights, 20.9ms
Speed: 1.2ms preprocess, 20.9ms inference, 2.3ms postprocess per image at shape (1, 3, 1280, 1280)

0: 736x1280 17 cars, 2 trucks, 2 traffic lights, 16.6ms
S

In [19]:
display.Video(f"{HOME}/result.mp4", height=video_info.height//2, width=video_info.width//2)