In [1]:
!pip install ultralytics

Collecting ultralytics
  Downloading ultralytics-8.3.5-py3-none-any.whl.metadata (34 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.9-py3-none-any.whl.metadata (9.3 kB)
Downloading ultralytics-8.3.5-py3-none-any.whl (882 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m882.8/882.8 kB[0m [31m20.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ultralytics_thop-2.0.9-py3-none-any.whl (26 kB)
Installing collected packages: ultralytics-thop, ultralytics
Successfully installed ultralytics-8.3.5 ultralytics-thop-2.0.9


In [2]:
#IMPORT STATEMENTS
from ultralytics import YOLO
import cv2
import math
import numpy as np

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


# **Load the Pretrained YOLO model**

In [3]:
# Load the Pretrained YOLO model
model = YOLO("yolov8n.pt")

Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8n.pt to 'yolov8n.pt'...


100%|██████████| 6.25M/6.25M [00:00<00:00, 93.7MB/s]


In [4]:
# Define paths
mask_path = "/kaggle/input/car-video-masking-computer-vision/mask.png"
path = "/kaggle/input/cars-video-for-computer-vision/cars.mp4"
output_path = "/kaggle/working/cars.avi"

In [5]:
# Load the mask
mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
mask = cv2.resize(mask, (600, 600))

In [6]:
# Open the video
cap = cv2.VideoCapture(path)

# Define the codec and create VideoWriter object
fourcc = cv2.VideoWriter_fourcc(*'XVID')
out = cv2.VideoWriter(output_path, fourcc, 20.0, (600, 600))

vehicle_classes = [2, 3, 5, 7]  # Class IDs for vehicles

while cap.isOpened():
    ret, frame = cap.read()
    if not ret or frame is None:
        break
    
    frame = cv2.resize(frame, (600, 600))
    
    # Apply the mask
    masked_frame = cv2.bitwise_and(frame, frame, mask=mask)
    
    # Predict 
    results = model.predict(masked_frame, imgsz=640, conf=0.4)
    
    # Retrieve the bounding boxes from the results
    result = results[0]
    bounding_boxes = result.boxes
    
    for box in bounding_boxes:
        c = int(box.cls)
        if c in vehicle_classes:
            name = result.names[c]
            x1, y1, x2, y2 = box.xyxy[0]
            x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            cv2.putText(frame, f"{name}", (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 255), 2)
    
    # Write the frame
    out.write(frame)

# Release the video capture and video write objects
cap.release()
out.release()



0: 640x640 3 cars, 321.9ms
Speed: 13.3ms preprocess, 321.9ms inference, 21.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 2 cars, 207.5ms
Speed: 5.1ms preprocess, 207.5ms inference, 1.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 car, 199.0ms
Speed: 11.4ms preprocess, 199.0ms inference, 1.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 car, 1 train, 197.6ms
Speed: 5.0ms preprocess, 197.6ms inference, 1.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 182.7ms
Speed: 6.0ms preprocess, 182.7ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 car, 171.5ms
Speed: 6.2ms preprocess, 171.5ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 169.4ms
Speed: 7.3ms preprocess, 169.4ms inference, 1.1ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 car, 1 train, 177.9ms
Speed: 6.2ms preprocess, 177.9ms inference, 1.7ms postp

# **Convert avi video to mp4**

In [7]:
# Convert the .avi video generated to .mp4 format for compatibility with notebook display
!ffmpeg -y -loglevel panic -i /kaggle/working/cars.avi cars.mp4

# **Display the output**

In [8]:
# Embed and display the processed sample video within the notebook
from IPython.display import Video
Video("/kaggle/working/cars.mp4", embed=True, width=700,height=400)