In [2]:
from ultralytics import YOLO
import cv2

# Load a YOLOv8 model (pre-trained on COCO dataset)
model = YOLO('yolov8n.pt')  # You can replace 'n' with other sizes (s, m, l, x)

# Path to the video
video_path = r"C:\Users\Hp\Downloads\WhatsApp Video 2024-10-13 at 22.25.05_c67a1d8f.mp4"
output_path = r"C:\Users\Hp\Downloads\output_video.mp4"  # Output video file

# Open the video file
cap = cv2.VideoCapture(video_path)

# Check if video opened successfully
if not cap.isOpened():
    print("Error: Could not open video.")
    exit()

# Get video properties
fps = int(cap.get(cv2.CAP_PROP_FPS))  # Frames per second
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))  # Frame width
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))  # Frame height
fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Codec for .mp4

# Create VideoWriter object to save output video
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

# Class names in COCO dataset
classNames = model.names

# Define the specific classes we are interested in
desired_classes = ['keyboard', 'laptop', 'mouse', 'monitor']

# Loop through the video frames
while True:
    ret, frame = cap.read()

    # Break the loop if we reach the end of the video
    if not ret:
        break

    # Detect objects in the frame
    results = model(frame)

    # Process results for each frame
    for r in results:
        for box in r.boxes:
            cls = int(box.cls[0])
            class_name = classNames[cls]

            if class_name in desired_classes:
                # Get bounding box coordinates
                x1, y1, x2, y2 = map(int, box.xyxy[0])

                # Draw the bounding box
                cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 255), 3)

                # Add class label and confidence
                confidence = round(float(box.conf[0]) * 100, 2)
                label = f"{class_name}: {confidence}%"
                cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 0), 2)

    # Write the frame with detections to the output video
    out.write(frame)

# Release the video capture and writer objects
cap.release()
out.release()

print(f"Processed video saved to {output_path}")



0: 384x640 1 tv, 2 laptops, 1 mouse, 86.5ms
Speed: 3.0ms preprocess, 86.5ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 cup, 1 tv, 1 laptop, 1 mouse, 92.5ms
Speed: 3.0ms preprocess, 92.5ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 cup, 1 cake, 2 tvs, 1 laptop, 1 mouse, 83.2ms
Speed: 3.0ms preprocess, 83.2ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 tvs, 1 laptop, 1 mouse, 80.5ms
Speed: 2.0ms preprocess, 80.5ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 tvs, 2 laptops, 1 mouse, 98.5ms
Speed: 2.0ms preprocess, 98.5ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 tv, 1 mouse, 94.3ms
Speed: 2.0ms preprocess, 94.3ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 tvs, 1 mouse, 80.9ms
Speed: 2.0ms preprocess, 80.9ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 3