In [1]:
import cv2
import numpy as np
from ultralytics import YOLO
from ultralytics.utils.plotting import Annotator, colors

In [2]:
import os
import cv2

def yolowebcam(model, output_file='yolo-output.mp4', fps=20.0, frame_size=(640, 480)):
    # Open the webcam
    vc = cv2.VideoCapture(0)
    vc.set(cv2.CAP_PROP_FRAME_WIDTH, frame_size[0])
    vc.set(cv2.CAP_PROP_FRAME_HEIGHT, frame_size[1])

    # Define variables for VideoWriter initialization
    out = None
    recording = False
    output_path = os.path.abspath(output_file)  # Get the absolute path of the output file

    # Automatically start recording
    # Define the codec and create a VideoWriter object
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Using 'mp4v' codec
    out = cv2.VideoWriter(output_file, fourcc, fps, frame_size)

    # Check if VideoWriter opened successfully
    if not out.isOpened():
        print("Failed to open VideoWriter")
        return  # Exit the function if VideoWriter fails
    else:
        recording = True
        print(f"Recording started... Saving video to: {output_path}")

    while True:
        ret, frame = vc.read()
        if not ret:
            print("Failed to capture frame")
            break

        # Run YOLO inference
        results = model(frame)

        # Create an Annotator object
        annotator = Annotator(frame)

        # Process results
        for r in results:
            boxes = r.boxes
            print(f"Detected {len(boxes)} objects")  # Debugging info

            for box in boxes:
                # Get box coordinates
                b = box.xyxy[0].cpu().numpy().astype(int)
                # Get class and confidence
                c = int(box.cls)
                conf = float(box.conf)

                # Draw box and label
                label = f'{model.names[c]} {conf:.2f}'
                color = colors(c, True)
                annotator.box_label(b, label, color=color)

                print(f"Box: {b}, Class: {model.names[c]}, Confidence: {conf:.2f}")  # Debugging info

        # Display the annotated frame
        annotated_frame = annotator.result()

        # Show the frame
        cv2.imshow("YOLO Webcam", annotated_frame)

        # Write the frame to the output file
        if recording and out is not None:
            # Ensure the frame size matches the VideoWriter frame size
            resized_frame = cv2.resize(annotated_frame, frame_size)
            out.write(resized_frame)
            print("Frame written to output file")  # Debugging info

        # Capture key press
        key = cv2.waitKey(1) & 0xFF

        # Check if 'q' is pressed to stop
        if key == ord('q'):
            print("Recording stopped by user")
            break

    # Release resources
    vc.release()
    if out is not None:
        out.release()
        # Verify if the file was created
        if os.path.exists(output_path) and os.path.getsize(output_path) > 0:
            print(f"Recording stopped and video saved at {output_path}")
        else:
            print(f"Failed to save the video or the file is empty at {output_path}")
    cv2.destroyAllWindows()


In [3]:
model = YOLO('./best.pt')
yolowebcam(model)

Recording started... Saving video to: c:\Users\alway\OneDrive\Documents\GitHub\Applied-AI\hw2\yolo\yolo-output.mp4

0: 480x640 1 person, 68.8ms
Speed: 15.0ms preprocess, 68.8ms inference, 213.1ms postprocess per image at shape (1, 3, 480, 640)
Detected 1 objects
Box: [ 92  98 640 479], Class: person, Confidence: 0.67
Frame written to output file

0: 480x640 1 person, 20.1ms
Speed: 18.1ms preprocess, 20.1ms inference, 4.0ms postprocess per image at shape (1, 3, 480, 640)
Detected 1 objects
Box: [116 106 639 479], Class: person, Confidence: 0.79
Frame written to output file

0: 480x640 1 person, 12.5ms
Speed: 6.7ms preprocess, 12.5ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)
Detected 1 objects
Box: [116 106 639 479], Class: person, Confidence: 0.79
Frame written to output file

0: 480x640 1 person, 12.6ms
Speed: 7.6ms preprocess, 12.6ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)
Detected 1 objects
Box: [ 97 106 640 479], Class: person, Confide