In [1]:
import cv2
from ultralytics import YOLO

# YOLO 모델 로드
model = YOLO('best.pt')

# 웹캠 열기
cap = cv2.VideoCapture(0)

if not cap.isOpened():
    print("Error: Could not open webcam.")
    exit()

detected_labels = set()  # 중복 없이 탐지된 라벨을 저장할 집합(set)

while True:
    # 프레임 읽기
    ret, frame = cap.read()
    if not ret:
        print("Error: Could not read frame.")
        break

    # 객체 탐지 수행
    results = model.predict(frame, conf=0.5)

    # 예측 결과를 프레임에 그리기 및 집합에 라벨 추가
    for result in results:
        for box in result.boxes:
            # 텐서를 개별 값으로 변환
            x1, y1, x2, y2 = map(int, box.xyxy[0])
            conf = box.conf[0]
            label = result.names[int(box.cls[0])]

            # 바운딩 박스와 레이블 그리기
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            cv2.putText(frame, f'{label} {conf:.2f}', (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)

            # 탐지된 라벨 집합에 추가 (중복 방지)
            detected_labels.add(label)

    # 결과 프레임을 화면에 표시
    cv2.imshow('Webcam YOLO Detection', frame)

    # 'q' 키를 누르면 종료
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# 리소스 해제
cap.release()
cv2.destroyAllWindows()

# 탐지된 라벨 출력
print("Detected Labels:", detected_labels)


0: 256x416 (no detections), 177.7ms
Speed: 2.0ms preprocess, 177.7ms inference, 25.0ms postprocess per image at shape (1, 3, 256, 416)

0: 256x416 (no detections), 71.6ms
Speed: 3.0ms preprocess, 71.6ms inference, 4.0ms postprocess per image at shape (1, 3, 256, 416)

0: 256x416 (no detections), 76.6ms
Speed: 2.0ms preprocess, 76.6ms inference, 4.0ms postprocess per image at shape (1, 3, 256, 416)

0: 256x416 (no detections), 83.6ms
Speed: 1.0ms preprocess, 83.6ms inference, 4.0ms postprocess per image at shape (1, 3, 256, 416)

0: 256x416 (no detections), 34.0ms
Speed: 1.0ms preprocess, 34.0ms inference, 1.0ms postprocess per image at shape (1, 3, 256, 416)

0: 256x416 (no detections), 32.5ms
Speed: 1.0ms preprocess, 32.5ms inference, 1.0ms postprocess per image at shape (1, 3, 256, 416)

0: 256x416 (no detections), 49.0ms
Speed: 1.0ms preprocess, 49.0ms inference, 2.0ms postprocess per image at shape (1, 3, 256, 416)

0: 256x416 (no detections), 81.1ms
Speed: 2.0ms preprocess, 81.1m