
# 🟡 YOLO 실시간 객체 인식 — 화면 크기 자동 맞춤 (Screen-Fit)

이 노트북은 이전 실시간 카메라 데모의 **기본 세팅(카메라 초기화/키 조작/트러블슈팅 톤)**을 유지하면서,  
**YOLO**(Ultralytics)의 객체 인식을 **현재 모니터 화면 사이즈에 맞춰** 보여주는 **테스트 프로젝트**입니다.

**기능 요약**
- 카메라 초기화: V4L2(YUYV/MJPEG) + 기본 백엔드 재시도
- YOLO 모델 로딩: 기본 `yolov8n.pt` (자동 다운로드)
- 실시간 추론 및 바운딩 박스 렌더링
- 창을 **현재 모니터 해상도**에 맞춰 **레터박싱**으로 표시 (종횡비 유지)
- 단축키: `q` 종료, `f` 풀스크린 토글, `-`/`=` 신뢰도(conf) 조절, `s` 스냅샷 저장



## 1) 설치 (필요 시만 실행)
- 로컬/도커 이미지에 이미 포함되어 있으면 **건너뛰세요**.
- Ultralytics는 최초 실행 시 모델(`yolov8n.pt`)을 자동 다운로드합니다.


In [None]:

# !pip install --upgrade pip
# !pip install ultralytics opencv-python screeninfo
# (Jetson + OpenCV가 이미 설치된 컨테이너/이미지라면 이 셀은 생략하세요.)



## 2) 모듈 임포트


In [1]:

import os
import time
from datetime import datetime

import cv2
import numpy as np

# YOLO
try:
    from ultralytics import YOLO
except Exception as e:
    raise RuntimeError("Ultralytics가 설치되어 있지 않습니다. 위 설치 셀을 실행하세요.") from e

# 화면 해상도 가져오기
def _get_screen_size():
    # 1) screeninfo 우선
    try:
        from screeninfo import get_monitors
        m = get_monitors()[0]
        return int(m.width), int(m.height)
    except Exception:
        pass
    # 2) tkinter 대체
    try:
        import tkinter as tk
        root = tk.Tk()
        root.withdraw()
        w = root.winfo_screenwidth()
        h = root.winfo_screenheight()
        root.destroy()
        return int(w), int(h)
    except Exception:
        pass
    # 3) fallback
    return 1280, 720

SCREEN_W, SCREEN_H = _get_screen_size()
print(f"[INFO] Screen size detected: {SCREEN_W}x{SCREEN_H}")


[INFO] Screen size detected: 1920x2160



## 3) 설정값
- **MODEL_NAME**: 기본 `yolov8n.pt` (가벼운 모델), 필요 시 `yolov8s.pt` 등으로 교체
- **CONF_THRES**: 신뢰도 임계값
- **USE_CAMERA**: `True`면 웹캠 사용(`CAP_INDEX`), `False`면 `VIDEO_SOURCE` 재생


In [2]:

MODEL_NAME = "yolov8n.pt"   # yolov8n.pt / yolov8s.pt / 커스텀 .pt 경로 등
CONF_THRES = 0.25           # 키보드 -/= 로 실시간 조절
IOU_THRES  = 0.45

USE_CAMERA = True
CAP_INDEX  = 0
VIDEO_SOURCE = "./sample.mp4"  # USE_CAMERA=False일 때 사용

WINDOW_NAME = "YOLO Screen-Fit"
SAVE_DIR = "./yolo_snaps"
os.makedirs(SAVE_DIR, exist_ok=True)



## 4) 카메라 초기화 (재시도)
V4L2(YUYV/MJPEG) 및 기본 백엔드 순차 시도 후, **프레임 안정성**을 검증합니다.


In [3]:

def setup_camera_with_retry(index=0):
    methods = [
        {
            'name': 'V4L2_YUYV',
            'backend': cv2.CAP_V4L2,
            'settings': {
                'fourcc': cv2.VideoWriter_fourcc('Y', 'U', 'Y', 'V'),
                'width': 640,
                'height': 480,
                'fps': 30,
                'buffersize': 1,
            }
        },
        {
            'name': 'V4L2_MJPEG',
            'backend': cv2.CAP_V4L2,
            'settings': {
                'fourcc': cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'),
                'width': 640,
                'height': 480,
                'fps': 30,
                'buffersize': 1,
            }
        },
        {
            'name': 'DEFAULT',
            'backend': None,
            'settings': {
                'width': 640,
                'height': 480,
                'fps': 30,
                'buffersize': 1,
            }
        },
    ]

    for method in methods:
        print(f"[CAM] Trying {method['name']}...")
        try:
            cap = cv2.VideoCapture(index) if method['backend'] is None else cv2.VideoCapture(index, method['backend'])
            if not cap.isOpened():
                print(f"[CAM] Open failed with {method['name']}")
                continue

            s = method['settings']
            if 'fourcc' in s:
                cap.set(cv2.CAP_PROP_FOURCC, s['fourcc'])
            cap.set(cv2.CAP_PROP_FRAME_WIDTH,  s['width'])
            cap.set(cv2.CAP_PROP_FRAME_HEIGHT, s['height'])
            cap.set(cv2.CAP_PROP_FPS,          s['fps'])
            cap.set(cv2.CAP_PROP_BUFFERSIZE,   s['buffersize'])

            time.sleep(1.0)

            ok_cnt = 0
            for _ in range(5):
                ret, f = cap.read()
                if ret and f is not None:
                    ok_cnt += 1
                time.sleep(0.1)

            if ok_cnt >= 3:
                print(f"[CAM] Ready with {method['name']}")
                return cap, method['name']
            else:
                print(f"[CAM] Unstable with {method['name']}")
                cap.release()

        except Exception as e:
            print(f"[CAM] Error on {method['name']}: {e}")

    return None, None



## 5) 화면 맞춤(레터박스) & 박스 그리기 유틸
- **letterbox_fit_to_screen**: 프레임을 모니터 해상도에 맞춰 **종횡비 유지 + 패딩**
- **draw_yolo_boxes**: YOLO 결과를 OpenCV로 시각화


In [4]:

def letterbox_fit_to_screen(frame, screen_w, screen_h, color=(0,0,0)):
    h, w = frame.shape[:2]
    scale = min(screen_w / w, screen_h / h)
    new_w, new_h = int(w * scale), int(h * scale)
    resized = cv2.resize(frame, (new_w, new_h), interpolation=cv2.INTER_LINEAR)

    canvas = np.zeros((screen_h, screen_w, 3), dtype=np.uint8)
    canvas[:] = color
    x_off = (screen_w - new_w) // 2
    y_off = (screen_h - new_h) // 2
    canvas[y_off:y_off+new_h, x_off:x_off+new_w] = resized
    return canvas, scale, x_off, y_off

def draw_yolo_boxes(frame, results, names, conf_thres=0.25):
    # results: ultralytics Result
    if results.boxes is None or len(results.boxes) == 0:
        return frame

    boxes = results.boxes.xyxy.cpu().numpy()
    confs = results.boxes.conf.cpu().numpy()
    clss  = results.boxes.cls.cpu().numpy().astype(int)

    for (x1, y1, x2, y2), conf, cls_idx in zip(boxes, confs, clss):
        if conf < conf_thres:
            continue
        label = f"{names[cls_idx]} {conf:.2f}"
        x1, y1, x2, y2 = map(int, [x1, y1, x2, y2])
        cv2.rectangle(frame, (x1, y1), (x2, y2), (0,255,0), 2)
        cv2.putText(frame, label, (x1, max(y1-6, 0)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,0), 2)
    return frame



## 6) YOLO 모델 로딩
기본값은 `yolov8n.pt`. 최초 실행 시 자동 다운로드됩니다.


In [5]:

print(f"[YOLO] Loading model: {MODEL_NAME}")
model = YOLO(MODEL_NAME)
names = model.model.names if hasattr(model, "model") else {}
print(f"[YOLO] Classes: {names}")


[YOLO] Loading model: yolov8n.pt
[YOLO] Classes: {0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle', 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli', 51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch', 58: 'potted plant', 59: 'bed', 60: 'dining table', 61: 'toilet', 62: 'tv', 63: 'laptop', 64: 'mouse', 65: 'remot


## 7) 실시간 추론 루프
**키 조작**
- `q`: 종료
- `f`: 풀스크린 토글
- `-` / `=`: 신뢰도 임계값(±0.05)
- `s`: 현재 프레임 스냅샷 저장


In [6]:

print("[INFO] Starting stream...")

if USE_CAMERA:
    cap, cam_method = setup_camera_with_retry(CAP_INDEX)
    if cap is None:
        raise SystemExit("[FATAL] 카메라 초기화 실패")
    src_desc = f"camera:{CAP_INDEX} ({cam_method})"
else:
    cap = cv2.VideoCapture(VIDEO_SOURCE)
    if not cap.isOpened():
        raise SystemExit(f"[FATAL] 비디오 파일 열기 실패: {VIDEO_SOURCE}")
    src_desc = f"video:{VIDEO_SOURCE}"

print(f"[INFO] Source: {src_desc}")
cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL)
cv2.resizeWindow(WINDOW_NAME, SCREEN_W, SCREEN_H)

fullscreen = False
fps = 0.0
frame_count = 0
last_time = time.time()

try:
    while True:
        ret, frame = cap.read()
        if not ret or frame is None:
            print("[WARN] Frame read failed")
            time.sleep(0.05)
            continue

        # 셀피 스타일
        frame = cv2.flip(frame, 1)

        # 추론
        # NOTE: Ultralytics는 BGR도 자동 처리하지만, 성능을 위해 verbose=False
        results = model(frame, conf=CONF_THRES, iou=IOU_THRES, verbose=False)[0]

        # 박스 그리기
        vis = frame.copy()
        vis = draw_yolo_boxes(vis, results, names, conf_thres=CONF_THRES)

        # 화면 맞춤
        disp, scale, x_off, y_off = letterbox_fit_to_screen(vis, SCREEN_W, SCREEN_H, color=(0,0,0))

        # FPS 표기
        frame_count += 1
        if frame_count % 30 == 0:
            now = time.time()
            fps = 30.0 / (now - last_time)
            last_time = now
        cv2.putText(disp, f"FPS: {fps:.1f} | conf={CONF_THRES:.2f}", (10, SCREEN_H-10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255,255,0), 2)

        cv2.imshow(WINDOW_NAME, disp)

        key = cv2.waitKey(1) & 0xFF
        if key == ord('q') or key == 27:
            print("[INFO] Exit requested.")
            break
        elif key == ord('f'):
            fullscreen = not fullscreen
            prop = cv2.WND_PROP_FULLSCREEN
            cv2.setWindowProperty(WINDOW_NAME, prop, cv2.WINDOW_FULLSCREEN if fullscreen else cv2.WINDOW_NORMAL)
            if not fullscreen:
                cv2.resizeWindow(WINDOW_NAME, SCREEN_W, SCREEN_H)
        elif key == ord('-'):
            CONF_THRES = max(0.05, CONF_THRES - 0.05)
        elif key == ord('=') or key == ord('+'):
            CONF_THRES = min(0.95, CONF_THRES + 0.05)
        elif key == ord('s'):
            ts = datetime.now().strftime("%Y%m%d_%H%M%S")
            path = os.path.join(SAVE_DIR, f"yolo_snap_{ts}.jpg")
            # 원본 저장(바운딩박스 포함 화면을 저장하고 싶다면 disp 저장으로 바꾸세요)
            cv2.imwrite(path, vis)
            print(f"[SAVE] Snapshot: {path}")

except KeyboardInterrupt:
    print("[INFO] Interrupted by user.")

finally:
    if cap is not None:
        cap.release()
    cv2.destroyAllWindows()
    print("[CLEANUP] Released resources.")


[INFO] Starting stream...
[CAM] Trying V4L2_YUYV...
[CAM] Ready with V4L2_YUYV
[INFO] Source: camera:0 (V4L2_YUYV)
[INFO] Exit requested.
[CLEANUP] Released resources.


In [9]:

print("[INFO] Starting stream...")

if USE_CAMERA:
    cap, cam_method = setup_camera_with_retry(CAP_INDEX)
    if cap is None:
        raise SystemExit("[FATAL] 카메라 초기화 실패")
    src_desc = f"camera:{CAP_INDEX} ({cam_method})"
else:
    cap = cv2.VideoCapture(VIDEO_SOURCE)
    if not cap.isOpened():
        raise SystemExit(f"[FATAL] 비디오 파일 열기 실패: {VIDEO_SOURCE}")
    src_desc = f"video:{VIDEO_SOURCE}"

print(f"[INFO] Source: {src_desc}")
cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL)
cv2.resizeWindow(WINDOW_NAME, SCREEN_W, SCREEN_H)

fullscreen = False
fps = 0.0
frame_count = 0
last_time = time.time()

try:
    while True:
        ret, frame = cap.read()
        if not ret or frame is None:
            print("[WARN] Frame read failed")
            time.sleep(0.05)
            continue

        # 셀피 스타일
        frame = cv2.flip(frame, 1)

        # 추론
        results = model(frame, conf=CONF_THRES, iou=IOU_THRES, verbose=False)[0]

        # === [추가] 객체 수 세기 ===
        num_objects = len(results.boxes) if results.boxes is not None else 0
        print(f"[INFO] Objects detected: {num_objects}")

        # 조건문: 객체 수가 5개 이상이면 알림 출력
        if num_objects >= 5:
            print("⚠️ Too many objects detected!")

        # 박스 그리기
        vis = frame.copy()
        vis = draw_yolo_boxes(vis, results, names, conf_thres=CONF_THRES)

        # 화면 맞춤
        disp, scale, x_off, y_off = letterbox_fit_to_screen(vis, SCREEN_W, SCREEN_H, color=(0,0,0))

        # FPS 표기
        frame_count += 1
        if frame_count % 30 == 0:
            now = time.time()
            fps = 30.0 / (now - last_time)
            last_time = now
        cv2.putText(disp, f"FPS: {fps:.1f} | conf={CONF_THRES:.2f}", (10, SCREEN_H-10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255,255,0), 2)

        cv2.imshow(WINDOW_NAME, disp)

        key = cv2.waitKey(1) & 0xFF
        if key == ord('q') or key == 27:
            print("[INFO] Exit requested.")
            break
        elif key == ord('f'):
            fullscreen = not fullscreen
            prop = cv2.WND_PROP_FULLSCREEN
            cv2.setWindowProperty(WINDOW_NAME, prop, cv2.WINDOW_FULLSCREEN if fullscreen else cv2.WINDOW_NORMAL)
            if not fullscreen:
                cv2.resizeWindow(WINDOW_NAME, SCREEN_W, SCREEN_H)
        elif key == ord('-'):
            CONF_THRES = max(0.05, CONF_THRES - 0.05)
        elif key == ord('=') or key == ord('+'):
            CONF_THRES = min(0.95, CONF_THRES + 0.05)
        elif key == ord('s'):
            ts = datetime.now().strftime("%Y%m%d_%H%M%S")
            path = os.path.join(SAVE_DIR, f"yolo_snap_{ts}.jpg")
            cv2.imwrite(path, vis)
            print(f"[SAVE] Snapshot: {path}")

except KeyboardInterrupt:
    print("[INFO] Interrupted by user.")

finally:
    if cap is not None:
        cap.release()
    cv2.destroyAllWindows()
    print("[CLEANUP] Released resources.")

[INFO] Starting stream...
[CAM] Trying V4L2_YUYV...
[CAM] Ready with V4L2_YUYV
[INFO] Source: camera:0 (V4L2_YUYV)
[INFO] Objects detected: 1
[INFO] Objects detected: 1
[INFO] Objects detected: 1
[INFO] Objects detected: 2
[INFO] Objects detected: 1
[INFO] Objects detected: 2
[INFO] Objects detected: 2
[INFO] Objects detected: 1
[INFO] Objects detected: 1
[INFO] Objects detected: 1
[INFO] Objects detected: 1
[INFO] Objects detected: 1
[INFO] Objects detected: 1
[INFO] Objects detected: 1
[INFO] Objects detected: 1
[INFO] Objects detected: 1
[INFO] Objects detected: 1
[INFO] Objects detected: 1
[INFO] Objects detected: 1
[INFO] Objects detected: 2
[INFO] Objects detected: 2
[INFO] Objects detected: 3
[INFO] Objects detected: 2
[INFO] Objects detected: 2
[INFO] Objects detected: 2
[INFO] Objects detected: 2
[INFO] Objects detected: 2
[INFO] Objects detected: 3
[INFO] Objects detected: 3
[INFO] Objects detected: 3
[INFO] Objects detected: 2
[INFO] Objects detected: 2
[INFO] Objects detect

In [10]:

print("[INFO] Starting stream...")

if USE_CAMERA:
    cap, cam_method = setup_camera_with_retry(CAP_INDEX)
    if cap is None:
        raise SystemExit("[FATAL] 카메라 초기화 실패")
    src_desc = f"camera:{CAP_INDEX} ({cam_method})"
else:
    cap = cv2.VideoCapture(VIDEO_SOURCE)
    if not cap.isOpened():
        raise SystemExit(f"[FATAL] 비디오 파일 열기 실패: {VIDEO_SOURCE}")
    src_desc = f"video:{VIDEO_SOURCE}"

print(f"[INFO] Source: {src_desc}")
cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL)
cv2.resizeWindow(WINDOW_NAME, SCREEN_W, SCREEN_H)

fullscreen = False
fps = 0.0
frame_count = 0
last_time = time.time()

try:
    while True:
        ret, frame = cap.read()
        if not ret or frame is None:
            print("[WARN] Frame read failed")
            time.sleep(0.05)
            continue

        # 셀피 스타일
        frame = cv2.flip(frame, 1)

        # 추론
        results = model(frame, conf=CONF_THRES, iou=IOU_THRES, verbose=False)[0]

        # === [추가] 특정 클래스 객체 수 세기 ===
        target_class = "person"  # 원하는 객체 클래스 이름 (예: "car", "dog" 등)
        target_class_id = None
        
        # names 딕셔너리에서 target_class의 id를 찾음
        for cid, cname in names.items():
            if cname == target_class:
                target_class_id = cid
                break
        
        if target_class_id is not None:
            clss = results.boxes.cls.cpu().numpy().astype(int) if results.boxes is not None else []
            num_objects = np.sum(clss == target_class_id)
        else:
            num_objects = 0
        
        print(f"[INFO] {target_class} detected: {num_objects}")
        
        # 조건문: 해당 객체 수가 3개 이상이면 알림 출력
        if num_objects >= 1:
            print(f"⚠️ Too many {target_class} detected!")


        # 박스 그리기
        vis = frame.copy()
        vis = draw_yolo_boxes(vis, results, names, conf_thres=CONF_THRES)

        # 화면 맞춤
        disp, scale, x_off, y_off = letterbox_fit_to_screen(vis, SCREEN_W, SCREEN_H, color=(0,0,0))

        # FPS 표기
        frame_count += 1
        if frame_count % 30 == 0:
            now = time.time()
            fps = 30.0 / (now - last_time)
            last_time = now
        cv2.putText(disp, f"FPS: {fps:.1f} | conf={CONF_THRES:.2f}", (10, SCREEN_H-10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255,255,0), 2)

        cv2.imshow(WINDOW_NAME, disp)

        key = cv2.waitKey(1) & 0xFF
        if key == ord('q') or key == 27:
            print("[INFO] Exit requested.")
            break
        elif key == ord('f'):
            fullscreen = not fullscreen
            prop = cv2.WND_PROP_FULLSCREEN
            cv2.setWindowProperty(WINDOW_NAME, prop, cv2.WINDOW_FULLSCREEN if fullscreen else cv2.WINDOW_NORMAL)
            if not fullscreen:
                cv2.resizeWindow(WINDOW_NAME, SCREEN_W, SCREEN_H)
        elif key == ord('-'):
            CONF_THRES = max(0.05, CONF_THRES - 0.05)
        elif key == ord('=') or key == ord('+'):
            CONF_THRES = min(0.95, CONF_THRES + 0.05)
        elif key == ord('s'):
            ts = datetime.now().strftime("%Y%m%d_%H%M%S")
            path = os.path.join(SAVE_DIR, f"yolo_snap_{ts}.jpg")
            cv2.imwrite(path, vis)
            print(f"[SAVE] Snapshot: {path}")

except KeyboardInterrupt:
    print("[INFO] Interrupted by user.")

finally:
    if cap is not None:
        cap.release()
    cv2.destroyAllWindows()
    print("[CLEANUP] Released resources.")

[INFO] Starting stream...
[CAM] Trying V4L2_YUYV...
[CAM] Ready with V4L2_YUYV
[INFO] Source: camera:0 (V4L2_YUYV)
[INFO] person detected: 1
⚠️ Too many person detected!
[INFO] person detected: 1
⚠️ Too many person detected!
[INFO] person detected: 1
⚠️ Too many person detected!
[INFO] person detected: 1
⚠️ Too many person detected!
[INFO] person detected: 1
⚠️ Too many person detected!
[INFO] person detected: 1
⚠️ Too many person detected!
[INFO] person detected: 1
⚠️ Too many person detected!
[INFO] person detected: 1
⚠️ Too many person detected!
[INFO] person detected: 1
⚠️ Too many person detected!
[INFO] person detected: 1
⚠️ Too many person detected!
[INFO] person detected: 1
⚠️ Too many person detected!
[INFO] person detected: 1
⚠️ Too many person detected!
[INFO] person detected: 1
⚠️ Too many person detected!
[INFO] person detected: 1
⚠️ Too many person detected!
[INFO] person detected: 1
⚠️ Too many person detected!
[INFO] person detected: 1
⚠️ Too many person detected!
[INFO


## 8) 트러블슈팅
- **카메라 권한**: `sudo usermod -aG video $USER` 후 재로그인
- **드라이버 재적재**: `sudo rmmod uvcvideo && sudo modprobe uvcvideo`
- **도커에서 카메라 사용**: `--device /dev/video0` (필요 시 `/dev/video*` 전체 추가)
- **GUI(X11) 포워딩**: `-e DISPLAY -v /tmp/.X11-unix:/tmp/.X11-unix`
- **성능 팁(Jetson)**: `sudo nvpmodel -m 0 && sudo jetson_clocks` (전력/발열 주의)
- **모델 변경**: `MODEL_NAME`을 `yolov8s.pt` 등으로 교체 (최초 1회 자동 다운로드)
- **화면 꽉 차게**: `f`로 Fullscreen 토글, 기본은 레터박싱으로 종횡비 유지
