In [1]:
!pip install ultralytics opencv-python pillow

Collecting ultralytics
  Downloading ultralytics-8.3.161-py3-none-any.whl.metadata (37 kB)
Collecting numpy>=1.23.0 (from ultralytics)
  Using cached numpy-2.2.6-cp310-cp310-win_amd64.whl.metadata (60 kB)
Collecting torchvision>=0.9.0 (from ultralytics)
  Downloading torchvision-0.22.1-cp310-cp310-win_amd64.whl.metadata (6.1 kB)
Collecting py-cpuinfo (from ultralytics)
  Downloading py_cpuinfo-9.0.0-py3-none-any.whl.metadata (794 bytes)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.14-py3-none-any.whl.metadata (9.4 kB)
Collecting numpy>=1.23.0 (from ultralytics)
  Using cached numpy-1.26.4-cp310-cp310-win_amd64.whl.metadata (61 kB)
Collecting torch>=1.8.0 (from ultralytics)
  Using cached torch-2.7.1-cp310-cp310-win_amd64.whl.metadata (28 kB)
Downloading ultralytics-8.3.161-py3-none-any.whl (1.0 MB)
   ---------------------------------------- 0.0/1.0 MB ? eta -:--:--
   ------------------------------ --------- 0.8/1.0 MB 8.5 MB/s eta 0:00:01
 

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
torchaudio 2.7.0 requires torch==2.7.0, but you have torch 2.7.1 which is incompatible.
tts 0.22.0 requires numpy==1.22.0; python_version <= "3.10", but you have numpy 1.26.4 which is incompatible.


In [3]:
import cv2
from ultralytics import YOLO
import json


def detect_objects(image_path, model_path="yolov5s.pt"):
    model = YOLO(model_path)
    results = model(image_path)

    detections = []
    for result in results:
        for box in result.boxes:
            x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
            label = model.names[int(box.cls)]
            confidence = float(box.conf)
            detections.append({
                "bbox": [int(x1), int(y1), int(x2), int(y2)],
                "label": label,
                "confidence": confidence
            })
    return detections


def draw_boxes(image, detections):
    for det in detections:
        x1, y1, x2, y2 = det["bbox"]
        label = det["label"]
        cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
        cv2.putText(image, label, (x1, y1 - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
    return image


def get_user_memos(image, detections):
    selected_indices = set()

    def mouse_callback(event, x, y, flags, param):
        if event == cv2.EVENT_LBUTTONDOWN:
            for idx, det in enumerate(detections):
                x1, y1, x2, y2 = det["bbox"]
                if x1 <= x <= x2 and y1 <= y <= y2:
                    print(f"[INFO] 객체 선택됨: {det['label']} (Index: {idx})")
                    selected_indices.add(idx)

    cv2.namedWindow("객체 탐지 이미지")
    cv2.imshow("객체 탐지 이미지", image)
    cv2.setMouseCallback("객체 탐지 이미지", mouse_callback)

    print("[INFO] 객체를 클릭하세요. 선택이 끝나면 이미지 창에서 아무 키나 누르세요.")
    cv2.waitKey(0)
    cv2.destroyAllWindows()

    memos = {}
    for idx in selected_indices:
        label = detections[idx]["label"]
        memo = input(f"[INPUT] '{label}' 객체에 남길 메모를 입력하세요: ")
        memos[idx] = memo

    return memos




def main():
    image_path = "test_image.jpg"  # 👉 여기 이미지 경로를 넣으세요
    image = cv2.imread(image_path)

    if image is None:
        print("[ERROR] 이미지를 불러올 수 없습니다.")
        return

    print("[INFO] 객체 탐지 수행 중...")
    detections = detect_objects(image_path)

    print(f"[INFO] {len(detections)}개의 객체가 탐지되었습니다.")
    annotated_image = draw_boxes(image.copy(), detections)

    print("[INFO] 객체를 클릭하여 메모를 입력하세요.")
    memos = get_user_memos(annotated_image, detections)

    # 메모 추가
    for idx, det in enumerate(detections):
        det["memo"] = memos.get(idx, "")

    # JSON 저장
    with open("image_annotations.json", "w", encoding="utf-8") as f:
        json.dump(detections, f, ensure_ascii=False, indent=2)

    print("[INFO] 객체 정보 및 메모가 image_annotations.json에 저장되었습니다.")


if __name__ == "__main__":
    main()


[INFO] 객체 탐지 수행 중...
PRO TIP  Replace 'model=yolov5s.pt' with new 'model=yolov5su.pt'.
YOLOv5 'u' models are trained with https://github.com/ultralytics/ultralytics and feature improved performance vs standard YOLOv5 models trained with https://github.com/ultralytics/yolov5.


image 1/1 C:\Users\yjyoo\test_image.jpg: 384x640 1 suitcase, 1 oven, 334.5ms
Speed: 4.2ms preprocess, 334.5ms inference, 3.1ms postprocess per image at shape (1, 3, 384, 640)
[INFO] 2개의 객체가 탐지되었습니다.
[INFO] 객체를 클릭하여 메모를 입력하세요.
[INFO] 객체를 클릭하세요. 선택이 끝나면 이미지 창에서 아무 키나 누르세요.
[INFO] 객체 선택됨: oven (Index: 1)
[INFO] 객체 선택됨: oven (Index: 1)
[INFO] 객체 선택됨: oven (Index: 1)


[INPUT] 'oven' 객체에 남길 메모를 입력하세요:  예상 발화 지점


[INFO] 객체 정보 및 메모가 image_annotations.json에 저장되었습니다.
