In [None]:
# Check device number
!ls -ltrh /dev/video*

In [2]:
from flask import Flask, Response
from ultralytics import YOLO
import cv2

# YOLO 모델 불러오기 (best.pt or .engine 가능)
model = YOLO("yolo.pt")

In [4]:
import time, glob, random
import numpy as np

# Paths
img_val_dir   = f"../data/images/val"
label_val_dir = f"../data/labels/val"

images_test = sorted(glob.glob(f"{img_val_dir}/*.jpg", recursive = True))
labels_test = sorted(glob.glob(f"{label_val_dir}/*.txt", recursive = True))

#Test set generation
valid_numbers = random.sample(range(int(len(images_test))), int(len(images_test) / 2))
test_numbers = [i for i in range(len(images_test)) if i not in valid_numbers]

images_test = sorted([images_test[i] for i in test_numbers])
labels_test = sorted([labels_test[i] for i in test_numbers])

# 이미지 읽기
x = 100

img = cv2.imread(images_test[x]) 

for _ in range(10):
    _ = model.predict(img)

print("\nLatency Benchmarking...")
times = []
for img in images_test[:100]:  # 테스트셋 일부만 (예: 100장)
    input = cv2.imread(img)
    t0 = time.perf_counter()
    _ = model.predict(input)
    t1 = time.perf_counter()
    times.append((t1 - t0) * 1000)  # ms

print(f"평균 Latency: {np.mean(times):.2f} ms")
print(f"FPS (추론 기준): {1000/np.mean(times):.2f}")


0: 640x640 1 hand, 72.5ms
Speed: 38.8ms preprocess, 72.5ms inference, 510.9ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 hand, 50.3ms
Speed: 15.6ms preprocess, 50.3ms inference, 8.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 hand, 48.7ms
Speed: 9.3ms preprocess, 48.7ms inference, 6.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 hand, 35.4ms
Speed: 6.7ms preprocess, 35.4ms inference, 5.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 hand, 40.9ms
Speed: 6.3ms preprocess, 40.9ms inference, 6.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 hand, 48.0ms
Speed: 7.5ms preprocess, 48.0ms inference, 6.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 hand, 34.0ms
Speed: 6.5ms preprocess, 34.0ms inference, 5.4ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 hand, 44.1ms
Speed: 6.6ms preprocess, 44.1ms inference, 6.2ms postprocess per image at shape (1, 3, 640, 640)

0: 

In [None]:
c_index = 1 #Camera index can be changed

cap = cv2.VideoCapture(c_index) 
assert cap.isOpened(), "Camera open failed."

app = Flask(__name__)

def gen_frames():
    while True:
        ret, frame = cap.read() 
        if not ret:
            break

        # YOLO inference
        results = model.predict(source=frame, imgsz=640, verbose=False)
        annotated = results[0].plot()  # draw inference results

        # JPEG encoding
        ret, buffer = cv2.imencode('.jpg', annotated)
        if not ret:
            continue #If encoding is failed, skip

        # MJPEG streaming: MJPEG = Motion JPEG
        yield (b'--frame\r\n'
               b'Content-Type: image/jpeg\r\n\r\n' + buffer.tobytes() + b'\r\n')

@app.route('/')
def video_feed():
    return Response(gen_frames(), mimetype='multipart/x-mixed-replace; boundary=frame')

if __name__ == "__main__":
    try:
        app.run(host="0.0.0.0", port=8000, debug=False, use_reloader=False)
    finally:
        cap.release()
        cv2.destroyAllWindows()
