In [1]:
import cv2
import mediapipe as mp


In [2]:
print(dir(mp.tasks.components.containers))
print(dir(mp.tasks.components.containers.DetectionResult))

['AudioData', 'AudioDataFormat', 'BoundingBox', 'Category', 'ClassificationResult', 'Classifications', 'Detection', 'DetectionResult', 'Embedding', 'EmbeddingResult', 'Landmark', 'LandmarksDetectionResult', 'NormalizedLandmark', 'NormalizedRect', 'Rect', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__path__', '__spec__', 'keypoint']
['__annotations__', '__class__', '__dataclass_fields__', '__dataclass_params__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__match_args__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', 'create_from_pb2', 'to_pb2']


In [3]:
# Correct imports using top-level aliases
BaseOptions = mp.tasks.BaseOptions
DetectionResult = mp.tasks.components.containers.DetectionResult
ObjectDetector = mp.tasks.vision.ObjectDetector
ObjectDetectorOptions = mp.tasks.vision.ObjectDetectorOptions
VisionRunningMode = mp.tasks.vision.RunningMode

In [4]:

# ✅ Replace this with the path to your .tflite model
model_path = "efficientdet_lite0-2.tflite"  # or any object detection TFLite model
import time
import os
assert os.path.exists(model_path), f"Model file not found at {model_path}"

In [5]:
from utils import streaming_utils_Tut
streaming_utils_Tut.getStream(0)

cap = streaming_utils_Tut.getStream()

In [6]:


# Callback for handling results
def detection_callback(result: DetectionResult, output_image: mp.Image, timestamp_ms: int):
    print("function?")
    annotated_image = output_image.numpy_view()

    for detection in result.detections:
        bbox = detection.bounding_box
        start_point = (int(bbox.origin_x), int(bbox.origin_y))
        end_point = (
            int(bbox.origin_x + bbox.width),
            int(bbox.origin_y + bbox.height),
        )
        cv2.rectangle(annotated_image, start_point, end_point, (0, 255, 0), 2)

        category = detection.categories[0]
        label = f"{category.category_name}: {int(category.score * 100)}%"
        cv2.putText(
            annotated_image,
            label,
            (start_point[0], start_point[1] - 10),
            cv2.FONT_HERSHEY_SIMPLEX,
            0.7,
            (0, 255, 0),
            2,
        )

    cv2.imshow("MediaPipe Live Stream Object Detection", annotated_image)
    if cv2.waitKey(1) & 0xFF == 27:  # ESC to quit
        return True  # signal to stop streaming







In [10]:
# Set up detector options

#base_options = BaseOptions(model_asset_path=model_path)

BaseOptions = mp.tasks.BaseOptions
ObjectDetectorOptions = mp.tasks.vision.ObjectDetectorOptions
ObjectDetector = mp.tasks.vision.ObjectDetector
VisionRunningMode = mp.tasks.vision.RunningMode
DetectionResult = mp.tasks.components.containers.DetectionResult

# Create detector in IMAGE mode
options = ObjectDetectorOptions(
    base_options=BaseOptions(model_asset_path=model_path),
    running_mode=VisionRunningMode.IMAGE,
    score_threshold=0.5,
)

detector = ObjectDetector.create_from_options(options)




while True:
    ret, frame = cap.read()
    if not ret:
        print("Failed to grab frame")
        break

    # Convert BGR to RGB
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Wrap frame in MediaPipe Image
    mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb_frame)

    # Detect objects (synchronously)
    result = detector.detect(mp_image)

    # Draw only person detections
    for detection in result.detections:
        category = detection.categories[0]
        if category.category_name == "person":
            bbox = detection.bounding_box
            start = (int(bbox.origin_x), int(bbox.origin_y))
            end = (int(bbox.origin_x + bbox.width), int(bbox.origin_y + bbox.height))
            cv2.rectangle(frame, start, end, (0, 255, 0), 2)
            label = f"{category.category_name}: {category.score:.2f}"
            cv2.putText(frame, label, (start[0], start[1] - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

    # Display the frame
    cv2.imshow("Person Detection (IMAGE mode)", frame)

    if cv2.waitKey(1) & 0xFF == 27:  # ESC key
        break
cap.release()
cv2.destroyAllWindows()


I0000 00:00:1751384258.936243 4367197 gl_context.cc:357] GL version: 2.1 (2.1 ATI-7.0.3), renderer: AMD Radeon Pro 560X OpenGL Engine


Press ESC to exit
