In [12]:
import cv2
import threading
import queue
import time
from ultralytics import YOLO
from collections import defaultdict

# --- CONFIGURATION ---
# RECOMMENDATION: Use .engine (TensorRT) or .onnx files for 2x-5x speedup
MODEL_PATH = 'data/model/best_detect.pt'  # Change to 'best5.engine' after exporting
MODEL_CLASSIFY_PATH = 'data/model/best_classify.pt'  # Change to 'best_classify.engine' after exporting
VIDEO_PATH = "D:\\Recordings\\2025_11_07\\20251107030005_20251107040005_3.mp4"
TRACKER_CONFIG = 'custom_bytetrack.yaml'

N_FRAMES_OPEN_CONFIRM = 3
M_FRAMES_CLOSED_CONFIRM = 4

# Reduced visualization resolution (width) for faster display
DISPLAY_WIDTH = 1280
# --- END OF CONFIGURATION ---

# --- Shared Resources for Threading ---
classification_queue = queue.Queue()
class_counts = defaultdict(int)
counts_lock = threading.Lock() # Prevents race conditions when reading/writing counts

def classifier_worker():
    """
    Worker thread that waits for ROIs (images of bags) and classifies them.
    This runs in the background so the video doesn't stutter.
    """
    print("Loading classification model in worker thread...")
    # Load model inside the thread or pass it in.
    # NOTE: For TensorRT/Exported models, loading here is safer for thread isolation.
    model_classify = YOLO(MODEL_CLASSIFY_PATH)

    while True:
        try:
            # Wait for an item, but allow checking for exit every second
            item = classification_queue.get(timeout=1)
        except queue.Empty:
            continue

        if item is None: # Sentinel to kill thread
            break

        track_id, roi_img = item

        # Run inference (verbose=False is slightly faster)
        results = model_classify(roi_img, verbose=False)

        detected_label = "Unknown"
        # Logic to extract label
        if results[0].probs is not None:
            detected_label = results[0].names[results[0].probs.top1]
        elif len(results[0].boxes) > 0:
            top_cls = int(results[0].boxes.cls[0])
            detected_label = model_classify.names[top_cls]

        with counts_lock:
            class_counts[detected_label] += 1

        # print(f" [Async] Track {track_id} -> {detected_label}")
        classification_queue.task_done()

# Start the classification thread
t = threading.Thread(target=classifier_worker, daemon=True)
t.start()

# --- Main Tracker Setup ---
model = YOLO(MODEL_PATH)
class_names = model.names

try:
    OPEN_CLASS_NAME = 'bread-bag-opened'
    CLOSED_CLASS_NAME = 'bread-bag-closed'
    names_to_ids = {v: k for k, v in class_names.items()}
    open_class_id = names_to_ids[OPEN_CLASS_NAME]
    closed_class_id = names_to_ids[CLOSED_CLASS_NAME]
except KeyError as e:
    print(f"Error: Class {e} not found.")
    exit()

track_states = {}
cap = cv2.VideoCapture(VIDEO_PATH)

# Optimization: Pre-calculate resize factor for display to avoid resizing full arrays constantly if not needed
# Or simply resize the final image.
frame_count = 0
start_time = time.time()

while cap.isOpened():
    success, frame = cap.read()
    if not success:
        break

    # Optimization: Run tracking.
    # 'persist=True' is required for tracking.
    # 'verbose=False' suppresses printing to console (saves I/O time)
    results = model.track(frame, persist=True, tracker=TRACKER_CONFIG, verbose=False)

    # Extract data locally to avoid repeated access
    boxes = results[0].boxes
    current_detections = {}

    # Optimization: Manual Drawing is faster than results[0].plot()
    # We will draw on 'frame' directly.
    if boxes.id is not None:
        # Move tensors to CPU once and convert to numpy/list
        track_ids = boxes.id.int().cpu().tolist()
        cls_ids = boxes.cls.int().cpu().tolist()
        xyxys = boxes.xyxy.cpu().tolist()

        for track_id, cls_id, box in zip(track_ids, cls_ids, xyxys):
            current_detections[track_id] = cls_id

            # Draw Bounding Box (Green for Closed, Red for Open, White for others)
            x1, y1, x2, y2 = map(int, box)
            color = (200, 200, 200)
            if cls_id == open_class_id: color = (0, 0, 255)     # Red
            elif cls_id == closed_class_id: color = (0, 255, 0) # Green

            cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)

            # Draw Label (Simplified)
            label = f"{track_id}: {class_names[cls_id]}"
            cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)

            # --- State Machine Logic ---
            # Retrieve or create state
            if track_id not in track_states:
                track_states[track_id] = {'state': 'detecting_open', 'open_count': 0, 'closed_count': 0}

            state_info = track_states[track_id]

            # Logic
            if state_info['state'] == 'detecting_open':
                if cls_id == open_class_id:
                    state_info['open_count'] += 1
                else:
                    state_info['open_count'] = 0

                if state_info['open_count'] >= N_FRAMES_OPEN_CONFIRM:
                    state_info['state'] = 'detecting_closed'
                    state_info['open_count'] = 0

            elif state_info['state'] == 'detecting_closed':
                if cls_id == closed_class_id:
                    state_info['closed_count'] += 1
                else:
                    state_info['closed_count'] = 0

                if state_info['closed_count'] >= M_FRAMES_CLOSED_CONFIRM:
                    state_info['state'] = 'counted'

                    # --- ASYNC CLASSIFICATION TRIGGER ---
                    # Validate coordinates
                    h, w, _ = frame.shape
                    cx1, cy1 = max(0, x1), max(0, y1)
                    cx2, cy2 = min(w, x2), min(h, y2)

                    if cx2 > cx1 and cy2 > cy1:
                        # Copy ROI to avoid memory issues when frame changes
                        roi = frame[cy1:cy2, cx1:cx2].copy()
                        # Push to queue
                        classification_queue.put((track_id, roi))

    # Clean up old tracks from state dictionary
    # (Optional: Keeps dictionary small)
    for tid in list(track_states.keys()):
        if tid not in current_detections:
            # You might want to keep them briefly or delete immediately
            # Here we just reset counts if lost
            track_states[tid]['open_count'] = 0
            track_states[tid]['closed_count'] = 0

    # --- Display Stats ---
    # Read counts safely
    with counts_lock:
        display_counts = list(class_counts.items())

    y_offset = 60
    cv2.putText(frame, "FPS: calculate if needed", (10, y_offset), cv2.FONT_HERSHEY_SIMPLEX, 1.6, (0, 255, 255), 3)

    sorted_counts = sorted(display_counts)
    for cls_name, count in sorted_counts:
        y_offset += 70
        text = f"{cls_name}: {count}"
        cv2.putText(frame, text, (10, y_offset), cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 255, 255), 3)

    # Resize only for display (keeps processing on original resolution)
    if DISPLAY_WIDTH:
        h, w = frame.shape[:2]
        scale = DISPLAY_WIDTH / w
        frame_disp = cv2.resize(frame, (int(w*scale), int(h*scale)))
    else:
        frame_disp = frame

    cv2.imshow("Optimized Tracker", frame_disp)

    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

# Cleanup
cap.release()
cv2.destroyAllWindows()

Loading classification model in worker thread...


In [3]:
from ultralytics import YOLO
model = YOLO('data/model/best_detect.pt')

# 2. Get the class names dictionary
class_dict = model.names
print(f"Detected Classes: {class_dict}")

Detected Classes: {0: 'bread-bag-closed', 1: 'bread-bag-opened'}


In [14]:
import time

fps = 30
if fps <= 0 or fps > 120:
    fps = 20.0
frame_duration = 1.0 / fps
start_time = time.time()
print(f"startTime: {start_time}")
time.sleep(0.02)
elapsed = time.time() - start_time
print(f"elapsed: {elapsed}")
# Sleep until next frame should be grabbed
sleep_time = frame_duration - elapsed
print(f"Frame Duration : {frame_duration}, Elapsed : {elapsed}, Sleep : {sleep_time}")
if sleep_time > 0:
    time.sleep(sleep_time)

startTime: 1764169259.681691
elapsed: 0.021376371383666992
Frame Duration : 0.03333333333333333, Elapsed : 0.021376371383666992, Sleep : 0.01195696194966634


In [15]:
#!/usr/bin/env python3
"""
Improved hardware/software video decode pipeline for RDK X5.
- Hardware decode via hobot_codec if available.
- Software fallback via ffmpeg (requires known resolution).

Usage:
    - Edit SOURCE and RESOLUTION below.
    - Run: python3 hw_decode_singlefile_fixed.py
"""

import shutil
import subprocess
import time
from typing import Generator, Optional, Tuple

import numpy as np

# ---------------------------
# USER CONFIG
# ---------------------------
DEFAULT_SOURCE = "test.mp4"  # or "rtsp://192.168.1.10/stream" or "/path/to/input.h264"
OUTPUT_WINDOW = True           # set False if running headless
DEFAULT_WIDTH = 640            # Enter known width for file/software fallback
DEFAULT_HEIGHT = 360           # Enter known height for file/software fallback
# ---------------------------

class Decoder:
    """
    Thin wrapper: hobot_codec for hardware decode, or ffmpeg SW decode with known resolution.
    """
    def __init__(self, codec_type: str = "h264", prefer_hw: bool = True, width: Optional[int] = None, height: Optional[int] = None):
        self.codec_type = codec_type.lower()
        self.hw = False
        self.decoder = None
        self.width = width
        self.height = height
        self.software_proc = None

        # Try hardware first
        if prefer_hw:
            try:
                from hobot_codec import Codec, CodecType  # type: ignore
                ctype = CodecType.H264 if "264" in self.codec_type else CodecType.H265
                self.decoder = Codec(
                    codec_type=ctype,
                    is_decoder=True,
                    input_mode="packet",
                    output_mode="array",
                    output_format="bgr"
                )
                self.hw = True
                print("[INFO] Using hardware decoder (hobot_codec).")
            except Exception as e:
                print("[WARN] hobot_codec not available or failed to init. Falling back to software decode.")
                print(f"[DEBUG] hobot_codec init error: {repr(e)}")

        if not self.hw:
            self._start_software_decoder()

    def _start_software_decoder(self):
        ffmpeg_bin = shutil.which("ffmpeg")
        if not ffmpeg_bin:
            raise RuntimeError("ffmpeg required for SW fallback but not found in PATH.")
        if self.width is None or self.height is None:
            raise ValueError("Width and height must be set for software fallback!")
        cmd = [
            ffmpeg_bin,
            "-loglevel", "error",
            "-hide_banner",
            "-f", "h264",
            "-i", "pipe:0",
            "-f", "rawvideo",
            "-pix_fmt", "bgr24",
            "-s", f"{self.width}x{self.height}",
            "pipe:1"
        ]
        self.software_proc = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, bufsize=0)
        print(f"[INFO] Started ffmpeg-based SW decoder for {self.width}x{self.height}.")

    def decode(self, nal_bytes: bytes) -> Optional[np.ndarray]:
        if self.hw:
            try:
                frame = self.decoder.decode(nal_bytes)
                return frame
            except Exception as e:
                print(f"[ERROR] hobot_codec decode error: {e}")
                return None

        # SW fallback: write NAL, read a frame
        if not self.software_proc or not self.software_proc.stdin or not self.software_proc.stdout:
            return None

        try:
            self.software_proc.stdin.write(nal_bytes)
            self.software_proc.stdin.flush()
        except BrokenPipeError:
            print("[ERROR] ffmpeg SW decoder pipe closed.")
            return None

        frame_bytes_required = self.width * self.height * 3
        frame_bytes = self.software_proc.stdout.read(frame_bytes_required)
        if len(frame_bytes) == frame_bytes_required:
            frame = np.frombuffer(frame_bytes, dtype=np.uint8).reshape((self.height, self.width, 3))
            return frame
        return None

    def close(self):
        # Clean up software proc if open
        if self.software_proc:
            try:
                self.software_proc.stdin.close()
                self.software_proc.stdout.close()
                self.software_proc.stderr.close()
                self.software_proc.terminate()
            except Exception:
                pass
            self.software_proc = None

    def __del__(self):
        self.close()

# ---------------------------
# NAL splitting utility
# ---------------------------

def extract_nals_from_buffer(buf: bytearray):
    start_codes = (b'\x00\x00\x01', b'\x00\x00\x00\x01')
    i = 0
    buf_len = len(buf)
    starts = []

    while i < buf_len - 3:
        if buf[i:i+3] == b'\x00\x00\x01':
            starts.append(i)
            i += 3
        elif buf[i:i+4] == b'\x00\x00\x00\x01':
            starts.append(i)
            i += 4
        else:
            i += 1

    if not starts:
        return []

    nals = []
    for idx, s in enumerate(starts):
        if buf[s:s+4] == b'\x00\x00\x00\x01':
            nal_start = s + 4
        else:
            nal_start = s + 3

        if idx + 1 < len(starts):
            next_s = starts[idx + 1]
            nal = bytes(buf[nal_start:next_s])
        else:
            nal = bytes(buf[nal_start:])

        nals.append((s, nal))

    buf.clear()
    return [nal for (_, nal) in nals]

# ---------------------------
# VideoSource abstraction
# ---------------------------

class VideoSource:
    """
    Yields decoded BGR frames from file/RTSP.
    """
    def __init__(self, source: str, codec: str = "h264", width: Optional[int] = None, height: Optional[int] = None):
        self.source = source
        self.codec = codec
        self.proc = None
        self.is_pipe = False
        self._buffer = bytearray()
        self.width = width
        self.height = height
        self.decoder = Decoder(codec_type=codec, width=width, height=height)
        self._fileobj = None
        self._start_input()

    def _start_input(self):
        lower = self.source.lower()
        if lower.endswith(".h264") or lower.endswith(".h265"):
            self.is_pipe = False
            self.proc = None
            self._fileobj = open(self.source, "rb")
            print(f"[INFO] Opening raw file {self.source}")
            return

        ffmpeg_bin = shutil.which("ffmpeg")
        if not ffmpeg_bin:
            raise RuntimeError("ffmpeg not found in PATH.")

        cmd = [
            ffmpeg_bin,
            "-loglevel", "error",
            "-i", self.source,
            "-c:v", "copy",
            "-f", "h264",
            "pipe:1"
        ]

        self.proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, bufsize=0)
        self.is_pipe = True
        print(f"[INFO] Started ffmpeg - streaming raw {self.codec} from {self.source}")

    def frames(self) -> Generator[Tuple[np.ndarray, float], None, None]:
        if not self.is_pipe:
            data = self._fileobj.read()
            if not data:
                return
            self._buffer.extend(data)
            nals = extract_nals_from_buffer(self._buffer)
            for nal in nals:
                t0 = time.time()
                frame = self.decoder.decode(nal)
                if frame is not None:
                    yield frame, (time.time() - t0) * 1000.0
            return

        assert self.proc is not None and self.proc.stdout is not None
        stdout = self.proc.stdout

        try:
            while True:
                chunk = stdout.read(4096)
                if not chunk:
                    nals = extract_nals_from_buffer(self._buffer)
                    for nal in nals:
                        t0 = time.time()
                        frame = self.decoder.decode(nal)
                        if frame is not None:
                            yield frame, (time.time() - t0) * 1000.0
                    break

                self._buffer.extend(chunk)
                nals = extract_nals_from_buffer(self._buffer)
                for nal in nals:
                    t0 = time.time()
                    frame = self.decoder.decode(nal)
                    if frame is not None:
                        yield frame, (time.time() - t0) * 1000.0

        finally:
            if self.proc:
                try:
                    self.proc.kill()
                except Exception:
                    pass
            if self._fileobj:
                self._fileobj.close()
            self.decoder.close()

# ---------------------------
# Example main for test/debug
# ---------------------------
if __name__ == "__main__":
    import cv2

    source = DEFAULT_SOURCE
    width = DEFAULT_WIDTH
    height = DEFAULT_HEIGHT

    src = VideoSource(source, "h264", width, height)
    for idx, (frame, latency_ms) in enumerate(src.frames()):
        print(f"Frame {idx}: latency={latency_ms:.2f} ms shape={frame.shape}")
        if OUTPUT_WINDOW:
            cv2.imshow("Decoded", frame)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
    cv2.destroyAllWindows()
    src.decoder.close()

[WARN] hobot_codec not available or failed to init. Falling back to software decode.
[DEBUG] hobot_codec init error: ModuleNotFoundError("No module named 'hobot_codec'")
[INFO] Started ffmpeg-based SW decoder for 640x360.
[INFO] Started ffmpeg - streaming raw h264 from test.mp4


In [None]:
#!/usr/bin/env python3
import gi
gi.require_version('Gst', '1.0')
gi.require_version('GstRtspServer', '1.0')
gi.require_version('GLib', '2.0')

from gi.repository import Gst, GstRtspServer, GLib

Gst.init(None)
VIDEO_PATH = "/home/sunrise/BreadCounting/output2.mp4"

class CameraFactory(GstRtspServer.RTSPMediaFactory):
    def __init__(self):
        super().__init__()
        self.set_shared(True)
        self.set_latency(200)
        self.set_clock(Gst.SystemClock.obtain())
        self.set_stop_on_disconnect(True)

    def do_create_element(self, url):
        pipeline = (
            f"filesrc location={VIDEO_PATH} ! qtdemux name=demux "
            f"demux.video_0 ! queue ! decodebin ! videoconvert ! videoscale ! "
            f"video/x-raw,width=1280,height=720 ! "
            f"openh264enc bitrate=2048 ! h264parse ! rtph264pay name=pay0 pt=96 config-interval=1"
        )

        return Gst.parse_launch(pipeline)

def main():
    server = GstRtspServer.RTSPServer()
    factory = CameraFactory()
    server.get_mount_points().add_factory("/test_cam", factory)
    server.attach(None)
    print("720p RTSP Camera running at rtsp://127.0.0.1:8554/test_cam")
    loop = GLib.MainLoop()
    loop.run()

if __name__ == "__main__":
    main()
