In [3]:
from dataclasses import dataclass
from pathlib import Path
from typing import Generator, Iterable, List, Optional, Tuple, TypedDict, cast

import cv2
import cv2 as cv
import numpy as np
from cv2 import BackgroundSubtractor, BackgroundSubtractorKNN, BackgroundSubtractorMOG2
from cv2.typing import MatLike, Size
from loguru import logger
from tqdm.notebook import tqdm

In [4]:
from os import PathLike


@dataclass
class CapProps:
    width: int
    height: int
    channels: int
    fps: float
    frame_count: Optional[int] = None


def video_cap(
    src: PathLike | int,
    scale: float = 1,
) -> Tuple[Generator[MatLike, None, None], CapProps]:
    assert 0 < scale <= 1, "scale should be in (0, 1]"
    if isinstance(src, PathLike):
        cap = cv2.VideoCapture(str(src))
    else:
        cap = cv2.VideoCapture(src)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH) * scale)
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT) * scale)
    fps = float(cap.get(cv2.CAP_PROP_FPS))
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    channels = int(cap.get(cv2.CAP_PROP_CHANNEL))
    props = CapProps(width=width,
                     height=height,
                     fps=fps,
                     channels=channels,
                     frame_count=frame_count)

    def gen():
        while True:
            ret, frame = cap.read()
            if not ret:
                break
            if scale != 1:
                frame = cv2.resize(frame, (width, height))
            yield frame
        cap.release()

    return gen(), props


def fourcc(*args: str) -> int:
    return cv2.VideoWriter_fourcc(*args)  # type: ignore

In [20]:
import awkward as ak

VIDEO_BG_PATH = Path("PETS09-S2L1-raw.mp4")
frames, props = video_cap(VIDEO_BG_PATH)
logger.info(f"Video properties: {props}")
is_mono = props.channels == 1
bg_writer = cv2.VideoWriter("PETS09-S2L1-bgsub.mp4",
                            fourcc(*"mp4v"),
                            props.fps, (props.width, props.height),
                            isColor=False)
writer = cv2.VideoWriter("PETS09-S2L1-detection.mp4", fourcc(*"mp4v"),
                         props.fps, (props.width, props.height))

# learningRate
# The value between 0 and 1 that indicates how fast the background model is
# learnt. Negative parameter value makes the algorithm to use some automatically
# chosen learning rate. 0 means that the background model is not updated at all,
# 1 means that the background model is completely reinitialized from the last
# frame.
subtractor = cv2.createBackgroundSubtractorMOG2(detectShadows=False)


class DetectionFeatures(TypedDict):
    x: int
    y: int
    w: int
    h: int
    area: float
    cX: int
    cY: int


# * means variable length (0 or more)
# frame count is determined by the video
# [frame detection* features]
batch_detection = ak.Array([])

try:
    for frame in tqdm(frames, total=props.frame_count):
        fgmask = subtractor.apply(frame)
        contours, _ = cv2.findContours(fgmask, cv2.RETR_EXTERNAL,
                                       cv2.CHAIN_APPROX_SIMPLE)
        detections = ak.Array([])
        for contour in contours:
            x, y, w, h = cv2.boundingRect(contour)
            area = cv2.contourArea(contour)
            if area > 100:
                M = cv2.moments(contour)
                cX = int(M["m10"] / M["m00"])
                cY = int(M["m01"] / M["m00"])
                features: DetectionFeatures = {
                    "x": x,
                    "y": y,
                    "w": w,
                    "h": h,
                    "area": area,
                    "cX": cX,
                    "cY": cY
                }
                ak_features = ak.Array([features])
                detections = ak.concatenate([detections, ak_features])
                cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
                cv2.circle(frame, (cX, cY), 5, (0, 0, 255), -1)
        batch_detection = ak.concatenate(
            [batch_detection, ak.Array([detections])])
        bg_writer.write(fgmask)
        writer.write(frame)
except KeyboardInterrupt as e:
    bg_writer.release()
    writer.release()
    raise e
finally:
    bg_writer.release()
    writer.release()

[32m2024-04-09 11:19:50.063[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m5[0m - [1mVideo properties: CapProps(width=768, height=576, channels=0, fps=7.0, frame_count=795)[0m


  0%|          | 0/795 [00:00<?, ?it/s]

In [25]:
batch_detection = cast(ak.Array, batch_detection)
batch_detection.typestr

'795 * var * {x: int64, y: int64, w: int64, h: int64, area: float64, cX: int64, cY: int64}'

In [28]:
import pyarrow as pa
import pyarrow.parquet as pq
# save the batch_detection (as akward array)
# https://github.com/scikit-hep/awkward/discussions/329
aw = ak.to_arrow_table(batch_detection)
pq.write_table(aw, "detections.parquet")

In [30]:
# try to read the parquet file
table = pq.read_table("detections.parquet")
ak.from_arrow(table).typestr

'795 * var * {x: int64, y: int64, w: int64, h: int64, area: float64, cX: int64, cY: int64}'