In [1]:
import cv2 as cv
import numpy as np

# Utilities


In [2]:
def compute_flow(frame1, frame2):
    # convert to grayscale
    frame1 = cv.cvtColor(frame1, cv.COLOR_BGR2GRAY)
    frame2 = cv.cvtColor(frame2, cv.COLOR_BGR2GRAY)

    # blurr image
    frame1 = cv.GaussianBlur(frame1, dst=None, ksize=(3, 3), sigmaX=5)
    frame2 = cv.GaussianBlur(frame2, dst=None, ksize=(3, 3), sigmaX=5)

    flow = cv.calcOpticalFlowFarneback(
        frame1,
        frame2,
        None,
        pyr_scale=0.7,
        levels=3,
        winsize=15,
        iterations=3,
        poly_n=7,
        poly_sigma=1.5,
        flags=0,
    )
    return flow


def get_flow_viz(flow):
    """Obtains BGR image to Visualize the Optical Flow"""
    hsv = np.zeros((flow.shape[0], flow.shape[1], 3), dtype=np.uint8)
    hsv[..., 1] = 255

    mag, ang = cv.cartToPolar(flow[..., 0], flow[..., 1])
    hsv[..., 0] = ang * 180 / np.pi / 2
    hsv[..., 2] = cv.normalize(mag, None, 0, 255, cv.NORM_MINMAX)
    rgb = cv.cvtColor(hsv, cv.COLOR_HSV2RGB)

    return rgb


In [3]:
def get_motion_mask(flow_mag, motion_thresh=1, kernel=np.ones((7, 7))):
    """Obtains Detection Mask from Optical Flow Magnitude
    Inputs:
        flow_mag (array) Optical Flow magnitude
        motion_thresh - thresold to determine motion
        kernel - kernal for Morphological Operations
    Outputs:
        motion_mask - Binray Motion Mask
    """
    motion_mask = np.uint8(flow_mag > motion_thresh) * 255

    motion_mask = cv.erode(motion_mask, kernel, iterations=1)
    motion_mask = cv.morphologyEx(motion_mask, cv.MORPH_OPEN, kernel, iterations=1)
    motion_mask = cv.morphologyEx(motion_mask, cv.MORPH_CLOSE, kernel, iterations=3)

    return motion_mask

In [4]:
def get_detections(
    frame1,
    frame2,
    motion_thresh=1,
    bbox_thresh=400,
    nms_thresh=0.1,
    mask_kernel=np.ones((7, 7), dtype=np.uint8),
):
    """Main function to get detections via Frame Differencing
    Inputs:
        frame1 - Grayscale frame at time t
        frame2 - Grayscale frame at time t + 1
        motion_thresh - Minimum flow threshold for motion
        bbox_thresh - Minimum threshold area for declaring a bounding box
        nms_thresh - IOU threshold for computing Non-Maximal Supression
        mask_kernel - kernel for morphological operations on motion mask
    Outputs:
        detections - list with bounding box locations of all detections
            bounding boxes are in the form of: (xmin, ymin, xmax, ymax)
    """
    # get optical flow
    flow = compute_flow(frame1, frame2)

    # separate into magntiude and angle
    mag, _ = cv.cartToPolar(flow[..., 0], flow[..., 1])

    motion_mask = get_motion_mask(mag, motion_thresh=motion_thresh, kernel=mask_kernel)

    # get initially proposed detections from contours
    detections = get_contour_detections(motion_mask, thresh=bbox_thresh)

    if len(detections) == 0:
        return np.array([], dtype=np.int32)

    # separate bboxes and scores
    bboxes = detections[:, :4]
    scores = detections[:, -1]

    # perform Non-Maximal Supression on initial detections
    final_detections = non_max_suppression(bboxes, scores, threshold=nms_thresh)

    # Convert to integer coordinates
    return np.array(final_detections, dtype=np.int32)

In [5]:
def get_contour_detections(mask, thresh=400):
    """Get bounding box detections from contours in binary mask"""
    contours, _ = cv.findContours(mask, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE)

    detections = []
    for cnt in contours:
        area = cv.contourArea(cnt)
        if area > thresh:
            x, y, w, h = cv.boundingRect(cnt)
            detections.append([x, y, x + w, y + h, area])

    # Convert to numpy array and ensure 2D shape
    detections = np.array(detections)
    if len(detections) == 0:
        # Return empty 2D array if no detections
        return np.zeros((0, 5))
    elif len(detections.shape) == 1:
        # Reshape 1D array to 2D
        detections = detections.reshape(1, -1)

    return detections

In [6]:
def remove_contained_bboxes(boxes):
    """Removes all smaller boxes that are contained within larger boxes.
    Requires bboxes to be soirted by area (score)
    Inputs:
        boxes - array bounding boxes sorted (descending) by area
                [[x1,y1,x2,y2]]
    Outputs:
        keep - indexes of bounding boxes that are not entirely contained
               in another box
    """
    check_array = np.array([True, True, False, False])
    keep = list(range(0, len(boxes)))
    for i in keep:  # range(0, len(bboxes)):
        for j in range(0, len(boxes)):
            # check if box j is completely contained in box i
            if np.all((np.array(boxes[j]) >= np.array(boxes[i])) == check_array):
                try:
                    keep.remove(j)
                except ValueError:
                    continue
    return keep


def non_max_suppression(boxes, scores, threshold=1e-1):
    """
    Perform non-max suppression on a set of bounding boxes
    and corresponding scores.
    Inputs:
        boxes: a list of bounding boxes in the format [xmin, ymin, xmax, ymax]
        scores: a list of corresponding scores
        threshold: the IoU (intersection-over-union) threshold for merging bboxes
    Outputs:
        boxes - non-max suppressed boxes
    """
    # Sort the boxes by score in descending order
    boxes = boxes[np.argsort(scores)[::-1]]

    # remove all contained bounding boxes and get ordered index
    order = remove_contained_bboxes(boxes)

    keep = []
    while order:
        i = order.pop(0)
        keep.append(i)
        for j in order:
            # Calculate the IoU between the two boxes
            intersection = max(
                0, min(boxes[i][2], boxes[j][2]) - max(boxes[i][0], boxes[j][0])
            ) * max(0, min(boxes[i][3], boxes[j][3]) - max(boxes[i][1], boxes[j][1]))
            union = (
                (boxes[i][2] - boxes[i][0]) * (boxes[i][3] - boxes[i][1])
                + (boxes[j][2] - boxes[j][0]) * (boxes[j][3] - boxes[j][1])
                - intersection
            )
            iou = intersection / union

            # Remove boxes with IoU greater than the threshold
            if iou > threshold:
                order.remove(j)

    return boxes[keep]

# The actual detection for the video


In [None]:
cap = cv.VideoCapture(cv.samples.findFile("SamsungGear360.mp4"))
scale = 0.2

cv.startWindowThread()

ret, frame1 = cap.read()
if not ret:
    print("Error reading frame 1")
    exit()

frame1_bgr = cv.resize(frame1, None, fx=scale, fy=scale)

while True:
    ret, frame2 = cap.read()
    if not ret:
        print("End of video")
        break

    frame2_bgr = cv.resize(frame2, None, fx=scale, fy=scale)
    detections = get_detections(
        frame1_bgr, frame2_bgr, motion_thresh=1, bbox_thresh=400, nms_thresh=0.1
    )

    for xmin, ymin, xmax, ymax in detections:
        cv.rectangle(frame2_bgr, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2)

    cv.imshow("Detections", frame2_bgr)
    frame1_bgr = frame2_bgr.copy()

    key = cv.waitKey(1)
    if key in [27, ord("q"), ord("Q")]:
        cv.destroyAllWindows()

        # this is nosense, but it's the only way to close the window
        # https://stackoverflow.com/questions/6116564/destroywindow-does-not-close-window-on-mac-using-python-and-opencv
        for _ in range(4):
            cv.waitKey(1)

        break

cap.release()