In [142]:
import cv2
import numpy as np

In [143]:
def resize_and_crop_image(image):
    ratio = 1
    max_edge = max(image.shape[0], image.shape[1])
    while max_edge > 1000:
        ratio *= 2
        max_edge //= 2

    rows = image.shape[0] // ratio
    cols = image.shape[1] // ratio

    # print(f"resize ratio = {ratio}.")
    image_resized = cv2.resize(image, (cols, rows))
    # print(image_resized.shape)
    if CROP:
        size = min(cols, rows)
        return image_resized[:size, :size]
    else:
        return image_resized

In [144]:
def RGBRange(image):
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    lower_skin = np.array([0, 48, 80], dtype="uint8")
    upper_skin = np.array([20, 255, 255], dtype="uint8")
    skinMask = cv2.inRange(hsv, lower_skin, upper_skin)
    skin = cv2.bitwise_and(image, image, mask=skinMask)
    return skin

In [145]:
def morph_image(image,morph, kernel_size):
    def erosion(image_cv, kernel_size = 4):
        kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_size,kernel_size))
        image_cv_eroded = cv2.erode(image_cv, kernel)
        return image_cv_eroded
    def dilate(image_cv, kernel_size = 4):
        kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_size,kernel_size))
        image_cv_dilated = cv2.dilate(image_cv, kernel)
        return image_cv_dilated
    if morph == 'dilate':
        image_morphed = dilate(image,kernel_size = kernel_size)
    elif morph == 'erode':
        image_morphed = erosion(image,kernel_size = kernel_size)
    else:
        raise NotImplementedError
    return image_morphed

def BinaryThreshold(image):
    threshold = 145
    img_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, img_thresh = cv2.threshold(img_gray, threshold, 255, cv2.THRESH_BINARY)
    img_thresh = morph_image(img_thresh, morph='erode', kernel_size=6)
    img_thresh = morph_image(img_thresh, morph='dilate', kernel_size=6)
    return img_thresh


In [146]:
def CannyEdgeDetection(img, lb=50, ub=100):

    # Convert the image to grayscale
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    # Apply Gaussian blur to reduce noise and improve edge detection
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)

    # blurred = gray

    # Detect edges using the Canny algorithm
    edges = cv2.Canny(blurred, lb, ub)
    
    return edges


def CannyEdgeMaskGeneration(img):

    edges = CannyEdgeDetection(img, 50, 100)

    # Generate a mask: edges in white, background in black
    mask = np.zeros_like(edges)
    mask[edges > 0] = 255  # Set the white color to the edges detected by Canny

    return mask


def CannyObjectMaskGeneration(img):
    # Detect edges using the Canny Edge Detection algorithm with low and high threshold values.
    edges = CannyEdgeDetection(img, 10, 20)

    # Use morphological operations to close the edges
    kernel = np.ones((5, 5), np.uint8)
    closing = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, kernel)

    # Find contours in the closed edge image
    contours, _ = cv2.findContours(closing, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # Return an empty mask if no contours are found
    if not contours:
        return np.zeros(edges.shape, dtype=np.uint8)

    # Assume the largest contour corresponds to the hand
    largest_contour = max(contours, key=cv2.contourArea)

    # Create a mask of the same size as the edge image
    mask = np.zeros(edges.shape, dtype=np.uint8)

    # Fill in the contour of the detected hand
    cv2.drawContours(mask, [largest_contour], -1, color=255, thickness=cv2.FILLED)

    return mask


In [147]:
def get_contours(img_thresh):
    contours, hierarchy = cv2.findContours(img_thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    return contours

def draw_contours(cv_image, contours, fill='line'):
    if fill=='solid':
        cv_image_out = cv2.drawContours(cv_image, contours, -1, (0,255,0), cv2.FILLED)
    else:
        cv_image_out = cv2.drawContours(cv_image, contours, -1, (0,255,0), 1)
    return cv_image_out

def draw_detected(img):
    img_thresh = BinaryThreshold(img)
    contours = get_contours(img_thresh)
    crop_img = draw_contours(img, contours, fill='solid')
    return crop_img

In [148]:
def get_hand_bbox_on_img(img, mask):
    """
    Takes an image and a corresponding hand mask, finds the largest contour in the mask,
    assumed to be the hand, and draws a bounding box around it on the original image.
    If no hand is detected, it returns the original image unmodified.
    
    :param img: Original image
    :param mask: Binary mask of the hand
    :return: The original image with the bounding box drawn around the hand (if detected),
             and the bounding box coordinates (x, y, w, h) or None if no hand is detected.
    """
    # Find contours in the mask
    contours, _ = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

    if contours:
        # Assume the largest contour is the hand
        max_contour = max(contours, key=cv2.contourArea)
        # Calculate bounding box for the largest contour
        x, y, w, h = cv2.boundingRect(max_contour)
        # Draw bounding box on the original image
        cv2.rectangle(img, (x, y), (x+w, y+h), (0, 255, 0), 2)
        return img, (x, y, w, h)
    else:
        # Return the original image if no contours are found
        return img, None

def draw_hand_bbox_from_mask(img, mask):
    img_with_bbox, bbox = get_hand_bbox_on_img(img, mask)
    
    if SHOW:
        cv2.imshow('Hand with Bounding Box', img_with_bbox)
        cv2.waitKey(0)
        cv2.destroyAllWindows()

In [149]:
def get_bbox_from_hv_projection(img, mask):
    """
    Given an image and its binary mask, this function calculates horizontal and vertical projections
    of the mask to find the bounding box around the largest area of interest. It then draws
    this bounding box on the original image. If no significant area is detected, it returns
    the original image unmodified.

    :param img: The original image.
    :param mask: The binary mask highlighting areas of interest.
    :return: The original image with the bounding box drawn around the detected area (if any),
             and the bounding box coordinates (x, y, w, h), or None if no area is detected.
    """
    # Calculate horizontal and vertical projections of the mask
    horizontal_projection = np.sum(mask, axis=0)
    vertical_projection = np.sum(mask, axis=1)

    # Find non-zero values in projections to determine bounds
    h_non_zero_indices = np.nonzero(horizontal_projection)[0]
    v_non_zero_indices = np.nonzero(vertical_projection)[0]

    if h_non_zero_indices.size > 0 and v_non_zero_indices.size > 0:
        # Determine bounding box coordinates
        x1, x2 = h_non_zero_indices[[0, -1]]
        y1, y2 = v_non_zero_indices[[0, -1]]
        w = x2 - x1
        h = y2 - y1

        # Draw bounding box on the original image
        cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
        return img, (x1, y1, w, h)
    else:
        # Return the original image if no significant area is detected
        return img, None

def draw_bbox_from_hv_projection(img, mask):
    img_with_bbox, bbox = get_bbox_from_hv_projection(img, mask)

    if SHOW:
        cv2.imshow('Image with Bounding Box', img_with_bbox)
        cv2.waitKey(0)
        cv2.destroyAllWindows()


In [150]:
def processSingleImage(process_func):
    # Create video capture object
    vidCap = cv2.VideoCapture(FVideo)
    
    # Process video frames
    success, image = vidCap.read()
    assert success
    
    cv2.imwrite(FFirst, image)
    
    mask = process_func(image)
    # Show the result
    if SHOW:
        cv2.imshow("object detection", mask)
        cv2.waitKey(0)
        cv2.destroyAllWindows()
        
    draw_hand_bbox_from_mask(image, mask)
    
    # detected_img = draw_detected(image)
    # cv2.imshow("detected image", detected_img)
    # cv2.waitKey(0)
    # cv2.destroyAllWindows()
    
    if not EXP:
        # Get video properties for the output video
        fps = vidCap.get(cv2.CAP_PROP_FPS)
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    
        # frame_width = int(vidCap.get(cv2.CAP_PROP_FRAME_WIDTH))
        # frame_height = int(vidCap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        w, h = mask.shape[:2]
        out = cv2.VideoWriter(FOutput, fourcc, fps, (h, w))
        outBbox = cv2.VideoWriter(FOutputBBox, fourcc, fps, (h, w))
    
        while success:
            # Apply skin detection
            mask = process_func(image)
    
            if len(mask.shape) == 2:
                BGRImg = cv2.cvtColor(mask, cv2.COLOR_GRAY2BGR)
            else:
                BGRImg = mask
    
            # Write the processed frame
            out.write(BGRImg)
    
            img_with_bbox, bbox = get_hand_bbox_on_img(image, mask)
            outBbox.write(img_with_bbox)
    
            success, image = vidCap.read()
    
        out.release()
        outBbox.release()
    
    # Release resources
    vidCap.release()
    

In [151]:
def find_motion_blobs(frame1, frame2, threshold=25):
    # Calculate the difference between two frames and convert the result to grayscale
    frame_diff = cv2.absdiff(frame1, frame2)
    if len(frame_diff.shape) == 3:
        gray_diff = cv2.cvtColor(frame_diff, cv2.COLOR_BGR2GRAY)
    else:
        gray_diff = frame_diff

    # Apply a threshold to identify significant changes
    _, thresh = cv2.threshold(gray_diff, threshold, 255, cv2.THRESH_BINARY)

    # Use dilation to fill in small holes in the image for easier processing
    kernel = np.ones((5,5), np.uint8)
    dilated = cv2.dilate(thresh, kernel, iterations=2)

    # Find contours
    contours, _ = cv2.findContours(dilated, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

    return contours


def Blob():
    cap = cv2.VideoCapture(FVideo)

    if not cap.isOpened():
        print("Error: Could not open video.")
        exit()

    ret, prev_frame = cap.read()
    if not ret:
        print("Error: Could not read the first frame.")
        cap.release()
        exit()
    
    # Convert the first frame to grayscale
    prev_frame = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY)

    out = None
    if not EXP:
        fps = cap.get(cv2.CAP_PROP_FPS)
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        w, h = prev_frame.shape[:2]
        out = cv2.VideoWriter(FOutput, fourcc, fps, (h, w))
    
    while True:
        ret, frame = cap.read()
        if not ret:
            break
            
        frame = resize_and_crop_image(frame)
    
        # Convert the current frame to grayscale
        gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    
        # Find motion areas
        contours = find_motion_blobs(prev_frame, gray_frame)
    
        # Draw bounding boxes on the original frame
        for contour in contours:
            if cv2.contourArea(contour) < 500:
                continue  # Ignore small areas
            x, y, w, h = cv2.boundingRect(contour)
            cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)
            
        if not EXP:
            out.write(frame)
    
        if SHOW:
            cv2.imshow('Motion Detection', frame)
        prev_frame = gray_frame

        if EXP:
            cv2.waitKey(0)
            break
    
        else:
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
        
    if not EXP:
        out.release()
    
    cap.release()


In [152]:
# Define a dictionary that maps mode strings to their corresponding processing methods
process_single_frame_methods = {
    "rgb": RGBRange,
    "grey": BinaryThreshold,
    "canny": CannyObjectMaskGeneration, 
    "cannyEdge": CannyEdgeMaskGeneration,
}

process_video_methods = {
    "blob": Blob
}


def work(mode):
    
    # Use the mode to get the corresponding process method from the dictionary
    process_method = process_single_frame_methods.get(mode, None)
    
    if process_method is not None:
        # single
        def process_func(image):
            return process_method(resize_and_crop_image(image))
        
        processSingleImage(process_func)
        
    else:
        process_method = process_video_methods.get(mode, None)
    
        # If mode does not match any key in the dictionary, process_method will be None   
        assert process_method is not None
        
        process_method()


In [153]:
EXP = False
CROP = False
HPV = True
mode = "blob"
SHOW = False

In [154]:
import os

# Define paths
VIDEO_NAME = "fingers"
FVideo = f'videos/{VIDEO_NAME}.mp4'
WORKDIR = f"videos/{VIDEO_NAME}/"
os.makedirs(WORKDIR, exist_ok=True)

for mode in ["grey", "canny", "blob"]:

    FFirst = WORKDIR + "first.png"  # Filename for the first frame image
    FOutput = WORKDIR + f"{mode}.mp4"  # Filename for the output video
        
    if HPV:
        FOutputBBox = WORKDIR + f"{mode}_bbox_HPV.mp4"  # Filename for the output video with bbox
    else:
        FOutputBBox = WORKDIR + f"{mode}_bbox.mp4"  # Filename for the output video with bbox
    
    work(mode)

In [155]:
cv2.destroyAllWindows()