In [4]:
import base64

In [20]:
import cv2
from IPython.display import clear_output, display, HTML
import time
import sys
import numpy as np

video = cv2.VideoCapture('test-video-segmentation.mp4')
ok, frame = video.read()
# Global Variables:

threshhold = 20 # [10, 25] was the range in the paper
B = 9 # Inverse learning rate = 2^9; 9 LSBs in centroids are for error term
div = 2**B # Divide by this to get the actual centroid value
P0 = 0.5 # Probability threshold
# N x N x 5  Array for Weights (Assuming 5 clusters)
weights = np.zeros((frame.shape[0], frame.shape[1], 5))

# N x N x 5 x 3 Array for Centroids + Errors (Assuming 5 clusters)
centroid_and_errors = np.zeros((frame.shape[0], frame.shape[1], 5, 3))

# ***BOTH WEIGHTS AND CENTROIDS ARE SORTED FROM GREATEST TO LEAST WEIGHT***

In [70]:
# Match the pixel with the highest-weighted cluster centroid within T Manhattan distance of the pixel color
# Also perform error adjustment like in Algorithm 2 in the paper
# ALSO perform cluster replacement in this function if no matching cluster is found
def match_pixel(rgb_pixel, i, j):
    global centroid_and_errors
    global weights
    global div
    # param: (3, 256) pixel for each channel
    # return highest-weighted cluster that matches, -1 if no match
    # Must be sorted from greatest to least
    for cluster_idx in range(5):
        cluster = centroid_and_errors[cluster_idx]//div # Integer division to remove error term
        
        # Calculate the Manhattan Distance
        distance = np.sum(np.abs(cluster - rgb_pixel))

        if distance < threshhold:
            centroid_and_errors[cluster_idx] += rgb_pixel - cluster # Adjust error terms
            return cluster_idx
    
    # If a matching cluster was not found...
    # Replace lowest-weighted cluster with new cluster with centroid=pixel value and weight=0.01
    # Normalize cluster weights (this is the only time it needs to happen)
    centroid_and_errors[i, j, 4] = rgb_pixel
    weights[i, j, 4] = 0.01
    weightSum = 0
    for idx in range(5):
        weightSum += weights[i, j, idx]
    for idx in range(5):
        weights[i, j, idx] /= weightSum
    return -1 # ***Should this be the sorted index of the new centroid?***

def sort_clusters():
    global centroid_and_errors
    global weights
    # Sort cluster weights and centroids based on weight values, in descending order
    for i in range(len(weights)):
        for j in range(len(weights[i])):
            arrinds = weights[i, j].argsort()
            weights[i, j] = weights[i, j, arrinds[::-1]]
            centroid_and_errors[i, j] = centroid_and_errors[i, j, arrinds[::-1]]

def process_frame(frame):
    global weights
    global P0
    # Find P for each pixel and threshold it
    output = np.zeros((frame.shape[0], frame.shape[1]))
    
    for i in range(len(frame)):
        for j in range(len(frame[i])):
            pixel = frame[i, j]
            Mk = match_pixel(pixel, i, j)
            if Mk == -1:
                continue # Assume pixel is part of foreground if new centroid was created (is this a good idea?)
            elif Mk == 0:
                output[i, j] = 1
                continue # P = 0 if the centroid of greatest weight was selected (definitely background pixel)
            else:
                P = np.sum(weights[i, j, 0:Mk-1])
                if(P < P0): # Background pixel found
                    output[i, j] = 1
    print("Done with frame")
    return output

# If no matching cluster is found, the lowest-weighted cluster is replaced with a new cluster of low weight (0.01 in the paper)

# Update all the weights according to equation 1 and add to the accumulated matching error for the matched centroid, if new cluster was not made

# Normalize Weights
# Calculate P then classify
# Return new frame with segmentation

In [73]:
video = cv2.VideoCapture('test-video-segmentation.mp4')

if not video.isOpened():
    print('Could not open video!')
else:
    while True:
        ret, frame = video.read()
        if not ret:
            break
        else:
            frame = cv2.resize(frame, (frame.shape[1]//3, frame.shape[0]//3))
            clear_output(wait=True)  # Clear the previous frame
            display(HTML("<img src='data:image/jpeg;base64," + base64.b64encode(cv2.imencode('.jpg', frame)[1]).decode() + "'>"))
        time.sleep(0.01)  # Adjust the sleep duration as needed


In [72]:
video = cv2.VideoCapture('test-video-segmentation.mp4')

if not video.isOpened():
    print('Could not open video!')
else:
    while True:
        ret, frame = video.read()
        if not ret:
            break
        else:
            segmented_frame = process_frame(frame)
            sort_clusters()
            clear_output(wait=True)  # Clear the previous frame
            display(HTML("<img src='data:image/jpeg;base64," + base64.b64encode(cv2.imencode('.jpg', segmented_frame)[1]).decode() + "'>"))
        time.sleep(0.01)  # Adjust the sleep duration as needed


KeyboardInterrupt: 