In [None]:
pip install opencv-python

In [None]:
pip install matplotlib

In [None]:
import cv2
import numpy as np
import matplotlib
import matplotlib.pyplot as plt

In [None]:
def ICV_to_grayscale(image):
    w, h, c = image.shape
    grayimg = np.zeros((w,h))
    grayimg = (0.299*image[:,:,0] + 0.587*image[:,:,1] + 0.114*image[:,:,2])
    return grayimg

# Task A

In [None]:
video_path = 'DatasetC.mpg'
cap = cv2.VideoCapture(video_path)

## Utility Functions

1. Classification Threshold: I picked a classification threshold of 40 as anything lower mean the frame difference images were too noisy and any higher of a threshold and the image didn't have enough information in the frame differences.
2. Frame difference: Take pixel-by-pixel difference between 2 images (ideally frames from a video). We take the absolute value of the difference as images can't have negative pixel values.

In [None]:
def ICV_classification_threshold(fdif):
    threshold = 40
    return np.where(fdif > threshold, 255, 0)
        
def ICV_frame_difference(frame1, frame):
    return np.abs(frame1 - frame)

In [None]:
def ICV_get_video_frames(video_capture):
    if not video_capture.isOpened():
        print("Error: Could not open video.")
        exit()
    
    frames = []
    
    frame_count = 0
    total_frames = int(video_capture.get(cv2.CAP_PROP_FRAME_COUNT))
    print("Total Frames: ", total_frames)
    while frame_count < total_frames:
        ret, frame = video_capture.read()
        if ret:
            frames.append(frame)
        frame_count += 1
    
    video_capture.release()
    return frames

video_path = 'DatasetC.avi'
cap = cv2.VideoCapture(video_path)
frames = ICV_get_video_frames(cap)
print(len(frames))

In [None]:
fig, axs = plt.subplots(1, 3, figsize=(10, 10))
selected_frame_1, selected_frame_2 = 34, 90 

for ax in axs.flat:
    ax.axis('off')

axs[0].imshow(frames[0])
axs[0].set_title("Reference Frame")
axs[1].imshow(frames[selected_frame_1])
axs[1].set_title("Selected Frame 1")
axs[2].imshow(frames[selected_frame_2])
axs[2].set_title("Selected Frame 2")
plt.savefig("SelectedFrames5a")

In [None]:
def ICV_frame_difference_with_first_frame(video_capture):
    """
    This function get a pixel-by-pixel difference of each frame within the video-capture. 
    Returns the list of the frame difference and the differences with a classification threshold applied to them.

    Parameters:
    video_capture -> cv2.VideoCapture("File")

    Returns:
    fdiffs, fdiffs_with_threshold
    """

    if not video_capture.isOpened():
        print("Error: Could not open video.")
        exit()
    
    frame_count = 1
    f_diffs, fdiffs_with_threshold = [], []
    # Get Max number of frames
    total_frames = int(video_capture.get(cv2.CAP_PROP_FRAME_COUNT))
    print("Total Frames: ", total_frames)
    # Get first frame as reference
    ret, frame1 = video_capture.read()
    print(frame1.shape)
    # Convert frame to grayscale
    fg1 = ICV_to_grayscale(frame1)

    # Iterate through all remaining frames in the video capture
    while frame_count < total_frames:
        ret, frame = video_capture.read() # Get current frame
        if ret:
            fg = ICV_to_grayscale(frame) # convert it to grascale
            fdif = ICV_frame_difference(fg1, fg) # get the frame difference
            f_diffs.append(fdif) 
            fdif1 = ICV_classification_threshold(fdif) # apply classification threshold to the frame difference
            fdiffs_with_threshold.append(fdif1)
        frame_count += 1
    
    video_capture.release() # Release video capture
    return f_diffs, fdiffs_with_threshold


video_path = 'DatasetC.avi'
cap = cv2.VideoCapture(video_path)
frame_difs, frame_difs_with_threshold = np.array(ICV_frame_difference_with_first_frame(cap))
print("Done")
frame_difs.shape

In [None]:
fig, axs = plt.subplots(2, 2, figsize=(10, 10))

for ax in axs.flat:
    ax.axis('off')

axs[0][0].imshow(frame_difs[selected_frame_1 - 1])
axs[0][0].set_title("Frame Difference for Selected Frame 1")
axs[0][1].imshow(frame_difs[selected_frame_2 - 1])
axs[0][1].set_title("Frame Difference for Selected Frame 2")
axs[1][0].imshow(frame_difs_with_threshold[selected_frame_1 - 1])
axs[1][0].set_title("Frame Difference with Threshold for Selected Frame 1")
axs[1][1].imshow(frame_difs_with_threshold[selected_frame_2 - 1])
axs[1][1].set_title("Frame Difference with Threshold for Selected Frame 2")
fig.suptitle("Frame Differences", fontsize=16)

plt.tight_layout()
plt.savefig("FrameDifferences5a")

# Task B

In [None]:
def ICV_frame_difference_with_previous_frame(video_capture):
    """
    This function get a pixel-by-pixel difference of consecutive frame within the video-capture. 
    Returns the list of the frame difference and the differences with a classification threshold applied to them.

    Parameters:
    video_capture -> cv2.VideoCapture("File")

    Returns:
    fdiffs, fdiffs_with_threshold -> List[List[List[[int]]]], List[List[List[[int]]]]
    """

    if not video_capture.isOpened():
        print("Error: Could not open video.")
        exit()
    
    frame_count = 0
    frames, f_diffs, fdiffs_with_threshold = [], [], []
    # Get max number of frames
    total_frames = int(video_capture.get(cv2.CAP_PROP_FRAME_COUNT))
    print("Total Frames: ", total_frames)
    # Iterate through all frames from the beginning and add them to a list of frames and convert each frame to grayscale
    while frame_count < total_frames:
        ret, frame = video_capture.read()
        if ret:
            fg = ICV_to_grayscale(frame)
            frames.append(frame)
        frame_count += 1

    # Iterate through list of all frames and grab consecutive frames
    for i in range(len(frames)-1):
        fi, fi1 = frames[i], frames[i+1]
        fdif = ICV_frame_difference(fi, fi1) # Get difference of consecutive frames
        f_diffs.append(fdif)
        fdif1 = ICV_classification_threshold(fdif) # apply classification threshold
        fdiffs_with_threshold.append(fdif1)
    
    video_capture.release() # Release Video Capture
    return f_diffs, fdiffs_with_threshold

cap = cv2.VideoCapture(video_path)
frame_difs_consecutive, fdiffs_with_threshold_consecutive = np.array(ICV_frame_difference_with_previous_frame(cap))

In [None]:
fig, axs = plt.subplots(1, 2, figsize=(10, 10))
selected_frame_1, selected_frame_2 = 50, 111 

for ax in axs.flat:
    ax.axis('off')

axs[0].imshow(frames[selected_frame_1])
axs[0].set_title("Selected Frame 1")
axs[1].imshow(frames[selected_frame_2])
axs[1].set_title("Selected Frame 2")

plt.savefig("SelectedFrame5b.png")

In [None]:
fig, axs = plt.subplots(2, 2, figsize=(10, 10))

for ax in axs.flat:
    ax.axis('off')

axs[0][0].imshow(frame_difs_consecutive[selected_frame_1 - 1])
axs[0][0].set_title("Frame Difference for Selected Frame 1")
axs[0][1].imshow(frame_difs_consecutive[selected_frame_2 - 1])
axs[0][1].set_title("Frame Difference for Selected Frame 2")
axs[1][0].imshow(fdiffs_with_threshold_consecutive[selected_frame_1 - 1])
axs[1][0].set_title("Frame Difference with Threshold for Selected Frame 1")
axs[1][1].imshow(fdiffs_with_threshold_consecutive[selected_frame_2 - 1])
axs[1][1].set_title("Frame Difference with Threshold for Selected Frame 2")
fig.suptitle("Frame Differences", fontsize=16)

plt.tight_layout()
plt.savefig("FrameDifferences5b.png")

# The result frames here look distorted which was an issue which cropped up after refactoring all functions to start with ICV_
# The report has the correct version for consecutive frames

# Task C

In [None]:
cap = cv2.VideoCapture(video_path)

def ICV_find_background(video_capture):
    """
    Applies Weighted temporal averaging on all frames of a video sequence to generate a refence frame for all objects which show in all frames of the video.
    This way will not have to use the first frame of the video anymore. 

    Parameters:
    video_capture -> cv2.VideoCapture("File")

    Returns:
    background -> List[List[[int]]]
    """
    def ICV_weighted_temporal_average(background, c_frame):
        alpha = 0.1
        return (1-alpha)*background + alpha*c_frame
    
    if not video_capture.isOpened():
        print("Error: Could not open video.")
        exit()
    
    frame_count = 1
    ret, frame = video_capture.read()
    background = ICV_to_grayscale(frame)
    total_frames = int(video_capture.get(cv2.CAP_PROP_FRAME_COUNT))
    print("Total Frames: ", total_frames)
    while frame_count < total_frames:
        ret, c_frame = video_capture.read()
        if ret:
            c_frame = ICV_to_grayscale(c_frame)
            background = ICV_weighted_temporal_average(background, c_frame)
        frame_count += 1
    
    video_capture.release()
    return background

In [None]:
bg = ICV_find_background(cap)
plt.imshow(bg, cmap="gray")

# Task D

In [None]:
def ICV_frame_difference_with_background(video_capture, background):
    """
    This function get a pixel-by-pixel difference of each frame in the video against the generated background we created.

    Parameters:
    video_capture -> cv2.VideoCapture("File")

    Returns:
    fdiffs -> List[List[List[[int]]]]
    """

    if not video_capture.isOpened():
        print("Error: Could not open video.")
        exit()
    
    frame_count = 0
    f_difs_bg = []
    # Max Frame count
    total_frames = int(video_capture.get(cv2.CAP_PROP_FRAME_COUNT))
    print("Total Frames: ", total_frames)
    # Iterate over all frames
    while frame_count < total_frames:
        ret, c_frame = video_capture.read() # Get current frame
        if ret:
            c_frame = ICV_to_grayscale(c_frame) # convert it to grayscale
            dif = ICV_frame_difference(background, c_frame) # Take the difference with the background
            dif = ICV_classification_threshold(dif) # apply classification threshold
            f_difs_bg.append(dif)
        frame_count += 1
    video_capture.release()
    return f_difs_bg

cap = cv2.VideoCapture(video_path)
f_difs_bg = ICV_frame_difference_with_background(cap, bg)

In [None]:
for i in range(0, len(f_difs_bg), 20):
    plt.imshow(f_difs_bg[i])
    plt.show()

In [None]:
def ICV_flood_fill(image, x, y, visited):
    """
    This algorithm takes a binary image and checks for pixels with non visited and non zero values. 
    It keeps searching for neighboring values from some starting point (x, y), until there are no more values left to visit in its stack.

    Parameters:
    image -> List[List[int]] (Serves as matrix or graph within this algoritm is searching for similar neighboring pixels)
    x -> int (starting x coordinate)
    y -> int (starting y coordinate)
    visited -> List[int] (helps keep track of all positions which have been visited when searching for neighboring points from some point (x, y))

    Returns:
    None
    """
    width, height = image.shape
    stack = [(x,y)]
    while stack:
        x, y = stack.pop()
        if x >= 0 and y >= 0 and x < width and y < height:
            if visited[x, y] or image[x, y] == 0:
                continue 
            else:
                visited[x, y] = True
                stack.extend([(x-1, y-1), (x, y-1), (x+1, y-1), (x-1, y), (x+1, y), (x+1, y+1), (x, y+1), (x+1, y+1)])
        else:
            continue
        

def ICV_count_moving_objects(image):
    """
    Takes an image and iterates over each pixel and applies flood fill algorithm for each position. 
    Incrememnts a count for number of objects, each time the flood fill algorithm completes execution.

    Parameters: 
    image -> Listp[List[int]]

    Returns:
    object_count -> int
    """
    width, height = image.shape
    visited = np.full((image.shape[0], image.shape[1]), False)
    object_count = 0
    for i in range(width):
        for j in range(height):
            if image[i,j]==255 and not visited[i,j]:
                ICV_flood_fill(image, i, j, visited)
                object_count += 1
    return object_count

In [None]:
moving_objects_in_video = []
for i in range(0, len(f_difs_bg), 1):
    moc = ICV_count_moving_objects(f_difs_bg[i])
    moving_objects_in_video.append(moc)

In [None]:
plt.bar(range(0,140,1), moving_objects_in_video)

In [None]:
## Taken from Question 2 to apply denoising on the video frames

def ICV_convolution_filtering_grayscale(image, kernel):
    """
    Applies a convolution kernel on a greyscale image.

    Parameters:
    image -> 2D numpy array
    kernel -> 2D numpy array (usually with shapes (3,3), (5,5) or (7,7))

    Returns:
    2D numpy array (Image with filter applied to it)
    """
    width, height = image.shape[:2] # Get width and height from image shape
    new_image = np.zeros((width, height)) # create new image array
    k_w, k_h = kernel.shape[:2] # Get width and height of convolution kernel

    ## I pad the image with zeroes to half the size of the 
    ## kernel's width/height before applying the filter
    pad_width, pad_height = (k_w-1)//2, (k_h-1)//2 

    ## The padded image is slightly larger to account for the pad width/height
    ## As the image is padded at the top and bottom and left and right
    padded_image = np.zeros((width + 2 * pad_width, height + 2 * pad_height))

    ## Put the original image back into the larger padded array
    padded_image[pad_width:pad_width + width, pad_height:pad_height + height] = image

    ## When iterating over the image to ensure that the kernel is properly applied
    ## and so we don't get out of bounds errors, I iterate till width/height -pad_width/pad_height
    for i in range(width-pad_width):
        for j in range(height-pad_height):
            ## Apply the kernel on each pixel of the original image 
            ## and store the results in the new_image array
            new_image[i+1, j+1] = np.sum(kernel * padded_image[i:i+k_w, j:j+k_h])
    return new_image

In [None]:
def ICV_gaussian_blur(image):
    # Applies Gaussian Blur convolution kernel to an image and returns the filtered image
    kernel = np.array([[1,2,1],[2,4,2], [1,2,1]])
    return ICV_convolution_filtering_grayscale(image, kernel)/np.sum(kernel)

def ICV_mean_filter(image):
    # Applies Mean convolution kernel to an image and returns the filtered image
    kernel = np.ones((3,3))
    return ICV_convolution_filtering_grayscale(image, kernel)/np.sum(kernel)

In [None]:
denoised_img1 = ICV_mean_filter(f_difs_bg[0])
denoised_img2 = ICV_gaussian_blur(f_difs_bg[0])
plt.imshow(denoised_img1)
plt.show()
plt.imshow(denoised_img2)
plt.show()

moc = ICV_count_moving_objects(f_difs_bg[0])
moc1 = ICV_count_moving_objects(denoised_img1)
moc2 = ICV_count_moving_objects(denoised_img2)
moc, moc1, moc2

In [None]:
moving_objects_in_video = []
for i in range(0, len(f_difs_bg), 1):
    moc = ICV_count_moving_objects(f_difs_bg[i])
    moving_objects_in_video.append(moc)
print("Done!")

In [None]:
# Can take up to 3-4 minutes to compute
moving_objects_in_video1 = []
for i in range(0, len(f_difs_bg), 1):
    moc1 = ICV_count_moving_objects(ICV_mean_filter(f_difs_bg[i])) # Apply mean filter for denoising the frame
    moving_objects_in_video1.append(moc1)
print("Done!")

In [None]:
# Can take up to 3-4 minutes to compute
moving_objects_in_video2 = []
for i in range(0, len(f_difs_bg), 1):
    moc2 = ICV_count_moving_objects(ICV_gaussian_blur(f_difs_bg[i])) # Apply gaussian blur for denoising the frame
    moving_objects_in_video2.append(moc2)
print("Done!")

In [None]:
plt.bar(range(0,140,1), moving_objects_in_video) # Plot raw object counts for each frame

In [None]:
plt.bar(range(0,140,1), moving_objects_in_video1) # Plot mean filtered object counts for each frame (denoizing of each frame using mean filter)

In [None]:
plt.bar(range(0,140,1), moving_objects_in_video2) # Plot gaussian blur filtered object counts for each frame (denoizing of each frame using gaussian blur filter)

In [None]:
len(moving_objects_in_video), len(moving_objects_in_video1), len(moving_objects_in_video2)