In [83]:
import cv2 as cv
import os
import numpy as np
from matplotlib import pyplot as plt

In [84]:
# For test purposes
VIDEO_PATH = "sample_videos/cool_dog.mp4"

# Number of pixels cropped from each side
TOP_CROP = 100
BOTTOM_CROP = 10
LEFT_CROP = 0
RIGHT_CROP = 50

# Dimensions for resized frames
FRAME_WIDTH = 576
FRAME_HEIGHT = 324

# Threshold for pick_frames_sliding function
MSE_THRESHOLD = 40

# Interval for extract_frames function
INTERVAL = 200

# % of first and last frames to exclude
FRAME_EXCLUSION_FIRST = 60
FRAME_EXCLUSION_LAST = 25


DATASET_PATH = "dataset_raw/People"
OUTPUT_PATH = 'dataset/Empty'

In [85]:
# Crops frame by n pixels in each direction
def crop_frame(frame, top_crop, bottom_crop, left_crop, right_crop):
    return frame[top_crop : frame.shape[0] - bottom_crop, 
                 left_crop : frame.shape[1] - right_crop]


# Extracts frames from given video and resizes them, taking a frame every n milliseconds
# Does not include n% of first and last frames; does not include first and last frame
def extract_frames(video_path, interval, target_height, target_width):
    video = cv.VideoCapture(video_path)
    
    frames = []
    total_frames = video.get(cv.CAP_PROP_FRAME_COUNT)
    i = 0

    # Remove first and last few frames
    first_frame = (total_frames * FRAME_EXCLUSION_FIRST) // 100
    last_frame = (total_frames * (100 - FRAME_EXCLUSION_LAST)) // 100
    video.set(cv.CAP_PROP_POS_FRAMES, first_frame)
    
    success, frame = video.read()
    while success and video.get(cv.CAP_PROP_POS_FRAMES) < last_frame:
        # Save current frame
        frames.append(cv.resize(frame, (target_width, target_height)))

        # Exit the loop if the video is unrealistically long
        assert i < 500, "Maximum number of frames exceeded"

        # Jump <interval> seconds forward
        video.set(cv.CAP_PROP_POS_MSEC, i * interval)
        success, frame = video.read()
        i += 1
    
    return frames[1:-1]


# Returns first and last frames from the video, which are most likely to be empty
def extract_empty_frames(video_path, interval, target_height, target_width):
    video = cv.VideoCapture(video_path)

    total_frames = video.get(cv.CAP_PROP_FRAME_COUNT)

    # Resize and save first frame
    _, frame1 = video.read()
    frame1 = cv.resize(frame1, (target_width, target_height))

    video.set(cv.CAP_PROP_POS_FRAMES, total_frames - 2) # Skip to the last frame

    # Resize and save last frame
    _, frame2 = video.read()
    frame2 = cv.resize(frame2, (target_width, target_height))

    frames = [frame1, frame2]
    return frames

In [86]:
# Mean squared error between 2 frames
def mse(frame1, frame2):
    # Cast uint8 to int32 to avoid overflow
    frame1 = frame1.astype(np.int32)
    frame2 = frame2.astype(np.int32)
    
    assert frame1.shape == frame2.shape, "Shapes do not match"

    # Calculate MSE
    mse = np.mean((frame1 - frame2) ** 2)
    return mse

In [87]:
# Saves frame if its MSE with ref. frame exceeds threshold, then sets it as new ref. frame
def pick_frames_sliding(frames, threshold):
    # Set first frame as reference frame
    reference_frame = frames[0]

    picked_frames = []
    for frame in frames:
        if mse(frame, reference_frame) > threshold:
            # Save the current frame and overwrite ref. frame
            reference_frame = frame
            picked_frames.append(frame)

    # Throw an error if no frames were picked
    assert len(picked_frames) > 0, "0 frames picked from the video"

    # If too many frames were picked, reduces their count to around 20
    if len(picked_frames) > 20:
        step = len(picked_frames) // 20
        picked_frames = picked_frames[::step]

    return picked_frames


In [88]:
# Picks frames from a video and saves them
def process_video(video_path, output_path, extract_func, pick_func):
    # Choose frames with moving object
    frames = extract_func(video_path, INTERVAL, FRAME_HEIGHT, FRAME_WIDTH)
    for i in range(len(frames)):
        frames[i] = crop_frame(frames[i], TOP_CROP, BOTTOM_CROP, LEFT_CROP, RIGHT_CROP)
    picked_frames = pick_func(frames, MSE_THRESHOLD)

    # Get video name from its path
    video_name = video_path.split('/')[-1]
    video_name = video_name[:-4] # Remove .mp4 from name

    # Save frames
    for i in range(len(picked_frames)):
        cv.imwrite(f'{output_path}/{video_name}_frame{i}.png', picked_frames[i])
    

In [89]:
# Identity function: returns back the first argument
def identity(x, *args, **kwargs):
    return x

# Picks frames from each video in a folder and saves them
def process_folder(folder_path, output_path, start_index = 0):
    # Find all videos in a folder
    video_paths = [f"{folder_path}/{video}" for video in os.listdir(folder_path)]

    # Process each video
    for i in range(start_index, len(video_paths)):
        process_video(video_paths[i], output_path, 
                      extract_func=extract_empty_frames, 
                      pick_func=identity)
        i += 1
        print(f"{i}/{len(video_paths)} { round(i / len(video_paths) * 100, 2) }%") # Print progress

In [90]:
process_folder(DATASET_PATH, OUTPUT_PATH)

1/84 1.19%
2/84 2.38%
3/84 3.57%
4/84 4.76%
5/84 5.95%
6/84 7.14%
7/84 8.33%
8/84 9.52%
9/84 10.71%
10/84 11.9%
11/84 13.1%
12/84 14.29%
13/84 15.48%
14/84 16.67%
15/84 17.86%
16/84 19.05%
17/84 20.24%
18/84 21.43%
19/84 22.62%
20/84 23.81%
21/84 25.0%
22/84 26.19%
23/84 27.38%
24/84 28.57%
25/84 29.76%
26/84 30.95%
27/84 32.14%
28/84 33.33%
29/84 34.52%
30/84 35.71%
31/84 36.9%
32/84 38.1%
33/84 39.29%
34/84 40.48%
35/84 41.67%
36/84 42.86%
37/84 44.05%
38/84 45.24%
39/84 46.43%
40/84 47.62%
41/84 48.81%
42/84 50.0%
43/84 51.19%
44/84 52.38%
45/84 53.57%
46/84 54.76%
47/84 55.95%
48/84 57.14%
49/84 58.33%
50/84 59.52%
51/84 60.71%
52/84 61.9%
53/84 63.1%
54/84 64.29%
55/84 65.48%
56/84 66.67%
57/84 67.86%
58/84 69.05%
59/84 70.24%
60/84 71.43%
61/84 72.62%
62/84 73.81%
63/84 75.0%
64/84 76.19%
65/84 77.38%
66/84 78.57%
67/84 79.76%
68/84 80.95%
69/84 82.14%
70/84 83.33%
71/84 84.52%
72/84 85.71%
73/84 86.9%
74/84 88.1%
75/84 89.29%
76/84 90.48%
77/84 91.67%
78/84 92.86%
79/84 94.05%
8