In [320]:
import cv2 as cv
import os
import numpy as np
from matplotlib import pyplot as plt

In [321]:
# For test purposes
VIDEO_PATH = "sample_videos/cool_dog.mp4"

# Number of pixels cropped from each side
TOP_CROP = 100
BOTTOM_CROP = 10
LEFT_CROP = 0
RIGHT_CROP = 50

# Dimensions for resized frames
FRAME_WIDTH = 576
FRAME_HEIGHT = 324

# Threshold for pick_frames_sliding function
MSE_THRESHOLD = 20

# Interval for extract_frames function
INTERVAL = 200

# % of first and last frames to exclude
FRAME_EXCLUSION_FIRST = 30
FRAME_EXCLUSION_LAST = 40


DATASET_PATH = "dataset/Foxes"
OUTPUT_PATH = 'training_dataset'

In [322]:
# Crops frame by n pixels in each direction
def crop_frame(frame, top_crop, bottom_crop, left_crop, right_crop):
    return frame[top_crop : frame.shape[0] - bottom_crop, 
                 left_crop : frame.shape[1] - right_crop]


# Extracts frames from given video and resizes them, taking a frame every n milliseconds
# Does not include n% of first and last frames
def extract_frames(video_path, interval, target_height, target_width):
    video = cv.VideoCapture(video_path)
    
    frames = []
    success, frame = video.read()
    total_frames = video.get(cv.CAP_PROP_FRAME_COUNT)
    i = 0

    # Remove first and last few frames
    first_frame = (total_frames * FRAME_EXCLUSION_FIRST) // 100
    last_frame = (total_frames * (100 - FRAME_EXCLUSION_LAST)) // 100
    video.set(cv.CAP_PROP_POS_FRAMES, first_frame)
    
    while success and video.get(cv.CAP_PROP_POS_FRAMES) < last_frame:
        # Save current frame
        frames.append(cv.resize(frame, (target_width, target_height)))

        # Exit the loop if the video is unrealistically long
        assert i < 500, "Maximum number of frames exceeded"

        # Jump <interval> seconds forward
        video.set(cv.CAP_PROP_POS_MSEC, i * interval)
        success, frame = video.read()
        i += 1

    return frames

In [323]:
# Mean squared error between 2 frames
def mse(frame1, frame2):
    # Cast uint8 to int32 to avoid overflow
    frame1 = frame1.astype(np.int32)
    frame2 = frame2.astype(np.int32)
    
    assert frame1.shape == frame2.shape, "Shapes do not match"

    # Calculate MSE
    mse = np.mean((frame1 - frame2) ** 2)
    return mse

In [324]:
# Saves frame if its MSE with ref. frame exceeds threshold, then sets it as new ref. frame
def pick_frames_sliding(frames, threshold):
    # Set first frame as reference frame
    reference_frame = frames[0]

    picked_frames = []
    for frame in frames:
        if mse(frame, reference_frame) > threshold:
            # Save the current frame and overwrite ref. frame
            reference_frame = frame
            picked_frames.append(frame)

    # Throw an error if no frames were picked
    assert len(picked_frames) > 0, "0 frames picked from the video"

    # If too many frames were picked, reduces their count to around 20
    if len(picked_frames) > 20:
        step = len(picked_frames) // 20
        picked_frames = picked_frames[::step]

    return picked_frames


In [325]:
# Picks frames from a video and saves them
def process_video(video_path, output_path):
    # Choose frames with moving object
    frames = extract_frames(video_path, INTERVAL, FRAME_HEIGHT, FRAME_WIDTH)
    for i in range(len(frames)):
        frames[i] = crop_frame(frames[i], TOP_CROP, BOTTOM_CROP, LEFT_CROP, RIGHT_CROP)
    picked_frames = pick_frames_sliding(frames, MSE_THRESHOLD)

    # Get video name from its path
    video_name = video_path.split('/')[-1]
    video_name = video_name[:-4] # Remove .mp4 from name

    # Save frames
    for i in range(len(picked_frames)):
        cv.imwrite(f'{output_path}/{video_name}_frame{i}.png', picked_frames[i])
    

In [326]:
# Picks frames from each video in a folder and saves them
def process_folder(folder_path, output_path, start_index = 0):
    video_paths = [f"{folder_path}/{video}" for video in os.listdir(folder_path)]
    for i in range(start_index, len(video_paths)):
        process_video(video_paths[i], output_path)
        i += 1
        print(f"{i}/{len(video_paths)} { round(i / len(video_paths) * 100, 2) }%")

In [327]:
process_folder(DATASET_PATH, OUTPUT_PATH, 134)

135/294 45.92%
136/294 46.26%
137/294 46.6%
138/294 46.94%
139/294 47.28%
140/294 47.62%
141/294 47.96%
142/294 48.3%
143/294 48.64%
144/294 48.98%
145/294 49.32%
146/294 49.66%
147/294 50.0%
148/294 50.34%
149/294 50.68%
150/294 51.02%
151/294 51.36%
152/294 51.7%
153/294 52.04%
154/294 52.38%
155/294 52.72%
156/294 53.06%
157/294 53.4%
158/294 53.74%
159/294 54.08%
160/294 54.42%
161/294 54.76%
162/294 55.1%
163/294 55.44%
164/294 55.78%
165/294 56.12%
166/294 56.46%
167/294 56.8%
168/294 57.14%
169/294 57.48%
170/294 57.82%
171/294 58.16%
172/294 58.5%
173/294 58.84%
174/294 59.18%
175/294 59.52%
176/294 59.86%
177/294 60.2%
178/294 60.54%
179/294 60.88%
180/294 61.22%
181/294 61.56%
182/294 61.9%
183/294 62.24%
184/294 62.59%
185/294 62.93%
186/294 63.27%
187/294 63.61%
188/294 63.95%
189/294 64.29%
190/294 64.63%
191/294 64.97%
192/294 65.31%
193/294 65.65%
194/294 65.99%
195/294 66.33%
196/294 66.67%
197/294 67.01%
198/294 67.35%
199/294 67.69%
200/294 68.03%
201/294 68.37%
202/2