In [None]:
import os 
import cv2
import numpy
import typing

In [None]:
TRAIN_VIDEOS_PATH = "experiments/current_experiment/data/videos/"

# Selecting size for the data

In [None]:
TRAINING_SET_LENGTH = 2000
VALIDATION_SET_LENGTH = 1000

# Video Processing methods

In [None]:
def load_videos():
    paths = []
    for file_video in os.listdir(TRAIN_VIDEOS_PATH):
        full_path = os.path.join(TRAIN_VIDEOS_PATH, file_video)
        paths.append(full_path)
    return paths

def extract_quality_frames(video_url: str, number_of_imgs: int):
    """
    Function for extracting quality frames
    from given video file. Randomly generates 
    index frames to extract from the file within 
    the range and then extracts it.
    Args:
        - number_of_imgs - number of simages to extract
        from video
    Returns:
        - list of image frames
    """
    output_frames = []
    video = cv2.VideoCapture()
    opened = video.open(filename=video_url)
    MAX_FRAMES = int(video.get(propId=cv2.CAP_PROP_FRAME_COUNT))

    if not opened: 
        raise RuntimeError("Failed to open video, file may be invalid")

    random_idxs = numpy.random.randint(
        low=0, 
        high=MAX_FRAMES-1, 
        size=number_of_imgs
    )

    curr_idx = 0

    while len(output_frames) != number_of_imgs and curr_idx < (MAX_FRAMES-1):
        extracted, curr_frame = video
        if not extracted: raise RuntimeError("Failed to extract video frame, file may be damaged")
        if curr_idx == random_idxs[0]:
            output_frames.append(curr_frame)
            random_idxs.pop(0)
        curr_idx = curr_idx + 1
    video.release()
    return output_frames

# Loading videos

In [None]:
video_paths = numpy.asarray(load_videos())

# Splitting videos into subsets of data for training and validation sets

In [None]:
from sklearn.model_selection import train_test_split
train_indexes, val_indexes = train_test_split(
    numpy.arange(len(video_paths)), 
    test_size=0.4
)

train_videos = video_paths[train_indexes]
validation_videos = video_paths[val_indexes]

# Extracting video information for building training and validation sets

In [None]:
train_video_frames = extract_quality_frames(train_videos, number_of_imgs=TRAINING_SET_LENGTH)
validation_video_frames = extract_quality_frames(validation_videos, number_of_imgs=VALIDATION_SET_LENGTH)

# Defining directories for storing training and validation data

In [None]:
train_path = os.mkdir(path="experiments/current_experiment/data/train_data/", exists_ok=True)
validation_path = os.mkdir(path="experiments/current_experiment/data/validation_data/", exists_ok=True)

# Saving training and validation sets to the corresponding directories

In [None]:
for idx, img in enumerate(train_video_frames):
    full_path = os.path.join(
        train_path, 
        "img%s.png" % str(idx)
    )
    cv2.imwrite(filename=full_path, img=img)

In [None]:
for idx, img in enumerate(train_video_frames):
    full_path = os.path.join(
        validation_path,
        "img%s.png" % str(idx)
    )
    cv2.imwrite(filename=full_path, img=img)