In [1]:
import os
from hashlib import sha1

import cv2


def split_video_into_frames(path_video, path_dir_output="frames", saving_method="hash")
    """ Split a video file into the frames composing it.
    
    :param path_video: The path to the video from which the frames are going to be extracted.
    :param path_dir_output: The output directory of the frames.
    :param saving_method: The method used to extract the frames. Can be 'hash' or 'iterative'. 'iterative' will
    extract and save all frames, orderer by their apparition in the video. 'hash' will extract a frame, hash-it,
    then see if it already has been extracted. This is less-disk intensive.

    TODO: parametize the hashing algorithm used, add more hashing algorithms.
    """
    vidcap = cv2.VideoCapture(path_video)
    success, image = vidcap.read()
    
    if saving_method == "hash":
        hash_values = set()
        while success:
            hash_value = sha1(image.data).hexdigest()

            if hash_value not in hash_values:
                hash_values.add(hash_value)
                cv2.imwrite(os.path.join(path_dir_output, hash_value + ".jpg"), image)      
            success, image = vidcap.read()
    elif saving_method == "iterative":
        i_frame = 0
        while success:
            cv2.imwrite(os.path.join(path_dir_output, "frame_%d.jpg" % i_frame), image)  
            success,image = vidcap.read()
            i_frame += 1
    else:
        raise ArgumentError("Unknown saving method : {}".format(saving_method))

        
split_video_into_frames("nothing_of_interest.mp4")