In [17]:
import os
import shutil
from tqdm import tqdm
from cv2 import VideoCapture, CAP_PROP_FPS, imwrite, destroyAllWindows
from constants import PATHS_FILE, SCORES_FILE, FRAMES_PATH, CLIPS_PATH

In [2]:
def add_to_file(path: str, new_content: str):
    """
        Add text to the file provided by the path
        path : str
            The path of where the file is located
        new_content : str
            The new text content to append to the file
    """
    with open(path, "a") as f:
        if os.path.getsize(path) > 0:
            f.write("\n")
        f.write(new_content)
    f.close()

In [10]:
def remove_file_contents():
    """
        Remove all the content of files
    """
    files = [PATHS_FILE, SCORES_FILE]
    for file in files:
        with open(file, "w") as f:
            f.truncate()

In [15]:
def delete_all_frames():
    """
        Remove all the frames of clips located in the frames folders
    """
    for boolean in range(2):
        path = os.path.join(FRAMES_PATH, str(boolean))
        for directory in os.listdir(path):
            shutil.rmtree(os.path.join(path, directory))

In [5]:
def get_frame_rate(path: str):
    """
        Get the frame rate per second of a video given its path
        path : str
            The path of where the video is located
    """
    cap = VideoCapture(path)
    fps = int(cap.get(CAP_PROP_FPS))
    return fps

In [24]:
def extract_frames(path: str, boolean: str):
    """
        Extract frames from the video located at path, and puts it into frames folder of boolean type.
        path : str
            The path of where the video is located
        boolean: str
            The classification of the video (Available Options: ["Yes", "No"])
    """
    cap = VideoCapture(path)
    i, frames = 0, 0
    frame_rate = get_frame_rate(path)
    new_folder = os.path.join(FRAMES_PATH, f'{boolean}/{path[path.rfind("/") + 1:-4]}')
    os.mkdir(new_folder)
    add_to_file(PATHS_FILE, new_folder)
    add_to_file(SCORES_FILE, boolean)
    while cap.isOpened() and frames < 5:
        ret, frame = cap.read()
        if ret == False:
            break
        if i % (frame_rate // 2) == 0:
            imwrite(f'{new_folder}/{i}.jpg', frame)
            frames += 1
        i += 1
    cap.release()
    destroyAllWindows()

In [25]:
def load_data():
    """
        Run the data loader.
    """
    remove_file_contents()
    delete_all_frames()
    clip_categories = ["education", "politics", "sports"]
    for category in clip_categories:
        for boolean in range(2):
            path = os.path.join(CLIPS_PATH, category, str(boolean))
            for filename in tqdm(os.listdir(path)):
                f = os.path.join(path, filename)
                extract_frames(f, str(boolean))

In [26]:
load_data()

 88%|████████▊ | 273/312 [00:19<00:03,  9.85it/s]OpenCV: Couldn't read video stream from file "/Users/kelvinyip7/Desktop/Code/CS490/data/clips/education/0/KhB7ctAG7vE_1413_1408.mp4"
OpenCV: Couldn't read video stream from file "/Users/kelvinyip7/Desktop/Code/CS490/data/clips/education/0/KhB7ctAG7vE_1413_1408.mp4"
100%|██████████| 312/312 [00:22<00:00, 13.86it/s]
100%|██████████| 214/214 [00:45<00:00,  4.68it/s]
100%|██████████| 683/683 [03:34<00:00,  3.19it/s]
 18%|█▊        | 146/801 [00:40<02:54,  3.75it/s][h264 @ 0x7fe127d271c0] mmco: unref short failure
 43%|████▎     | 348/801 [01:37<02:16,  3.31it/s][h264 @ 0x7fe1278f87c0] mmco: unref short failure
 63%|██████▎   | 508/801 [02:24<01:11,  4.08it/s][h264 @ 0x7fe123215080] mmco: unref short failure
[h264 @ 0x7fe123215080] mmco: unref short failure
100%|██████████| 801/801 [03:50<00:00,  3.47it/s]
100%|██████████| 94/94 [00:24<00:00,  3.81it/s]
100%|██████████| 142/142 [00:39<00:00,  3.58it/s]
