In [None]:
!pip install yt-dlp opencv-python

In [19]:
import subprocess
import cv2
import os

base_folder="/content/drive/MyDrive/hornet_project/data/top-down/hornets/"

### Defining videos

In [20]:
videos = [
    {
        "url": "https://www.youtube.com/watch?v=pyi2M1GZ6qc",
        "min_time": 0,
        "max_time": 999,
    },
    {
        "url": "https://www.youtube.com/shorts/U2LbM_gsRGw",
        "min_time": 0,
        "max_time": 4,
    },
    {
        "url": "https://www.youtube.com/watch?v=5UbrYp_pgKc",
        "min_time": 0,
        "max_time": 5,
    },
    {
        "url": "https://www.youtube.com/watch?v=5UbrYp_pgKc",
        "min_time": 7,
        "max_time": 999,
    }
]


### Extract frames

In [21]:
def process_video(entry, index, interval=0.5):
    """
    entry  = one dict from the videos list
    index  = numeric index (1, 2, 3...) used for output folder names
    interval = frame extraction interval (seconds)
    """

    url       = entry["url"]
    min_time  = entry["min_time"]
    max_time  = entry["max_time"]

    print(f"\n=== Processing Video {index} ===")
    print("URL:", url)
    print(f"Time range: {min_time}s â†’ {max_time}s")

    # ------------------------------
    # A. Download video
    # ------------------------------
    video_file = f"video_{index}.mp4"

    subprocess.run([
        "yt-dlp",
        "-f", "mp4",
        "-o", video_file,
        url
    ])

    # ------------------------------
    # B. Extract frames
    # ------------------------------
    cap = cv2.VideoCapture(video_file)
    fps = cap.get(cv2.CAP_PROP_FPS)
    duration = cap.get(cv2.CAP_PROP_FRAME_COUNT) / fps

    # Prevent times beyond video length
    max_t = min(max_time, duration)

    # Output folder for this video
    out_folder = os.path.join(base_folder, f"video_{index}")
    os.makedirs(out_folder, exist_ok=True)

    t = min_time
    frame_num = 0

    while t <= max_t:
        cap.set(cv2.CAP_PROP_POS_MSEC, t * 1000)
        success, frame = cap.read()
        if not success:
            break

        filename = os.path.join(out_folder, f"frame_{frame_num:05d}.jpg")
        cv2.imwrite(filename, frame)
        print("Saved:", filename)

        t += interval
        frame_num += 1

    cap.release()
    print(f"Finished video {index}")

In [None]:
# Example usage:
process_video(videos[0], index=1, interval = 0.1)
process_video(videos[1], index=2, interval = 0.1)
process_video(videos[2], index=3, interval = 0.1)
process_video(videos[3], index=4, interval = 0.1)