In [1]:
print("hi")

hi


In [2]:
import yt_dlp

def get_video_info(youtube_url: str) -> dict:
    """
    Takes a YouTube URL and returns a dictionary containing the direct
    video stream URL and the video's duration in seconds.

    Args:
        youtube_url: The standard URL of the YouTube video.

    Returns:
        A dictionary with 'stream_url' and 'duration'.
    """
    # yt-dlp options: we want the best quality MP4 format.
    ydl_opts = {'format': 'best[ext=mp4]'}

    # The 'with' statement ensures resources are managed correctly.
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        # The key is 'download=False'. This prevents downloading the video.
        info = ydl.extract_info(youtube_url, download=False)
        
        # Extract the necessary information
        stream_url = info['url']
        duration = int(info['duration'])
        
        print(f"✅ Successfully found stream URL for a video of {duration} seconds.")
        
        return {
            'stream_url': stream_url,
            'duration': duration
        }

In [3]:
import os
import subprocess

def extract_frames(stream_url: str, duration: int):
    """
    Uses FFmpeg to extract frames from a video stream every 10 seconds.

    Args:
        stream_url: The direct video stream URL from get_video_info.
        duration: The video's duration in seconds from get_video_info.
    """
    output_dir = "video_frames_output"
    os.makedirs(output_dir, exist_ok=True)
    print(f"Directory '{output_dir}' created. Starting frame extraction...")

    # Loop from the start to the end of the video, in 10-second steps.
    for i in range(0, duration, 10):
        # Format the timestamp into HH:MM:SS for FFmpeg
        timestamp = f"{i//3600:02d}:{(i%3600)//60:02d}:{i%60:02d}"
        output_filename = f"{output_dir}/frame_at_{i}_sec.jpg"
        
        # Construct the FFmpeg command as a list of arguments
        command = [
            'ffmpeg',
            '-ss', timestamp,      # Seek (jump) to the specific timestamp
            '-i', stream_url,       # The input is our direct stream URL
            '-vframes', '1',        # We only want to extract 1 frame
            '-q:v', '2',            # Set the image quality (1 is highest, 5 is high)
            '-y',                   # Overwrite the file if it already exists
            output_filename
        ]
        
        try:
            # Run the command using subprocess
            subprocess.run(command, check=True, capture_output=True, text=True)
            print(f"📸 Successfully extracted: {output_filename}")
        except subprocess.CalledProcessError as e:
            print(f"❌ Failed to extract frame at {timestamp}.")
            print(f"FFmpeg Error: {e.stderr}")
            break # Stop if there's an error
            
    print("\n🎉 Frame extraction complete!")

In [7]:
my_youtube_url = "https://www.youtube.com/watch?v=t9mRf2S5vDI" 

In [8]:
video_info = get_video_info(my_youtube_url)

[youtube] Extracting URL: https://www.youtube.com/watch?v=t9mRf2S5vDI
[youtube] t9mRf2S5vDI: Downloading webpage
[youtube] t9mRf2S5vDI: Downloading tv client config
[youtube] t9mRf2S5vDI: Downloading tv player API JSON
[youtube] t9mRf2S5vDI: Downloading tv simply player API JSON
✅ Successfully found stream URL for a video of 1776 seconds.


In [9]:
extract_frames(
    stream_url=video_info['stream_url'],
    duration=video_info['duration']
)

Directory 'video_frames_output' created. Starting frame extraction...
📸 Successfully extracted: video_frames_output/frame_at_0_sec.jpg
📸 Successfully extracted: video_frames_output/frame_at_10_sec.jpg
📸 Successfully extracted: video_frames_output/frame_at_20_sec.jpg


KeyboardInterrupt: 

In [5]:
import os
import subprocess
import yt_dlp

In [2]:
def get_video_info(youtube_url: str) -> dict:
    """
    Takes a YouTube URL and returns a dictionary containing the direct
    video stream URL and the video's duration in seconds.
    """
    ydl_opts = {'format': 'best[ext=mp4]'}
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        info = ydl.extract_info(youtube_url, download=False)
        stream_url = info['url']
        duration = int(info['duration'])
        print(f"✅ Successfully found stream URL for a video of {duration} seconds.")
        return {'stream_url': stream_url, 'duration': duration}

In [9]:
my_youtube_url = "https://www.youtube.com/watch?v=t9mRf2S5vDI" # Example video


In [10]:
data=get_video_info(my_youtube_url)

[youtube] Extracting URL: https://www.youtube.com/watch?v=t9mRf2S5vDI
[youtube] t9mRf2S5vDI: Downloading webpage
[youtube] t9mRf2S5vDI: Downloading tv client config
[youtube] t9mRf2S5vDI: Downloading tv player API JSON
[youtube] t9mRf2S5vDI: Downloading tv simply player API JSON
✅ Successfully found stream URL for a video of 1776 seconds.


In [11]:
data

{'stream_url': 'https://rr1---sn-ci5gup-itq6.googlevideo.com/videoplayback?expire=1756050612&ei=VOCqaN6DKPWd4t4P0PGWoA4&ip=2401%3A4900%3A3671%3Aee25%3Ae0f5%3Aee34%3Acdf0%3A9b55&id=o-AKXfDLlaHlm4ASRW2ugwrfokzaST76LDyhpwaGGiEEMe&itag=18&source=youtube&requiressl=yes&xpc=EgVo2aDSNQ%3D%3D&met=1756029012%2C&mh=cJ&mm=31%2C29&mn=sn-ci5gup-itq6%2Csn-ci5gup-h55e&ms=au%2Crdu&mv=m&mvi=1&pcm2cms=yes&pl=45&rms=au%2Cau&initcwndbps=365000&bui=AY1jyLNaIjzbteclS0q5IxQvX-v5rD71nZXPnzcl9Z9YxfVFY-DRvSfAcV6MV7U852yg5RYXOsbTXwt0&spc=l3OVKZI4b4Mgkppn6MInP2RQq1RP6zAduTh-zIr4X_k9AxVC-dAbfIfQQJjDouK8ikbonDRz&vprv=1&svpuc=1&mime=video%2Fmp4&ns=KMlJbkaKJsuEAclbID0rrMUQ&rqh=1&cnr=14&ratebypass=yes&dur=1776.442&lmt=1688740674731572&mt=1756028466&fvip=11&lmw=1&fexp=51557447%2C51565116%2C51565682%2C51580970&c=TVHTML5&sefc=1&txp=6219224&n=RmgXigaq20WStw&sparams=expire%2Cei%2Cip%2Cid%2Citag%2Csource%2Crequiressl%2Cxpc%2Cbui%2Cspc%2Cvprv%2Csvpuc%2Cmime%2Cns%2Crqh%2Ccnr%2Cratebypass%2Cdur%2Clmt&sig=AJfQdSswRgIhAM2kDMju3D

In [None]:
def extract_frames_fast(stream_url: str):
    """
    Uses a single, efficient FFmpeg command to extract frames every 10 seconds.
    
    Args:
        stream_url: The direct video stream URL.
    """
    output_dir = "video_frames_output_fast"
    os.makedirs(output_dir, exist_ok=True)
    print(f"Directory '{output_dir}' created. Starting FAST frame extraction...")

    command = [
        'ffmpeg',
        '-i', stream_url,                     # Input from the direct stream URL
        '-vf', 'fps=1/10',                    # The filter to apply: 1 frame every 10 seconds
        f'{output_dir}/frame_%04d.jpg'         # The output file pattern
    ]

    try:
        subprocess.run(command, check=True, capture_output=True, text=True)
        print("\n🎉 All frames extracted successfully!")
    except subprocess.CalledProcessError as e:
        print("❌ FFmpeg command failed.")
        print(f"Error: {e.stderr}")

if __name__ == "__main__":
    my_youtube_url = "https://www.youtube.com/watch?v=t9mRf2S5vDI" # Example video

    video_info = get_video_info(my_youtube_url)

    if video_info:
        extract_frames_fast(stream_url=video_info['stream_url'])

[youtube] Extracting URL: https://www.youtube.com/watch?v=t9mRf2S5vDI
[youtube] t9mRf2S5vDI: Downloading webpage


[youtube] t9mRf2S5vDI: Downloading tv client config
[youtube] t9mRf2S5vDI: Downloading tv player API JSON
[youtube] t9mRf2S5vDI: Downloading tv simply player API JSON
✅ Successfully found stream URL for a video of 1776 seconds.
Directory 'video_frames_output_fast' created. Starting FAST frame extraction...

🎉 All frames extracted successfully!


In [None]:
import re
import subprocess

def extract_frames_by_scene_change(stream_url: str):
    """
    Extracts frames only when a significant visual change (scene change) is detected.
    """
    print("🔬 Analyzing video for scene changes... this may take a moment.")

    scenedet_command = [
        'ffmpeg',
        '-i', stream_url,
        '-vf', "select='gt(scene,0.2)',showinfo",
        '-f', 'null', '-'
    ]

    # Run the command and capture its output (which prints to stderr)
    result = subprocess.run(scenedet_command, capture_output=True, text=True)
    
    # Find all timestamps in the output using regular expressions
    timestamps = re.findall(r"pts_time:([\d.]+)", result.stderr)
    
    print(f"✅ Analysis complete. Found {len(timestamps)} significant scene changes.")

    # 2. EXTRACTION PASS: Loop through timestamps and grab each frame
    output_dir = "video_frames_scene_detect"
    os.makedirs(output_dir, exist_ok=True)

    for i, ts in enumerate(timestamps):
        output_filename = f"{output_dir}/scene_{i+1:04d}.jpg"
        extract_command = [
            'ffmpeg',
            '-ss', ts,
            '-i', stream_url,
            '-vframes', '1',
            '-q:v', '2',
            '-y',
            output_filename
        ]
        subprocess.run(extract_command, capture_output=True, text=True)
        print(f"📸 Extracted {output_filename} at {float(ts):.2f} seconds")

    print("\n🎉 Scene-based frame extraction complete!")

# --- How to use it ---
# if __name__ == "__main__":
#     video_info = get_video_info("YOUR_YOUTUBE_URL")
#     if video_info:
#         extract_frames_by_scene_change(video_info['stream_url'])

In [None]:
if __name__ == "__main__":
    my_youtube_url = "https://www.youtube.com/watch?v=t9mRf2S5vDI" # Example video

    video_info = get_video_info(my_youtube_url)

    if video_info:
        extract_frames_by_scene_change(stream_url=video_info['stream_url'])

[youtube] Extracting URL: https://www.youtube.com/watch?v=t9mRf2S5vDI


[youtube] t9mRf2S5vDI: Downloading webpage
[youtube] t9mRf2S5vDI: Downloading tv client config
[youtube] t9mRf2S5vDI: Downloading tv player API JSON
[youtube] t9mRf2S5vDI: Downloading tv simply player API JSON
✅ Successfully found stream URL for a video of 1776 seconds.
🔬 Analyzing video for scene changes... this may take a moment.
✅ Analysis complete. Found 16 significant scene changes.
📸 Extracted video_frames_scene_detect/scene_0001.jpg at 659.13 seconds
📸 Extracted video_frames_scene_detect/scene_0002.jpg at 673.81 seconds
📸 Extracted video_frames_scene_detect/scene_0003.jpg at 674.81 seconds
📸 Extracted video_frames_scene_detect/scene_0004.jpg at 676.91 seconds
📸 Extracted video_frames_scene_detect/scene_0005.jpg at 698.56 seconds
📸 Extracted video_frames_scene_detect/scene_0006.jpg at 915.95 seconds
📸 Extracted video_frames_scene_detect/scene_0007.jpg at 1116.38 seconds
📸 Extracted video_frames_scene_detect/scene_0008.jpg at 1118.62 seconds
📸 Extracted video_frames_scene_detect/s