In [1]:
!pip install yt_dlp

Collecting yt_dlp
  Downloading yt_dlp-2025.5.22-py3-none-any.whl.metadata (174 kB)
Downloading yt_dlp-2025.5.22-py3-none-any.whl (3.3 MB)
   ---------------------------------------- 0.0/3.3 MB ? eta -:--:--
   ---------------------------------------- 3.3/3.3 MB 27.5 MB/s eta 0:00:00
Installing collected packages: yt_dlp
Successfully installed yt_dlp-2025.5.22


In [8]:

import yt_dlp
import os
import sys
from pathlib import Path

In [9]:
url = "https://www.youtube.com/watch?v=8vXe48xQSoo"
output_path="./downloads"
quality="best"

In [None]:
# Create output directory if it doesn't exist
Path(output_path).mkdir(parents=True, exist_ok=True)

In [7]:
ydl_opts = {
    'listformats': True,
    'quiet': False,
}

with yt_dlp.YoutubeDL(ydl_opts) as ydl:
    ydl.extract_info(url, download=False)

[youtube] Extracting URL: https://www.youtube.com/watch?v=8vXe48xQSoo
[youtube] 8vXe48xQSoo: Downloading webpage
[youtube] 8vXe48xQSoo: Downloading tv client config
[youtube] 8vXe48xQSoo: Downloading player 59b252b9-main
[youtube] 8vXe48xQSoo: Downloading tv player API JSON
[youtube] 8vXe48xQSoo: Downloading ios player API JSON




[youtube] 8vXe48xQSoo: Downloading m3u8 information
[info] Available formats for 8vXe48xQSoo:
ID  EXT   RESOLUTION FPS CH |   FILESIZE   TBR PROTO | VCODEC          VBR ACODEC     ASR MORE INFO
------------------------------------------------------------------------------------------------------------
sb3 mhtml 48x27        0    |                  mhtml | images                             storyboard
sb2 mhtml 80x45        0    |                  mhtml | images                             storyboard
sb1 mhtml 160x90       0    |                  mhtml | images                             storyboard
sb0 mhtml 320x180      0    |                  mhtml | images                             storyboard
233 mp4   audio only        |                  m3u8  | audio only          unknown        [en] Default, low
234 mp4   audio only        |                  m3u8  | audio only          unknown        [en] Default, high
602 mp4   256x144     15    | ~366.83MiB  187k m3u8  | vp09.00.10.08  187k v

# Download Video

In [None]:
# Configure yt-dlp options with better format selection
ydl_opts = {
    'outtmpl': os.path.join(output_path, '%(title)s.%(ext)s'),
    'format': 'best[height<=1080]/best',  # Prefer combined formats, fallback to best
    'noplaylist': True,
    'writeinfojson': False,
    'writesubtitles': False,
    'writeautomaticsub': False,
    'ignoreerrors': False,
}

# Override format if user specified
if quality != "best":
    if quality == "worst":
        ydl_opts['format'] = 'worst'
    elif quality.endswith('p'):
        # For specific resolution like 720p, 480p
        height = quality[:-1]
        ydl_opts['format'] = f'best[height<={height}]/best'
    else:
        ydl_opts['format'] = quality

with yt_dlp.YoutubeDL(ydl_opts) as ydl:
    # Get video info first
    info = ydl.extract_info(url, download=False)
    title = info.get('title', 'Unknown')
    duration = info.get('duration', 'Unknown')
    uploader = info.get('uploader', 'Unknown')
    
    print(f"Title: {title}")
    print(f"Duration: {duration} seconds")
    print(f"Uploader: {uploader}")
    
    # Check file size estimate
    formats = info.get('formats', [])
    if formats:
        selected_format = None
        for fmt in formats:
            if fmt.get('vcodec') != 'none' and fmt.get('acodec') != 'none':
                selected_format = fmt
                break
        
        if selected_format and selected_format.get('filesize'):
            size_mb = selected_format['filesize'] / (1024 * 1024)
            print(f"Estimated size: {size_mb:.1f} MB")
    
    # Download the video
    print(f"\nStarting download...")
    ydl.extract_info(url, download=True)
    print(f"Download completed.")
    

[youtube] Extracting URL: https://www.youtube.com/watch?v=8vXe48xQSoo
[youtube] 8vXe48xQSoo: Downloading webpage
[youtube] 8vXe48xQSoo: Downloading tv client config
[youtube] 8vXe48xQSoo: Downloading tv player API JSON
[youtube] 8vXe48xQSoo: Downloading ios player API JSON
[youtube] 8vXe48xQSoo: Downloading m3u8 information
Title: Critical Role plays Daggerheart | Live One-Shot | Open Beta
Duration: 16437 seconds
Uploader: Critical Role
Estimated size: 703.0 MB

Starting download...
[youtube] Extracting URL: https://www.youtube.com/watch?v=8vXe48xQSoo
[youtube] 8vXe48xQSoo: Downloading webpage
[youtube] 8vXe48xQSoo: Downloading tv client config
[youtube] 8vXe48xQSoo: Downloading tv player API JSON
[youtube] 8vXe48xQSoo: Downloading ios player API JSON
[youtube] 8vXe48xQSoo: Downloading m3u8 information
[info] 8vXe48xQSoo: Downloading 1 format(s): 18
[download] Destination: downloads\Critical Role plays Daggerheart ｜ Live One-Shot ｜ Open Beta.mp4
[download] 100% of  703.02MiB in 00:00:4

# Download Audio Only

In [10]:
ydl_opts = {
    'format': 'bestaudio/best',
    'outtmpl': os.path.join(output_path, '%(title)s.%(ext)s'),
    'postprocessors': [{
        'key': 'FFmpegExtractAudio',
        'preferredcodec': 'mp3',
        'preferredquality': '192',
    }],
    'noplaylist': True,
}

try:
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        print("Downloading audio...")
        ydl.download([url])
        print("✅ Audio download completed!")
except Exception as e:
    print(f"❌ Error downloading audio: {str(e)}")

Downloading audio...
[youtube] Extracting URL: https://www.youtube.com/watch?v=8vXe48xQSoo
[youtube] 8vXe48xQSoo: Downloading webpage
[youtube] 8vXe48xQSoo: Downloading tv client config
[youtube] 8vXe48xQSoo: Downloading tv player API JSON
[youtube] 8vXe48xQSoo: Downloading ios player API JSON
[youtube] 8vXe48xQSoo: Downloading m3u8 information
[info] 8vXe48xQSoo: Downloading 1 format(s): 251
[download] Destination: downloads\Critical Role plays Daggerheart ｜ Live One-Shot ｜ Open Beta.webm
[download] 100% of  215.53MiB in 00:00:11 at 18.74MiB/s    
[ExtractAudio] Destination: downloads\Critical Role plays Daggerheart ｜ Live One-Shot ｜ Open Beta.mp3
Deleting original file downloads\Critical Role plays Daggerheart ｜ Live One-Shot ｜ Open Beta.webm (pass -k to keep)
✅ Audio download completed!


# Download Audio only Mono WAV 16khz

In [12]:
sample_rate = 16000  # Set desired sample rate for WAV conversion

Path(output_path).mkdir(parents=True, exist_ok=True)

ydl_opts = {
    'format': 'bestaudio/best',
    'outtmpl': os.path.join(output_path, '%(title)s.%(ext)s'),
    'postprocessors': [{
        'key': 'FFmpegExtractAudio',
        'preferredcodec': 'wav',
        'preferredquality': None,
    }],
    'postprocessor_args': [
        '-ac', '1',  # Convert to mono
        '-ar', str(sample_rate),  # Set sample rate  
        '-acodec', 'pcm_s16le',  # 16-bit PCM
    ],
    'noplaylist': True,
}

with yt_dlp.YoutubeDL(ydl_opts) as ydl:
    info = ydl.extract_info(url, download=False)
    print(f"Title: {info.get('title', 'Unknown')}")
    print(f"Converting to mono WAV at {sample_rate} Hz...")
    ydl.download([url])
    print("✅ Download completed!")

[youtube] Extracting URL: https://www.youtube.com/watch?v=8vXe48xQSoo
[youtube] 8vXe48xQSoo: Downloading webpage
[youtube] 8vXe48xQSoo: Downloading tv client config
[youtube] 8vXe48xQSoo: Downloading tv player API JSON
[youtube] 8vXe48xQSoo: Downloading ios player API JSON
[youtube] 8vXe48xQSoo: Downloading m3u8 information
Title: Critical Role plays Daggerheart | Live One-Shot | Open Beta
Converting to mono WAV at 16000 Hz...
[youtube] Extracting URL: https://www.youtube.com/watch?v=8vXe48xQSoo
[youtube] 8vXe48xQSoo: Downloading webpage
[youtube] 8vXe48xQSoo: Downloading tv client config
[youtube] 8vXe48xQSoo: Downloading tv player API JSON
[youtube] 8vXe48xQSoo: Downloading ios player API JSON
[youtube] 8vXe48xQSoo: Downloading m3u8 information
[info] 8vXe48xQSoo: Downloading 1 format(s): 251
[download] Destination: downloads\Critical Role plays Daggerheart ｜ Live One-Shot ｜ Open Beta.webm
[download] 100% of  215.53MiB in 00:00:12 at 17.45MiB/s    
[ExtractAudio] Destination: downloa

# Chunk the audio into 30 second segments

In [11]:
!pip install pydub

Collecting pydub
  Using cached pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Using cached pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub
Successfully installed pydub-0.25.1


In [14]:
# Chunk the audio into 30 second segments naively
from pydub import AudioSegment
from pydub.playback import play
import os

def chunk_audio(input_file, chunk_length=30):

    print(f"Chunking audio file: {input_file}")

    audio = AudioSegment.from_file(input_file)
    duration_ms = len(audio)
    chunk_length_ms = chunk_length * 1000

    print(f"Audio duration: {duration_ms / 1000:.2f} seconds")

    chunks = []
    for i in range(0, duration_ms, chunk_length_ms):
        print(f"Processing chunk from {i / 1000:.2f} to {(i + chunk_length_ms) / 1000:.2f} seconds")
        chunk = audio[i:i + chunk_length_ms]
        chunks.append(chunk)

    # Save chunks to files
    print(f"Saving {len(chunks)} chunks...")
    base_name = os.path.splitext(input_file)[0]
    for i, chunk in enumerate(chunks):
        print(f"Saving chunk {i}...")
        chunk.export(f"{base_name}_chunk_{i}_{chunk_length}_seconds.wav", format="wav")
        print(f"Chunk {i} saved as {base_name}_chunk_{i}.wav")

In [15]:
chunk_audio("downloads/Critical Role plays Daggerheart ｜ Live One-Shot ｜ Open Beta.wav", chunk_length=300)

Chunking audio file: downloads/Critical Role plays Daggerheart ｜ Live One-Shot ｜ Open Beta.wav
Audio duration: 16437.43 seconds
Processing chunk from 0.00 to 300.00 seconds
Processing chunk from 300.00 to 600.00 seconds
Processing chunk from 600.00 to 900.00 seconds
Processing chunk from 900.00 to 1200.00 seconds
Processing chunk from 1200.00 to 1500.00 seconds
Processing chunk from 1500.00 to 1800.00 seconds
Processing chunk from 1800.00 to 2100.00 seconds
Processing chunk from 2100.00 to 2400.00 seconds
Processing chunk from 2400.00 to 2700.00 seconds
Processing chunk from 2700.00 to 3000.00 seconds
Processing chunk from 3000.00 to 3300.00 seconds
Processing chunk from 3300.00 to 3600.00 seconds
Processing chunk from 3600.00 to 3900.00 seconds
Processing chunk from 3900.00 to 4200.00 seconds
Processing chunk from 4200.00 to 4500.00 seconds
Processing chunk from 4500.00 to 4800.00 seconds
Processing chunk from 4800.00 to 5100.00 seconds
Processing chunk from 5100.00 to 5400.00 seconds