<a href="https://colab.research.google.com/github/Shakir-ahmed1/colab_notebooks/blob/main/dataset_collector/yt_audiobooks_downloader.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import re
import yt_dlp
import subprocess

def download_and_convert_to_audio(youtube_url: str):
    """
    Downloads a YouTube video or playlist, converts it to a 16kHz WAV audio file,
    and saves it to a specified directory structure in Google Colab.

    This function intelligently distinguishes between single video links and playlist links,
    organizing the downloaded audio files accordingly. For playlists, a directory
    is created using the playlist's title. Single videos are stored in a
    dedicated 'single_videos' folder. To prevent file-naming conflicts, the
    unique YouTube video ID is appended to each filename.

    Args:
        youtube_url: The URL of the YouTube video or playlist.
    """

    try:
        # Check if running in Google Colab
        import google.colab
        print("Running in Google Colab environment. Dependencies will be installed.")
        # Install necessary packages
        subprocess.run(["pip", "install", "--upgrade", "yt-dlp"], check=True, capture_output=True)
        subprocess.run(["apt-get", "install", "-y", "ffmpeg"], check=True, capture_output=True)
        print("Dependencies installed successfully.")
    except (ImportError, subprocess.CalledProcessError):
        print("Not running in Google Colab or dependency installation failed. Please ensure yt-dlp and ffmpeg are installed.")


    is_playlist = 'list=' in youtube_url

    base_dir = '/content/audiobooks'
    playlist_title = 'unknown_playlist'

    if is_playlist:
        try:
            # Extract playlist title for the directory name
            with yt_dlp.YoutubeDL({'ignoreerrors': True, 'extract_flat': True, 'quiet': True}) as ydl:
                info = ydl.extract_info(youtube_url, download=False)
                if info and 'title' in info:
                    playlist_title = info['title']
        except Exception as e:
            print(f"Could not extract playlist title. Defaulting to '{playlist_title}'. Error: {e}")

        output_dir = os.path.join(base_dir, playlist_title)
    else:
        output_dir = os.path.join(base_dir, 'single_videos')

    os.makedirs(output_dir, exist_ok=True)

    # Appending the unique video ID to the filename to prevent overwriting
    output_template = os.path.join(output_dir, '%(title)s [%(id)s].%(ext)s')

    ydl_opts = {
        'format': 'bestaudio/best',
        'postprocessors': [{
            'key': 'FFmpegExtractAudio',
            'preferredcodec': 'wav',
        }],
        'postprocessor_args': [
            '-ar', '16000'
        ],
        'outtmpl': output_template,
        'ignoreerrors': True,
        'no_overwrites': True,
        'quiet': False,
        'progress': True,
        'logtostderr': False,
    }

    try:
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            print(f"\nStarting download for: {youtube_url}")
            ydl.download([youtube_url])
            print("\n✅ Download and conversion complete!")
            print(f"🎧 Audio files are saved in: {output_dir}")

    except yt_dlp.utils.DownloadError as e:
        print(f"\n❌ An error occurred during download: {e}")
    except Exception as e:
        print(f"\n❌ An unexpected error occurred: {e}")

if __name__ == '__main__':
    youtube_link = input("Enter the YouTube video or playlist URL: ")
    if youtube_link:
        download_and_convert_to_audio(youtube_link)
    else:
        print("No URL provided. Exiting.")

Enter the YouTube video or playlist URL: https://www.youtube.com/watch?v=-4wsmnbUpCI&list=PL-suF1GLbqeIYBc2wTcQnx16s27YRGfoz
Running in Google Colab environment. Dependencies will be installed.
Dependencies installed successfully.

Starting download for: https://www.youtube.com/watch?v=-4wsmnbUpCI&list=PL-suF1GLbqeIYBc2wTcQnx16s27YRGfoz
[youtube:tab] Extracting URL: https://www.youtube.com/watch?v=-4wsmnbUpCI&list=PL-suF1GLbqeIYBc2wTcQnx16s27YRGfoz
[youtube:tab] Downloading playlist PL-suF1GLbqeIYBc2wTcQnx16s27YRGfoz - add --no-playlist to download just the video -4wsmnbUpCI
[youtube:tab] PL-suF1GLbqeIYBc2wTcQnx16s27YRGfoz: Downloading webpage
[youtube:tab] Extracting URL: https://www.youtube.com/playlist?list=PL-suF1GLbqeIYBc2wTcQnx16s27YRGfoz
[youtube:tab] PL-suF1GLbqeIYBc2wTcQnx16s27YRGfoz: Downloading webpage




[youtube:tab] PL-suF1GLbqeIYBc2wTcQnx16s27YRGfoz: Redownloading playlist API JSON with unavailable videos
[download] Downloading playlist: Book Audio እቲ ኣዝዩ ሕጒስ ሰብ ኣብ ዓለም
[youtube:tab] PL-suF1GLbqeIYBc2wTcQnx16s27YRGfoz page 1: Downloading API JSON
[youtube:tab] Playlist Book Audio እቲ ኣዝዩ ሕጒስ ሰብ ኣብ ዓለም: Downloading 8 items of 8
[download] Downloading item 1 of 8
[youtube] Extracting URL: https://www.youtube.com/watch?v=-4wsmnbUpCI
[youtube] -4wsmnbUpCI: Downloading webpage
[youtube] -4wsmnbUpCI: Downloading tv client config
[youtube] -4wsmnbUpCI: Downloading tv player API JSON
[youtube] -4wsmnbUpCI: Downloading ios player API JSON
[youtube] -4wsmnbUpCI: Downloading m3u8 information
[info] -4wsmnbUpCI: Downloading 1 format(s): 251
[download] Destination: /content/audiobooks/Book Audio እቲ ኣዝዩ ሕጒስ ሰብ ኣብ ዓለም/ምሉእ ትረኻ መጽሓፍ እቲ ኣዝዩ ሕጒስ ሰብ ኣብ ዓለም  ｜  eti azyu hgus seb ab alem ｜ Tigrinya Audio Book  #AseyMedia [-4wsmnbUpCI].webm
[download] 100% of  234.07MiB in 00:00:34 at 6.72MiB/s   
[ExtractA