In [1]:
# Install necessary packages
!pip install yt-dlp ffmpeg-python

Collecting yt-dlp
  Downloading yt_dlp-2025.4.30-py3-none-any.whl.metadata (173 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m173.3/173.3 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting ffmpeg-python
  Downloading ffmpeg_python-0.2.0-py3-none-any.whl.metadata (1.7 kB)
Downloading yt_dlp-2025.4.30-py3-none-any.whl (3.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.2/3.2 MB[0m [31m37.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ffmpeg_python-0.2.0-py3-none-any.whl (25 kB)
Installing collected packages: yt-dlp, ffmpeg-python
Successfully installed ffmpeg-python-0.2.0 yt-dlp-2025.4.30


In [None]:


import os
import subprocess
import re
from google.colab import files
import yt_dlp
import shutil
import time
import tempfile

class YouTubeDownloader:
    def __init__(self):
        """Initialize the YouTube downloader with default settings"""
        self.install_dependencies()
        self.available_formats = {
            '1': {'name': 'Best Quality (MP4)', 'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best'},
            '2': {'name': 'HD 1080p (MP4)', 'format': 'bestvideo[height<=1080][ext=mp4]+bestaudio[ext=m4a]/best[height<=1080][ext=mp4]/best[height<=1080]'},
            '3': {'name': 'HD 720p (MP4)', 'format': 'bestvideo[height<=720][ext=mp4]+bestaudio[ext=m4a]/best[height<=720][ext=mp4]/best[height<=720]'},
            '4': {'name': 'SD 480p (MP4)', 'format': 'bestvideo[height<=480][ext=mp4]+bestaudio[ext=m4a]/best[height<=480][ext=mp4]/best[height<=480]'},
            '5': {'name': 'Audio Only (MP3)', 'format': 'bestaudio[ext=m4a]/bestaudio'}
        }

    def install_dependencies(self):
        """Install necessary dependencies"""
        try:
            print("Checking and installing dependencies...")
            subprocess.run(["apt-get", "update", "-qq"], check=True)
            subprocess.run(["apt-get", "install", "-y", "ffmpeg"], check=True)
            print("Dependencies installed successfully.\n")
        except subprocess.SubprocessError as e:
            print(f"Warning: Could not install dependencies: {e}")

    def sanitize_filename(self, filename):
        """Remove characters that might cause issues in filenames"""
        # Replace characters that could cause issues
        sanitized = re.sub(r'[\\/*?:"<>|]', "_", filename)
        # Limit filename length
        if len(sanitized) > 200:
            sanitized = sanitized[:197] + "..."
        return sanitized

    def get_video_info(self, url):
        """Get video information without downloading"""
        print(f"Fetching video information for: {url}")
        try:
            with yt_dlp.YoutubeDL({'quiet': True, 'noplaylist': True}) as ydl:
                info = ydl.extract_info(url, download=False)
                print("\n--- Video Information ---")
                print(f"Title: {info.get('title', 'Unknown')}")
                print(f"Duration: {info.get('duration', 'Unknown')} seconds")
                print(f"Channel: {info.get('uploader', 'Unknown')}")
                print(f"Upload Date: {info.get('upload_date', 'Unknown')}")
                print(f"View Count: {info.get('view_count', 'Unknown')}")
                print("------------------------\n")
                return info
        except Exception as e:
            print(f"Error fetching video information: {str(e)}")
            return None

    def display_format_options(self):
        """Display available download format options"""
        print("\n=== Available Download Options ===")
        for key, value in self.available_formats.items():
            print(f"{key}: {value['name']}")
        print("=================================\n")

    def download_video(self, url, format_option='1', output_path=None):
        """
        Download a video with the selected format option

        Args:
            url (str): The YouTube video URL
            format_option (str): Key of selected format from available_formats
            output_path (str): Directory to save the video

        Returns:
            str: Path to the downloaded video file
        """
        # Create a unique temporary directory for this download
        temp_dir = tempfile.mkdtemp(prefix="yt_download_")

        try:
            # Get format configuration
            if format_option not in self.available_formats:
                print(f"Invalid format option. Using default (Best Quality).")
                format_option = '1'

            format_config = self.available_formats[format_option]
            print(f"Selected format: {format_config['name']}")

            # Set the output path
            final_output_path = output_path if output_path else os.getcwd()
            if not os.path.exists(final_output_path):
                os.makedirs(final_output_path)

            # Get video info first for filename preparation
            info = self.get_video_info(url)
            if not info:
                return None

            # Create sanitized filename
            video_title = self.sanitize_filename(info.get('title', 'video'))

            # Configure special handling for audio-only downloads
            is_audio_only = format_option == '5'

            # Configure yt-dlp options
            ydl_opts = {
                'format': format_config['format'],
                'outtmpl': os.path.join(temp_dir, '%(title)s.%(ext)s'),
                'quiet': False,
                'no_warnings': False,
                'ignoreerrors': False,
                'noplaylist': True,
                # Preserve metadata by avoiding direct remuxing
                'postprocessors': [],
            }

            # For audio-only downloads, add conversion to MP3
            if is_audio_only:
                ydl_opts['postprocessors'] = [{
                    'key': 'FFmpegExtractAudio',
                    'preferredcodec': 'mp3',
                    'preferredquality': '192',
                }]
                ydl_opts['outtmpl'] = os.path.join(temp_dir, '%(title)s.%(ext)s')

            # Download with the configured options
            print("\nDownloading media...")
            with yt_dlp.YoutubeDL(ydl_opts) as ydl:
                info = ydl.extract_info(url, download=True)
                downloaded_file = ydl.prepare_filename(info)

                # Handle possible extension change for audio conversion
                if is_audio_only:
                    file_base = os.path.splitext(downloaded_file)[0]
                    downloaded_file = f"{file_base}.mp3"

            # Copy the file to final destination with proper metadata handling
            final_filename = f"{video_title}_{format_config['name'].replace(' ', '_')}"
            if is_audio_only:
                final_filename += ".mp3"
            else:
                final_filename += ".mp4"

            final_file_path = os.path.join(final_output_path, final_filename)

            # Copy the file instead of moving to preserve metadata
            print(f"\nFinalizing download: {final_file_path}")
            shutil.copy2(downloaded_file, final_file_path)

            print(f"Download complete! Saved to: {final_file_path}")
            return final_file_path

        except Exception as e:
            print(f"Download error: {str(e)}")
            return None
        finally:
            # Clean up temporary directory
            try:
                shutil.rmtree(temp_dir)
            except Exception as e:
                print(f"Warning: Could not clean up temporary files: {e}")

    def compress_video(self, input_file, compression_quality=23, output_file=None):
        """
        Compress a video file using FFmpeg with careful metadata handling

        Args:
            input_file (str): Path to the input video file
            compression_quality (int): CRF value (18-28), lower is better quality
            output_file (str): Path for the output compressed video

        Returns:
            str: Path to the compressed video file
        """
        try:
            if not os.path.exists(input_file):
                print(f"Error: Input file '{input_file}' not found.")
                return None

            # Generate output filename if not provided
            if output_file is None:
                file_dir = os.path.dirname(input_file)
                file_base = os.path.splitext(os.path.basename(input_file))[0]
                output_file = os.path.join(file_dir, f"{file_base}_compressed.mp4")

            print(f"\nCompressing video: {input_file}")
            print(f"Output file: {output_file}")
            print(f"Using CRF value: {compression_quality} (lower = better quality, higher = smaller file)")

            # Use FFmpeg with appropriate settings to avoid metadata corruption
            # -map_metadata 0 preserves all metadata from input
            # -movflags +faststart optimizes for web streaming
            # Using a two-pass approach for better metadata handling

            # Create a temporary file for intermediate processing
            temp_dir = tempfile.mkdtemp(prefix="compression_")
            temp_file = os.path.join(temp_dir, "intermediate.mp4")

            # Use simpler FFmpeg command that avoids metadata issues
            compression_command = [
                "ffmpeg", "-i", input_file,
                "-c:v", "libx264", "-crf", str(compression_quality),
                "-preset", "medium",
                "-c:a", "aac", "-b:a", "128k",
                "-map_metadata", "0",
                "-movflags", "+faststart",
                "-y", temp_file
            ]

            print("Running compression (this may take a while)...")
            subprocess.run(compression_command, check=True)

            # Copy the temp file to the final destination
            shutil.copy2(temp_file, output_file)

            # Clean up
            shutil.rmtree(temp_dir)

            # Verify output and calculate size reduction
            if os.path.exists(output_file):
                original_size = os.path.getsize(input_file) / (1024 * 1024)  # MB
                compressed_size = os.path.getsize(output_file) / (1024 * 1024)  # MB
                print(f"Compression complete!")
                print(f"Original size: {original_size:.2f} MB")
                print(f"Compressed size: {compressed_size:.2f} MB")
                print(f"Size reduction: {(1 - compressed_size/original_size) * 100:.2f}%")
                return output_file
            else:
                print("Compression failed: Output file not found")
                return None

        except subprocess.SubprocessError as e:
            print(f"FFmpeg error: {str(e)}")
            return None
        except Exception as e:
            print(f"Compression error: {str(e)}")
            return None

def main():
    """Main application function"""
    # Welcome message
    print("Enhanced YouTube Video Downloader & Compressor")
    print("============================================")

    # Initialize downloader
    downloader = YouTubeDownloader()

    # Get YouTube URL
    video_url = input("Enter YouTube video URL: ")

    # Display format options and get user choice
    downloader.display_format_options()
    format_choice = input("Select download format (1-5) or press Enter for Best Quality: ")
    if not format_choice.strip():
        format_choice = '1'

    # Ask if user wants compression
    compress_choice = input("Do you want to compress the video after download? (y/n): ").lower()
    apply_compression = compress_choice.startswith('y')

    compression_quality = 23  # Default
    if apply_compression:
        try:
            quality_input = input("Enter compression quality (18-28, lower=better quality, default=23) or press Enter for default: ")
            if quality_input.strip():
                compression_quality = int(quality_input)
                # Validate CRF value
                if compression_quality < 18 or compression_quality > 28:
                    print(f"Warning: {compression_quality} is outside recommended range (18-28). Using it anyway.")
        except ValueError:
            print("Invalid input. Using default quality (23).")
            compression_quality = 23

    # Download video
    print("\nStarting download process...")
    downloaded_file = downloader.download_video(video_url, format_choice)

    if not downloaded_file or not os.path.exists(downloaded_file):
        print("Download failed. Exiting.")
        return

    final_file = downloaded_file

    # Apply compression if requested (and not audio-only)
    if apply_compression and format_choice != '5':  # Skip compression for audio-only
        print("\nStarting compression process...")
        compressed_file = downloader.compress_video(downloaded_file, compression_quality)
        if compressed_file and os.path.exists(compressed_file):
            final_file = compressed_file
        else:
            print("Compression failed. Using original downloaded file.")

    # Download the file to user's computer
    if final_file and os.path.exists(final_file):
        print("\nPreparing file for download to your computer...")
        try:
            files.download(final_file)
            print(f"File is ready for download: {os.path.basename(final_file)}")
        except Exception as e:
            print(f"Error preparing download: {str(e)}")
            print(f"The file is available at: {final_file}")
    else:
        print("Processing failed. No file to download.")

if __name__ == "__main__":
    main()

Enhanced YouTube Video Downloader & Compressor
Checking and installing dependencies...
Dependencies installed successfully.

Enter YouTube video URL: https://youtu.be/Rq5LyXG8m2U?si=jL50SD0-QuVtOHkv

=== Available Download Options ===
1: Best Quality (MP4)
2: HD 1080p (MP4)
3: HD 720p (MP4)
4: SD 480p (MP4)
5: Audio Only (MP3)

Select download format (1-5) or press Enter for Best Quality: 1
Do you want to compress the video after download? (y/n): y
Enter compression quality (18-28, lower=better quality, default=23) or press Enter for default: 23

Starting download process...
Selected format: Best Quality (MP4)
Fetching video information for: https://youtu.be/Rq5LyXG8m2U?si=jL50SD0-QuVtOHkv

--- Video Information ---
Title: Superman | Superman Day - Behind The Scenes Look
Duration: 137 seconds
Channel: DC
Upload Date: 20250418
View Count: 1975821
------------------------


Downloading media...
[youtube] Extracting URL: https://youtu.be/Rq5LyXG8m2U?si=jL50SD0-QuVtOHkv
[youtube] Rq5LyXG8m