# üé¨ YouTube Vocal Separator

Extract vocals from YouTube videos using AI-powered audio separation.

This notebook will:
1. Download video and audio from YouTube
2. Separate vocals using audio-separator
3. Merge vocals back with the original video

---

---

‚ö†Ô∏è **Note for Google Colab Users:**

YouTube may block downloads from Colab due to bot detection. If you encounter download errors:
- Try running this notebook **locally** (works reliably)
- Or use the [HF Spaces web interface](https://huggingface.co/spaces/melnema/youtube-music-remover) with file upload

---

In [None]:
# === QUICK PARAMETERS ===
# Edit these values in the form on the right (Colab) or directly in the cell

# @title Configuration
USE_UPLOAD = False # @param {type:"boolean"}
VIDEO_INPUT = "" # @param {type:"string"}
MODEL = "UVR-MDX-NET-Inst_HQ_3.onnx" # @param ["UVR-MDX-NET-Inst_HQ_3.onnx", "Kim_Vocal_2.onnx", "UVR_MDXNET_KARA_2.onnx"]
BATCH_SIZE = 4 # @param {type:"slider", min:1, max:8, step:1}
OUTPUT_DIR = "./output" # @param {type:"string"}
DEBUG = False # @param {type:"boolean"}

# Handle file upload if enabled
if USE_UPLOAD:
    try:
        from google.colab import files
        print("üì§ Upload your video file:")
        uploaded = files.upload()
        if uploaded:
            UPLOADED_FILE = list(uploaded.keys())[0]
            print(f"‚úÖ Uploaded: {UPLOADED_FILE}")
        else:
            print("‚ùå No file uploaded")
            UPLOADED_FILE = None
    except ImportError:
        print("‚ö†Ô∏è File upload only works in Google Colab")
        UPLOADED_FILE = None
else:
    UPLOADED_FILE = None

## üì¶ Setup & Dependencies

First, let's install the required packages if they're not already available.

In [None]:
import sys
import subprocess
import shutil

def check_command(cmd):
    return shutil.which(cmd) is not None

# Check FFmpeg
if not check_command('ffmpeg'):
    print("‚ùå FFmpeg not found!")
    print("Please install FFmpeg:")
    print("  - Windows: winget install ffmpeg")
    print("  - Linux: sudo apt install ffmpeg")
    print("  - Mac: brew install ffmpeg")
    raise SystemExit("FFmpeg is required to run this notebook")
else:
    print("‚úì FFmpeg found")

# Check and install dependencies
if not check_command('yt-dlp'):
    print("Installing yt-dlp...")
    !pip install -q yt-dlp

if not check_command('audio-separator'):
    print("Installing audio-separator...")
    !pip install -q audio-separator[gpu]

print("‚úì All dependencies installed!")

## üé® Helper Functions

Define color formatting and utility functions.

In [None]:
import os
import re
import time
from pathlib import Path
from typing import Optional
from IPython.display import display, HTML, clear_output

# ANSI Colors for terminal output
class Color:
    GREEN = '\033[32m'
    RED = '\033[31m'
    CYAN = '\033[36m'
    YELLOW = '\033[33m'
    GRAY = '\033[90m'
    BOLD = '\033[1m'
    RESET = '\033[0m'

def colored(text: str, color: str) -> str:
    """Apply ANSI color to text"""
    return f"{color}{text}{Color.RESET}"

def format_time(ms: int) -> str:
    """Format milliseconds into human-readable time"""
    seconds = ms / 1000.0
    if seconds < 1:
        return f"{ms}ms"
    elif seconds < 60:
        return f"{seconds:.1f}s"
    else:
        minutes = int(seconds / 60)
        secs = int(seconds % 60)
        return f"{minutes}m {secs}s"

def extract_video_id(input_str: str) -> str:
    """Extract video ID from URL or return as-is if already an ID"""
    if re.match(r'^[a-zA-Z0-9_-]{11}$', input_str):
        return input_str
    
    patterns = [
        r'youtu\.be/([a-zA-Z0-9_-]{11})',
        r'watch\?v=([a-zA-Z0-9_-]{11})',
        r'/shorts/([a-zA-Z0-9_-]{11})'
    ]
    
    for pattern in patterns:
        match = re.search(pattern, input_str)
        if match:
            return match.group(1)
    
    return input_str

def run_command(*cmd, cwd=None, show_output=False):
    """Execute command and return success status"""
    try:
        if show_output:
            result = subprocess.run(cmd, cwd=cwd, check=False)
        else:
            result = subprocess.run(
                cmd,
                cwd=cwd,
                stdout=subprocess.DEVNULL,
                stderr=subprocess.DEVNULL,
                check=False
            )
        return result.returncode == 0
    except Exception as e:
        print(colored(f"Command failed: {e}", Color.RED))
        return False

def run_output(*cmd):
    """Execute command and return output"""
    try:
        result = subprocess.run(
            cmd,
            capture_output=True,
            text=True,
            check=False
        )
        return result.stdout.strip() if result.returncode == 0 else None
    except Exception:
        return None

print("‚úì Helper functions loaded")

## ‚öôÔ∏è Configuration

Set your preferences here:

In [None]:
# Extract video ID from the parameters set above
VIDEO_ID = extract_video_id(VIDEO_INPUT)

print(f"Video ID: {VIDEO_ID}")
print(f"Model: {MODEL}")
print(f"Batch Size: {BATCH_SIZE}")
print(f"Output Dir: {OUTPUT_DIR}")

## üì• Step 1: Get Video & Audio

Download from YouTube or use uploaded file.

In [None]:
url = f"https://www.youtube.com/watch?v={VIDEO_ID}"
out_dir = Path(OUTPUT_DIR)
out_dir.mkdir(parents=True, exist_ok=True)
temp_dir = out_dir / "temp"
temp_dir.mkdir(exist_ok=True)

if USE_UPLOAD and UPLOADED_FILE:
    # Use uploaded file
    print(colored("=> Using uploaded file...", Color.CYAN))
    start_time = time.time()
    
    uploaded_path = Path(UPLOADED_FILE)
    
    # Extract audio from uploaded video
    audio_file = temp_dir / "audio.wav"
    success = run_command(
        "ffmpeg",
        "-i", str(uploaded_path),
        "-vn",
        "-acodec", "pcm_s16le",
        "-ar", "44100",
        "-ac", "2",
        str(audio_file),
        show_output=DEBUG
    )
    
    if success:
        video_file = uploaded_path
        elapsed = int((time.time() - start_time) * 1000)
        print(colored(f"   [OK] {format_time(elapsed)}", Color.GREEN))
        print(colored(f"   Video: {video_file.name}", Color.GRAY))
        print(colored(f"   Audio: {audio_file.name}", Color.GRAY))
    else:
        raise Exception("Audio extraction failed")
else:
    # Download from YouTube
    print(colored("=> Downloading video and audio...", Color.CYAN))
    start_time = time.time()
    
    success = run_command(
        "yt-dlp",
        "-f", "bv*,ba",
        "-o", f"{temp_dir}/%(format_id)s.%(ext)s",
        url,
        show_output=DEBUG
    )
    
    elapsed = int((time.time() - start_time) * 1000)
    
    if success:
        print(colored(f"   [OK] {format_time(elapsed)}", Color.GREEN))
        
        # Find downloaded files
        video_files = [f for f in temp_dir.iterdir() if f.suffix in ['.mp4', '.webm', '.mkv']]
        video_file = max(video_files, key=lambda f: f.stat().st_size) if video_files else None
        
        audio_files = [f for f in temp_dir.iterdir() 
                      if f != video_file and f.suffix in ['.opus', '.m4a', '.webm', '.mp3']]
        audio_file = audio_files[0] if audio_files else None
        
        if video_file and audio_file:
            print(colored(f"   Video: {video_file.name}", Color.GRAY))
            print(colored(f"   Audio: {audio_file.name}", Color.GRAY))
        else:
            raise Exception("Video or audio file not found")
    else:
        raise Exception("Download failed")

## üé§ Step 2: Separate Vocals

Use AI to extract vocals from the audio track.

In [None]:
print(colored("=> Separating vocals (this may take a while)...", Color.CYAN))
start_time = time.time()

success = run_command(
    "audio-separator",
    str(audio_file),
    "--model_filename", MODEL,
    "--mdx_batch_size", str(BATCH_SIZE),
    "--output_dir", str(temp_dir),
    "--output_format", "WAV",
    show_output=DEBUG
)

elapsed = int((time.time() - start_time) * 1000)

if success:
    print(colored(f"   [OK] {format_time(elapsed)}", Color.GREEN))
    
    # Find vocals file
    vocals_files = [f for f in temp_dir.iterdir() 
                   if 'vocals' in f.name.lower() and f.suffix == '.wav']
    vocals_file = vocals_files[0] if vocals_files else None
    
    if vocals_file:
        print(colored(f"   Vocals: {vocals_file.name}", Color.GRAY))
    else:
        raise Exception("Vocals file not found")
else:
    raise Exception("Separation failed")

## üìù Step 3: Get Video Title

Fetch the video title for a nice filename.

In [None]:
print(colored("=> Getting video title...", Color.CYAN))
start_time = time.time()

title = run_output("yt-dlp", "--print", "title", url)
if title:
    title = re.sub(r'[\\/:*?"<>|]', '_', title)
else:
    title = VIDEO_ID

elapsed = int((time.time() - start_time) * 1000)
print(colored(f"   [OK] {format_time(elapsed)}", Color.GREEN))
print(colored(f"   Title: {title}", Color.GRAY))

final_output = out_dir / f"{title}-vocals-only.mp4"

## üé¨ Step 4: Merge Video with Vocals

Combine the original video with the extracted vocals.

In [None]:
print(colored("=> Merging video with vocals...", Color.CYAN))
start_time = time.time()

success = run_command(
    "ffmpeg",
    "-i", str(video_file),
    "-i", str(vocals_file),
    "-c:v", "copy",
    "-c:a", "aac",
    "-b:a", "192k",
    "-map", "0:v:0",
    "-map", "1:a:0",
    "-shortest",
    "-y",
    str(final_output),
    show_output=DEBUG
)

elapsed = int((time.time() - start_time) * 1000)

if success:
    print(colored(f"   [OK] {format_time(elapsed)}", Color.GREEN))
else:
    raise Exception("Merge failed")

## üßπ Step 5: Cleanup

Remove temporary files.

In [None]:
print(colored("=> Cleaning up...", Color.CYAN))
start_time = time.time()

import shutil
shutil.rmtree(temp_dir)

elapsed = int((time.time() - start_time) * 1000)
print(colored(f"   [OK] {format_time(elapsed)}", Color.GREEN))

## ‚úÖ Success!

Your video with vocals-only is ready!

In [None]:
print(colored("\n[SUCCESS]", Color.GREEN + Color.BOLD))
print(colored(f"Output: {final_output.absolute()}", Color.YELLOW))

# Display video player in Jupyter
try:
    from IPython.display import Video
    display(Video(str(final_output)))
except Exception as e:
    print(f"\nVideo preview not available: {e}")

# Auto-download in Colab
try:
    from google.colab import files
    print(f"\nüì• Downloading {final_output.name}...")
    files.download(str(final_output))
    print("‚úÖ Download started! Check your browser's downloads.")
except ImportError:
    # Not in Colab, skip auto-download
    pass
except Exception as e:
    print(f"\n‚ö†Ô∏è Could not auto-download: {e}")
    print(f"You can manually download from: {final_output.absolute()}")

---

## üåê Web Interface (Optional)

Run a Gradio web interface for easier interaction:

In [None]:
# Install gradio if not already installed
try:
    import gradio as gr
except ImportError:
    !pip install -q gradio
    import gradio as gr

def gradio_process(video_input, model, batch_size):
    """Gradio wrapper for video processing"""
    import time
    try:
        video_id = extract_video_id(video_input)
        url = f"https://www.youtube.com/watch?v={video_id}"
        
        out_dir = Path(OUTPUT_DIR)
        out_dir.mkdir(parents=True, exist_ok=True)
        temp_dir = out_dir / "temp"
        temp_dir.mkdir(exist_ok=True)
        
        # Download
        run_command("yt-dlp", "-f", "bv*,ba", "-o", f"{temp_dir}/%(format_id)s.%(ext)s", url)
        
        video_file = max([f for f in temp_dir.iterdir() if f.suffix in ['.mp4', '.webm', '.mkv']], key=lambda f: f.stat().st_size)
        audio_file = [f for f in temp_dir.iterdir() if f != video_file and f.suffix in ['.opus', '.m4a', '.webm']][0]
        
        # Separate
        run_command("audio-separator", str(audio_file), "--model_filename", model, "--mdx_batch_size", str(batch_size), "--output_dir", str(temp_dir), "--output_format", "WAV")
        
        vocals_file = [f for f in temp_dir.iterdir() if 'vocals' in f.name.lower() and f.suffix == '.wav'][0]
        
        # Get title
        title = run_output("yt-dlp", "--print", "title", url) or video_id
        title = re.sub(r'[\\\\/:*?"<>|]', '_', title)
        final_output = out_dir / f"{title}-vocals-only.mp4"
        
        # Merge
        run_command("ffmpeg", "-i", str(video_file), "-i", str(vocals_file), "-c:v", "copy", "-c:a", "aac", "-b:a", "192k", "-map", "0:v:0", "-map", "1:a:0", "-shortest", "-y", str(final_output))
        
        # Wait a bit for file handles to close
        time.sleep(1)
        
        # Cleanup with retry
        import shutil
        max_retries = 3
        for i in range(max_retries):
            try:
                shutil.rmtree(temp_dir)
                break
            except Exception as e:
                if i < max_retries - 1:
                    time.sleep(1)
                else:
                    print(f"Warning: Could not clean up temp files: {e}")
        
        return str(final_output), f"‚úÖ Success! Saved to: {final_output.name}"
    except Exception as e:
        return None, f"‚ùå Error: {str(e)}"

# Create interface
with gr.Blocks(title="YouTube Music Remover") as demo:
    gr.Markdown("# üé¨ YouTube Music Remover")
    gr.Markdown("Strip background music from YouTube videos")
    
    with gr.Row():
        with gr.Column():
            video_input = gr.Textbox(label="YouTube URL or Video ID", placeholder="Enter URL or video ID")
            model = gr.Dropdown(
                choices=["UVR-MDX-NET-Inst_HQ_3.onnx", "Kim_Vocal_2.onnx", "UVR_MDXNET_KARA_2.onnx"],
                value="UVR-MDX-NET-Inst_HQ_3.onnx",
                label="Model"
            )
            batch_size = gr.Slider(1, 8, 4, step=1, label="Batch Size")
            btn = gr.Button("Process", variant="primary")
        
        with gr.Column():
            output_video = gr.Video(label="Output")
            status = gr.Textbox(label="Status", lines=2)
    
    btn.click(fn=gradio_process, inputs=[video_input, model, batch_size], outputs=[output_video, status])

# Launch with share=True for public URL
demo.launch(share=True)