In [None]:
#@title <h1>🎙️ Voice Extractor Pro - Advanced Audio Processing Tool</h1> { display-mode: "form" }
#@markdown <p style="text-align: center; color: #666; font-size: 14px;">Extract clean voice segments from multi-speaker recordings with AI</p>

import os
import sys
import json
import time
import subprocess
import shutil
import pandas as pd
import zipfile
from pathlib import Path
from datetime import datetime
import ipywidgets as widgets
from IPython.display import display, HTML, Audio, Javascript, clear_output
from google.colab import files, drive
import warnings
import threading
warnings.filterwarnings('ignore')

# === MOUNT DRIVE IMMEDIATELY ===
print("🚀 Loading Voice Extractor Pro...")
print("📁 Connecting to Google Drive...")
if not os.path.exists('/content/drive'):
    drive.mount('/content/drive')
print("✅ Google Drive connected!")
print("=" * 50)

# === IMMEDIATE GUI SETUP ===
def inject_custom_css():
    """Inject custom CSS for modern UI"""
    css = """
    <style>
    /* Modern UI Theme */
    .section-header {
        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
        color: white;
        padding: 15px 20px;
        border-radius: 10px;
        margin: 20px 0 15px 0;
        font-weight: 600;
        font-size: 18px;
    }
    
    .card {
        background: #2d3748;
        border-radius: 10px;
        padding: 20px;
        margin: 15px 0;
        box-shadow: 0 4px 6px rgba(0,0,0,0.3);
        border: 1px solid #4a5568;
        color: white;
    }
    
    .file-input-group {
        display: flex;
        gap: 10px;
        align-items: center;
        margin: 10px 0;
    }
    
    .file-input-group input {
        flex: 1;
        padding: 8px 12px;
        border: 1px solid #4a5568;
        border-radius: 6px;
        font-family: monospace;
        font-size: 13px;
        background: #1a202c;
        color: white;
    }
    
    .btn-primary {
        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
        color: white;
        border: none;
        padding: 12px 30px;
        border-radius: 8px;
        font-weight: 600;
        cursor: pointer;
        transition: all 0.3s ease;
    }
    
    .btn-primary:hover:not(:disabled) {
        transform: translateY(-2px);
        box-shadow: 0 5px 15px rgba(102, 126, 234, 0.4);
    }
    
    .btn-primary:disabled {
        opacity: 0.6;
        cursor: not-allowed;
    }
    
    .error-message {
        background: #feb2b2;
        color: #742a2a;
        border-left: 4px solid #f56565;
        padding: 15px;
        border-radius: 6px;
        margin: 10px 0;
    }
    
    .success-message {
        background: #9ae6b4;
        color: #22543d;
        border-left: 4px solid #38b2ac;
        padding: 15px;
        border-radius: 6px;
        margin: 10px 0;
    }
    
    .warning-message {
        background: #fbd38d;
        color: #744210;
        border-left: 4px solid #ed8936;
        padding: 15px;
        border-radius: 6px;
        margin: 10px 0;
    }
    
    .progress-step {
        display: flex;
        align-items: center;
        padding: 12px;
        margin: 5px 0;
        border-radius: 8px;
        background: #2d3748;
        color: white;
    }
    
    .progress-step.active {
        background: #3182ce;
        border-left: 4px solid #667eea;
    }
    
    .progress-step.completed {
        background: #38a169;
        border-left: 4px solid #38b2ac;
    }
    
    .tab-container {
        display: flex;
        gap: 10px;
        margin-bottom: 20px;
        border-bottom: 2px solid #4a5568;
    }
    
    .tab {
        padding: 10px 20px;
        cursor: pointer;
        border-bottom: 3px solid transparent;
        transition: all 0.3s ease;
        font-weight: 500;
        color: white;
    }
    
    .tab:hover {
        color: #667eea;
    }
    
    .tab.active {
        color: #667eea;
        border-bottom-color: #667eea;
    }
    
    .tooltip {
        position: relative;
        display: inline-block;
        margin-left: 5px;
        color: #a0aec0;
        cursor: help;
    }
    
    .tooltip .tooltiptext {
        visibility: hidden;
        width: 250px;
        background-color: #1a202c;
        color: #fff;
        text-align: left;
        border-radius: 6px;
        padding: 10px;
        position: absolute;
        z-index: 1;
        bottom: 125%;
        left: 50%;
        margin-left: -125px;
        opacity: 0;
        transition: opacity 0.3s;
        font-size: 12px;
        border: 1px solid #4a5568;
    }
    
    .tooltip:hover .tooltiptext {
        visibility: visible;
        opacity: 1;
    }
    
    /* Hide Jupyter default styles and white backgrounds */
    .jupyter-widgets-view { 
        margin: 0 !important; 
        background: transparent !important;
    }
    .widget-label { display: none !important; }
    .widget-box { background: transparent !important; }
    .widget-vbox { background: transparent !important; }
    .widget-hbox { background: transparent !important; }
    
    /* Dark theme for all elements */
    body { background-color: #1a202c !important; color: white !important; }
    .cell { background: transparent !important; }
    </style>
    """
    display(HTML(css))

# === AUDIO CONVERSION UTILITY ===
def convert_to_wav(input_path, output_path=None):
    """Convert audio file to WAV format using ffmpeg"""
    if output_path is None:
        output_path = str(Path(input_path).with_suffix('.wav'))
    
    try:
        # Use ffmpeg to convert to WAV
        cmd = ['ffmpeg', '-i', input_path, '-acodec', 'pcm_s16le', '-ar', '44100', '-y', output_path]
        result = subprocess.run(cmd, capture_output=True, text=True)
        
        if result.returncode == 0:
            return output_path
        else:
            raise Exception(f"FFmpeg conversion failed: {result.stderr}")
    except Exception as e:
        raise Exception(f"Audio conversion failed: {str(e)}")

# === BACKGROUND SETUP CLASS ===
class BackgroundSetup:
    def __init__(self, ui_callback):
        self.ui_callback = ui_callback
        self.setup_complete = False
        self.setup_error = None
        self.setup_progress = "Starting..."
        
    def run_setup(self):
        """Run setup in background thread"""
        thread = threading.Thread(target=self._setup_worker)
        thread.daemon = True
        thread.start()
        
    def _setup_worker(self):
        """Background setup worker"""
        try:
            self.setup_progress = "Checking existing installation..."
            self.ui_callback("setup_progress", self.setup_progress)
            
            # Quick check if already set up
            if os.path.exists('Voice_Extractor'):
                self.setup_progress = "✅ Already installed!"
                self.setup_complete = True
                self.ui_callback("setup_complete", None)
                return
            
            # Clone repo
            if not os.path.exists('Voice_Extractor'):
                self.setup_progress = "📥 Cloning repository..."
                self.ui_callback("setup_progress", self.setup_progress)
                subprocess.run(['git', 'clone', '-q', 'https://github.com/ReisCook/Voice_Extractor.git'], 
                             check=True, capture_output=True)
            
            # Install essential packages first
            self.setup_progress = "📦 Installing essential packages..."
            self.ui_callback("setup_progress", self.setup_progress)
            essential_packages = ['huggingface_hub', 'ipywidgets', 'pandas']
            subprocess.run([sys.executable, '-m', 'pip', 'install', '-q'] + essential_packages, 
                         capture_output=True)
            
            # Install main requirements
            self.setup_progress = "📦 Installing Voice Extractor dependencies..."
            self.ui_callback("setup_progress", self.setup_progress)
            subprocess.run([sys.executable, '-m', 'pip', 'install', '-q', '-r', 'Voice_Extractor/requirements.txt'], 
                         capture_output=True)
            
            self.setup_progress = "✅ Setup complete!"
            self.setup_complete = True
            self.ui_callback("setup_complete", None)
            
        except Exception as e:
            self.setup_error = str(e)
            self.ui_callback("setup_error", self.setup_error)

# === FAST-LOADING UI CLASS ===
class VoiceExtractorUI:
    """Simplified UI controller for Voice Extractor"""
    
    def __init__(self):
        self.hf_token = None
        self.input_audio = None
        self.reference_audio = None
        self.target_name = None
        self.output_dir = "/content/drive/MyDrive/VoiceExtractor_Results"
        self.process = None
        self.is_processing = False
        self.start_time = None
        self.setup_complete = False
        self.setup_error = None
        self.converted_files = {}  # Track converted files for cleanup
        
        # Setup background installer
        self.bg_setup = BackgroundSetup(self._handle_setup_callback)
        
        # Initialize UI immediately
        self.setup_ui()
        
        # Start background setup
        self.bg_setup.run_setup()
        
    def _handle_setup_callback(self, event_type, data):
        """Handle callbacks from background setup"""
        if event_type == "setup_progress":
            with self.setup_status_output:
                clear_output()
                display(HTML(f'<div style="color: #667eea; font-weight: 500;">{data}</div>'))
        elif event_type == "setup_complete":
            self.setup_complete = True
            with self.setup_status_output:
                clear_output()
                display(HTML('<div style="color: #38a169; font-weight: 500;">✅ Setup complete! You can now use all features.</div>'))
            self._validate_inputs()
        elif event_type == "setup_error":
            self.setup_error = data
            with self.setup_status_output:
                clear_output()
                display(HTML(f'<div style="color: #f56565;">❌ Setup failed: {data}</div>'))
    
    def setup_ui(self):
        """Create the main UI - loads immediately"""
        inject_custom_css()
        
        # Header
        header = widgets.HTML("""
        <div style="text-align: center; padding: 20px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); 
                    color: white; border-radius: 15px; margin-bottom: 20px;">
            <h1 style="margin: 0; font-size: 36px;">🎙️ Voice Extractor Pro</h1>
            <p style="margin: 10px 0 0 0; opacity: 0.9;">Extract clean voice segments from multi-speaker recordings</p>
        </div>
        """)
        
        # Setup status panel (shows immediately)
        self.setup_status_output = widgets.Output()
        setup_status_panel = widgets.VBox([
            widgets.HTML('<div class="section-header">🚀 System Setup</div>'),
            widgets.HTML('<p style="color: white;">Setting up Voice Extractor in the background...</p>'),
            self.setup_status_output
        ])
        
        # Create main UI components
        self.create_main_components()
        
        # Create tabs
        self.tab_widget = self._create_tabs()
        
        # Progress panel
        self.progress_panel = self._create_progress_panel()
        
        # Main layout
        self.main_container = widgets.VBox([
            header,
            setup_status_panel,
            self.tab_widget,
            self.progress_panel
        ])
        
        display(self.main_container)
    
    def create_main_components(self):
        """Create all UI components"""
        # Authentication components
        self.hf_token_input = widgets.Password(
            placeholder='hf_...',
            layout=widgets.Layout(width='400px')
        )
        
        self.auth_btn = widgets.Button(
            description='🔐 Authenticate',
            button_style='primary',
            layout=widgets.Layout(width='150px')
        )
        self.auth_btn.on_click(self._authenticate)
        
        self.auth_status = widgets.HTML()
        
        # File inputs
        self.input_audio_text = widgets.Text(
            placeholder='/content/drive/MyDrive/audio.mp3 (MP3, WAV, M4A, FLAC supported)',
            description='',
            layout=widgets.Layout(width='100%')
        )
        self.input_audio_text.observe(lambda change: self._validate_inputs(), names='value')
        
        self.reference_audio_text = widgets.Text(
            placeholder='/content/drive/MyDrive/reference.wav (MP3, WAV, M4A, FLAC supported)',
            description='',
            layout=widgets.Layout(width='100%')
        )
        self.reference_audio_text.observe(lambda change: self._validate_inputs(), names='value')
        
        self.target_name_input = widgets.Text(
            placeholder='e.g., JohnDoe',
            description='',
            layout=widgets.Layout(width='300px')
        )
        self.target_name_input.observe(lambda change: self._validate_inputs(), names='value')
        
        self.output_dir_text = widgets.Text(
            value='/content/drive/MyDrive/VoiceExtractor_Results',
            description='',
            layout=widgets.Layout(width='100%')
        )
        
        # Settings components
        self.sample_rate = widgets.Dropdown(
            options=[16000, 22050, 24000, 44100, 48000],
            value=24000,
            layout=widgets.Layout(width='200px')
        )
        
        self.whisper_model = widgets.Dropdown(
            options=['tiny', 'base', 'small', 'medium', 'large', 'large-v2', 'large-v3'],
            value='large-v3',
            layout=widgets.Layout(width='200px')
        )
        
        self.language = widgets.Text(
            value='en',
            placeholder='en, es, fr, etc.',
            layout=widgets.Layout(width='100px')
        )
        
        self.skip_bandit = widgets.Checkbox(
            value=True,
            description='Skip Bandit-v2 Vocal Separation (recommended for Colab)',
            indent=False
        )
        
        self.min_duration = widgets.FloatSlider(
            value=1.0, min=0.5, max=10.0, step=0.1,
            description='',
            layout=widgets.Layout(width='300px')
        )
        
        self.verification_threshold = widgets.FloatSlider(
            value=0.7, min=0.0, max=1.0, step=0.01,
            description='',
            layout=widgets.Layout(width='300px')
        )
        
        self.dry_run = widgets.Checkbox(value=False, description='Dry run (60s only)', indent=False)
        self.debug = widgets.Checkbox(value=False, description='Debug logging', indent=False)
        
        # Start button
        self.start_btn = widgets.Button(
            description='🚀 Start Extraction',
            button_style='success',
            layout=widgets.Layout(width='200px', height='40px'),
            disabled=True  # Disabled until setup complete
        )
        self.start_btn.on_click(self._start_extraction)
        
        self.validation_msg = widgets.HTML()
        
        # Results components
        self.results_output = widgets.Output()
    
    def _create_tabs(self):
        """Create tabbed interface"""
        # Tab buttons
        tab_output = widgets.Output()
        
        with tab_output:
            display(HTML("""
            <div class="tab-container">
                <div class="tab active" onclick="showTab('setup')">🚀 Setup</div>
                <div class="tab" onclick="showTab('settings')">⚙️ Settings</div>
                <div class="tab" onclick="showTab('results')">📊 Results</div>
            </div>
            """))
        
        # Tab contents
        self.setup_tab = self._create_setup_tab()
        self.settings_tab = self._create_settings_tab()
        self.results_tab = self._create_results_tab()
        
        # Tab container with all tabs
        self.tab_contents = widgets.VBox([
            self.setup_tab,
            self.settings_tab,
            self.results_tab
        ])
        
        # Initially show only setup tab
        self.settings_tab.layout.display = 'none'
        self.results_tab.layout.display = 'none'
        
        # JavaScript for tab switching
        display(Javascript("""
        window.showTab = function(tabName) {
            // Update tab buttons
            document.querySelectorAll('.tab').forEach(tab => {
                tab.classList.remove('active');
                if (tab.textContent.toLowerCase().includes(tabName.toLowerCase()) || 
                    (tabName === 'setup' && tab.textContent.includes('Setup'))) {
                    tab.classList.add('active');
                }
            });
            
            // Update visibility via Python callback
            google.colab.kernel.invokeFunction('switch_tab', [tabName], {});
        }
        """))
        
        # Register callback
        from google.colab import output
        output.register_callback('switch_tab', self._switch_tab)
        
        return widgets.VBox([tab_output, self.tab_contents])
    
    def _switch_tab(self, tab_name):
        """Switch between tabs"""
        # Hide all tabs
        self.setup_tab.layout.display = 'none'
        self.settings_tab.layout.display = 'none'
        self.results_tab.layout.display = 'none'
        
        # Show selected tab
        if tab_name == 'setup':
            self.setup_tab.layout.display = 'block'
        elif tab_name == 'settings':
            self.settings_tab.layout.display = 'block'
        elif tab_name == 'results':
            self.results_tab.layout.display = 'block'
    
    def _create_setup_tab(self):
        """Create simplified setup tab"""
        auth_card = widgets.VBox([
            widgets.HTML('<p>Enter your HuggingFace token to access required models:</p>'),
            widgets.HBox([self.hf_token_input, self.auth_btn]),
            self.auth_status,
            widgets.HTML('<p style="font-size: 12px; color: #a0aec0; margin-top: 10px;">Need a token? <a href="https://huggingface.co/settings/tokens" target="_blank" style="color: #667eea;">Create one here</a></p>'),
        ])
        auth_card.add_class('card')

        input_audio_card = widgets.VBox([
            widgets.HTML('<h4>🎵 Audio File to Process</h4>'),
            widgets.HTML('<p style="font-size: 12px; color: #a0aec0; margin-bottom: 10px;">Enter the full path to your audio file (MP3, WAV, M4A, FLAC supported - will auto-convert to WAV)</p>'),
            self.input_audio_text,
        ])
        input_audio_card.add_class('card')

        reference_audio_card = widgets.VBox([
            widgets.HTML('<h4>🎤 Reference Audio</h4>'),
            widgets.HTML('<p style="font-size: 12px; color: #a0aec0; margin-bottom: 10px;">5-30 seconds of clean audio with ONLY the target speaker (MP3, WAV, M4A, FLAC supported)</p>'),
            self.reference_audio_text,
        ])
        reference_audio_card.add_class('card')

        target_name_card = widgets.VBox([
            widgets.HTML('<h4>Speaker Name</h4>'),
            self.target_name_input,
        ])
        target_name_card.add_class('card')

        output_dir_card = widgets.VBox([
            self.output_dir_text,
        ])
        output_dir_card.add_class('card')

        setup_content = widgets.VBox([
            widgets.HTML('<div class="section-header">🔐 Authentication</div>'),
            auth_card,
            
            widgets.HTML('<div class="section-header">📂 Input Files</div>'),
            input_audio_card,
            reference_audio_card,
            
            widgets.HTML('<div class="section-header">👤 Target Information</div>'),
            target_name_card,
            
            widgets.HTML('<div class="section-header">💾 Output Location</div>'),
            output_dir_card,
            
            # Start button
            widgets.VBox([
                widgets.HBox([self.start_btn], layout=widgets.Layout(justify_content='center')),
                self.validation_msg
            ], layout=widgets.Layout(margin='20px 0 0 0'))
        ])
        
        return setup_content
   
    def _create_settings_tab(self):
        """Create simplified settings tab"""
        audio_settings_card = widgets.VBox([
            widgets.HBox([
                widgets.VBox([
                    widgets.HTML('<b>Output Sample Rate</b>'),
                    self.sample_rate
                ]),
                widgets.VBox([
                    widgets.HTML('<b>Transcription Model</b>'),
                    self.whisper_model
                ]),
                widgets.VBox([
                    widgets.HTML('<b>Language</b>'),
                    self.language
                ])
            ])
        ])
        audio_settings_card.add_class('card')

        performance_card = widgets.VBox([
            self.skip_bandit,
            widgets.HTML('<div class="warning-message" style="margin-top: 10px;">⚠️ Bandit-v2 uses significant memory. Keep this enabled for Colab.</div>'),
        ])
        performance_card.add_class('card')
        
        advanced_options_card = widgets.VBox([
            self._create_param_row('Min Duration (s)', self.min_duration, 'Minimum segment length to keep'),
            self._create_param_row('Verification Threshold', self.verification_threshold, 'Speaker verification confidence (0.7 = 70%)'),
            widgets.VBox([
                self.dry_run,
                self.debug,
            ], layout=widgets.Layout(margin='15px 0 0 0')),
        ])
        advanced_options_card.add_class('card')

        settings_content = widgets.VBox([
            widgets.HTML('<div class="section-header">🎛️ Audio Settings</div>'),
            audio_settings_card,
            
            widgets.HTML('<div class="section-header">🚀 Performance</div>'),
            performance_card,
            
            widgets.HTML('<div class="section-header">🎯 Advanced Options</div>'),
            advanced_options_card
        ])
        
        return settings_content
   
    def _create_results_tab(self):
        """Create results tab"""
        results_content = widgets.VBox([
            widgets.HTML('<div class="section-header">📊 Processing Results</div>'),
            self.results_output
        ])
        
        return results_content
   
    def _create_param_row(self, label, widget, tooltip):
        """Create parameter row with label and tooltip"""
        return widgets.HBox([
            widgets.HTML(f'<div style="width: 200px;"><b>{label}</b></div>'),
            widget,
            widgets.HTML(f'<span class="tooltip">ℹ️<span class="tooltiptext">{tooltip}</span></span>')
        ])
   
    def _create_progress_panel(self):
        """Create progress tracking panel"""
        self.progress_output = widgets.Output()
        self.log_output = widgets.Output(layout={'height': '300px', 'overflow_y': 'scroll', 'width': '100%'})
        
        log_card = widgets.VBox([self.log_output])
        log_card.add_class('card')
        
        panel = widgets.VBox([
            widgets.HTML('<div class="section-header">📈 Progress</div>'),
            self.progress_output,
            widgets.HTML('<div class="section-header">📜 Processing Log</div>'),
            log_card
        ])
        
        # Initially hidden
        panel.layout.display = 'none'
        return panel
   
    def _validate_inputs(self):
        """Validate all required inputs"""
        self.input_audio = self.input_audio_text.value.strip()
        self.reference_audio = self.reference_audio_text.value.strip()
        self.target_name = self.target_name_input.value.strip()
        self.output_dir = self.output_dir_text.value.strip()
        
        errors = []
        warnings = []
        
        # Check if setup is complete
        if not self.setup_complete:
            errors.append("System setup in progress...")
        
        if not self.hf_token:
            errors.append("HuggingFace authentication required")
        if not self.input_audio:
            errors.append("Input audio file path required")
        elif not Path(self.input_audio).exists():
            warnings.append("Input audio file not found (check path)")
        if not self.reference_audio:
            errors.append("Reference audio file path required")
        elif not Path(self.reference_audio).exists():
            warnings.append("Reference audio file not found (check path)")
        if not self.target_name:
            errors.append("Target speaker name required")
        if not self.output_dir:
            errors.append("Output directory required")
        
        # Check file extensions and note conversion
        if self.input_audio and Path(self.input_audio).exists():
            ext = Path(self.input_audio).suffix.lower()
            if ext in ['.mp3', '.m4a', '.flac', '.ogg']:
                warnings.append(f"Input file will be auto-converted from {ext.upper()} to WAV")
            elif ext not in ['.wav']:
                warnings.append(f"Input file format '{ext}' may not be supported")
        
        if self.reference_audio and Path(self.reference_audio).exists():
            ext = Path(self.reference_audio).suffix.lower()
            if ext in ['.mp3', '.m4a', '.flac', '.ogg']:
                warnings.append(f"Reference file will be auto-converted from {ext.upper()} to WAV")
            elif ext not in ['.wav']:
                warnings.append(f"Reference file format '{ext}' may not be supported")
        
        # Update UI
        if errors:
            self.validation_msg.value = f'<div class="error-message">❌ {" • ".join(errors)}</div>'
            self.start_btn.disabled = True
        elif warnings:
            self.validation_msg.value = f'<div class="warning-message">⚠️ {" • ".join(warnings)}</div>'
            self.start_btn.disabled = False
        else:
            self.validation_msg.value = '<div class="success-message">✅ Ready to process!</div>'
            self.start_btn.disabled = False
   
    def _authenticate(self, b):
        """Handle HuggingFace authentication"""
        token = self.hf_token_input.value.strip()
        if not token:
            self.auth_status.value = '<p style="color: #f56565;">Please enter a token</p>'
            return
        
        # Check if setup is complete before authenticating
        if not self.setup_complete:
            self.auth_status.value = '<p style="color: #ed8936;">⏳ Please wait for setup to complete...</p>'
            return
        
        try:
            # Import here to avoid errors if not yet installed
            from huggingface_hub import login
            login(token=token, add_to_git_credential=False)
            self.hf_token = token
            self.auth_status.value = '<p style="color: #38a169;">✅ Authentication successful!</p>'
            self._validate_inputs()
        except Exception as e:
            self.auth_status.value = f'<p style="color: #f56565;">❌ Authentication failed: {str(e)}</p>'
   
    def _prepare_audio_files(self):
        """Convert audio files to WAV if needed"""
        converted_input = self.input_audio
        converted_reference = self.reference_audio
        
        # Check and convert input audio
        input_ext = Path(self.input_audio).suffix.lower()
        if input_ext in ['.mp3', '.m4a', '.flac', '.ogg']:
            with self.log_output:
                print(f"🔄 Converting input audio from {input_ext.upper()} to WAV...")
            
            converted_input = str(Path(self.input_audio).with_suffix('.wav'))
            convert_to_wav(self.input_audio, converted_input)
            self.converted_files['input'] = converted_input
            
            with self.log_output:
                print(f"✅ Input audio converted to: {converted_input}")
        
        # Check and convert reference audio
        ref_ext = Path(self.reference_audio).suffix.lower()
        if ref_ext in ['.mp3', '.m4a', '.flac', '.ogg']:
            with self.log_output:
                print(f"🔄 Converting reference audio from {ref_ext.upper()} to WAV...")
            
            converted_reference = str(Path(self.reference_audio).with_suffix('.wav'))
            convert_to_wav(self.reference_audio, converted_reference)
            self.converted_files['reference'] = converted_reference
            
            with self.log_output:
                print(f"✅ Reference audio converted to: {converted_reference}")
        
        return converted_input, converted_reference
   
    def _cleanup_converted_files(self):
        """Clean up temporarily converted files"""
        for file_type, file_path in self.converted_files.items():
            try:
                if os.path.exists(file_path):
                    os.remove(file_path)
                    with self.log_output:
                        print(f"🗑️ Cleaned up temporary {file_type} file: {file_path}")
            except Exception as e:
                with self.log_output:
                    print(f"⚠️ Could not clean up {file_path}: {str(e)}")
        
        self.converted_files.clear()
   
    def _update_progress(self, stage, status='active', message=''):
        """Update progress display"""
        stages = [
            ('setup', '🔧', 'Setup & Initialization'),
            ('conversion', '🔄', 'Audio Conversion'),
            ('bandit', '🎵', 'Vocal Separation'),
            ('diarization', '👥', 'Speaker Diarization'),
            ('overlap', '🔍', 'Overlap Detection'),
            ('identification', '🎯', 'Speaker Identification'),
            ('extraction', '✂️', 'Segment Extraction'),
            ('verification', '✅', 'Speaker Verification'),
            ('transcription', '📝', 'Transcription'),
            ('finalization', '📦', 'Finalization')
        ]
        
        with self.progress_output:
            clear_output()
            html = '<div style="background: #2d3748; padding: 20px; border-radius: 10px; color: white;">'
            
            for stage_id, icon, stage_name in stages:
                if stage_id == stage:
                    if status == 'active':
                        html += f'''
                        <div class="progress-step active">
                            <div style="margin-right: 15px;">{icon}</div>
                            <div>
                                <div><b>{stage_name}</b></div>
                                <div style="font-size: 12px; color: #a0aec0;">{message}</div>
                            </div>
                        </div>
                        '''
                    elif status == 'completed':
                        html += f'''
                        <div class="progress-step completed">
                            <div style="margin-right: 15px;">✅</div>
                            <div>
                                <div><b>{stage_name}</b></div>
                                <div style="font-size: 12px; color: #a0aec0;">Completed</div>
                            </div>
                        </div>
                        '''
                    elif status == 'error':
                        html += f'''
                        <div class="progress-step" style="background: #feb2b2; border-left: 4px solid #f56565; color: #742a2a;">
                            <div style="margin-right: 15px;">❌</div>
                            <div>
                                <div><b>{stage_name}</b></div>
                                <div style="font-size: 12px;">{message}</div>
                            </div>
                        </div>
                        '''
                elif stages.index((stage_id, icon, stage_name)) < stages.index(next(s for s in stages if s[0] == stage)):
                    html += f'''
                    <div class="progress-step completed">
                        <div style="margin-right: 15px;">✅</div>
                        <div><b>{stage_name}</b></div>
                    </div>
                    '''
                else:
                    html += f'''
                    <div class="progress-step">
                        <div style="margin-right: 15px; opacity: 0.3;">{icon}</div>
                        <div style="opacity: 0.5;"><b>{stage_name}</b></div>
                    </div>
                    '''
            
            # Add elapsed time
            if self.start_time:
                elapsed = time.time() - self.start_time
                html += f'<div style="text-align: center; margin-top: 20px; color: #a0aec0;">Elapsed time: {int(elapsed//60)}m {int(elapsed%60)}s</div>'
            
            html += '</div>'
            display(HTML(html))
   
    def _start_extraction(self, b):
        """Start the extraction process"""
        if self.is_processing:
            return
        
        self.is_processing = True
        self.start_time = time.time()
        self.start_btn.disabled = True
        self.start_btn.description = '🔄 Processing...'
        
        # Show progress panel
        self.progress_panel.layout.display = 'block'
        
        # Clear previous outputs
        with self.log_output:
            clear_output()
        with self.results_output:
            clear_output()
        
        try:
            self._run_extraction()
        except Exception as e:
            self._handle_error(str(e))
        finally:
            self.is_processing = False
            self.start_btn.disabled = False
            self.start_btn.description = '🚀 Start Extraction'
            self._validate_inputs()
            # Clean up converted files
            self._cleanup_converted_files()
   
    def _run_extraction(self):
        """Run the extraction process"""
        # Setup and conversion
        self._update_progress('setup', 'active', 'Initializing Voice Extractor...')
        
        # Prepare audio files (convert if needed)
        self._update_progress('conversion', 'active', 'Preparing audio files...')
        input_audio_path, reference_audio_path = self._prepare_audio_files()
        self._update_progress('conversion', 'completed')
        
        # Build command - simplified defaults
        cmd = [
            'python', 'Voice_Extractor/run_extractor.py',
            '--input-audio', input_audio_path,
            '--reference-audio', reference_audio_path,
            '--target-name', self.target_name,
            '--output-base-dir', self.output_dir,
            '--token', self.hf_token,
            '--output-sr', str(self.sample_rate.value),
            '--whisper-model', self.whisper_model.value,
            '--language', self.language.value,
            '--min-duration', str(self.min_duration.value),
            '--verification-threshold', str(self.verification_threshold.value),
            '--merge-gap', '0.25',
            '--concat-silence', '0.25',
            '--diar-model', 'pyannote/speaker-diarization-3.1'
        ]
        
        # Add flags
        if self.skip_bandit.value:
            cmd.append('--skip-bandit')
        cmd.append('--skip-rejected-transcripts')  # Default on for simplicity
        if self.dry_run.value:
            cmd.append('--dry-run')
        if self.debug.value:
            cmd.append('--debug')
        
        # Run process
        self._update_progress('setup', 'completed')
        
        self.process = subprocess.Popen(
            cmd,
            stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT,
            text=True,
            bufsize=1
        )
        
        # Monitor output
        current_stage = 'bandit'
        stage_map = {
            'Reference Audio Preparation': 'setup',
            'Vocal Separation': 'bandit',
            'Speaker Diarization': 'diarization',
            'Overlapped Speech Detection': 'overlap',
            'Identifying Target Speaker': 'identification',
            'Slice & Verify': 'extraction',
            'Speaker Verification': 'verification',
            'Transcribing': 'transcription',
            'Concatenating': 'finalization'
        }
        
        with self.log_output:
            for line in self.process.stdout:
                print(line, end='')
                
                # Update progress based on output
                for key, stage in stage_map.items():
                    if key in line:
                        if current_stage != stage:
                            self._update_progress(current_stage, 'completed')
                        current_stage = stage
                        self._update_progress(stage, 'active', line.strip())
                        break
        
        exit_code = self.process.wait()
        
        if exit_code == 0:
            self._update_progress(current_stage, 'completed')
            self._update_progress('finalization', 'completed')
            self._show_results()
        else:
            self._handle_error(f"Process failed with exit code {exit_code}")
   
    def _show_results(self):
        """Display extraction results"""
        # Switch to results tab
        display(Javascript("showTab('results')"))
        
        with self.results_output:
            clear_output()
            
            # Find output directory
            run_name = f"{self.target_name}_{Path(self.input_audio).stem}_extracted"
            output_path = Path(self.output_dir) / run_name
            
            if not output_path.exists():
                display(HTML('<div class="error-message">Output directory not found</div>'))
                return
            
            display(HTML('<div class="success-message">✅ Extraction completed successfully!</div>'))
            
            # Show statistics
            stats_html = '<div class="card"><h3>📊 Extraction Statistics</h3><ul>'
            
            # Count files
            verified_dir = output_path / f"target_segments_solo/{self.target_name}_solo_verified"
            rejected_dir = output_path / f"target_segments_solo/{self.target_name}_solo_rejected_for_review"
            
            if verified_dir.exists():
                verified_count = len(list(verified_dir.glob("*.wav")))
                stats_html += f'<li>✅ Verified segments: {verified_count}</li>'
            
            if rejected_dir.exists():
                rejected_count = len(list(rejected_dir.glob("*.wav")))
                stats_html += f'<li>❌ Rejected segments: {rejected_count}</li>'
            
            stats_html += '</ul></div>'
            display(HTML(stats_html))
            
            # Show concatenated audio
            concat_dir = output_path / "concatenated_audio_solo_verified"
            if concat_dir.exists():
                concat_files = list(concat_dir.glob("*.wav"))
                if concat_files:
                    display(HTML('<div class="card"><h3>🎵 Concatenated Audio</h3>'))
                    display(Audio(str(concat_files[0])))
                    display(HTML('</div>'))
            
            # Show transcript sample
            transcript_dir = output_path / "transcripts_solo_verified_whisper"
            if transcript_dir.exists():
                csv_files = list(transcript_dir.glob("*.csv"))
                if csv_files:
                    df = pd.read_csv(csv_files[0])
                    display(HTML('<div class="card"><h3>📝 Transcript Sample</h3>'))
                    display(df.head(10))
                    display(HTML(f'<p>Total segments: {len(df)}</p></div>'))
            
            # Create download package
            self._create_download_package(output_path)
   
    def _create_download_package(self, output_path):
        """Create ZIP file for download"""
        display(HTML('<div class="card"><h3>📦 Download Package</h3>'))
        
        zip_name = f"{self.target_name}_voice_extract_{datetime.now().strftime('%Y%m%d_%H%M%S')}.zip"
        zip_path = Path(self.output_dir) / zip_name
        
        # Create ZIP
        with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf:
            for file_path in output_path.rglob('*'):
                if file_path.is_file() and not file_path.name.startswith('__'):
                    zf.write(file_path, file_path.relative_to(output_path.parent))
        
        # Download button
        download_btn = widgets.Button(
            description='⬇️ Download Results',
            button_style='success',
            layout=widgets.Layout(width='200px')
        )
        
        def download_results(b):
            files.download(str(zip_path))
        
        download_btn.on_click(download_results)
        
        display(download_btn)
        display(HTML(f'<p>Package size: {zip_path.stat().st_size / 1024 / 1024:.1f} MB</p>'))
        display(HTML('</div>'))
   
    def _handle_error(self, error_msg):
        """Handle extraction errors"""
        self._update_progress('error', 'error', error_msg)
        
        with self.results_output:
            clear_output()
            
            error_html = '<div class="error-message"><h3>❌ Extraction Failed</h3>'
            
            # Check for common errors
            if "CUDA out of memory" in error_msg or "Bandit" in error_msg and "memory" in error_msg:
                error_html += '''
                <p><b>Memory Error Detected</b></p>
                <p>The vocal separation step ran out of memory. This is common in Colab.</p>
                <h4>Solutions:</h4>
                <ul>
                    <li>Enable "Skip Bandit-v2 Vocal Separation" in Settings tab</li>
                    <li>Use a shorter audio file</li>
                    <li>Restart runtime and try again</li>
                </ul>
                '''
            elif "HuggingFace" in error_msg or "token" in error_msg:
                error_html += '''
                <p><b>Authentication Error</b></p>
                <p>There was an issue with HuggingFace authentication.</p>
                <h4>Solutions:</h4>
                <ul>
                    <li>Verify your token is correct</li>
                    <li>Ensure you've accepted model terms on HuggingFace</li>
                    <li>Check your internet connection</li>
                </ul>
                '''
            elif "ffmpeg" in error_msg.lower() or "conversion" in error_msg.lower():
                error_html += '''
                <p><b>Audio Conversion Error</b></p>
                <p>There was an issue converting your audio file.</p>
                <h4>Solutions:</h4>
                <ul>
                    <li>Check that your audio file is not corrupted</li>
                    <li>Try converting to WAV manually before processing</li>
                    <li>Ensure the file path is correct</li>
                </ul>
                '''
            else:
                error_html += f'<p>{error_msg}</p>'
            
            error_html += '</div>'
            display(HTML(error_html))
        
        # Switch to results tab to show error
        display(Javascript("showTab('results')"))

# === IMMEDIATE EXECUTION ===
# Create and display UI immediately
ui = VoiceExtractorUI()

# Voice Extractor - Usage Instructions

This notebook provides a graphical interface for the [Voice Extractor](https://github.com/ReisCook/Voice_Extractor) tool, which identifies, isolates, and transcribes clean solo segments of a target speaker from multi-speaker audio recordings.

## How to Use

1. **Authentication**: Enter your HuggingFace User Access Token. This is required to access PyAnnote models.
2. **Input Files**:
   - Enter the full path to your audio file to process (supports MP3, WAV, M4A, FLAC - auto-converts to WAV)
   - Enter the path to a clean reference audio of ONLY your target speaker (5-30 seconds)
   - Enter a name for your target speaker
   - Choose an output directory for results
3. **Settings**: Configure sample rate, transcription model, and other options in the Settings tab
4. **Start Processing**: Click the "Start Extraction" button when all required fields are filled

## Important Notes

- You need to accept the terms of use for the following PyAnnote models on Hugging Face:
  - [pyannote/speaker-diarization-3.1](https://huggingface.co/pyannote/speaker-diarization-3.1)
  - [pyannote/overlapped-speech-detection](https://huggingface.co/pyannote/overlapped-speech-detection)
  - [pyannote/segmentation-3.0](https://huggingface.co/pyannote/segmentation-3.0)
  - [pyannote/segmentation](https://huggingface.co/pyannote/segmentation)
- You'll need a Hugging Face access token which you can create at: [https://huggingface.co/settings/tokens](https://huggingface.co/settings/tokens)
- **Audio Format Support**: The tool now supports MP3, WAV, M4A, FLAC, and OGG files. Non-WAV files will be automatically converted to WAV format before processing.
- For optimal results, provide a clean reference audio with only the target speaker's voice
- The "Dry Run" option is helpful for testing as it processes only the first 60 seconds
- GPU acceleration is automatically used when available
- **Colab Users**: Select T4 GPU runtime for 10-20x faster processing (Runtime → Change runtime type → T4)
- **Google Drive**: The tool automatically connects to your Google Drive at startup for easy file access
- **Need Help?** If you encounter any issues or have questions, feel free to contact me at: reiscook@gmail.com

For more detailed documentation, visit the [Voice Extractor GitHub repository](https://github.com/ReisCook/Voice_Extractor).