In [None]:
#@title <h1>🎙️ Voice Extractor Pro - Advanced Audio Processing Tool</h1> { display-mode: "form" }
#@markdown <p style="text-align: center; color: #666; font-size: 14px;">Extract clean voice segments from multi-speaker recordings with AI</p>

import os
import sys
import json
import time
import subprocess
import shutil
import pandas as pd
import zipfile
from pathlib import Path
from datetime import datetime
import ipywidgets as widgets
from IPython.display import display, HTML, Audio, Javascript, clear_output
from google.colab import files, drive
from huggingface_hub import login
import warnings
warnings.filterwarnings('ignore')

# === SETUP AND INSTALLATION ===
def setup_environment():
    """Install Voice Extractor and dependencies"""
    if not os.path.exists('Voice_Extractor'):
        print("📦 Installing Voice Extractor...")
        subprocess.run(['git', 'clone', '-q', 'https://github.com/ReisCook/Voice_Extractor.git'], check=True)
    
    print("📚 Installing dependencies...")
    subprocess.run([sys.executable, '-m', 'pip', 'uninstall', '-y', 'fastai'], capture_output=True)
    subprocess.run([sys.executable, '-m', 'pip', 'install', '-q', '-r', 'Voice_Extractor/requirements.txt'], capture_output=True)
    subprocess.run([sys.executable, '-m', 'pip', 'install', '-q', 'ipywidgets', 'pandas', 'matplotlib', 'huggingface_hub', 'datasets'], capture_output=True)
    
    print("🔗 Mounting Google Drive...")
    drive.mount('/content/drive', force_remount=True)
    print("✅ Setup complete!")

# === STYLING ===
def inject_custom_css():
    """Inject custom CSS for modern UI"""
    css = """
    <style>
    /* Modern UI Theme */
    .widget-container {
        background: #f8f9fa;
        border-radius: 12px;
        padding: 20px;
        margin: 10px 0;
        box-shadow: 0 2px 10px rgba(0,0,0,0.1);
    }
    
    .section-header {
        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
        color: white;
        padding: 15px 20px;
        border-radius: 10px;
        margin: 20px 0 15px 0;
        font-weight: 600;
        font-size: 18px;
        display: flex;
        align-items: center;
        gap: 10px;
    }
    
    .card {
        background: white;
        border-radius: 10px;
        padding: 20px;
        margin: 15px 0;
        box-shadow: 0 1px 3px rgba(0,0,0,0.12);
        transition: all 0.3s ease;
    }
    
    .card:hover {
        box-shadow: 0 4px 15px rgba(0,0,0,0.15);
        transform: translateY(-2px);
    }
    
    .file-drop-zone {
        border: 2px dashed #667eea;
        border-radius: 10px;
        padding: 30px;
        text-align: center;
        background: #f8f9ff;
        transition: all 0.3s ease;
        cursor: pointer;
    }
    
    .file-drop-zone:hover {
        background: #e9ecff;
        border-color: #5a67d8;
    }
    
    .file-drop-zone.drag-over {
        background: #e9ecff;
        border-color: #5a67d8;
        transform: scale(1.02);
    }
    
    .progress-step {
        display: flex;
        align-items: center;
        padding: 15px;
        margin: 5px 0;
        border-radius: 8px;
        background: #f8f9fa;
        transition: all 0.3s ease;
    }
    
    .progress-step.active {
        background: #e9ecff;
        border-left: 4px solid #667eea;
    }
    
    .progress-step.completed {
        background: #e6fffa;
        border-left: 4px solid #38b2ac;
    }
    
    .status-indicator {
        width: 30px;
        height: 30px;
        border-radius: 50%;
        display: flex;
        align-items: center;
        justify-content: center;
        margin-right: 15px;
        font-size: 16px;
    }
    
    .btn-primary {
        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
        color: white;
        border: none;
        padding: 12px 30px;
        border-radius: 8px;
        font-weight: 600;
        cursor: pointer;
        transition: all 0.3s ease;
    }
    
    .btn-primary:hover:not(:disabled) {
        transform: translateY(-2px);
        box-shadow: 0 5px 15px rgba(102, 126, 234, 0.4);
    }
    
    .btn-primary:disabled {
        opacity: 0.6;
        cursor: not-allowed;
    }
    
    .error-message {
        background: #fee;
        border-left: 4px solid #f56565;
        padding: 15px;
        border-radius: 6px;
        margin: 10px 0;
    }
    
    .success-message {
        background: #e6fffa;
        border-left: 4px solid #38b2ac;
        padding: 15px;
        border-radius: 6px;
        margin: 10px 0;
    }
    
    .tab-container {
        display: flex;
        gap: 10px;
        margin-bottom: 20px;
        border-bottom: 2px solid #e2e8f0;
    }
    
    .tab {
        padding: 10px 20px;
        cursor: pointer;
        border-bottom: 3px solid transparent;
        transition: all 0.3s ease;
        font-weight: 500;
    }
    
    .tab:hover {
        color: #667eea;
    }
    
    .tab.active {
        color: #667eea;
        border-bottom-color: #667eea;
    }
    
    .tooltip {
        position: relative;
        display: inline-block;
        margin-left: 5px;
    }
    
    .tooltip .tooltiptext {
        visibility: hidden;
        width: 250px;
        background-color: #555;
        color: #fff;
        text-align: left;
        border-radius: 6px;
        padding: 10px;
        position: absolute;
        z-index: 1;
        bottom: 125%;
        left: 50%;
        margin-left: -125px;
        opacity: 0;
        transition: opacity 0.3s;
        font-size: 12px;
    }
    
    .tooltip:hover .tooltiptext {
        visibility: visible;
        opacity: 1;
    }
    
    /* Animations */
    @keyframes pulse {
        0% { opacity: 1; }
        50% { opacity: 0.5; }
        100% { opacity: 1; }
    }
    
    .processing {
        animation: pulse 2s infinite;
    }
    
    @keyframes spin {
        from { transform: rotate(0deg); }
        to { transform: rotate(360deg); }
    }
    
    .spinner {
        animation: spin 1s linear infinite;
    }
    
    /* Hide Jupyter default styles */
    .jupyter-widgets-view { margin: 0 !important; }
    .widget-label { display: none !important; }
    </style>
    """
    display(HTML(css))

# === FILE BROWSER ===
class GoogleDriveFileBrowser:
    """Interactive Google Drive file browser"""
    def __init__(self, start_path="/content/drive/MyDrive"):
        self.current_path = Path(start_path)
        self.selected_file = None
        self.file_type_filter = None
        
    def create_browser(self, file_types=None, on_select=None):
        """Create file browser widget"""
        self.file_type_filter = file_types
        self.on_select_callback = on_select
        
        # Path display
        self.path_label = widgets.HTML(
            value=f'<div style="font-family: monospace; background: #f0f0f0; padding: 8px; border-radius: 4px; margin-bottom: 10px;">'
                  f'📁 {self.current_path}</div>'
        )
        
        # File list
        self.file_list = widgets.Select(
            options=[],
            rows=10,
            layout=widgets.Layout(width='100%')
        )
        self.file_list.observe(self._on_file_select, names='value')
        
        # Navigation buttons
        self.up_btn = widgets.Button(
            description='⬆️ Up',
            layout=widgets.Layout(width='80px')
        )
        self.up_btn.on_click(self._go_up)
        
        self.select_btn = widgets.Button(
            description='✅ Select',
            button_style='success',
            layout=widgets.Layout(width='100px')
        )
        self.select_btn.on_click(self._select_file)
        
        self.refresh_btn = widgets.Button(
            description='🔄 Refresh',
            layout=widgets.Layout(width='100px')
        )
        self.refresh_btn.on_click(lambda b: self._update_file_list())
        
        # Quick access buttons
        self.quick_access = widgets.HBox([
            self._create_quick_button("🏠 MyDrive", "/content/drive/MyDrive"),
            self._create_quick_button("🎵 Audio", "/content/drive/MyDrive/Audio"),
            self._create_quick_button("📊 Datasets", "/content/drive/MyDrive/Datasets"),
        ])
        
        # Layout
        nav_box = widgets.HBox([self.up_btn, self.select_btn, self.refresh_btn])
        
        self._update_file_list()
        
        return widgets.VBox([
            self.path_label,
            self.quick_access,
            self.file_list,
            nav_box
        ])
    
    def _create_quick_button(self, text, path):
        """Create quick access button"""
        btn = widgets.Button(description=text, layout=widgets.Layout(width='auto'))
        btn.on_click(lambda b: self._navigate_to(path))
        return btn
    
    def _navigate_to(self, path):
        """Navigate to specific path"""
        target = Path(path)
        if target.exists():
            self.current_path = target
            self._update_file_list()
    
    def _update_file_list(self):
        """Update file list display"""
        try:
            items = []
            
            # Add directories
            for item in sorted(self.current_path.iterdir()):
                if item.is_dir():
                    items.append(('📁 ' + item.name, str(item)))
            
            # Add files
            for item in sorted(self.current_path.iterdir()):
                if item.is_file():
                    if self.file_type_filter:
                        if any(item.suffix.lower() == ext for ext in self.file_type_filter):
                            items.append(('📄 ' + item.name, str(item)))
                    else:
                        items.append(('📄 ' + item.name, str(item)))
            
            self.file_list.options = items if items else [('(empty folder)', '')]
            self.path_label.value = (
                f'<div style="font-family: monospace; background: #f0f0f0; padding: 8px; '
                f'border-radius: 4px; margin-bottom: 10px;">📁 {self.current_path}</div>'
            )
        except Exception as e:
            self.file_list.options = [(f'Error: {str(e)}', '')]
    
    def _on_file_select(self, change):
        """Handle file selection"""
        if change['new'] and change['new'] != '':
            path = Path(change['new'])
            if path.is_dir():
                self.current_path = path
                self._update_file_list()
    
    def _go_up(self, b):
        """Go to parent directory"""
        if self.current_path.parent != self.current_path:
            self.current_path = self.current_path.parent
            self._update_file_list()
    
    def _select_file(self, b):
        """Select current file"""
        if self.file_list.value and self.file_list.value != '':
            path = Path(self.file_list.value)
            if path.is_file():
                self.selected_file = str(path)
                if self.on_select_callback:
                    self.on_select_callback(self.selected_file)

# === MAIN UI CLASS ===
class VoiceExtractorUI:
    """Main UI controller for Voice Extractor"""
    
    def __init__(self):
        self.hf_token = None
        self.input_audio = None
        self.reference_audio = None
        self.target_name = None
        self.output_dir = "/content/drive/MyDrive/VoiceExtractor_Results"
        self.process = None
        self.is_processing = False
        self.start_time = None
        
        # Initialize components
        self.setup_ui()
        
    def setup_ui(self):
        """Create the main UI"""
        inject_custom_css()
        
        # Header
        header = HTML("""
        <div style="text-align: center; padding: 20px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); 
                    color: white; border-radius: 15px; margin-bottom: 30px;">
            <h1 style="margin: 0; font-size: 36px;">🎙️ Voice Extractor Pro</h1>
            <p style="margin: 10px 0 0 0; opacity: 0.9;">Extract clean voice segments from multi-speaker recordings</p>
        </div>
        """)
        
        # Create tabs
        self.tab_widget = self._create_tabs()
        
        # Progress panel
        self.progress_panel = self._create_progress_panel()
        
        # Main layout
        self.main_container = widgets.VBox([
            header,
            self.tab_widget,
            self.progress_panel
        ])
        
        display(self.main_container)
    
    def _create_tabs(self):
        """Create tabbed interface"""
        # Tab buttons
        tab_output = widgets.Output()
        
        with tab_output:
            display(HTML("""
            <div class="tab-container">
                <div class="tab active" onclick="showTab('setup')">🚀 Setup</div>
                <div class="tab" onclick="showTab('basic')">⚙️ Basic Settings</div>
                <div class="tab" onclick="showTab('advanced')">🔧 Advanced</div>
                <div class="tab" onclick="showTab('results')">📊 Results</div>
            </div>
            """))
        
        # Tab contents
        self.setup_tab = self._create_setup_tab()
        self.basic_tab = self._create_basic_tab()
        self.advanced_tab = self._create_advanced_tab()
        self.results_tab = self._create_results_tab()
        
        # Tab container with all tabs
        self.tab_contents = widgets.VBox([
            self.setup_tab,
            self.basic_tab,
            self.advanced_tab,
            self.results_tab
        ])
        
        # Initially show only setup tab
        self.basic_tab.layout.display = 'none'
        self.advanced_tab.layout.display = 'none'
        self.results_tab.layout.display = 'none'
        
        # JavaScript for tab switching
        display(Javascript("""
        window.showTab = function(tabName) {
            // Update tab buttons
            document.querySelectorAll('.tab').forEach(tab => {
                tab.classList.remove('active');
                if (tab.textContent.toLowerCase().includes(tabName.toLowerCase()) || 
                    (tabName === 'setup' && tab.textContent.includes('Setup'))) {
                    tab.classList.add('active');
                }
            });
            
            // Update visibility via Python callback
            google.colab.kernel.invokeFunction('switch_tab', [tabName], {});
        }
        """))
        
        # Register callback
        from google.colab import output
        output.register_callback('switch_tab', self._switch_tab)
        
        return widgets.VBox([tab_output, self.tab_contents])
    
    def _switch_tab(self, tab_name):
        """Switch between tabs"""
        # Hide all tabs
        self.setup_tab.layout.display = 'none'
        self.basic_tab.layout.display = 'none'
        self.advanced_tab.layout.display = 'none'
        self.results_tab.layout.display = 'none'
        
        # Show selected tab
        if tab_name == 'setup':
            self.setup_tab.layout.display = 'block'
        elif tab_name == 'basic':
            self.basic_tab.layout.display = 'block'
        elif tab_name == 'advanced':
            self.advanced_tab.layout.display = 'block'
        elif tab_name == 'results':
            self.results_tab.layout.display = 'block'
    
    def _create_setup_tab(self):
        """Create setup/authentication tab"""
        # HuggingFace token input
        self.hf_token_input = widgets.Password(
            placeholder='hf_...',
            layout=widgets.Layout(width='400px')
        )
        
        self.auth_btn = widgets.Button(
            description='🔐 Authenticate',
            button_style='primary',
            layout=widgets.Layout(width='150px')
        )
        self.auth_btn.on_click(self._authenticate)
        
        self.auth_status = widgets.HTML()
        
        # Input file selection
        self.input_audio_text = widgets.Text(
            placeholder='/content/drive/MyDrive/audio.mp3',
            layout=widgets.Layout(width='100%')
        )
        
        self.input_browse_btn = widgets.Button(
            description='📁 Browse',
            layout=widgets.Layout(width='100px')
        )
        self.input_browse_btn.on_click(self._browse_input_audio)
        
        # Reference file selection
        self.reference_audio_text = widgets.Text(
            placeholder='/content/drive/MyDrive/reference.wav',
            layout=widgets.Layout(width='100%')
        )
        
        self.reference_browse_btn = widgets.Button(
            description='📁 Browse',
            layout=widgets.Layout(width='100px')
        )
        self.reference_browse_btn.on_click(self._browse_reference_audio)
        
        self.reference_upload_btn = widgets.Button(
            description='📤 Upload',
            layout=widgets.Layout(width='100px')
        )
        self.reference_upload_btn.on_click(self._upload_reference)
        
        # Target name
        self.target_name_input = widgets.Text(
            placeholder='e.g., JohnDoe',
            layout=widgets.Layout(width='300px')
        )
        
        # Output directory
        self.output_dir_text = widgets.Text(
            value='/content/drive/MyDrive/VoiceExtractor_Results',
            layout=widgets.Layout(width='100%')
        )
        
        # File browsers (initially hidden)
        self.input_browser_output = widgets.Output()
        self.reference_browser_output = widgets.Output()
        
        setup_content = widgets.VBox([
            HTML('<div class="section-header">🔐 Authentication</div>'),
            HTML('<div class="card">'),
            HTML('<p>Enter your HuggingFace token to access required models:</p>'),
            widgets.HBox([self.hf_token_input, self.auth_btn]),
            self.auth_status,
            HTML('<p style="font-size: 12px; color: #666; margin-top: 10px;">Need a token? <a href="https://huggingface.co/settings/tokens" target="_blank">Create one here</a></p>'),
            HTML('</div>'),
            
            HTML('<div class="section-header">📂 Input Files</div>'),
            HTML('<div class="card">'),
            HTML('<h4>🎵 Audio File to Process</h4>'),
            widgets.HBox([self.input_audio_text, self.input_browse_btn]),
            self.input_browser_output,
            HTML('</div>'),
            
            HTML('<div class="card">'),
            HTML('<h4>🎤 Reference Audio (target speaker only)</h4>'),
            widgets.HBox([self.reference_audio_text, self.reference_browse_btn, self.reference_upload_btn]),
            self.reference_browser_output,
            HTML('<p style="font-size: 12px; color: #666;">5-30 seconds of clean audio with ONLY the target speaker</p>'),
            HTML('</div>'),
            
            HTML('<div class="section-header">👤 Target Information</div>'),
            HTML('<div class="card">'),
            HTML('<h4>Speaker Name</h4>'),
            self.target_name_input,
            HTML('</div>'),
            
            HTML('<div class="section-header">💾 Output Location</div>'),
            HTML('<div class="card">'),
            self.output_dir_text,
            HTML('</div>'),
            
            self._create_start_button()
        ])
        
        return setup_content
    
    def _create_basic_tab(self):
        """Create basic settings tab"""
        # Sample rate
        self.sample_rate = widgets.Dropdown(
            options=[16000, 22050, 24000, 44100, 48000],
            value=24000,
            layout=widgets.Layout(width='200px')
        )
        
        # Whisper model
        self.whisper_model = widgets.Dropdown(
            options=['tiny', 'base', 'small', 'medium', 'large', 'large-v2', 'large-v3'],
            value='large-v3',
            layout=widgets.Layout(width='200px')
        )
        
        # Language
        self.language = widgets.Text(
            value='en',
            placeholder='en, es, fr, etc.',
            layout=widgets.Layout(width='100px')
        )
        
        # Skip Bandit
        self.skip_bandit = widgets.Checkbox(
            value=True,
            description='Skip Bandit-v2 Vocal Separation',
            indent=False
        )
        
        basic_content = widgets.VBox([
            HTML('<div class="section-header">🎛️ Audio Settings</div>'),
            HTML('<div class="card">'),
            widgets.HBox([
                widgets.VBox([
                    HTML('<b>Output Sample Rate</b>'),
                    self.sample_rate
                ]),
                widgets.VBox([
                    HTML('<b>Transcription Model</b>'),
                    self.whisper_model
                ]),
                widgets.VBox([
                    HTML('<b>Language</b>'),
                    self.language
                ])
            ]),
            HTML('</div>'),
            
            HTML('<div class="section-header">🚀 Performance</div>'),
            HTML('<div class="card">'),
            self.skip_bandit,
            HTML('<p style="color: orange; font-size: 12px;">⚠️ Bandit-v2 uses significant memory. Enable skip for Colab or if audio is pre-separated.</p>'),
            HTML('</div>')
        ])
        
        return basic_content
    
    def _create_advanced_tab(self):
        """Create advanced settings tab"""
        # Segment parameters
        self.min_duration = widgets.FloatSlider(
            value=1.0, min=0.5, max=10.0, step=0.1,
            description='',
            layout=widgets.Layout(width='300px')
        )
        
        self.merge_gap = widgets.FloatSlider(
            value=0.25, min=0.0, max=2.0, step=0.05,
            description='',
            layout=widgets.Layout(width='300px')
        )
        
        self.verification_threshold = widgets.FloatSlider(
            value=0.7, min=0.0, max=1.0, step=0.01,
            description='',
            layout=widgets.Layout(width='300px')
        )
        
        self.concat_silence = widgets.FloatSlider(
            value=0.25, min=0.0, max=5.0, step=0.1,
            description='',
            layout=widgets.Layout(width='300px')
        )
        
        # Model options
        self.diar_model = widgets.Dropdown(
            options=['pyannote/speaker-diarization-3.1'],
            value='pyannote/speaker-diarization-3.1',
            layout=widgets.Layout(width='350px')
        )
        
        # Processing options
        self.disable_speechbrain = widgets.Checkbox(value=False, description='Disable SpeechBrain', indent=False)
        self.skip_rejected = widgets.Checkbox(value=True, description='Skip rejected transcripts', indent=False)
        self.dry_run = widgets.Checkbox(value=False, description='Dry run (60s only)', indent=False)
        self.debug = widgets.Checkbox(value=False, description='Debug logging', indent=False)
        self.keep_temp = widgets.Checkbox(value=False, description='Keep temp files', indent=False)
        
        advanced_content = widgets.VBox([
            HTML('<div class="section-header">🎯 Segment Parameters</div>'),
            HTML('<div class="card">'),
            self._create_param_row('Min Duration (s)', self.min_duration, 'Minimum segment length to keep'),
            self._create_param_row('Merge Gap (s)', self.merge_gap, 'Max gap between segments to merge'),
            self._create_param_row('Verification Threshold', self.verification_threshold, 'Speaker verification confidence'),
            self._create_param_row('Concat Silence (s)', self.concat_silence, 'Silence between concatenated segments'),
            HTML('</div>'),
            
            HTML('<div class="section-header">🤖 Model Configuration</div>'),
            HTML('<div class="card">'),
            HTML('<b>Diarization Model</b>'),
            self.diar_model,
            HTML('</div>'),
            
            HTML('<div class="section-header">⚙️ Processing Options</div>'),
            HTML('<div class="card">'),
            widgets.VBox([
                self.disable_speechbrain,
                self.skip_rejected,
                self.dry_run,
                self.debug,
                self.keep_temp
            ]),
            HTML('</div>')
        ])
        
        return advanced_content
    
    def _create_results_tab(self):
        """Create results tab"""
        self.results_output = widgets.Output()
        
        results_content = widgets.VBox([
            HTML('<div class="section-header">📊 Processing Results</div>'),
            self.results_output
        ])
        
        return results_content
    
    def _create_param_row(self, label, widget, tooltip):
        """Create parameter row with label and tooltip"""
        return widgets.HBox([
            widgets.HTML(f'<div style="width: 150px;"><b>{label}</b></div>'),
            widget,
            widgets.HTML(f'''
                <div class="tooltip">ℹ️
                    <span class="tooltiptext">{tooltip}</span>
                </div>
            ''')
        ])
    
    def _create_progress_panel(self):
        """Create progress tracking panel"""
        self.progress_output = widgets.Output()
        self.log_output = widgets.Output(layout={'height': '300px', 'overflow_y': 'scroll'})
        
        panel = widgets.VBox([
            HTML('<div class="section-header">📈 Progress</div>'),
            self.progress_output,
            HTML('<div class="section-header">📜 Processing Log</div>'),
            HTML('<div class="card">'),
            self.log_output,
            HTML('</div>')
        ])
        
        # Initially hidden
        panel.layout.display = 'none'
        return panel
    
    def _create_start_button(self):
        """Create the main start button"""
        self.start_btn = widgets.Button(
            description='🚀 Start Extraction',
            button_style='success',
            layout=widgets.Layout(width='200px', height='40px')
        )
        self.start_btn.on_click(self._start_extraction)
        
        self.validation_msg = widgets.HTML()
        
        return widgets.VBox([
            widgets.HBox([self.start_btn], layout=widgets.Layout(justify_content='center')),
            self.validation_msg
        ])
    
    def _authenticate(self, b):
        """Handle HuggingFace authentication"""
        token = self.hf_token_input.value.strip()
        if not token:
            self.auth_status.value = '<p style="color: red;">Please enter a token</p>'
            return
        
        try:
            login(token=token, add_to_git_credential=False)
            self.hf_token = token
            self.auth_status.value = '<p style="color: green;">✅ Authentication successful!</p>'
            self._validate_inputs()
        except Exception as e:
            self.auth_status.value = f'<p style="color: red;">❌ Authentication failed: {str(e)}</p>'
    
    def _browse_input_audio(self, b):
        """Browse for input audio file"""
        with self.input_browser_output:
            clear_output()
            browser = GoogleDriveFileBrowser()
            
            def on_select(path):
                self.input_audio_text.value = path
                self.input_audio = path
                self._validate_inputs()
                with self.input_browser_output:
                    clear_output()
            
            browser_widget = browser.create_browser(
                file_types=['.wav', '.mp3', '.m4a', '.flac', '.ogg'],
                on_select=on_select
            )
            display(browser_widget)
    
    def _browse_reference_audio(self, b):
        """Browse for reference audio file"""
        with self.reference_browser_output:
            clear_output()
            browser = GoogleDriveFileBrowser()
            
            def on_select(path):
                self.reference_audio_text.value = path
                self.reference_audio = path
                self._validate_inputs()
                with self.reference_browser_output:
                    clear_output()
            
            browser_widget = browser.create_browser(
                file_types=['.wav', '.mp3', '.m4a', '.flac', '.ogg'],
                on_select=on_select
            )
            display(browser_widget)
    
    def _upload_reference(self, b):
        """Upload reference audio file"""
        uploaded = files.upload()
        if uploaded:
            filename = list(uploaded.keys())[0]
            # Save to drive
            save_path = f"/content/drive/MyDrive/VoiceExtractor_Uploads/{filename}"
            os.makedirs(os.path.dirname(save_path), exist_ok=True)
            
            with open(save_path, 'wb') as f:
                f.write(uploaded[filename])
            
            self.reference_audio_text.value = save_path
            self.reference_audio = save_path
            self._validate_inputs()
            
            with self.reference_browser_output:
                clear_output()
                display(HTML(f'<p style="color: green;">✅ Uploaded: {filename}</p>'))
    
    def _validate_inputs(self):
        """Validate all required inputs"""
        self.input_audio = self.input_audio_text.value.strip()
        self.reference_audio = self.reference_audio_text.value.strip()
        self.target_name = self.target_name_input.value.strip()
        self.output_dir = self.output_dir_text.value.strip()
        
        errors = []
        if not self.hf_token:
            errors.append("HuggingFace authentication required")
        if not self.input_audio:
            errors.append("Input audio file required")
        elif not Path(self.input_audio).exists():
            errors.append("Input audio file not found")
        if not self.reference_audio:
            errors.append("Reference audio file required")
        elif not Path(self.reference_audio).exists():
            errors.append("Reference audio file not found")
        if not self.target_name:
            errors.append("Target speaker name required")
        if not self.output_dir:
            errors.append("Output directory required")
        
        if errors:
            self.validation_msg.value = f'<div class="error-message">❌ {", ".join(errors)}</div>'
            self.start_btn.disabled = True
        else:
            self.validation_msg.value = '<div class="success-message">✅ All inputs valid!</div>'
            self.start_btn.disabled = False
    
    def _update_progress(self, stage, status='active', message=''):
        """Update progress display"""
        stages = [
            ('setup', '🔧', 'Setup & Initialization'),
            ('bandit', '🎵', 'Vocal Separation'),
            ('diarization', '👥', 'Speaker Diarization'),
            ('overlap', '🔍', 'Overlap Detection'),
            ('identification', '🎯', 'Speaker Identification'),
            ('extraction', '✂️', 'Segment Extraction'),
            ('verification', '✅', 'Speaker Verification'),
            ('transcription', '📝', 'Transcription'),
            ('finalization', '📦', 'Finalization')
        ]
        
        with self.progress_output:
            clear_output()
            html = '<div style="background: white; padding: 20px; border-radius: 10px;">'
            
            for stage_id, icon, stage_name in stages:
                if stage_id == stage:
                    if status == 'active':
                        html += f'''
                        <div class="progress-step active">
                            <div class="status-indicator spinner">{icon}</div>
                            <div>
                                <div><b>{stage_name}</b></div>
                                <div style="font-size: 12px; color: #666;">{message}</div>
                            </div>
                        </div>
                        '''
                    elif status == 'completed':
                        html += f'''
                        <div class="progress-step completed">
                            <div class="status-indicator">✅</div>
                            <div>
                                <div><b>{stage_name}</b></div>
                                <div style="font-size: 12px; color: #666;">Completed</div>
                            </div>
                        </div>
                        '''
                    elif status == 'error':
                        html += f'''
                        <div class="progress-step" style="background: #fee; border-left: 4px solid #f56565;">
                            <div class="status-indicator">❌</div>
                            <div>
                                <div><b>{stage_name}</b></div>
                                <div style="font-size: 12px; color: #f56565;">{message}</div>
                            </div>
                        </div>
                        '''
                elif stages.index((stage_id, icon, stage_name)) < stages.index(next(s for s in stages if s[0] == stage)):
                    html += f'''
                    <div class="progress-step completed">
                        <div class="status-indicator">✅</div>
                        <div><b>{stage_name}</b></div>
                    </div>
                    '''
                else:
                    html += f'''
                    <div class="progress-step">
                        <div class="status-indicator" style="background: #e2e8f0;">{icon}</div>
                        <div style="opacity: 0.6;"><b>{stage_name}</b></div>
                    </div>
                    '''
            
            # Add elapsed time
            if self.start_time:
                elapsed = time.time() - self.start_time
                html += f'<div style="text-align: center; margin-top: 20px; color: #666;">Elapsed time: {int(elapsed//60)}m {int(elapsed%60)}s</div>'
            
            html += '</div>'
            display(HTML(html))
    
    def _start_extraction(self, b):
        """Start the extraction process"""
        if self.is_processing:
            return
        
        self.is_processing = True
        self.start_time = time.time()
        self.start_btn.disabled = True
        self.start_btn.description = '🔄 Processing...'
        
        # Show progress panel
        self.progress_panel.layout.display = 'block'
        
        # Clear previous outputs
        with self.log_output:
            clear_output()
        with self.results_output:
            clear_output()
        
        try:
            self._run_extraction()
        except Exception as e:
            self._handle_error(str(e))
        finally:
            self.is_processing = False
            self.start_btn.disabled = False
            self.start_btn.description = '🚀 Start Extraction'
            self._validate_inputs()
    
    def _run_extraction(self):
        """Run the extraction process"""
        # Build command
        cmd = [
            'python', 'Voice_Extractor/run_extractor.py',
            '--input-audio', self.input_audio,
            '--reference-audio', self.reference_audio,
            '--target-name', self.target_name,
            '--output-base-dir', self.output_dir,
            '--token', self.hf_token,
            '--output-sr', str(self.sample_rate.value),
            '--whisper-model', self.whisper_model.value,
            '--language', self.language.value,
            '--min-duration', str(self.min_duration.value),
            '--merge-gap', str(self.merge_gap.value),
            '--verification-threshold', str(self.verification_threshold.value),
            '--concat-silence', str(self.concat_silence.value),
            '--diar-model', self.diar_model.value
        ]
        
        # Add flags
        if self.skip_bandit.value:
            cmd.append('--skip-bandit')
        if self.disable_speechbrain.value:
            cmd.append('--disable-speechbrain')
        if self.skip_rejected.value:
            cmd.append('--skip-rejected-transcripts')
        if self.dry_run.value:
            cmd.append('--dry-run')
        if self.debug.value:
            cmd.append('--debug')
        if self.keep_temp.value:
            cmd.append('--keep-temp-files')
        
        # Run process
        self._update_progress('setup', 'active', 'Initializing Voice Extractor...')
        
        self.process = subprocess.Popen(
            cmd,
            stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT,
            text=True,
            bufsize=1
        )
        
        # Monitor output
        current_stage = 'setup'
        stage_map = {
            'Reference Audio Preparation': 'setup',
            'Vocal Separation': 'bandit',
            'Speaker Diarization': 'diarization',
            'Overlapped Speech Detection': 'overlap',
            'Identifying Target Speaker': 'identification',
            'Slice & Verify': 'extraction',
            'Speaker Verification': 'verification',
            'Transcribing': 'transcription',
            'Concatenating': 'finalization'
        }
        
        with self.log_output:
            for line in self.process.stdout:
                print(line, end='')
                
                # Update progress based on output
                for key, stage in stage_map.items():
                    if key in line:
                        if current_stage != stage:
                            self._update_progress(current_stage, 'completed')
                        current_stage = stage
                        self._update_progress(stage, 'active', line.strip())
                        break
        
        exit_code = self.process.wait()
        
        if exit_code == 0:
            self._update_progress(current_stage, 'completed')
            self._update_progress('finalization', 'completed')
            self._show_results()
        else:
            self._handle_error(f"Process failed with exit code {exit_code}")
    
    def _show_results(self):
        """Display extraction results"""
        # Switch to results tab
        display(Javascript("showTab('results')"))
        
        with self.results_output:
            clear_output()
            
            # Find output directory
            run_name = f"{self.target_name}_{Path(self.input_audio).stem}_extracted"
            output_path = Path(self.output_dir) / run_name
            
            if not output_path.exists():
                display(HTML('<div class="error-message">Output directory not found</div>'))
                return
            
            display(HTML('<div class="success-message">✅ Extraction completed successfully!</div>'))
            
            # Show statistics
            stats_html = '<div class="card"><h3>📊 Extraction Statistics</h3><ul>'
            
            # Count files
            verified_dir = output_path / f"target_segments_solo/{self.target_name}_solo_verified"
            rejected_dir = output_path / f"target_segments_solo/{self.target_name}_solo_rejected_for_review"
            
            if verified_dir.exists():
                verified_count = len(list(verified_dir.glob("*.wav")))
                stats_html += f'<li>✅ Verified segments: {verified_count}</li>'
            
            if rejected_dir.exists():
                rejected_count = len(list(rejected_dir.glob("*.wav")))
                stats_html += f'<li>❌ Rejected segments: {rejected_count}</li>'
            
            stats_html += '</ul></div>'
            display(HTML(stats_html))
            
            # Show concatenated audio
            concat_dir = output_path / "concatenated_audio_solo_verified"
            if concat_dir.exists():
                concat_files = list(concat_dir.glob("*.wav"))
                if concat_files:
                    display(HTML('<div class="card"><h3>🎵 Concatenated Audio</h3>'))
                    display(Audio(str(concat_files[0])))
                    display(HTML('</div>'))
            
            # Show transcript sample
            transcript_dir = output_path / "transcripts_solo_verified_whisper"
            if transcript_dir.exists():
                csv_files = list(transcript_dir.glob("*.csv"))
                if csv_files:
                    df = pd.read_csv(csv_files[0])
                    display(HTML('<div class="card"><h3>📝 Transcript Sample</h3>'))
                    display(df.head(10))
                    display(HTML(f'<p>Total segments: {len(df)}</p></div>'))
            
            # Create download package
            self._create_download_package(output_path)
    
    def _create_download_package(self, output_path):
        """Create ZIP file for download"""
        display(HTML('<div class="card"><h3>📦 Download Package</h3>'))
        
        zip_name = f"{self.target_name}_voice_extract_{datetime.now().strftime('%Y%m%d_%H%M%S')}.zip"
        zip_path = Path(self.output_dir) / zip_name
        
        # Create ZIP
        with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf:
            for file_path in output_path.rglob('*'):
                if file_path.is_file() and not file_path.name.startswith('__'):
                    zf.write(file_path, file_path.relative_to(output_path.parent))
        
        # Download button
        download_btn = widgets.Button(
            description='⬇️ Download Results',
            button_style='success',
            layout=widgets.Layout(width='200px')
        )
        
        def download_results(b):
            files.download(str(zip_path))
        
        download_btn.on_click(download_results)
        
        display(download_btn)
        display(HTML(f'<p>Package size: {zip_path.stat().st_size / 1024 / 1024:.1f} MB</p>'))
        display(HTML('</div>'))
    
    def _handle_error(self, error_msg):
        """Handle extraction errors"""
        self._update_progress('error', 'error', error_msg)
        
        with self.results_output:
            clear_output()
            
            error_html = '<div class="error-message"><h3>❌ Extraction Failed</h3>'
            
            # Check for common errors
            if "CUDA out of memory" in error_msg or "Bandit" in error_msg and "memory" in error_msg:
                error_html += '''
                <p><b>Memory Error Detected</b></p>
                <p>The vocal separation step ran out of memory. This is common in Colab.</p>
                <h4>Solutions:</h4>
                <ul>
                    <li>Enable "Skip Bandit-v2 Vocal Separation" in Basic Settings</li>
                    <li>Use a shorter audio file</li>
                    <li>Restart runtime and try again</li>
                </ul>
                '''
            elif "HuggingFace" in error_msg or "token" in error_msg:
                error_html += '''
                <p><b>Authentication Error</b></p>
                <p>There was an issue with HuggingFace authentication.</p>
                <h4>Solutions:</h4>
                <ul>
                    <li>Verify your token is correct</li>
                    <li>Ensure you've accepted model terms on HuggingFace</li>
                    <li>Check your internet connection</li>
                </ul>
                '''
            else:
                error_html += f'<p>{error_msg}</p>'
            
            error_html += '</div>'
            display(HTML(error_html))

# === MAIN EXECUTION ===
print("🚀 Initializing Voice Extractor Pro...")
print("=" * 50)

# Setup environment
setup_environment()

print("\n✅ Setup complete! Loading interface...")
print("=" * 50)

# Create and display UI
ui = VoiceExtractorUI()

# Display instructions
display(HTML("""
<div style="margin-top: 30px; padding: 20px; background: #f0f7ff; border-radius: 10px; border-left: 4px solid #4299e1;">
    <h3>📖 Quick Start Guide</h3>
    <ol>
        <li><b>Authenticate:</b> Enter your HuggingFace token and click Authenticate</li>
        <li><b>Select Files:</b> Browse or enter paths for your audio files</li>
        <li><b>Configure:</b> Adjust settings in Basic and Advanced tabs as needed</li>
        <li><b>Process:</b> Click "Start Extraction" and monitor progress</li>
        <li><b>Download:</b> Get your results from the Results tab</li>
    </ol>
    <p><b>Need help?</b> Contact: reiscook@gmail.com</p>
</div>
"""))