In [None]:
# Cell 1: Mount Google Drive & Install Dependencies
from google.colab import drive
drive.mount('/content/drive')

# Clone repo and install
!git clone -q https://github.com/ReisCook/Voice_Extractor.git
!pip install -q -r Voice_Extractor/requirements.txt ipywidgets pandas matplotlib huggingface_hub datasets


In [None]:
import ipywidgets as widgets
import os
import subprocess
import shutil
import time
import json
import glob
import pandas as pd
from IPython.display import display, Audio, clear_output, HTML
from google.colab import files, output
from huggingface_hub import login, HfApi
from pathlib import Path

display(HTML("""
<style>
    /* General Styles */
    .widget-label { min-width: 180px !important; font-weight: bold; display: flex; align-items: center;}
    .widget-text input[type="text"], .widget-text input[type="password"], .widget-text textarea { width: calc(100% - 16px); box-sizing: border-box; }
    .widget-dropdown select { min-width: 200px; }
    .widget-button { margin: 5px 0; }
    .full-width-widget > .widget-label { width: 100% !important; } /* Forcing label to take full width for some widgets */

    /* Section Styling */
    .section-header { font-size: 1.3em; font-weight: bold; color: #1A73E8; margin-top: 25px; margin-bottom: 15px; padding-bottom: 5px; border-bottom: 2px solid #1A73E8;}
    .subsection-header { font-size: 1.1em; font-weight: bold; color: #34A853; margin-top: 20px; margin-bottom: 10px;}

    /* File Picker & Input Rows */
    .input-row { display: flex; align-items: center; margin-bottom: 8px; width: 100%; }
    .input-row > .widget-label { flex-basis: 200px; flex-shrink: 0; margin-right: 10px; }
    .input-row > .widget-text, .input-row > .widget-dropdown { flex-grow: 1; }
    .input-row .widget-button { margin-left: 10px; min-width: 90px; }
    
    /* Tooltips */
    .tooltip-container { position: relative; display: inline-block; margin-left: 5px; }
    .tooltip-icon { color: #5f6368; cursor: help; }
    .tooltip-text {
        visibility: hidden; width: 280px; background-color: #333; color: #fff; text-align: left;
        border-radius: 6px; padding: 8px; position: absolute; z-index: 100;
        bottom: 130%; left: 50%; margin-left: -140px; opacity: 0; transition: opacity 0.3s;
        font-size: 0.9em; line-height: 1.4;
    }
    .tooltip-container:hover .tooltip-text { visibility: visible; opacity: 1; }

    /* Log Area & Status */
    .log-area-container { border: 1px solid #ccc; height: 400px; overflow-y: scroll; padding: 10px; background-color: #f9f9f9; margin-top: 15px; font-family: monospace; font-size: 0.9em; white-space: pre-wrap; }
    .status-message-container { margin-top: 15px; padding: 10px; border-radius: 5px; font-weight: bold; text-align: center; }
    .status-ready { background-color: #e0e0e0; color: #333; }
    .status-processing { background-color: #fff3cd; color: #856404; border: 1px solid #ffeeba; } /* Bootstrap warning yellow */
    .status-success { background-color: #d4edda; color: #155724; border: 1px solid #c3e6cb; } /* Bootstrap success green */
    .status-error { background-color: #f8d7da; color: #721c24; border: 1px solid #f5c6cb; } /* Bootstrap danger red */

    /* Accordion Styling */
    .widget-accordion .accordion-header { background-color: #f0f0f0; padding: 8px; margin-top: 5px; border: 1px solid #ddd; }
    .widget-accordion .widget-box { padding: 10px; border: 1px solid #ddd; border-top: none; }
    
    /* Button Styling */
    .start-button { font-size: 1.1em; padding: 10px 15px; }
    .widget-radio-buttons label { margin-right: 15px; }
    .disabled-widget-note { color: #777; font-style: italic; font-size: 0.9em; margin-left: 5px;}

</style>
<!-- Font Awesome for tooltips -->
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.3/css/all.min.css">
"""))

# Helper Functions
def create_tooltip_html(tooltip_text):
    """Creates an HTML tooltip with Font Awesome icon."""
    return f'<div class="tooltip-container"><i class="fas fa-info-circle tooltip-icon"></i><span class="tooltip-text">{tooltip_text}</span></div>'

def create_file_input_row(description_text, placeholder_text, tooltip_content, initial_value="", is_folder=False):
    """Creates an input row with label, text input, browse button, and tooltip."""
    label = widgets.Label(description_text)
    text_widget = widgets.Text(value=initial_value, placeholder=placeholder_text)
    button = widgets.Button(description="Browse", icon="folder-open" if is_folder else "file-audio")
    tooltip = widgets.HTML(create_tooltip_html(tooltip_content))
    
    output_widget = widgets.Output()
    
    def on_button_click(b):
        with output_widget:
            clear_output()
            if is_folder:
                text_widget.value = "/content/drive/MyDrive/"
                print("Please manually complete the Google Drive folder path. Example: `/content/drive/MyDrive/my_audio_folder`.")
            else:
                try:
                    selected = files.pick_file(accept='.wav,.mp3,.m4a,.flac')
                    if selected:
                        text_widget.value = list(selected.keys())[0]
                except TypeError:
                    print("No file selected.")
    
    button.on_click(on_button_click)
    
    hbox = widgets.HBox([label, text_widget, button, tooltip])
    hbox.add_class("input-row")
    
    # Return combined widget for use in layout
    return widgets.VBox([hbox, output_widget]), text_widget

def validate_inputs(*args):
    """Validates required inputs and enables/disables start button accordingly."""
    required_widgets = [
        hf_token_widget, 
        audio_dir_text, 
        reference_file_text, 
        target_name_text, 
        output_base_dir_text
    ]
    
    all_valid = all([w.value.strip() for w in required_widgets])
    
    # Additional validation for HF dataset if push_to_hf is enabled
    if push_to_hf_cb.value:
        all_valid = all_valid and hf_dataset_repo_text.value.strip()
    
    start_btn.disabled = not all_valid
    
    if all_valid:
        validation_status_text.value = "<span style='color: green;'>All required fields are filled.</span>"
    else:
        validation_status_text.value = "<span style='color: red;'>Please fill all required fields marked with *.</span>"

# Authentication & Main Inputs
header_auth = widgets.HTML("<div class='section-header'>Authentication & Setup</div>")
hf_token_widget = widgets.Password(
    placeholder="hf_...", 
    description="*HF Token:",
)
hf_token_tooltip = widgets.HTML(create_tooltip_html(
    "Required. Your Hugging Face User Access Token (e.g., hf_xxx). Needs 'read' access for PyAnnote models. Get from hf.co/settings/tokens."
))

auth_box = widgets.HBox([hf_token_widget, hf_token_tooltip])
auth_box.add_class("input-row")

# Input files
header_input = widgets.HTML("<div class='section-header'>Input Files & Target Name</div>")
audio_dir_row, audio_dir_text = create_file_input_row(
    "*Audio Directory:", 
    "/content/drive/MyDrive/your_audio_folder", 
    "Path to the folder containing your input audio file(s) on Google Drive. This Colab version will process the first compatible audio found in this folder.",
    is_folder=True
)

reference_file_row, reference_file_text = create_file_input_row(
    "*Reference Audio:", 
    "/content/drive/MyDrive/your_reference.wav", 
    "Path to a clean reference audio (5-30s) of ONLY the target speaker on Google Drive.",
    is_folder=False
)

target_name_text = widgets.Text(
    placeholder="e.g., JohnDoe", 
    description="*Target Name:"
)
target_name_tooltip = widgets.HTML(create_tooltip_html(
    "A name for the target speaker (e.g., 'JohnDoe'). Used in output file/folder names."
))
target_name_box = widgets.HBox([target_name_text, target_name_tooltip])
target_name_box.add_class("input-row")

output_base_dir_row, output_base_dir_text = create_file_input_row(
    "*Output Directory:", 
    "/content/drive/MyDrive/VoiceExtractor_Runs", 
    "Base GDrive folder where a new subfolder for this run's outputs will be created (e.g., `/content/drive/MyDrive/VoiceExtractor_Runs`).",
    is_folder=True
)

# Processing Options (Main)
header_processing = widgets.HTML("<div class='section-header'>Basic Processing Options</div>")

output_sr_dd = widgets.Dropdown(
    options=[16000, 22050, 24000, 44100, 48000],
    value=44100,
    description="Output Sample Rate:"
)
output_sr_tooltip = widgets.HTML(create_tooltip_html(
    "Sample rate (Hz) for final output segments. 24000Hz is common for some ASR/TTS models."
))
output_sr_box = widgets.HBox([output_sr_dd, output_sr_tooltip])
output_sr_box.add_class("input-row")

whisper_model_dd = widgets.Dropdown(
    options=['tiny.en', 'tiny', 'base.en', 'base', 'small.en', 'small', 'medium.en', 'medium', 'large-v1', 'large-v2', 'large-v3'],
    value='base.en',
    description="Whisper Model:"
)
whisper_model_tooltip = widgets.HTML(create_tooltip_html(
    "OpenAI Whisper model for transcription. '.en' models are English-only. Larger models are more accurate but slower."
))
whisper_model_box = widgets.HBox([whisper_model_dd, whisper_model_tooltip])
whisper_model_box.add_class("input-row")

language_text = widgets.Text(
    value='en',
    description="Language Code:"
)
language_tooltip = widgets.HTML(create_tooltip_html(
    "Language code for Whisper (e.g., 'en', 'es', 'auto' for detection). Specifying is often more reliable."
))
language_box = widgets.HBox([language_text, language_tooltip])
language_box.add_class("input-row")

# Advanced Options (Accordion)
header_advanced = widgets.HTML("<div class='subsection-header'>Advanced Settings</div>")

min_duration_slider = widgets.FloatSlider(
    value=1.0, 
    min=0.5, 
    max=10.0, 
    step=0.1,
    description="Min Segment Duration:"
)
min_duration_tooltip = widgets.HTML(create_tooltip_html(
    "Minimum duration (seconds) for a solo speaker segment to be kept."
))
min_duration_box = widgets.HBox([min_duration_slider, min_duration_tooltip])
min_duration_box.add_class("input-row")

merge_gap_slider = widgets.FloatSlider(
    value=0.25, 
    min=0.0, 
    max=2.0, 
    step=0.05,
    description="Merge Gap:"
)
merge_gap_tooltip = widgets.HTML(create_tooltip_html(
    "Maximum gap (seconds) to merge adjacent solo segments of the target speaker."
))
merge_gap_box = widgets.HBox([merge_gap_slider, merge_gap_tooltip])
merge_gap_box.add_class("input-row")

verification_threshold_slider = widgets.FloatSlider(
    value=0.69, 
    min=0.0, 
    max=1.0, 
    step=0.01,
    description="Verification Threshold:"
)
verification_threshold_tooltip = widgets.HTML(create_tooltip_html(
    "Speaker verification score (0-1). Higher is stricter. Segments below this are 'rejected'."
))
verification_threshold_box = widgets.HBox([verification_threshold_slider, verification_threshold_tooltip])
verification_threshold_box.add_class("input-row")

concat_silence_slider = widgets.FloatSlider(
    value=0.5, 
    min=0.0, 
    max=5.0, 
    step=0.1,
    description="Concatenation Silence:"
)
concat_silence_tooltip = widgets.HTML(create_tooltip_html(
    "Duration of silence (seconds) inserted between concatenated verified segments."
))
concat_silence_box = widgets.HBox([concat_silence_slider, concat_silence_tooltip])
concat_silence_box.add_class("input-row")

skip_demucs_cb = widgets.Checkbox(
    value=False,
    description="Skip Demucs Vocal Separation"
)
skip_demucs_tooltip = widgets.HTML(create_tooltip_html(
    "Check if audio is already clean/vocals-only to save time."
))
skip_demucs_box = widgets.HBox([skip_demucs_cb, skip_demucs_tooltip])
skip_demucs_box.add_class("input-row")

disable_speechbrain_cb = widgets.Checkbox(
    value=False,
    description="Disable SpeechBrain Verification"
)
disable_speechbrain_tooltip = widgets.HTML(create_tooltip_html(
    "Rely only on Resemblyzer for verification (faster, potentially less accurate)."
))
disable_speechbrain_box = widgets.HBox([disable_speechbrain_cb, disable_speechbrain_tooltip])
disable_speechbrain_box.add_class("input-row")

skip_rejected_transcripts_cb = widgets.Checkbox(
    value=False,
    description="Skip Transcribing Rejected Segments"
)
skip_rejected_transcripts_tooltip = widgets.HTML(create_tooltip_html(
    "Saves time if you don't need transcripts for segments that failed verification or were too short."
))
skip_rejected_transcripts_box = widgets.HBox([skip_rejected_transcripts_cb, skip_rejected_transcripts_tooltip])
skip_rejected_transcripts_box.add_class("input-row")

diar_model_dd = widgets.Dropdown(
    options=["pyannote/speaker-diarization-3.1", "pyannote/speaker-diarization-3.0"],
    value="pyannote/speaker-diarization-3.1",
    description="Diarization Model:"
)
diar_model_tooltip = widgets.HTML(create_tooltip_html(
    "PyAnnote model for speaker diarization. Ensure you have accepted terms on Hugging Face."
))
diar_model_box = widgets.HBox([diar_model_dd, diar_model_tooltip])
diar_model_box.add_class("input-row")

osd_model_dd = widgets.Dropdown(
    options=["pyannote/segmentation-3.0", "pyannote/overlapped-speech-detection"],
    value="pyannote/segmentation-3.0",
    description="OSD Model:"
)
osd_model_tooltip = widgets.HTML(create_tooltip_html(
    "PyAnnote model for Overlapped Speech Detection. 'segmentation-3.0' is often used as a base. Ensure you have accepted terms."
))
osd_model_box = widgets.HBox([osd_model_dd, osd_model_tooltip])
osd_model_box.add_class("input-row")

# Debug & File Management
dry_run_cb = widgets.Checkbox(
    value=False,
    description="Dry Run (Process first 60s only)"
)
dry_run_tooltip = widgets.HTML(create_tooltip_html(
    "Limits processing to the first minute of audio for quick testing."
))
dry_run_box = widgets.HBox([dry_run_cb, dry_run_tooltip])
dry_run_box.add_class("input-row")

debug_log_cb = widgets.Checkbox(
    value=False,
    description="Enable Verbose Debug Logging"
)
debug_log_tooltip = widgets.HTML(create_tooltip_html(
    "Outputs detailed logs for troubleshooting."
))
debug_log_box = widgets.HBox([debug_log_cb, debug_log_tooltip])
debug_log_box.add_class("input-row")

keep_temp_files_cb = widgets.Checkbox(
    value=False,
    description="Keep Temporary Processing Files"
)
keep_temp_files_tooltip = widgets.HTML(create_tooltip_html(
    "Retains the `__tmp_processing` directory inside the run's output folder for inspection."
))
keep_temp_files_box = widgets.HBox([keep_temp_files_cb, keep_temp_files_tooltip])
keep_temp_files_box.add_class("input-row")

# Output Handling & Export
header_output = widgets.HTML("<div class='section-header'>Output Handling & Export</div>")

output_method_radio = widgets.RadioButtons(
    options=[
        "Save ZIP to GDrive & Download to Computer", 
        "Download ZIP to Computer (No GDrive save of .zip)", 
        "Save ZIP to GDrive Only"
    ],
    value="Save ZIP to GDrive & Download to Computer",
    description="Output Methods:"
)

push_to_hf_cb = widgets.Checkbox(
    value=False,
    description="Push Final Dataset to Hugging Face Hub"
)

def toggle_hf_dataset_fields(change):
    hf_dataset_repo_text.disabled = not change['new']
    hf_dataset_private_cb.disabled = not change['new']
    validate_inputs()

push_to_hf_cb.observe(toggle_hf_dataset_fields, names='value')

hf_dataset_repo_text = widgets.Text(
    placeholder="your_username/dataset_name",
    description="HF Dataset Repo:",
    disabled=True
)
hf_dataset_repo_tooltip = widgets.HTML(create_tooltip_html(
    "HF Hub repository name. Will be created as private. Example: 'MyOrg/MyTargetSpeakerDataset'."
))
hf_dataset_repo_box = widgets.HBox([hf_dataset_repo_text, hf_dataset_repo_tooltip])
hf_dataset_repo_box.add_class("input-row")

hf_dataset_private_cb = widgets.Checkbox(
    value=True,
    description="Make HF Dataset Private",
    disabled=True
)

# Execution Control & Feedback
validation_status_text = widgets.HTML(value="")
start_btn = widgets.Button(
    description="🚀 Start Extraction",
    button_style='success',
    icon='play',
    disabled=True,
    layout={'width': '250px', 'height': '40px'}
)
start_btn.add_class("start-button")

overall_status_html = widgets.HTML(
    value="<div class='status-message-container status-ready'>Status: Ready. Configure and click Start.</div>"
)

log_output_widget = widgets.Output(
    layout={'border': '1px solid #dedede', 'height': '400px', 'overflow_y': 'scroll', 'padding': '10px', 'margin_top':'10px'}
)
log_output_widget.add_class("log-area-container")

results_output_widget = widgets.Output(
    layout={'margin_top':'10px'}
)

# Set up observers for input validation
for w in [hf_token_widget, audio_dir_text, reference_file_text, target_name_text, output_base_dir_text]:
    w.observe(lambda change: validate_inputs(), names='value')
push_to_hf_cb.observe(lambda change: validate_inputs(), names='value')
hf_dataset_repo_text.observe(lambda change: validate_inputs(), names='value')

# Main execution function
def run_extraction(button_click_event):
    """Main function to execute the Voice Extractor script."""
    log_output_widget.clear_output()
    results_output_widget.clear_output()
    
    # Update UI
    start_btn.disabled = True
    start_btn.description = "🔄 Processing..."
    start_btn.icon = "spinner"
    overall_status_html.value = "<div class='status-message-container status-processing'>Status: Initializing... Authenticating with Hugging Face...</div>"
    
    # Hugging Face Login
    try:
        with log_output_widget:
            print(f"Authenticating with Hugging Face using token starting with: {hf_token_widget.value[:4]}...")
            login(token=hf_token_widget.value, add_to_git_credential=False)
            print("✅ Authentication successful")
    except Exception as e:
        overall_status_html.value = f"<div class='status-message-container status-error'>Error: Hugging Face authentication failed.</div>"
        with log_output_widget:
            print(f"❌ Authentication Error: {str(e)}")
        start_btn.disabled = False
        start_btn.description = "🚀 Start Extraction"
        start_btn.icon = "play"
        return
    
    # Input audio file discovery
    try:
        audio_dir = Path(audio_dir_text.value)
        if not audio_dir.exists() or not audio_dir.is_dir():
            raise FileNotFoundError(f"Audio directory not found: {audio_dir}")
        
        audio_files = []
        for ext in ['.wav', '.mp3', '.m4a', '.flac']:
            audio_files.extend(list(audio_dir.glob(f"*{ext}")))
        
        if not audio_files:
            raise FileNotFoundError(f"No audio files found in {audio_dir}")
        
        input_audio_file = audio_files[0]
        with log_output_widget:
            print(f"Found audio file: {input_audio_file}")
    except Exception as e:
        overall_status_html.value = f"<div class='status-message-container status-error'>Error: Could not find audio files.</div>"
        with log_output_widget:
            print(f"❌ Error: {str(e)}")
        start_btn.disabled = False
        start_btn.description = "🚀 Start Extraction"
        start_btn.icon = "play"
        return
    
    # Construct the command
    cmd_list = [
        "python", "Voice_Extractor/run_extractor.py",
        "--input-audio", f'"{str(input_audio_file)}"',
        "--reference-audio", f'"{str(reference_file_text.value)}"',
        "--target-name", target_name_text.value,
        "--output-base-dir", f'"{str(output_base_dir_text.value)}"',
        "--token", hf_token_widget.value,
        "--output-sr", str(output_sr_dd.value),
        "--whisper-model", whisper_model_dd.value,
        "--language", language_text.value,
        "--min-duration", str(min_duration_slider.value),
        "--merge-gap", str(merge_gap_slider.value),
        "--verification-threshold", str(verification_threshold_slider.value),
        "--concat-silence", str(concat_silence_slider.value),
        "--diar-model", diar_model_dd.value,
        "--osd-model", osd_model_dd.value
    ]
    
    # Add boolean flags
    if skip_demucs_cb.value:
        cmd_list.append("--skip-demucs")
    if disable_speechbrain_cb.value:
        cmd_list.append("--disable-speechbrain")
    if skip_rejected_transcripts_cb.value:
        cmd_list.append("--skip-rejected-transcripts")
    if dry_run_cb.value:
        cmd_list.append("--dry-run")
    if debug_log_cb.value:
        cmd_list.append("--debug")
    if keep_temp_files_cb.value:
        cmd_list.append("--keep-temp-files")
    
    # Update UI before starting process
    overall_status_html.value = "<div class='status-message-container status-processing'>Status: Running Voice Extractor script...</div>"
    
    # Execute script
    cmd_str = " ".join(cmd_list)
    with log_output_widget:
        print(f"Executing command: {cmd_str}\n--- LOG START ---")
        process = subprocess.Popen(cmd_str, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, 
                                  text=True, bufsize=1, universal_newlines=True, shell=True)
        for line in process.stdout:
            print(line, end='')
        exit_code = process.wait()
    
    # Post-execution processing
    if exit_code == 0:
        overall_status_html.value = "<div class='status-message-container status-success'>Status: Voice extraction completed successfully!</div>"
        
        # Determine the run output directory
        run_output_dir_name = f"{target_name_text.value.replace(' ', '_')}_{input_audio_file.stem}_SOLO_Split"
        actual_run_output_dir = Path(output_base_dir_text.value) / run_output_dir_name
        
        # Create ZIP archive
        base_name_for_zip = actual_run_output_dir.parent / f"{target_name_text.value.replace(' ', '_')}_dataset"
        zip_file_path = f"{base_name_for_zip}.zip"
        
        with log_output_widget:
            print(f"\nCreating ZIP archive of results: {zip_file_path}")
            try:
                shutil.make_archive(str(base_name_for_zip), 'zip', root_dir=actual_run_output_dir.parent, base_dir=actual_run_output_dir.name)
                print(f"✅ ZIP created successfully: {zip_file_path}")
            except Exception as e:
                print(f"❌ Error creating ZIP: {str(e)}")
        
        # Handle output based on selected method
        output_method = output_method_radio.value
        if "Download to Computer" in output_method:
            with log_output_widget:
                print("\nPreparing to download ZIP file...")
                try:
                    files.download(zip_file_path)
                    print("✅ Download initiated. Check your browser downloads.")
                except Exception as e:
                    print(f"❌ Download error: {str(e)}")
        
        # Handle Hugging Face push if selected
        if push_to_hf_cb.value:
            with log_output_widget:
                print(f"\nPreparing to push dataset to Hugging Face: {hf_dataset_repo_text.value}")
                try:
                    from datasets import load_dataset, Audio
                    
                    # Find the verified transcripts CSV
                    verified_csv_path = list(actual_run_output_dir.glob("transcripts_solo_verified/*.csv"))[0]
                    
                    print(f"Loading dataset from {verified_csv_path}")
                    ds = load_dataset('csv', data_files={'train': str(verified_csv_path)}, 
                                      data_dir=str(actual_run_output_dir))
                    
                    # Cast audio column to Audio type
                    ds = ds.cast_column('filename', Audio())
                    
                    print(f"Pushing dataset to Hugging Face Hub: {hf_dataset_repo_text.value}")
                    ds.push_to_hub(
                        hf_dataset_repo_text.value,
                        private=hf_dataset_private_cb.value,
                        token=hf_token_widget.value,
                        embed_external_files=True
                    )
                    print(f"✅ Dataset pushed successfully to https://huggingface.co/datasets/{hf_dataset_repo_text.value}")
                except Exception as e:
                    print(f"❌ Hugging Face push error: {str(e)}")
        
        # Display results preview
        with results_output_widget:
            print("## Extraction Results Summary\n")
            
            # Try to display the concatenated audio file
            try:
                concat_file = list(actual_run_output_dir.glob("concatenated_audio_solo/*.wav"))[0]
                print(f"### Concatenated audio: {concat_file.name}\n")
                display(Audio(str(concat_file)))
            except (IndexError, FileNotFoundError):
                print("### No concatenated audio file found\n")
            
            # Display transcript table
            try:
                transcript_csv = list(actual_run_output_dir.glob("transcripts_solo_verified/*.csv"))[0]
                df = pd.read_csv(transcript_csv)
                print(f"\n### Transcript sample (from {transcript_csv.name}):\n")
                display(df.head())
                print(f"\nTotal segments: {len(df)}")
            except (IndexError, FileNotFoundError):
                print("\n### No transcript CSV found")
            
    else:
        overall_status_html.value = f"<div class='status-message-container status-error'>Error: Voice extraction failed with exit code {exit_code}.</div>"
        with log_output_widget:
            print(f"\n❌ Process failed with exit code: {exit_code}")
    
    # Re-enable UI
    start_btn.disabled = False
    start_btn.description = "🚀 Start Extraction"
    start_btn.icon = "play"

# Attach the execution function to the start button
start_btn.on_click(run_extraction)

# Create the advanced options accordion
advanced_options = widgets.Accordion(
    children=[
        widgets.VBox([
            min_duration_box,
            merge_gap_box,
            verification_threshold_box,
            concat_silence_box,
            skip_demucs_box, 
            disable_speechbrain_box,
            skip_rejected_transcripts_box
        ]),
        widgets.VBox([
            diar_model_box,
            osd_model_box
        ]),
        widgets.VBox([
            dry_run_box,
            debug_log_box,
            keep_temp_files_box
        ])
    ],
    titles=('Segment Parameters', 'Model Options', 'Debug & Temp Files')
)

# Layout
main_layout = widgets.VBox([
    widgets.HTML("<h1 style='text-align:center; color:#1A73E8;'>Voice Extractor - Google Colab Interface</h1>"),
    widgets.HTML("<p style='text-align:center;'>Extract solo voice segments of a target speaker from multi-speaker recordings</p>"),
    
    header_auth,
    auth_box,
    
    header_input,
    audio_dir_row,
    reference_file_row,
    target_name_box,
    output_base_dir_row,
    
    header_processing,
    output_sr_box,
    whisper_model_box,
    language_box,
    
    header_advanced,
    advanced_options,
    
    header_output,
    output_method_radio,
    widgets.VBox([push_to_hf_cb, hf_dataset_repo_box, hf_dataset_private_cb]),
    
    widgets.HBox([start_btn, validation_status_text]),
    overall_status_html,
    widgets.HTML("<div class='section-header'>Processing Log</div>"),
    log_output_widget,
    widgets.HTML("<div class='section-header'>Results</div>"),
    results_output_widget
])

# Call validate_inputs once to set initial state
validate_inputs()

# Display the layout
display(main_layout)

# Voice Extractor - Usage Instructions

This notebook provides a graphical interface for the [Voice Extractor](https://github.com/ReisCook/Voice_Extractor) tool, which identifies, isolates, and transcribes clean solo segments of a target speaker from multi-speaker audio recordings.

## How to Use

1. **Authentication**: Enter your HuggingFace User Access Token. This is required to access PyAnnote models.
2. **Input Files**:
   - Specify the folder containing your audio (first compatible audio file will be processed)
   - Select a clean reference audio of ONLY your target speaker (5-30 seconds)
   - Enter a name for your target speaker
   - Choose an output directory for results
3. **Processing Options**: Configure sample rate, transcription model, and other settings
4. **Advanced Options**: Fine-tune segment parameters, model selection, and debug settings
5. **Output Handling**: Choose how to save results and optionally push to Hugging Face
6. **Start Processing**: Click the "Start Extraction" button when all required fields are filled

## Important Notes

- You need to accept the terms of use for the following PyAnnote models on Hugging Face:
  - [pyannote/speaker-diarization-3.1](https://huggingface.co/pyannote/speaker-diarization-3.1)
  - [pyannote/overlapped-speech-detection](https://huggingface.co/pyannote/overlapped-speech-detection)
  - [pyannote/segmentation-3.0](https://huggingface.co/pyannote/segmentation-3.0)
- For optimal results, provide a clean reference audio with only the target speaker's voice
- The "Dry Run" option is helpful for testing as it processes only the first 60 seconds
- GPU acceleration is automatically used when available

For more detailed documentation, visit the [Voice Extractor GitHub repository](https://github.com/ReisCook/Voice_Extractor).
