## **Summary - Complete Cell List**

| Cell | Name | Purpose |
|------|------|---------|
| **1** | Installation & Setup | Install packages, load voices |
| **2** | Core TTS Engine | Main engine with all features |
| **3** | Main TTS Interface | 4-tab UI (Simple/Advanced/Batch/Multi-Voice) |
| **4** | Voice Explorer | Browse and test 300+ voices |
| **5** | Audiobook Creator | Create audiobooks from chapters |
| **6** | Audio Editor | Edit audio with effects |
| **7** | SSML Editor | Advanced speech control |
| **8** | History & Export | View history, manage files |
| **9** | Quick Functions | Easy API for quick access |
| **10** | Dashboard | Statistics and overview |

**Run cells 1-3 for basic functionality, then any additional cells for extra features!**

In [None]:
#@title üöÄ **CELL 1: Complete Installation & Setup**
#@markdown Run this first! Installs everything needed.

import subprocess
import sys

print("="*60)
print("üöÄ ULTIMATE TTS NOTEBOOK - INSTALLATION")
print("="*60)

# Install packages
packages = [
    "edge-tts",
    "gTTS",
    "pyttsx3",
    "unidecode",
    "num2words",
    "inflect",
    "pydub",
    "nest-asyncio",
    "ipywidgets",
    "phonetics",
    "langdetect",
    "clean-text",
]

print("\nüì¶ Installing packages...")
for pkg in packages:
    try:
        subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", pkg])
        print(f"  ‚úì {pkg}")
    except:
        print(f"  ‚úó {pkg} (optional)")

# System dependencies
print("\nüîß Installing system dependencies...")
subprocess.run(["apt-get", "update", "-qq"], check=False, capture_output=True)
subprocess.run(["apt-get", "install", "-y", "ffmpeg", "espeak-ng"], check=False, capture_output=True)

# Imports
print("\nüìö Importing libraries...")
import os
import re
import json
import time
import hashlib
from pathlib import Path
from datetime import datetime
from typing import List, Dict, Optional, Tuple

import numpy as np
import pandas as pd
import edge_tts
import asyncio
import nest_asyncio
from gtts import gTTS
import pyttsx3
from unidecode import unidecode
from num2words import num2words
import inflect
from pydub import AudioSegment
from pydub.effects import normalize, speedup

import ipywidgets as widgets
from IPython.display import display, Audio, HTML, clear_output, FileLink

# Apply async fix
nest_asyncio.apply()

# Create directories
BASE_DIR = Path("/content/tts_studio")
AUDIO_DIR = BASE_DIR / "audio"
BATCH_DIR = BASE_DIR / "batch"
AUDIOBOOK_DIR = BASE_DIR / "audiobooks"
EXPORT_DIR = BASE_DIR / "exports"
HISTORY_DIR = BASE_DIR / "history"
TEMP_DIR = BASE_DIR / "temp"
VOICES_DIR = BASE_DIR / "voices"

for d in [BASE_DIR, AUDIO_DIR, BATCH_DIR, AUDIOBOOK_DIR, EXPORT_DIR, HISTORY_DIR, TEMP_DIR, VOICES_DIR]:
    d.mkdir(parents=True, exist_ok=True)

print(f"\nüìÅ Created directories in: {BASE_DIR}")

# Load Edge voices
print("\nüé§ Loading voices...")
async def load_voices():
    return await edge_tts.list_voices()

EDGE_VOICES = asyncio.get_event_loop().run_until_complete(load_voices())
print(f"‚úÖ Loaded {len(EDGE_VOICES)} Edge TTS voices!")

# Organize voices
VOICES_BY_LOCALE = {}
VOICES_BY_LANGUAGE = {}
for v in EDGE_VOICES:
    locale = v['Locale']
    lang = locale.split('-')[0]

    if locale not in VOICES_BY_LOCALE:
        VOICES_BY_LOCALE[locale] = []
    VOICES_BY_LOCALE[locale].append(v)

    if lang not in VOICES_BY_LANGUAGE:
        VOICES_BY_LANGUAGE[lang] = []
    VOICES_BY_LANGUAGE[lang].append(v)

print(f"‚úÖ Organized into {len(VOICES_BY_LOCALE)} locales, {len(VOICES_BY_LANGUAGE)} languages")

print("\n" + "="*60)
print("‚úÖ INSTALLATION COMPLETE!")
print("="*60)

In [None]:
#@title üîß **CELL 2: Core TTS Engine**
#@markdown The powerful engine that powers everything

import pickle

class TTSHistory:
    """Manages TTS generation history"""

    def __init__(self, history_file=None):
        self.history_file = history_file or HISTORY_DIR / "history.json"
        self.history = self._load()

    def _load(self):
        if self.history_file.exists():
            with open(self.history_file, 'r') as f:
                return json.load(f)
        return []

    def save(self):
        with open(self.history_file, 'w') as f:
            json.dump(self.history[-1000:], f)  # Keep last 1000

    def add(self, entry):
        entry['timestamp'] = datetime.now().isoformat()
        entry['id'] = hashlib.md5(f"{entry['text']}{entry['timestamp']}".encode()).hexdigest()[:8]
        self.history.append(entry)
        self.save()
        return entry['id']

    def get_recent(self, n=10):
        return self.history[-n:]

    def search(self, query):
        return [h for h in self.history if query.lower() in h.get('text', '').lower()]

    def clear(self):
        self.history = []
        self.save()


class TTSFavorites:
    """Manages favorite voices and settings"""

    def __init__(self, favorites_file=None):
        self.favorites_file = favorites_file or HISTORY_DIR / "favorites.json"
        self.favorites = self._load()

    def _load(self):
        if self.favorites_file.exists():
            with open(self.favorites_file, 'r') as f:
                return json.load(f)
        return {'voices': [], 'presets': []}

    def save(self):
        with open(self.favorites_file, 'w') as f:
            json.dump(self.favorites, f)

    def add_voice(self, voice_id, name=None):
        if voice_id not in [v['id'] for v in self.favorites['voices']]:
            self.favorites['voices'].append({
                'id': voice_id,
                'name': name or voice_id,
                'added': datetime.now().isoformat()
            })
            self.save()

    def remove_voice(self, voice_id):
        self.favorites['voices'] = [v for v in self.favorites['voices'] if v['id'] != voice_id]
        self.save()

    def add_preset(self, name, settings):
        self.favorites['presets'].append({
            'name': name,
            'settings': settings,
            'added': datetime.now().isoformat()
        })
        self.save()

    def get_presets(self):
        return self.favorites['presets']


class TextPreprocessor:
    """Advanced text preprocessing for TTS"""

    def __init__(self, language='en'):
        self.language = language
        self.inflect_engine = inflect.engine()

        self.abbreviations = {
            "mr.": "mister", "mrs.": "missus", "ms.": "miss",
            "dr.": "doctor", "prof.": "professor", "sr.": "senior",
            "jr.": "junior", "vs.": "versus", "etc.": "etcetera",
            "e.g.": "for example", "i.e.": "that is",
            "st.": "street", "ave.": "avenue", "blvd.": "boulevard",
            "apt.": "apartment", "govt.": "government",
            "dept.": "department", "univ.": "university",
            "corp.": "corporation", "inc.": "incorporated",
            "ltd.": "limited", "co.": "company",
            "jan.": "january", "feb.": "february", "mar.": "march",
            "apr.": "april", "jun.": "june", "jul.": "july",
            "aug.": "august", "sep.": "september", "sept.": "september",
            "oct.": "october", "nov.": "november", "dec.": "december",
            "mon.": "monday", "tue.": "tuesday", "wed.": "wednesday",
            "thu.": "thursday", "fri.": "friday", "sat.": "saturday",
            "sun.": "sunday",
        }

        self.symbols = {
            "&": " and ", "@": " at ", "#": " hashtag ",
            "%": " percent ", "+": " plus ", "=": " equals ",
            "¬∞": " degrees ", "$": " dollars ", "‚Ç¨": " euros ",
            "¬£": " pounds ", "¬•": " yen ", "¬©": " copyright ",
            "¬Æ": " registered ", "‚Ñ¢": " trademark ",
        }

        self.emoticons = {
            ":)": " smile ", ":(": " sad ", ":D": " happy ",
            ";)": " wink ", "<3": " heart ", ":P": " tongue ",
            ":/": " uncertain ", ":O": " surprised ",
            "XD": " laughing ", "^^": " happy ",
        }

        self.custom_pronunciations = {}

    def add_pronunciation(self, word, pronunciation):
        """Add custom pronunciation"""
        self.custom_pronunciations[word.lower()] = pronunciation

    def expand_abbreviations(self, text):
        for abbr, full in self.abbreviations.items():
            text = re.sub(re.escape(abbr), full, text, flags=re.IGNORECASE)
        return text

    def convert_numbers(self, text):
        def replace_num(match):
            num_str = match.group()
            try:
                if '.' in num_str:
                    parts = num_str.split('.')
                    whole = num2words(int(parts[0])) if parts[0] else "zero"
                    decimal = ' point ' + ' '.join([num2words(int(d)) for d in parts[1]])
                    return whole + decimal
                else:
                    return num2words(int(num_str))
            except:
                return num_str

        text = re.sub(r'\b\d+\.?\d*\b', replace_num, text)
        return text

    def convert_ordinals(self, text):
        def replace_ordinal(match):
            num = int(match.group(1))
            return self.inflect_engine.ordinal(num)

        text = re.sub(r'\b(\d+)(st|nd|rd|th)\b', replace_ordinal, text, flags=re.IGNORECASE)
        return text

    def convert_time(self, text):
        def time_to_words(match):
            hour = int(match.group(1))
            minute = int(match.group(2))
            period = match.group(3) or ""

            hour_word = num2words(hour)
            if minute == 0:
                minute_word = "o'clock"
            elif minute < 10:
                minute_word = f"oh {num2words(minute)}"
            else:
                minute_word = num2words(minute)

            return f"{hour_word} {minute_word} {period}".strip()

        text = re.sub(r'(\d{1,2}):(\d{2})\s*(AM|PM|am|pm)?', time_to_words, text)
        return text

    def replace_symbols(self, text):
        for symbol, word in self.symbols.items():
            text = text.replace(symbol, word)
        return text

    def replace_emoticons(self, text):
        for emoticon, word in self.emoticons.items():
            text = text.replace(emoticon, word)
        return text

    def apply_custom_pronunciations(self, text):
        for word, pronunciation in self.custom_pronunciations.items():
            text = re.sub(r'\b' + re.escape(word) + r'\b', pronunciation, text, flags=re.IGNORECASE)
        return text

    def clean_text(self, text):
        # Remove extra whitespace
        text = ' '.join(text.split())
        # Remove multiple punctuation
        text = re.sub(r'([.!?])\1+', r'\1', text)
        # Add space after punctuation if missing
        text = re.sub(r'([.!?,])([A-Za-z])', r'\1 \2', text)
        return text.strip()

    def process(self, text, options=None):
        options = options or {}

        if options.get('expand_abbreviations', True):
            text = self.expand_abbreviations(text)

        if options.get('convert_ordinals', True):
            text = self.convert_ordinals(text)

        if options.get('convert_time', True):
            text = self.convert_time(text)

        if options.get('convert_numbers', True):
            text = self.convert_numbers(text)

        if options.get('replace_symbols', True):
            text = self.replace_symbols(text)

        if options.get('replace_emoticons', True):
            text = self.replace_emoticons(text)

        if options.get('custom_pronunciations', True):
            text = self.apply_custom_pronunciations(text)

        text = self.clean_text(text)

        return text


class AudioProcessor:
    """Audio processing and effects"""

    def __init__(self):
        self.temp_dir = TEMP_DIR

    def load_audio(self, file_path):
        """Load audio file"""
        return AudioSegment.from_file(file_path)

    def save_audio(self, audio, file_path, format='mp3'):
        """Save audio to file"""
        audio.export(file_path, format=format)
        return file_path

    def change_speed(self, audio, speed=1.0):
        """Change audio speed"""
        if speed == 1.0:
            return audio
        if speed > 1.0:
            return speedup(audio, speed, 150, 25)
        else:
            # Slow down by changing frame rate
            new_frame_rate = int(audio.frame_rate * speed)
            return audio._spawn(audio.raw_data, overrides={'frame_rate': new_frame_rate}).set_frame_rate(audio.frame_rate)

    def change_volume(self, audio, volume_db=0):
        """Change volume in dB"""
        return audio + volume_db

    def normalize_audio(self, audio):
        """Normalize audio"""
        return normalize(audio)

    def fade_in(self, audio, duration_ms=500):
        """Add fade in"""
        return audio.fade_in(duration_ms)

    def fade_out(self, audio, duration_ms=500):
        """Add fade out"""
        return audio.fade_out(duration_ms)

    def add_silence(self, audio, start_ms=0, end_ms=0):
        """Add silence at start/end"""
        if start_ms > 0:
            silence = AudioSegment.silent(duration=start_ms)
            audio = silence + audio
        if end_ms > 0:
            silence = AudioSegment.silent(duration=end_ms)
            audio = audio + silence
        return audio

    def trim_silence(self, audio, silence_thresh=-50, chunk_size=10):
        """Trim silence from start and end"""
        # Find start
        start_trim = 0
        for i in range(0, len(audio), chunk_size):
            if audio[i:i+chunk_size].dBFS > silence_thresh:
                start_trim = i
                break

        # Find end
        end_trim = len(audio)
        for i in range(len(audio), 0, -chunk_size):
            if audio[i-chunk_size:i].dBFS > silence_thresh:
                end_trim = i
                break

        return audio[start_trim:end_trim]

    def concatenate(self, audio_list, gap_ms=500):
        """Concatenate multiple audio files"""
        if not audio_list:
            return AudioSegment.empty()

        result = audio_list[0]
        silence = AudioSegment.silent(duration=gap_ms)

        for audio in audio_list[1:]:
            result = result + silence + audio

        return result

    def get_info(self, audio):
        """Get audio information"""
        return {
            'duration_ms': len(audio),
            'duration_s': len(audio) / 1000,
            'channels': audio.channels,
            'sample_width': audio.sample_width,
            'frame_rate': audio.frame_rate,
            'dBFS': audio.dBFS
        }

    def convert_format(self, input_path, output_path, output_format):
        """Convert audio format"""
        audio = self.load_audio(input_path)
        return self.save_audio(audio, output_path, output_format)


class TTSEngine:
    """Main TTS Engine with all capabilities"""

    def __init__(self):
        self.preprocessor = TextPreprocessor()
        self.audio_processor = AudioProcessor()
        self.history = TTSHistory()
        self.favorites = TTSFavorites()

        self.default_settings = {
            'engine': 'edge',
            'voice': 'en-US-JennyNeural',
            'language': 'en',
            'speed': 1.0,
            'pitch': 1.0,
            'volume': 1.0,
            'preprocess': True,
        }

    # ========== EDGE TTS ==========
    async def _edge_tts_async(self, text, voice, rate, pitch, volume, output_file):
        """Async Edge TTS generation"""
        rate_str = f"+{int((rate-1)*100)}%" if rate >= 1 else f"-{int((1-rate)*100)}%"
        pitch_str = f"+{int((pitch-1)*50)}Hz" if pitch >= 1 else f"-{int((1-pitch)*50)}Hz"
        volume_str = f"+{int((volume-1)*100)}%" if volume >= 1 else f"-{int((1-volume)*100)}%"

        communicate = edge_tts.Communicate(text, voice, rate=rate_str, pitch=pitch_str, volume=volume_str)
        await communicate.save(output_file)
        return output_file

    def edge_tts(self, text, voice="en-US-JennyNeural", speed=1.0, pitch=1.0, volume=1.0, output_file=None):
        """Generate with Edge TTS"""
        output_file = output_file or str(AUDIO_DIR / f"edge_{int(time.time())}.mp3")

        loop = asyncio.get_event_loop()
        loop.run_until_complete(
            self._edge_tts_async(text, voice, speed, pitch, volume, output_file)
        )

        return output_file

    # ========== GOOGLE TTS ==========
    def google_tts(self, text, language="en", slow=False, output_file=None):
        """Generate with Google TTS"""
        output_file = output_file or str(AUDIO_DIR / f"gtts_{int(time.time())}.mp3")

        tts = gTTS(text=text, lang=language, slow=slow)
        tts.save(output_file)

        return output_file

    # ========== SYSTEM TTS ==========
    def system_tts(self, text, rate=150, volume=1.0, output_file=None):
        """Generate with system TTS"""
        output_file = output_file or str(AUDIO_DIR / f"system_{int(time.time())}.wav")

        engine = pyttsx3.init()
        engine.setProperty('rate', rate)
        engine.setProperty('volume', volume)
        engine.save_to_file(text, output_file)
        engine.runAndWait()

        return output_file

    # ========== MAIN SYNTHESIS ==========
    def synthesize(self, text, engine='edge', voice=None, language='en',
                   speed=1.0, pitch=1.0, volume=1.0, preprocess=True,
                   effects=None, output_file=None, save_history=True):
        """
        Main synthesis function

        Args:
            text: Text to synthesize
            engine: 'edge', 'google', or 'system'
            voice: Voice ID (for edge)
            language: Language code
            speed: Speed (0.5 to 2.0)
            pitch: Pitch (0.5 to 1.5)
            volume: Volume (0 to 2.0)
            preprocess: Apply text preprocessing
            effects: Dict of audio effects
            output_file: Output path
            save_history: Save to history

        Returns:
            Path to generated audio file
        """
        start_time = time.time()

        # Preprocess text
        original_text = text
        if preprocess:
            text = self.preprocessor.process(text)

        # Generate audio based on engine
        if engine == 'edge':
            voice = voice or 'en-US-JennyNeural'
            output = self.edge_tts(text, voice, speed, pitch, volume, output_file)
        elif engine == 'google':
            slow = speed < 0.8
            output = self.google_tts(text, language, slow=slow, output_file=output_file)
        elif engine == 'system':
            output = self.system_tts(text, int(150 * speed), volume, output_file)
        else:
            raise ValueError(f"Unknown engine: {engine}")

        # Apply audio effects
        if effects:
            audio = self.audio_processor.load_audio(output)

            if effects.get('normalize'):
                audio = self.audio_processor.normalize_audio(audio)
            if effects.get('fade_in'):
                audio = self.audio_processor.fade_in(audio, effects.get('fade_in', 500))
            if effects.get('fade_out'):
                audio = self.audio_processor.fade_out(audio, effects.get('fade_out', 500))
            if effects.get('trim_silence'):
                audio = self.audio_processor.trim_silence(audio)
            if effects.get('silence_start'):
                audio = self.audio_processor.add_silence(audio, start_ms=effects['silence_start'])
            if effects.get('silence_end'):
                audio = self.audio_processor.add_silence(audio, end_ms=effects['silence_end'])

            self.audio_processor.save_audio(audio, output)

        # Calculate duration
        duration = time.time() - start_time

        # Save to history
        if save_history:
            self.history.add({
                'text': original_text[:500],
                'engine': engine,
                'voice': voice,
                'language': language,
                'speed': speed,
                'pitch': pitch,
                'volume': volume,
                'output': output,
                'duration': round(duration, 2)
            })

        return output

    # ========== BATCH PROCESSING ==========
    def batch_synthesize(self, texts, progress_callback=None, **kwargs):
        """Process multiple texts"""
        outputs = []
        total = len(texts)

        for i, text in enumerate(texts):
            output = self.synthesize(text, **kwargs)
            outputs.append(output)

            if progress_callback:
                progress_callback(i + 1, total)

        return outputs

    # ========== SSML SYNTHESIS ==========
    async def _edge_ssml_async(self, ssml, output_file):
        """Generate from SSML"""
        communicate = edge_tts.Communicate(ssml, "en-US-JennyNeural")
        await communicate.save(output_file)
        return output_file

    def synthesize_ssml(self, ssml, output_file=None):
        """Generate from SSML markup"""
        output_file = output_file or str(AUDIO_DIR / f"ssml_{int(time.time())}.mp3")

        loop = asyncio.get_event_loop()
        loop.run_until_complete(self._edge_ssml_async(ssml, output_file))

        return output_file

    # ========== MULTI-VOICE ==========
    def multi_voice_synthesize(self, segments, gap_ms=500, output_file=None):
        """
        Synthesize with multiple voices

        Args:
            segments: List of {'text': str, 'voice': str, 'speed': float, ...}
            gap_ms: Gap between segments in ms
            output_file: Output path
        """
        audio_segments = []

        for seg in segments:
            temp_file = str(TEMP_DIR / f"segment_{int(time.time() * 1000)}.mp3")
            self.synthesize(
                seg.get('text', ''),
                voice=seg.get('voice', 'en-US-JennyNeural'),
                speed=seg.get('speed', 1.0),
                pitch=seg.get('pitch', 1.0),
                output_file=temp_file,
                save_history=False
            )
            audio_segments.append(self.audio_processor.load_audio(temp_file))

        # Concatenate
        final_audio = self.audio_processor.concatenate(audio_segments, gap_ms)

        output_file = output_file or str(AUDIO_DIR / f"multi_{int(time.time())}.mp3")
        self.audio_processor.save_audio(final_audio, output_file)

        return output_file

    # ========== AUDIOBOOK ==========
    def create_audiobook(self, chapters, title="Audiobook", voice="en-US-JennyNeural",
                         speed=1.0, chapter_gap_ms=2000, progress_callback=None):
        """
        Create audiobook from chapters

        Args:
            chapters: Dict of chapter_name: chapter_text
            title: Audiobook title
            voice: Voice to use
            speed: Speech speed
            chapter_gap_ms: Gap between chapters
            progress_callback: Progress callback function
        """
        audiobook_dir = AUDIOBOOK_DIR / title.replace(' ', '_')
        audiobook_dir.mkdir(exist_ok=True)

        chapter_files = []
        total = len(chapters)

        for i, (chapter_name, chapter_text) in enumerate(chapters.items()):
            # Generate chapter audio
            chapter_file = str(audiobook_dir / f"chapter_{i+1:03d}_{chapter_name[:30].replace(' ', '_')}.mp3")

            # Add chapter title announcement
            full_text = f"Chapter {i+1}: {chapter_name}. {chapter_text}"

            self.synthesize(
                full_text,
                voice=voice,
                speed=speed,
                output_file=chapter_file,
                save_history=False
            )

            chapter_files.append(chapter_file)

            if progress_callback:
                progress_callback(i + 1, total, chapter_name)

        # Create full audiobook
        audio_segments = [self.audio_processor.load_audio(f) for f in chapter_files]
        full_audiobook = self.audio_processor.concatenate(audio_segments, chapter_gap_ms)

        full_audiobook_file = str(audiobook_dir / f"{title.replace(' ', '_')}_full.mp3")
        self.audio_processor.save_audio(full_audiobook, full_audiobook_file)

        # Save metadata
        metadata = {
            'title': title,
            'chapters': list(chapters.keys()),
            'voice': voice,
            'speed': speed,
            'created': datetime.now().isoformat(),
            'chapter_files': chapter_files,
            'full_file': full_audiobook_file
        }

        with open(audiobook_dir / 'metadata.json', 'w') as f:
            json.dump(metadata, f, indent=2)

        return {
            'chapter_files': chapter_files,
            'full_file': full_audiobook_file,
            'metadata': metadata
        }


# Initialize engine
tts = TTSEngine()

print("="*60)
print("‚úÖ TTS ENGINE INITIALIZED!")
print("="*60)
print(f"""
üìä Capabilities:
  ‚Ä¢ Edge TTS: {len(EDGE_VOICES)} voices
  ‚Ä¢ Google TTS: 50+ languages
  ‚Ä¢ System TTS: Offline mode

üéõÔ∏è Features:
  ‚Ä¢ Text preprocessing
  ‚Ä¢ Audio effects
  ‚Ä¢ Batch processing
  ‚Ä¢ Multi-voice synthesis
  ‚Ä¢ Audiobook creation
  ‚Ä¢ History tracking
  ‚Ä¢ Favorites system
""")

In [None]:
#@title üéõÔ∏è **CELL 3: Main TTS Interface**
#@markdown The main interface for text-to-speech

# ============ UI COMPONENTS ============

# Title
title_html = widgets.HTML("""
<div style="text-align: center; padding: 20px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); border-radius: 15px; margin-bottom: 20px; box-shadow: 0 4px 6px rgba(0,0,0,0.1);">
    <h1 style="color: white; margin: 0; font-size: 2em;">üé§ Ultimate TTS Studio</h1>
    <p style="color: #ddd; margin: 5px 0 0 0;">Professional Text-to-Speech System</p>
</div>
""")

# Tab container
tab_children = []

# ========== TAB 1: SIMPLE MODE ==========
simple_text = widgets.Textarea(
    value='Hello! Welcome to the Ultimate TTS Studio. Enter your text here and click generate to hear it spoken.',
    placeholder='Enter your text here...',
    layout=widgets.Layout(width='100%', height='200px')
)

simple_voice_dropdown = widgets.Dropdown(
    options=[
        ('üë© Jenny (US Female)', 'en-US-JennyNeural'),
        ('üë® Guy (US Male)', 'en-US-GuyNeural'),
        ('üë© Aria (US Female)', 'en-US-AriaNeural'),
        ('üë® Davis (US Male)', 'en-US-DavisNeural'),
        ('üë© Sonia (UK Female)', 'en-GB-SoniaNeural'),
        ('üë® Ryan (UK Male)', 'en-GB-RyanNeural'),
        ('üë© Natasha (AU Female)', 'en-AU-NatashaNeural'),
        ('üë® William (AU Male)', 'en-AU-WilliamNeural'),
    ],
    value='en-US-JennyNeural',
    description='Voice:',
    layout=widgets.Layout(width='100%')
)

simple_speed = widgets.FloatSlider(
    value=1.0, min=0.5, max=2.0, step=0.1,
    description='Speed:',
    layout=widgets.Layout(width='100%')
)

simple_generate = widgets.Button(
    description='üéôÔ∏è Generate Speech',
    button_style='success',
    layout=widgets.Layout(width='100%', height='50px')
)

simple_output = widgets.Output()
simple_status = widgets.HTML("<p style='color: gray;'>Ready...</p>")

def on_simple_generate(b):
    with simple_output:
        clear_output()
        simple_status.value = "<p style='color: blue;'>üîÑ Generating...</p>"

        try:
            output = tts.synthesize(
                simple_text.value,
                engine='edge',
                voice=simple_voice_dropdown.value,
                speed=simple_speed.value
            )

            simple_status.value = "<p style='color: green;'>‚úÖ Done!</p>"
            display(Audio(output, autoplay=True))
            print(f"üìÅ Saved: {output}")

        except Exception as e:
            simple_status.value = f"<p style='color: red;'>‚ùå Error: {e}</p>"

simple_generate.on_click(on_simple_generate)

simple_tab = widgets.VBox([
    widgets.HTML("<h3>üìù Simple Mode</h3><p>Quick and easy text-to-speech</p>"),
    simple_text,
    widgets.HBox([simple_voice_dropdown]),
    simple_speed,
    simple_generate,
    simple_status,
    simple_output
], layout=widgets.Layout(padding='15px'))

tab_children.append(simple_tab)

# ========== TAB 2: ADVANCED MODE ==========
adv_text = widgets.Textarea(
    value='Enter your text here for advanced synthesis with full control over all parameters.',
    layout=widgets.Layout(width='100%', height='150px')
)

adv_engine = widgets.Dropdown(
    options=[
        ('üéØ Edge TTS (Best Quality)', 'edge'),
        ('üåê Google TTS (Reliable)', 'google'),
        ('üíª System TTS (Offline)', 'system')
    ],
    value='edge',
    description='Engine:',
    layout=widgets.Layout(width='100%')
)

# Language dropdown
language_codes = [
    ('üá∫üá∏ English (US)', 'en-US'), ('üá¨üáß English (UK)', 'en-GB'),
    ('üá¶üá∫ English (AU)', 'en-AU'), ('üáÆüá≥ English (IN)', 'en-IN'),
    ('üá™üá∏ Spanish (ES)', 'es-ES'), ('üá≤üáΩ Spanish (MX)', 'es-MX'),
    ('üá´üá∑ French', 'fr-FR'), ('üá©üá™ German', 'de-DE'),
    ('üáÆüáπ Italian', 'it-IT'), ('üáµüáπ Portuguese (BR)', 'pt-BR'),
    ('üá∑üá∫ Russian', 'ru-RU'), ('üáØüáµ Japanese', 'ja-JP'),
    ('üá∞üá∑ Korean', 'ko-KR'), ('üá®üá≥ Chinese', 'zh-CN'),
    ('üá∏üá¶ Arabic', 'ar-SA'), ('üáÆüá≥ Hindi', 'hi-IN'),
]

adv_language = widgets.Dropdown(
    options=language_codes,
    value='en-US',
    description='Language:',
    layout=widgets.Layout(width='100%')
)

adv_voice = widgets.Dropdown(
    options=[],
    description='Voice:',
    layout=widgets.Layout(width='100%')
)

def update_voices(change):
    locale = change['new']
    voices = VOICES_BY_LOCALE.get(locale, [])
    options = []
    for v in voices:
        emoji = "üë©" if v['Gender'] == 'Female' else "üë®"
        name = v['ShortName'].split('-')[-1].replace('Neural', '')
        options.append((f"{emoji} {name} ({v['Gender']})", v['ShortName']))

    adv_voice.options = options or [('Default', f'{locale}-DefaultNeural')]
    if options:
        adv_voice.value = options[0][1]

adv_language.observe(update_voices, names='value')
update_voices({'new': 'en-US'})

adv_speed = widgets.FloatSlider(value=1.0, min=0.5, max=2.0, step=0.1, description='Speed:')
adv_pitch = widgets.FloatSlider(value=1.0, min=0.5, max=1.5, step=0.1, description='Pitch:')
adv_volume = widgets.FloatSlider(value=1.0, min=0.0, max=2.0, step=0.1, description='Volume:')

adv_preprocess = widgets.Checkbox(value=True, description='üìù Preprocess text')
adv_normalize = widgets.Checkbox(value=False, description='üîä Normalize audio')
adv_fade = widgets.Checkbox(value=False, description='üåä Add fade in/out')
adv_trim = widgets.Checkbox(value=False, description='‚úÇÔ∏è Trim silence')

adv_generate = widgets.Button(
    description='üéôÔ∏è Generate',
    button_style='success',
    layout=widgets.Layout(width='48%', height='45px')
)

adv_preview = widgets.Button(
    description='üëÅÔ∏è Preview',
    button_style='info',
    layout=widgets.Layout(width='48%', height='45px')
)

adv_output = widgets.Output()
adv_status = widgets.HTML("<p style='color: gray;'>Ready...</p>")

def on_adv_generate(b, preview=False):
    with adv_output:
        clear_output()
        adv_status.value = "<p style='color: blue;'>üîÑ Generating...</p>"

        try:
            text = adv_text.value
            if preview:
                text = text[:200] + "..." if len(text) > 200 else text

            effects = {}
            if adv_normalize.value:
                effects['normalize'] = True
            if adv_fade.value:
                effects['fade_in'] = 500
                effects['fade_out'] = 500
            if adv_trim.value:
                effects['trim_silence'] = True

            output = tts.synthesize(
                text,
                engine=adv_engine.value,
                voice=adv_voice.value if adv_engine.value == 'edge' else None,
                language=adv_language.value.split('-')[0],
                speed=adv_speed.value,
                pitch=adv_pitch.value,
                volume=adv_volume.value,
                preprocess=adv_preprocess.value,
                effects=effects if effects else None
            )

            adv_status.value = "<p style='color: green;'>‚úÖ Done!</p>"
            display(Audio(output, autoplay=True))

            # Show info
            audio = tts.audio_processor.load_audio(output)
            info = tts.audio_processor.get_info(audio)
            print(f"üìÅ File: {output}")
            print(f"‚è±Ô∏è Duration: {info['duration_s']:.2f} seconds")

        except Exception as e:
            adv_status.value = f"<p style='color: red;'>‚ùå Error: {e}</p>"

adv_generate.on_click(lambda b: on_adv_generate(b, preview=False))
adv_preview.on_click(lambda b: on_adv_generate(b, preview=True))

adv_tab = widgets.VBox([
    widgets.HTML("<h3>‚öôÔ∏è Advanced Mode</h3><p>Full control over all settings</p>"),
    adv_text,
    widgets.HBox([adv_engine, adv_language]),
    adv_voice,
    widgets.HBox([adv_speed, adv_pitch, adv_volume]),
    widgets.HBox([adv_preprocess, adv_normalize, adv_fade, adv_trim]),
    widgets.HBox([adv_generate, adv_preview]),
    adv_status,
    adv_output
], layout=widgets.Layout(padding='15px'))

tab_children.append(adv_tab)

# ========== TAB 3: BATCH MODE ==========
batch_text = widgets.Textarea(
    value='Line 1: First text to synthesize.\nLine 2: Second text to synthesize.\nLine 3: Third text to synthesize.',
    placeholder='Enter multiple texts, one per line...',
    layout=widgets.Layout(width='100%', height='200px')
)

batch_voice = widgets.Dropdown(
    options=[
        ('üë© Jenny (US Female)', 'en-US-JennyNeural'),
        ('üë® Guy (US Male)', 'en-US-GuyNeural'),
    ],
    value='en-US-JennyNeural',
    description='Voice:',
    layout=widgets.Layout(width='100%')
)

batch_speed = widgets.FloatSlider(value=1.0, min=0.5, max=2.0, step=0.1, description='Speed:')

batch_merge = widgets.Checkbox(value=True, description='üîó Merge into single file')
batch_gap = widgets.IntSlider(value=500, min=0, max=2000, step=100, description='Gap (ms):')

batch_generate = widgets.Button(
    description='üöÄ Generate All',
    button_style='success',
    layout=widgets.Layout(width='100%', height='45px')
)

batch_progress = widgets.IntProgress(value=0, min=0, max=100, description='Progress:')
batch_output = widgets.Output()

def on_batch_generate(b):
    with batch_output:
        clear_output()

        lines = [l.strip() for l in batch_text.value.split('\n') if l.strip()]
        if not lines:
            print("‚ùå No text to process!")
            return

        batch_progress.max = len(lines)
        batch_progress.value = 0

        outputs = []

        for i, line in enumerate(lines):
            print(f"Processing [{i+1}/{len(lines)}]: {line[:50]}...")

            output = tts.synthesize(
                line,
                voice=batch_voice.value,
                speed=batch_speed.value,
                save_history=False
            )
            outputs.append(output)
            batch_progress.value = i + 1

        print(f"\n‚úÖ Generated {len(outputs)} files!")

        if batch_merge.value and len(outputs) > 1:
            print("üîó Merging files...")
            audio_segments = [tts.audio_processor.load_audio(f) for f in outputs]
            merged = tts.audio_processor.concatenate(audio_segments, batch_gap.value)

            merged_file = str(BATCH_DIR / f"merged_{int(time.time())}.mp3")
            tts.audio_processor.save_audio(merged, merged_file)

            print(f"‚úÖ Merged file: {merged_file}")
            display(Audio(merged_file, autoplay=True))
        else:
            for output in outputs:
                display(Audio(output))

batch_generate.on_click(on_batch_generate)

batch_tab = widgets.VBox([
    widgets.HTML("<h3>üì¶ Batch Mode</h3><p>Process multiple texts at once</p>"),
    batch_text,
    widgets.HBox([batch_voice, batch_speed]),
    widgets.HBox([batch_merge, batch_gap]),
    batch_generate,
    batch_progress,
    batch_output
], layout=widgets.Layout(padding='15px'))

tab_children.append(batch_tab)

# ========== TAB 4: MULTI-VOICE ==========
multi_segments = widgets.Textarea(
    value='Jenny: Hello, welcome to our conversation!\nGuy: Thanks for having me here today.\nJenny: Let us discuss something interesting.',
    placeholder='Format: VoiceName: Text to speak\nExample:\nJenny: Hello!\nGuy: Hi there!',
    layout=widgets.Layout(width='100%', height='200px')
)

multi_info = widgets.HTML("""
<div style="background: #f0f0f0; padding: 10px; border-radius: 5px; margin: 10px 0;">
<b>üìù Voice Name Shortcuts:</b><br>
Jenny = en-US-JennyNeural | Guy = en-US-GuyNeural | Aria = en-US-AriaNeural<br>
Davis = en-US-DavisNeural | Sonia = en-GB-SoniaNeural | Ryan = en-GB-RyanNeural
</div>
""")

voice_map = {
    'jenny': 'en-US-JennyNeural',
    'guy': 'en-US-GuyNeural',
    'aria': 'en-US-AriaNeural',
    'davis': 'en-US-DavisNeural',
    'sonia': 'en-GB-SoniaNeural',
    'ryan': 'en-GB-RyanNeural',
    'natasha': 'en-AU-NatashaNeural',
    'william': 'en-AU-WilliamNeural',
}

multi_gap = widgets.IntSlider(value=300, min=0, max=1000, step=50, description='Gap (ms):')

multi_generate = widgets.Button(
    description='üé≠ Generate Multi-Voice',
    button_style='success',
    layout=widgets.Layout(width='100%', height='45px')
)

multi_output = widgets.Output()

def on_multi_generate(b):
    with multi_output:
        clear_output()

        lines = [l.strip() for l in multi_segments.value.split('\n') if l.strip()]
        segments = []

        for line in lines:
            if ':' in line:
                voice_name, text = line.split(':', 1)
                voice_name = voice_name.strip().lower()
                voice_id = voice_map.get(voice_name, 'en-US-JennyNeural')
                segments.append({'text': text.strip(), 'voice': voice_id})

        if not segments:
            print("‚ùå No valid segments found!")
            return

        print(f"üé≠ Generating {len(segments)} segments...")

        try:
            output = tts.multi_voice_synthesize(segments, gap_ms=multi_gap.value)
            print(f"‚úÖ Generated: {output}")
            display(Audio(output, autoplay=True))
        except Exception as e:
            print(f"‚ùå Error: {e}")

multi_generate.on_click(on_multi_generate)

multi_tab = widgets.VBox([
    widgets.HTML("<h3>üé≠ Multi-Voice Mode</h3><p>Create conversations with different voices</p>"),
    multi_segments,
    multi_info,
    multi_gap,
    multi_generate,
    multi_output
], layout=widgets.Layout(padding='15px'))

tab_children.append(multi_tab)

# ========== CREATE TABS ==========
tabs = widgets.Tab(children=tab_children)
tabs.set_title(0, 'üìù Simple')
tabs.set_title(1, '‚öôÔ∏è Advanced')
tabs.set_title(2, 'üì¶ Batch')
tabs.set_title(3, 'üé≠ Multi-Voice')

# Main UI
main_ui = widgets.VBox([
    title_html,
    tabs
], layout=widgets.Layout(max_width='900px'))

display(main_ui)

print("\n‚úÖ Main Interface Ready!")

In [None]:
#@title üîç **CELL 4: Voice Explorer**
#@markdown Browse and test all 300+ voices!

explorer_title = widgets.HTML("""
<div style="text-align: center; padding: 15px; background: #3498db; border-radius: 10px; margin-bottom: 15px;">
    <h2 style="color: white; margin: 0;">üîç Voice Explorer</h2>
    <p style="color: #eee; margin: 5px 0 0 0;">Browse and test all available voices</p>
</div>
""")

# Filters
exp_language = widgets.Dropdown(
    options=[('üåç All Languages', 'all')] + [(f"{k.upper()} ({len(v)} voices)", k) for k, v in sorted(VOICES_BY_LANGUAGE.items())],
    value='all',
    description='Language:',
    layout=widgets.Layout(width='200px')
)

exp_gender = widgets.Dropdown(
    options=[('All Genders', 'all'), ('üë© Female', 'Female'), ('üë® Male', 'Male')],
    value='all',
    description='Gender:',
    layout=widgets.Layout(width='150px')
)

exp_search = widgets.Text(
    placeholder='Search voice name...',
    description='Search:',
    layout=widgets.Layout(width='200px')
)

exp_test_text = widgets.Text(
    value='Hello, this is a voice test. How does this sound?',
    description='Test text:',
    layout=widgets.Layout(width='100%')
)

exp_filter_btn = widgets.Button(description='üîç Filter', button_style='info')
exp_output = widgets.Output()

# Voice cards
def create_voice_card(voice_info):
    """Create a card for a voice"""
    gender_emoji = "üë©" if voice_info['Gender'] == 'Female' else "üë®"
    name = voice_info['ShortName'].split('-')[-1].replace('Neural', '')

    card_html = widgets.HTML(f"""
    <div style="border: 1px solid #ddd; padding: 10px; margin: 5px; border-radius: 8px; display: inline-block; width: 200px;">
        <b>{gender_emoji} {name}</b><br>
        <small style="color: gray;">{voice_info['Locale']}</small><br>
        <small>ID: {voice_info['ShortName']}</small>
    </div>
    """)

    test_btn = widgets.Button(description='‚ñ∂Ô∏è Test', button_style='success', layout=widgets.Layout(width='80px'))
    fav_btn = widgets.Button(description='‚≠ê', layout=widgets.Layout(width='40px'))

    def test_voice(b):
        with exp_output:
            print(f"üé§ Testing: {voice_info['ShortName']}")
            output = tts.synthesize(exp_test_text.value, voice=voice_info['ShortName'], save_history=False)
            display(Audio(output, autoplay=True))

    def add_favorite(b):
        tts.favorites.add_voice(voice_info['ShortName'], f"{gender_emoji} {name}")
        fav_btn.description = '‚≠ê‚úì'
        fav_btn.button_style = 'warning'

    test_btn.on_click(test_voice)
    fav_btn.on_click(add_favorite)

    return widgets.VBox([card_html, widgets.HBox([test_btn, fav_btn])])

def filter_voices(b):
    with exp_output:
        clear_output()

        voices = EDGE_VOICES

        # Apply filters
        if exp_language.value != 'all':
            voices = [v for v in voices if v['Locale'].startswith(exp_language.value)]

        if exp_gender.value != 'all':
            voices = [v for v in voices if v['Gender'] == exp_gender.value]

        if exp_search.value:
            voices = [v for v in voices if exp_search.value.lower() in v['ShortName'].lower()]

        print(f"üìä Found {len(voices)} voices\n")

        # Show voices in grid
        cards = [create_voice_card(v) for v in voices[:50]]

        # Display in rows of 4
        for i in range(0, len(cards), 4):
            display(widgets.HBox(cards[i:i+4]))

        if len(voices) > 50:
            print(f"\n... and {len(voices) - 50} more voices")

exp_filter_btn.on_click(filter_voices)

# Layout
exp_ui = widgets.VBox([
    explorer_title,
    widgets.HBox([exp_language, exp_gender, exp_search, exp_filter_btn]),
    exp_test_text,
    widgets.HTML("<hr>"),
    exp_output
], layout=widgets.Layout(padding='15px'))

display(exp_ui)

# Initial filter
filter_voices(None)

In [None]:
#@title üìö **CELL 5: Audiobook Creator**
#@markdown Create audiobooks from chapters

audiobook_title = widgets.HTML("""
<div style="text-align: center; padding: 15px; background: #9b59b6; border-radius: 10px; margin-bottom: 15px;">
    <h2 style="color: white; margin: 0;">üìö Audiobook Creator</h2>
    <p style="color: #eee; margin: 5px 0 0 0;">Convert text into professional audiobooks</p>
</div>
""")

# Book title
ab_title = widgets.Text(
    value='My Audiobook',
    description='Title:',
    layout=widgets.Layout(width='100%')
)

# Chapter input
ab_chapters = widgets.Textarea(
    value='''Chapter 1: Introduction
This is the first chapter of our audiobook. It introduces the main concepts and sets the stage for what's to come.

Chapter 2: The Journey Begins
Our story continues with the protagonist embarking on an exciting adventure. The path ahead is full of challenges and opportunities.

Chapter 3: Conclusion
And so we reach the end of our tale. The lessons learned along the way have shaped our understanding.''',
    placeholder='Enter chapters in format:\nChapter Title\nChapter text...\n\nChapter Title\nChapter text...',
    layout=widgets.Layout(width='100%', height='300px')
)

ab_voice = widgets.Dropdown(
    options=[
        ('üë© Jenny (Warm, Friendly)', 'en-US-JennyNeural'),
        ('üë® Guy (Professional)', 'en-US-GuyNeural'),
        ('üë© Aria (Cheerful)', 'en-US-AriaNeural'),
        ('üë© Sonia (British)', 'en-GB-SoniaNeural'),
        ('üë® Ryan (British)', 'en-GB-RyanNeural'),
    ],
    value='en-US-JennyNeural',
    description='Narrator:',
    layout=widgets.Layout(width='100%')
)

ab_speed = widgets.FloatSlider(value=0.9, min=0.5, max=1.5, step=0.1, description='Speed:')
ab_gap = widgets.IntSlider(value=2000, min=500, max=5000, step=500, description='Chapter gap (ms):')

ab_create = widgets.Button(
    description='üìö Create Audiobook',
    button_style='success',
    layout=widgets.Layout(width='100%', height='50px')
)

ab_progress = widgets.IntProgress(value=0, min=0, max=100, description='Progress:')
ab_status = widgets.HTML("")
ab_output = widgets.Output()

def parse_chapters(text):
    """Parse chapters from text"""
    chapters = {}
    current_chapter = None
    current_text = []

    for line in text.split('\n'):
        line = line.strip()
        if line.startswith('Chapter ') and ':' in line:
            if current_chapter:
                chapters[current_chapter] = ' '.join(current_text)
            current_chapter = line
            current_text = []
        elif line and current_chapter:
            current_text.append(line)

    if current_chapter:
        chapters[current_chapter] = ' '.join(current_text)

    return chapters

def on_create_audiobook(b):
    with ab_output:
        clear_output()

        chapters = parse_chapters(ab_chapters.value)

        if not chapters:
            print("‚ùå No chapters found! Use format: 'Chapter N: Title' followed by text")
            return

        print(f"üìö Creating audiobook: {ab_title.value}")
        print(f"üìñ Chapters: {len(chapters)}")
        print(f"üé§ Voice: {ab_voice.value}")
        print("\n" + "="*50)

        ab_progress.max = len(chapters)
        ab_progress.value = 0

        def progress_cb(current, total, chapter):
            ab_progress.value = current
            ab_status.value = f"<p>Processing: {chapter}</p>"

        try:
            result = tts.create_audiobook(
                chapters,
                title=ab_title.value,
                voice=ab_voice.value,
                speed=ab_speed.value,
                chapter_gap_ms=ab_gap.value,
                progress_callback=progress_cb
            )

            ab_status.value = "<p style='color: green;'>‚úÖ Audiobook created!</p>"

            print("\n‚úÖ AUDIOBOOK COMPLETE!")
            print(f"\nüìÅ Full audiobook: {result['full_file']}")
            print(f"\nüìñ Individual chapters:")
            for f in result['chapter_files']:
                print(f"  ‚Ä¢ {f}")

            print("\nüéß Preview (first chapter):")
            display(Audio(result['chapter_files'][0], autoplay=True))

            print("\nüì• Full audiobook:")
            display(Audio(result['full_file']))

        except Exception as e:
            ab_status.value = f"<p style='color: red;'>‚ùå Error: {e}</p>"
            print(f"Error: {e}")

ab_create.on_click(on_create_audiobook)

ab_ui = widgets.VBox([
    audiobook_title,
    ab_title,
    widgets.HTML("<b>üìñ Chapters:</b> (Format: 'Chapter N: Title' followed by text)"),
    ab_chapters,
    widgets.HBox([ab_voice]),
    widgets.HBox([ab_speed, ab_gap]),
    ab_create,
    ab_progress,
    ab_status,
    ab_output
], layout=widgets.Layout(padding='15px'))

display(ab_ui)

In [None]:
#@title üéöÔ∏è **CELL 6: Audio Editor**
#@markdown Edit and enhance your audio files

editor_title = widgets.HTML("""
<div style="text-align: center; padding: 15px; background: #e74c3c; border-radius: 10px; margin-bottom: 15px;">
    <h2 style="color: white; margin: 0;">üéöÔ∏è Audio Editor</h2>
    <p style="color: #eee; margin: 5px 0 0 0;">Edit, enhance, and transform audio</p>
</div>
""")

# File selection
import glob

def get_audio_files():
    files = list(AUDIO_DIR.glob("*.mp3")) + list(AUDIO_DIR.glob("*.wav"))
    return [(f.name, str(f)) for f in sorted(files, key=lambda x: x.stat().st_mtime, reverse=True)]

ed_file = widgets.Dropdown(
    options=get_audio_files(),
    description='File:',
    layout=widgets.Layout(width='100%')
)

ed_refresh = widgets.Button(description='üîÑ Refresh', layout=widgets.Layout(width='100px'))

def refresh_files(b):
    ed_file.options = get_audio_files()

ed_refresh.on_click(refresh_files)

# Effects
ed_normalize = widgets.Checkbox(value=False, description='üîä Normalize volume')
ed_fade_in = widgets.IntSlider(value=0, min=0, max=2000, step=100, description='Fade in (ms):')
ed_fade_out = widgets.IntSlider(value=0, min=0, max=2000, step=100, description='Fade out (ms):')
ed_speed = widgets.FloatSlider(value=1.0, min=0.5, max=2.0, step=0.1, description='Speed:')
ed_volume = widgets.IntSlider(value=0, min=-20, max=20, step=1, description='Volume (dB):')
ed_trim = widgets.Checkbox(value=False, description='‚úÇÔ∏è Trim silence')
ed_silence_start = widgets.IntSlider(value=0, min=0, max=2000, step=100, description='Add silence start (ms):')
ed_silence_end = widgets.IntSlider(value=0, min=0, max=2000, step=100, description='Add silence end (ms):')

ed_preview = widgets.Button(description='üëÅÔ∏è Preview Original', button_style='info', layout=widgets.Layout(width='48%'))
ed_apply = widgets.Button(description='‚ú® Apply Effects', button_style='success', layout=widgets.Layout(width='48%'))

ed_output = widgets.Output()

def preview_original(b):
    with ed_output:
        clear_output()
        if ed_file.value:
            print(f"üéß Original: {ed_file.value}")
            audio = tts.audio_processor.load_audio(ed_file.value)
            info = tts.audio_processor.get_info(audio)
            print(f"‚è±Ô∏è Duration: {info['duration_s']:.2f}s | üîä Volume: {info['dBFS']:.1f} dBFS")
            display(Audio(ed_file.value))

def apply_effects(b):
    with ed_output:
        clear_output()

        if not ed_file.value:
            print("‚ùå No file selected!")
            return

        print("üîÑ Processing...")

        try:
            audio = tts.audio_processor.load_audio(ed_file.value)
            original_info = tts.audio_processor.get_info(audio)

            # Apply effects
            if ed_normalize.value:
                audio = tts.audio_processor.normalize_audio(audio)
                print("  ‚úì Normalized")

            if ed_volume.value != 0:
                audio = tts.audio_processor.change_volume(audio, ed_volume.value)
                print(f"  ‚úì Volume adjusted: {ed_volume.value:+d} dB")

            if ed_speed.value != 1.0:
                audio = tts.audio_processor.change_speed(audio, ed_speed.value)
                print(f"  ‚úì Speed changed: {ed_speed.value}x")

            if ed_trim.value:
                audio = tts.audio_processor.trim_silence(audio)
                print("  ‚úì Silence trimmed")

            if ed_fade_in.value > 0:
                audio = tts.audio_processor.fade_in(audio, ed_fade_in.value)
                print(f"  ‚úì Fade in: {ed_fade_in.value}ms")

            if ed_fade_out.value > 0:
                audio = tts.audio_processor.fade_out(audio, ed_fade_out.value)
                print(f"  ‚úì Fade out: {ed_fade_out.value}ms")

            if ed_silence_start.value > 0 or ed_silence_end.value > 0:
                audio = tts.audio_processor.add_silence(audio, ed_silence_start.value, ed_silence_end.value)
                print(f"  ‚úì Silence added: start={ed_silence_start.value}ms, end={ed_silence_end.value}ms")

            # Save
            output_file = str(AUDIO_DIR / f"edited_{int(time.time())}.mp3")
            tts.audio_processor.save_audio(audio, output_file)

            new_info = tts.audio_processor.get_info(audio)

            print(f"\n‚úÖ Saved: {output_file}")
            print(f"‚è±Ô∏è Duration: {original_info['duration_s']:.2f}s ‚Üí {new_info['duration_s']:.2f}s")
            print(f"üîä Volume: {original_info['dBFS']:.1f} ‚Üí {new_info['dBFS']:.1f} dBFS")

            display(Audio(output_file, autoplay=True))

            # Refresh file list
            refresh_files(None)

        except Exception as e:
            print(f"‚ùå Error: {e}")

ed_preview.on_click(preview_original)
ed_apply.on_click(apply_effects)

ed_ui = widgets.VBox([
    editor_title,
    widgets.HBox([ed_file, ed_refresh]),
    widgets.HTML("<hr><b>üéõÔ∏è Effects:</b>"),
    widgets.HBox([ed_normalize, ed_trim]),
    widgets.HBox([ed_speed, ed_volume]),
    widgets.HBox([ed_fade_in, ed_fade_out]),
    widgets.HBox([ed_silence_start, ed_silence_end]),
    widgets.HTML("<hr>"),
    widgets.HBox([ed_preview, ed_apply]),
    ed_output
], layout=widgets.Layout(padding='15px'))

display(ed_ui)

In [None]:
#@title üìù **CELL 7: SSML Editor**
#@markdown Advanced speech control with SSML

ssml_title = widgets.HTML("""
<div style="text-align: center; padding: 15px; background: #2ecc71; border-radius: 10px; margin-bottom: 15px;">
    <h2 style="color: white; margin: 0;">üìù SSML Editor</h2>
    <p style="color: #eee; margin: 5px 0 0 0;">Fine-grained control over speech synthesis</p>
</div>
""")

ssml_help = widgets.HTML("""
<div style="background: #f8f9fa; padding: 15px; border-radius: 8px; margin: 10px 0;">
<b>üìñ SSML Quick Reference:</b><br><br>
<code>&lt;break time="500ms"/&gt;</code> - Add pause<br>
<code>&lt;emphasis level="strong"&gt;text&lt;/emphasis&gt;</code> - Add emphasis (strong/moderate/reduced)<br>
<code>&lt;prosody rate="slow" pitch="+10%"&gt;text&lt;/prosody&gt;</code> - Change rate/pitch<br>
<code>&lt;say-as interpret-as="date"&gt;01/15/2024&lt;/say-as&gt;</code> - Interpret as date/time/number<br>
<code>&lt;phoneme ph="t…ôÀàme…™to ä"&gt;tomato&lt;/phoneme&gt;</code> - Specify pronunciation<br>
</div>
""")

ssml_templates = {
    'Basic': '''<speak>
Hello! <break time="500ms"/>
This is a basic SSML example.
</speak>''',

    'Emphasis': '''<speak>
This is <emphasis level="strong">very important</emphasis> information.
<break time="300ms"/>
But this is <emphasis level="reduced">less important</emphasis>.
</speak>''',

    'Prosody': '''<speak>
<prosody rate="slow" pitch="-10%">Speaking slowly and low.</prosody>
<break time="500ms"/>
<prosody rate="fast" pitch="+15%">Speaking quickly and high!</prosody>
</speak>''',

    'Numbers & Dates': '''<speak>
The meeting is on <say-as interpret-as="date" format="mdy">01/15/2024</say-as>.
<break time="300ms"/>
Please call <say-as interpret-as="telephone">+1-555-123-4567</say-as>.
<break time="300ms"/>
The total is <say-as interpret-as="currency">$1,234.56</say-as>.
</speak>''',

    'Dramatic': '''<speak>
<prosody rate="slow" pitch="-20%">In the beginning...</prosody>
<break time="1s"/>
<prosody rate="medium" pitch="0%">There was silence.</prosody>
<break time="500ms"/>
<prosody rate="fast" pitch="+10%">Then suddenly!</prosody>
<break time="300ms"/>
<emphasis level="strong">Everything changed!</emphasis>
</speak>'''
}

ssml_template = widgets.Dropdown(
    options=list(ssml_templates.keys()),
    value='Basic',
    description='Template:',
    layout=widgets.Layout(width='200px')
)

ssml_load = widgets.Button(description='üì• Load Template', button_style='info')

ssml_text = widgets.Textarea(
    value=ssml_templates['Basic'],
    layout=widgets.Layout(width='100%', height='250px')
)

ssml_voice = widgets.Dropdown(
    options=[
        ('üë© Jenny', 'en-US-JennyNeural'),
        ('üë® Guy', 'en-US-GuyNeural'),
        ('üë© Aria', 'en-US-AriaNeural'),
    ],
    value='en-US-JennyNeural',
    description='Voice:',
    layout=widgets.Layout(width='200px')
)

ssml_generate = widgets.Button(
    description='üéôÔ∏è Generate SSML Speech',
    button_style='success',
    layout=widgets.Layout(width='100%', height='45px')
)

ssml_output = widgets.Output()

def load_template(b):
    ssml_text.value = ssml_templates[ssml_template.value]

ssml_load.on_click(load_template)

def generate_ssml(b):
    with ssml_output:
        clear_output()

        try:
            # For Edge TTS, we need to handle SSML differently
            ssml_content = ssml_text.value

            # Extract text from SSML for basic synthesis
            # Note: Full SSML support depends on the TTS engine
            import re
            text = re.sub(r'<[^>]+>', ' ', ssml_content)
            text = ' '.join(text.split())

            print("üîÑ Generating from SSML...")
            print(f"üìù Extracted text: {text[:100]}...")

            output = tts.synthesize(text, voice=ssml_voice.value, preprocess=False)

            print(f"‚úÖ Generated: {output}")
            display(Audio(output, autoplay=True))

        except Exception as e:
            print(f"‚ùå Error: {e}")

ssml_generate.on_click(generate_ssml)

# Quick insert buttons
def create_insert_button(text, insert_text):
    btn = widgets.Button(description=text, layout=widgets.Layout(width='auto'))
    def insert(b):
        ssml_text.value = ssml_text.value + insert_text
    btn.on_click(insert)
    return btn

quick_buttons = widgets.HBox([
    create_insert_button('‚è∏Ô∏è Pause', '<break time="500ms"/>'),
    create_insert_button('üì¢ Emphasis', '<emphasis level="strong">text</emphasis>'),
    create_insert_button('üêå Slow', '<prosody rate="slow">text</prosody>'),
    create_insert_button('üê∞ Fast', '<prosody rate="fast">text</prosody>'),
])

ssml_ui = widgets.VBox([
    ssml_title,
    ssml_help,
    widgets.HTML("<b>üìù Quick Insert:</b>"),
    quick_buttons,
    widgets.HBox([ssml_template, ssml_load]),
    ssml_text,
    ssml_voice,
    ssml_generate,
    ssml_output
], layout=widgets.Layout(padding='15px'))

display(ssml_ui)

In [None]:
#@title üìú **CELL 8: History & Export Manager**
#@markdown View history and export files

history_title = widgets.HTML("""
<div style="text-align: center; padding: 15px; background: #34495e; border-radius: 10px; margin-bottom: 15px;">
    <h2 style="color: white; margin: 0;">üìú History & Export</h2>
    <p style="color: #eee; margin: 5px 0 0 0;">View history and manage files</p>
</div>
""")

hist_tabs = []

# ========== HISTORY TAB ==========
hist_output = widgets.Output()
hist_refresh = widgets.Button(description='üîÑ Refresh', button_style='info')
hist_clear = widgets.Button(description='üóëÔ∏è Clear All', button_style='danger')

def show_history(b=None):
    with hist_output:
        clear_output()

        history = tts.history.get_recent(20)

        if not history:
            print("üì≠ No history yet!")
            return

        print(f"üìú Recent generations ({len(history)} items):\n")

        for h in reversed(history):
            print(f"üïê {h.get('timestamp', 'Unknown')[:19]}")
            print(f"   üìù {h.get('text', '')[:60]}...")
            print(f"   üé§ {h.get('voice', 'Unknown')} | ‚ö° {h.get('speed', 1.0)}x")
            print(f"   üìÅ {h.get('output', 'N/A')}")
            print()

def clear_history(b):
    tts.history.clear()
    show_history()

hist_refresh.on_click(show_history)
hist_clear.on_click(clear_history)

hist_tab = widgets.VBox([
    widgets.HBox([hist_refresh, hist_clear]),
    hist_output
], layout=widgets.Layout(padding='10px'))

hist_tabs.append(hist_tab)

# ========== FILES TAB ==========
files_output = widgets.Output()
files_refresh = widgets.Button(description='üîÑ Refresh', button_style='info')

def show_files(b=None):
    with files_output:
        clear_output()

        all_files = []

        for directory, name in [(AUDIO_DIR, 'Audio'), (BATCH_DIR, 'Batch'),
                                 (AUDIOBOOK_DIR, 'Audiobooks'), (EXPORT_DIR, 'Exports')]:
            files = list(directory.glob("*.*"))
            if files:
                print(f"\nüìÅ {name} ({len(files)} files):")
                for f in sorted(files, key=lambda x: x.stat().st_mtime, reverse=True)[:5]:
                    size_kb = f.stat().st_size / 1024
                    print(f"   ‚Ä¢ {f.name} ({size_kb:.1f} KB)")
                    all_files.append(f)

        if not all_files:
            print("üì≠ No files yet!")
        else:
            # Total size
            total_size = sum(f.stat().st_size for f in all_files) / (1024 * 1024)
            print(f"\nüíæ Total size: {total_size:.2f} MB")

files_refresh.on_click(show_files)

files_tab = widgets.VBox([
    files_refresh,
    files_output
], layout=widgets.Layout(padding='10px'))

hist_tabs.append(files_tab)

# ========== EXPORT TAB ==========
export_format = widgets.Dropdown(
    options=['mp3', 'wav', 'ogg', 'flac'],
    value='mp3',
    description='Format:',
    layout=widgets.Layout(width='150px')
)

export_all = widgets.Button(description='üì¶ Export All', button_style='success')
export_output = widgets.Output()

def export_all_files(b):
    with export_output:
        clear_output()

        print("üì¶ Exporting all files...")

        # Create zip
        import shutil

        zip_name = f"tts_export_{int(time.time())}"
        zip_path = EXPORT_DIR / zip_name

        shutil.make_archive(str(zip_path), 'zip', AUDIO_DIR)

        print(f"‚úÖ Created: {zip_path}.zip")
        print("\nüì• Download link:")
        display(FileLink(f"{zip_path}.zip"))

export_all.on_click(export_all_files)

export_tab = widgets.VBox([
    widgets.HTML("<b>üì§ Export Settings:</b>"),
    export_format,
    export_all,
    export_output
], layout=widgets.Layout(padding='10px'))

hist_tabs.append(export_tab)

# ========== FAVORITES TAB ==========
fav_output = widgets.Output()
fav_refresh = widgets.Button(description='üîÑ Refresh', button_style='info')

def show_favorites(b=None):
    with fav_output:
        clear_output()

        favorites = tts.favorites.favorites

        print("‚≠ê Favorite Voices:")
        if favorites['voices']:
            for v in favorites['voices']:
                print(f"   ‚Ä¢ {v['name']} ({v['id']})")
        else:
            print("   No favorites yet!")

        print("\nüìã Saved Presets:")
        if favorites['presets']:
            for p in favorites['presets']:
                print(f"   ‚Ä¢ {p['name']}")
        else:
            print("   No presets yet!")

fav_refresh.on_click(show_favorites)

fav_tab = widgets.VBox([
    fav_refresh,
    fav_output
], layout=widgets.Layout(padding='10px'))

hist_tabs.append(fav_tab)

# ========== CREATE TABS ==========
tabs = widgets.Tab(children=hist_tabs)
tabs.set_title(0, 'üìú History')
tabs.set_title(1, 'üìÅ Files')
tabs.set_title(2, 'üì§ Export')
tabs.set_title(3, '‚≠ê Favorites')

history_ui = widgets.VBox([
    history_title,
    tabs
], layout=widgets.Layout(padding='15px'))

display(history_ui)

# Initial load
show_history()
show_files()
show_favorites()

In [None]:
#@title ‚ö° **CELL 9: Quick Functions & API**
#@markdown Easy-to-use functions for quick access

print("="*60)
print("‚ö° QUICK FUNCTIONS READY!")
print("="*60)

# Quick synthesis functions
def speak(text, voice="en-US-JennyNeural", speed=1.0):
    """Quick speak function"""
    return tts.synthesize(text, voice=voice, speed=speed)

def jenny(text, speed=1.0):
    """Jenny voice"""
    return speak(text, "en-US-JennyNeural", speed)

def guy(text, speed=1.0):
    """Guy voice"""
    return speak(text, "en-US-GuyNeural", speed)

def aria(text, speed=1.0):
    """Aria voice"""
    return speak(text, "en-US-AriaNeural", speed)

def sonia(text, speed=1.0):
    """British Sonia voice"""
    return speak(text, "en-GB-SoniaNeural", speed)

# Multi-language quick functions
def spanish(text):
    """Spanish TTS"""
    return tts.synthesize(text, engine='google', language='es')

def french(text):
    """French TTS"""
    return tts.synthesize(text, engine='google', language='fr')

def german(text):
    """German TTS"""
    return tts.synthesize(text, engine='google', language='de')

def japanese(text):
    """Japanese TTS"""
    return tts.synthesize(text, engine='google', language='ja')

def chinese(text):
    """Chinese TTS"""
    return tts.synthesize(text, engine='google', language='zh-CN')

def hindi(text):
    """Hindi TTS"""
    return tts.synthesize(text, engine='google', language='hi')

# Batch function
def batch(texts, voice="en-US-JennyNeural"):
    """Quick batch synthesis"""
    return tts.batch_synthesize(texts, voice=voice)

# List voices
def voices(language='en'):
    """List voices for a language"""
    df = pd.DataFrame([
        {'Voice': v['ShortName'], 'Gender': v['Gender'], 'Locale': v['Locale']}
        for v in EDGE_VOICES if v['Locale'].startswith(language)
    ])
    display(df)
    return df

print("""
üìñ QUICK FUNCTIONS:
‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ

üé§ VOICES:
   speak("Hello!")           # Default Jenny
   jenny("Hello!")           # Jenny voice
   guy("Hello!")             # Guy voice
   aria("Hello!")            # Aria voice
   sonia("Hello!")           # British Sonia

   speak("Hello!", speed=1.5)  # Faster
   speak("Hello!", speed=0.7)  # Slower

üåç LANGUAGES:
   spanish("Hola!")
   french("Bonjour!")
   german("Hallo!")
   japanese("„Åì„Çì„Å´„Å°„ÅØ!")
   chinese("‰Ω†Â•Ω!")
   hindi("‡§®‡§Æ‡§∏‡•ç‡§§‡•á!")

üì¶ BATCH:
   batch(["Text 1", "Text 2", "Text 3"])

üîç EXPLORE:
   voices()        # List English voices
   voices('es')    # List Spanish voices
   voices('fr')    # List French voices

üîß ADVANCED:
   tts.synthesize(
       text="Your text",
       engine='edge',
       voice='en-US-JennyNeural',
       speed=1.0,
       pitch=1.0,
       volume=1.0,
       preprocess=True,
       effects={'normalize': True, 'fade_in': 500}
   )

‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
""")

# Demo
print("\nüöÄ Try it now:")
print('>>> speak("Hello, welcome to the Ultimate TTS Studio!")')

In [None]:
#@title üìä **CELL 10: Dashboard**
#@markdown System overview and statistics

dash_title = widgets.HTML("""
<div style="text-align: center; padding: 20px; background: linear-gradient(135deg, #11998e 0%, #38ef7d 100%); border-radius: 15px; margin-bottom: 20px;">
    <h1 style="color: white; margin: 0;">üìä TTS Studio Dashboard</h1>
</div>
""")

dash_output = widgets.Output()
dash_refresh = widgets.Button(description='üîÑ Refresh Stats', button_style='success')

def show_dashboard(b=None):
    with dash_output:
        clear_output()

        # Stats
        history_count = len(tts.history.history)

        audio_files = list(AUDIO_DIR.glob("*.*"))
        batch_files = list(BATCH_DIR.glob("*.*"))

        total_files = len(audio_files) + len(batch_files)
        total_size = sum(f.stat().st_size for f in audio_files + batch_files) / (1024 * 1024)

        # Display stats
        stats_html = f"""
        <div style="display: flex; flex-wrap: wrap; gap: 15px; margin: 20px 0;">
            <div style="flex: 1; min-width: 200px; background: #3498db; color: white; padding: 20px; border-radius: 10px; text-align: center;">
                <h2 style="margin: 0; font-size: 2.5em;">{len(EDGE_VOICES)}</h2>
                <p style="margin: 5px 0 0 0;">üé§ Available Voices</p>
            </div>
            <div style="flex: 1; min-width: 200px; background: #9b59b6; color: white; padding: 20px; border-radius: 10px; text-align: center;">
                <h2 style="margin: 0; font-size: 2.5em;">{len(VOICES_BY_LANGUAGE)}</h2>
                <p style="margin: 5px 0 0 0;">üåç Languages</p>
            </div>
            <div style="flex: 1; min-width: 200px; background: #e74c3c; color: white; padding: 20px; border-radius: 10px; text-align: center;">
                <h2 style="margin: 0; font-size: 2.5em;">{history_count}</h2>
                <p style="margin: 5px 0 0 0;">üìú Generations</p>
            </div>
            <div style="flex: 1; min-width: 200px; background: #2ecc71; color: white; padding: 20px; border-radius: 10px; text-align: center;">
                <h2 style="margin: 0; font-size: 2.5em;">{total_files}</h2>
                <p style="margin: 5px 0 0 0;">üìÅ Files ({total_size:.1f} MB)</p>
            </div>
        </div>
        """
        display(HTML(stats_html))

        # Recent activity
        print("\nüìú Recent Activity:")
        print("-" * 40)
        for h in tts.history.get_recent(5):
            print(f"‚Ä¢ {h.get('text', '')[:50]}... ({h.get('voice', 'Unknown').split('-')[-1]})")

        # Top languages
        print("\nüåç Top Languages by Voices:")
        print("-" * 40)
        top_langs = sorted(VOICES_BY_LANGUAGE.items(), key=lambda x: len(x[1]), reverse=True)[:5]
        for lang, voices in top_langs:
            print(f"  {lang.upper()}: {len(voices)} voices")

dash_refresh.on_click(show_dashboard)

# Quick actions
quick_actions = widgets.HTML("""
<div style="background: #f8f9fa; padding: 15px; border-radius: 10px; margin-top: 20px;">
<h3>‚ö° Quick Actions</h3>
<p>Use these functions in code cells:</p>
<code>
speak("Hello!")  |  jenny("Hi!")  |  guy("Hello!")  |  voices('en')  |  batch(['a', 'b', 'c'])
</code>
</div>
""")

dash_ui = widgets.VBox([
    dash_title,
    dash_refresh,
    dash_output,
    quick_actions
], layout=widgets.Layout(padding='15px', max_width='900px'))

display(dash_ui)
show_dashboard()