In [None]:
!pip install elevenlabs gradio==5.29.0 httpx pydub

import gradio as gr
from elevenlabs import ElevenLabs, VoiceSettings
import httpx
import re
import time
import random
from pydub import AudioSegment
import os
import pandas as pd
import numpy as np


def get_model_list():
    return [
        "eleven_multilingual_v2",
        "eleven_multilingual_v1",
        "eleven_flash_v2",
        "eleven_flash_v2_5",
        "eleven_english_sts_v2",
        "eleven_english_sts_v1"
    ]

VOICE_LIST = {
    "Rachel (Legacy)": "21m00Tcm4TlvDq8ikWAM",
    "Drew (Legacy)": "29vD33N1CtxCmqQRPOHJ",
    "Clyde (Legacy)": "2EiwWnXFnvU5JabPnv8n",
    "Paul (Legacy)": "5Q0t7uMcjvnagumLfvZi",
    "Aria": "9BWtsMINqrJLrRacOk9x",
    "Domi (Legacy)": "AZnzlk1XvdvUeBnXmlld",
    "Dave (Legacy)": "CYw3kZ02Hs0563khs1Fj",
    "Roger": "CwhRBWXzGAHq8TQ4Fs17",
    "Fin (Legacy)": "D38z5RcWu1voky8WS1ja",
    "Sarah": "EXAVITQu4vr4xnSDxMaL",
    "Antoni (Legacy)": "ErXwobaYiN019PkySvjV",
    "Laura": "FGY2WhTYpPnrIDTdsKH5",
    "Thomas (Legacy)": "GBv7mTt0atIp3Br8iCZE",
    "Charlie": "IKne3meq5aSn9XLyUdCD",
    "George": "JBFqnCBsd6RMkjVDRZzb",
    "Emily (Legacy)": "LcfcDJNUP1GQjkzn1xUU",
    "Elli (Legacy)": "MF3mGyEYCl7XYWbV9V6O",
    "Callum": "N2lVS1w4EtoT3dr4eOWO",
    "Patrick (Legacy)": "ODq5zmih8GrVes37Dizd",
    "River": "SAz9YHcvj6GT2YYXdXww",
    "Harry (Legacy)": "SOYHLrjzK2X1ezoPC6cr",
    "Liam": "TX3LPaxmHKxFdv7VOQHJ",
    "Dorothy (Legacy)": "ThT5KcBeYPX3keUQqHPh",
    "Josh (Legacy)": "TxGEqnHWrfWFTfGW9XjX",
    "Will": "bIHbv24MWmeRgasZH58o",
    "Jeremy (Legacy)": "bVMeCyTHy58xNoL34h3p",
    "Jessica": "cgSgspJ2msm6clMCkdW9",
    "Eric": "cjVigY5qzO86Huf0OWal",
    "Michael (Legacy)": "flq6f7yk4E4fJM5XTYuZ",
    "Ethan (Legacy)": "g5CIjZEefAph4nQFvHAz",
    "Chris": "iP95p4xoKVk53GoZ742B",
    "Gigi (Legacy)": "jBpfuIE2acCO8z3wKNLl",
    "Freya (Legacy)": "jsCqWAovK2LkecY7zXl4",
    "üéÖ Santa Claus (Legacy)": "knrPHWnBmmDHMoiMeP3l",
    "Brian": "nPczCjzI2devNBz1zQrb",
    "Grace (Legacy)": "oWAxZDx7w5VEj9dCyTzz",
    "Daniel": "onwK4e9ZLuTAKqWW03F9",
    "Lily": "pFZP5JQG7iQjIQuC4Bku",
    "Serena (Legacy)": "pMsXgVXv3BLzUgSXRplE",
    "Adam (Legacy)": "pNInz6obpgDQGcFmaJgB",
    "Nicole (Legacy)": "piTKgcLEGmPE4e6mEKli",
    "Bill": "pqHfZKP75CvOlQylNhV4",
    "essie (Legacy)": "t0jbNlBVZ17f02VDIeMI",
    "Arnold (Legacy)": "wViXBPUzp2ZZixB1xQuM",
    "Sam (Legacy)": "yoZ06aMxZJJ28mfd3POQ",
    "Glinda (Legacy)": "z9fAnlkpzviPz146aGWa",
    "Giovanni (Legacy)": "zcAOhNBS3c14rBihAFp1",
    "Mimi (Legacy)": "zrHiDhphv9ZnVXBqCLjz"
}

DEFAULT_STABILITY = 0.5
DEFAULT_SIMILARITY = 0.75

def get_voice_settings(api_key, voice_id):
    try:
        client = ElevenLabs(api_key=api_key.strip())
        voice = client.voices.get(voice_id)
        return voice.settings.stability, voice.settings.similarity_boost
    except Exception as e:
        print(f"L·ªói get_voice_settings: {str(e)}")
        return DEFAULT_STABILITY, DEFAULT_SIMILARITY

def check_api_key(api_key):
    try:
        if not api_key or not api_key.strip():
            return "‚ùå Key ch∆∞a nh·∫≠p."
        headers = {"xi-api-key": api_key.strip()}
        response = httpx.get("https://api.elevenlabs.io/v1/user", headers=headers)
        if response.status_code == 200:
            user_data = response.json()
            remaining = user_data["subscription"]["character_limit"] - user_data["subscription"]["character_count"]
            return f"‚úÖ C√≤n l·∫°i {remaining:,} k√Ω t·ª±."
        elif response.status_code == 401:
            return "‚ùå Key kh√¥ng t·ªìn t·∫°i ho·∫∑c kh√¥ng h·ª£p l·ªá."
        return f"‚ùå L·ªói: {response.text}"
    except Exception as e:
        return f"‚ùå L·ªói: {str(e)}"

def split_text_into_sentences(text):
    sentences = re.split(r'[.!?]+', text)
    return [s.strip() for s in sentences if s.strip()]

def conditional_display_sentences(text, split):
    if not text or not isinstance(text, str):
        return [], pd.DataFrame(columns=["STT", "C√¢u", "Tr·∫°ng th√°i"]), None

    if split:
        sentences = [s.strip() for s in text.split(".") if s.strip()]
    else:
        sentences = [text.strip()]

    checkboxes = [str(i + 1) for i in range(len(sentences))]

    df = pd.DataFrame({
        "STT": list(range(1, len(sentences) + 1)),
        "C√¢u": sentences,
        "Tr·∫°ng th√°i": ["üü° Ch∆∞a chuy·ªÉn"] * len(sentences),
        "Select": [True] * len(sentences)
    })

    display_df = df[["STT", "C√¢u", "Tr·∫°ng th√°i"]]

    return checkboxes, display_df, None

def text_to_speech_single(api_key, text, index, model_id, stability, similarity, voice_id, folder=None, is_single_sentence=False, base_filename=None):
    print(f"API Key ƒë∆∞·ª£c s·ª≠ d·ª•ng: {api_key}")
    try:
        if not api_key or not api_key.strip():
            return f"‚ùå C√¢u {index+1} l·ªói: Key ch∆∞a nh·∫≠p.", None
        
        client = ElevenLabs(api_key=api_key.strip())
        audio = client.text_to_speech.convert(
            text=text,
            voice_id=voice_id,
            model_id=model_id,
            voice_settings=VoiceSettings(stability=stability, similarity_boost=similarity),
            output_format="mp3_44100_128"
        )
        
        if folder:
            os.makedirs(folder, exist_ok=True)
            if is_single_sentence:
                filename = os.path.join(folder, f"{base_filename}.mp3")
            else:
                filename = os.path.join(folder, f"{index+1}.mp3")
        else:
            if is_single_sentence:
                filename = f"{base_filename}.mp3"
            else:
                filename = f"{index+1}.mp3"
        
        with open(filename, "wb") as f:
            for chunk in audio:
                f.write(chunk)
        
        time.sleep(random.uniform(1.5, 2.5))
        return f"‚úÖ C√¢u {index+1}", filename
    except Exception as e:
        print(f"L·ªói text_to_speech_single: {str(e)}")
        if "401" in str(e) or "invalid_api_key" in str(e).lower():
            return f"‚ùå C√¢u {index+1} l·ªói: Key kh√¥ng t·ªìn t·∫°i ho·∫∑c kh√¥ng h·ª£p l·ªá.", None
        return f"‚ùå C√¢u {index+1} l·ªói: {str(e)}", None

def text_to_speech(api_key, text, split, model_id, stability, similarity, voice_name, selected_sentences, file_info=None):
    voice_id = VOICE_LIST.get(voice_name)
    if not voice_id:
        return pd.DataFrame({"L·ªói": ["‚ùå Gi·ªçng ƒë·ªçc kh√¥ng h·ª£p l·ªá."]}), None

    if file_info and hasattr(file_info, 'name'):
        base_filename = os.path.splitext(os.path.basename(file_info.name))[0]
        safe_prefix = base_filename
    else:
        safe_prefix = re.sub(r'[^a-zA-Z0-9_]', '_', text[:15]).strip('_')
        base_filename = safe_prefix

    folder = safe_prefix or "output"

    if split:
        sentences = split_text_into_sentences(text)
        selected_indices = [int(idx) - 1 for idx in selected_sentences if idx.isdigit() and 0 <= int(idx) - 1 < len(sentences)]
        data = {
            "STT": list(range(1, len(sentences) + 1)),
            "C√¢u": sentences,
            "Tr·∫°ng th√°i": ["üü° Ch∆∞a chuy·ªÉn"] * len(sentences),
            "Select": [str(i + 1) in selected_sentences for i in range(len(sentences))]
        }
        paths = []
        for i in selected_indices:
            if i >= len(sentences):
                continue
            s = sentences[i]
            is_single = len(selected_indices) == 1
            msg, path = text_to_speech_single(
                api_key, s, i, model_id, stability, similarity, voice_id, 
                folder, is_single, base_filename
            )
            data["Tr·∫°ng th√°i"][i] = msg
            if path:
                paths.append(path)
        return pd.DataFrame(data)[["STT", "C√¢u", "Tr·∫°ng th√°i"]], paths[0] if paths else None
    else:
        if "1" in selected_sentences:
            is_single = True
            msg, path = text_to_speech_single(
                api_key, text, 0, model_id, stability, similarity, voice_id, 
                folder, is_single, base_filename
            )
            return pd.DataFrame({
                "STT": [1],
                "C√¢u": [text],
                "Tr·∫°ng th√°i": [msg],
                "Select": [True]
            })[["STT", "C√¢u", "Tr·∫°ng th√°i"]], path
        return pd.DataFrame({
            "STT": [1],
            "C√¢u": [text],
            "Tr·∫°ng th√°i": ["üü° Ch∆∞a chuy·ªÉn"],
            "Select": [True]
        })[["STT", "C√¢u", "Tr·∫°ng th√°i"]], None

def read_file_content(file):
    try:
        with open(file.name, 'r', encoding='utf-8') as f:
            return f.read()
    except Exception as e:
        print(f"L·ªói read_file_content: {str(e)}")
        return "‚ùå Kh√¥ng th·ªÉ ƒë·ªçc n·ªôi dung file."

def get_ip_info():
    try:
        response = httpx.get("https://ipinfo.io/json")
        data = response.json()
        return f"""<div style="background-color: #f9f9f9; padding: 10px; border-radius: 5px; border: 1px solid #ddd;">
            <h3 style="color: #2c3e50; margin: 0 0 10px 0;">üìç Th√¥ng tin IP</h3>
            <p><span style="color: #3498db;">üåê IP:</span> {data.get('ip', 'N/A')}</p>
            <p><span style="color: #3498db;">üìç V·ªã tr√≠:</span> {data.get('city', 'N/A')}, {data.get('region', 'N/A')}, {data.get('country', 'N/A')}</p>
            <p><span style="color: #3498db;">üè¢ ISP:</span> {data.get('org', 'N/A')}</p>
        </div>"""
    except Exception as e:
        return f"<div style='color: red; padding: 10px;'>‚ùå L·ªói: {str(e)}</div>"

with gr.Blocks(theme=gr.themes.Default(), css="""
    .sync-scroll {
        overflow-y: auto !important;
        overflow-x: hidden !important;
        height: 350px !important;
        width: 100% !important;
    }

    .checkbox-group {
        display: flex !important;
        flex-direction: column !important;
        width: 100 !important;
        max-width: 150px !important;
        height: 350px !important;
        overflow-y: auto !important;
    }
    .checkbox-group .checkbox {
        display: block !important;
        margin-bottom: 10px !important;
        width: 100% !important;
    }

    .gradio-container .dataframe {
        width: 100 !important;
        table-layout: fixed !important;
        border-collapse: collapse !important;
    }

    .gradio-container .dataframe th:nth-child(1),
    .gradio-container .dataframe td:nth-child(1) {
        width: 60px !important;
        min-width: 60px !important;
        max-width: 60px !important;
        text-align: center !important;
    }

    .gradio-container .dataframe th:nth-child(2),
    .gradio-container .dataframe td:nth-child(2) {
        width: calc(100% - 160px) !important;
        min-width: 200px !important;
        max-width: calc(100% - 160px) !important;
        word-break: break-word !important;
        white-space: pre-wrap !important;
        overflow-wrap: anywhere !important;
        padding: 8px !important;
    }

    .gradio-container .dataframe th:nth-child(3),
    .gradio-container .dataframe td:nth-child(3) {
        width: 100px !important;
        min-width: 100px !important;
        max-width: 100px !important;
        text-align: center !important;
        position: sticky !important;
        right: 0 !important;
        background: white !important;
        z-index: 1 !important;
    }

    .gradio-container .dataframe tr {
        height: auto !important;
    }

    .gradio-container .dataframe tr:hover td:nth-child(3) {
        background: #f0f0f0 !important;
    }
""") as demo:
    gr.Markdown("<h1 style='text-align:center;'>üé§ Text to Speech v·ªõi ElevenLabs</h1>")

    with gr.Row():
        api_input = gr.Textbox(label="üîë API Key", type="password")
        check_button = gr.Button("‚úÖ Ki·ªÉm tra")
    check_output = gr.Textbox(label="K·∫øt qu·∫£ ki·ªÉm tra API Key", interactive=False)
    check_button.click(fn=check_api_key, inputs=api_input, outputs=check_output)

    with gr.Tabs():
        with gr.Tab("üìù Nh·∫≠p VƒÉn B·∫£n"):
            text_input = gr.Textbox(label="VƒÉn b·∫£n", lines=4)
            split_toggle = gr.Checkbox(label="‚úÇÔ∏è T√°ch c√¢u")

            with gr.Row():
                with gr.Column(scale=1, min_width=150):
                    sentence_checkboxes = gr.CheckboxGroup(
                        label="Ch·ªçn",
                        choices=[],
                        value=[],
                        elem_classes=["checkbox-group"]
                    )
                with gr.Column(scale=9):
                    status_output = gr.DataFrame(
                        label="",
                        headers=["STT", "C√¢u", "Tr·∫°ng th√°i"],
                        datatype=["number", "str", "str"],
                        interactive=False,
                        type="pandas",
                        elem_classes=["sync-scroll"],
                        wrap=True
                    )

            model_dropdown = gr.Dropdown(label="Model", choices=get_model_list(), value="eleven_multilingual_v2")
            voice_dropdown = gr.Dropdown(label="Gi·ªçng ƒë·ªçc", choices=["‚ùó H√£y ch·ªçn gi·ªçng ƒë·ªçc"] + list(VOICE_LIST.keys()), value="‚ùó H√£y ch·ªçn gi·ªçng ƒë·ªçc")
            stability_slider = gr.Slider(0, 1, label="Stability", step=0.01, value=DEFAULT_STABILITY)
            similarity_slider = gr.Slider(0, 1, label="Similarity Boost", step=0.01, value=DEFAULT_SIMILARITY)

            voice_dropdown.change(
                fn=lambda k, v: get_voice_settings(k, VOICE_LIST.get(v, "")) if not v.startswith("‚ùó") else (DEFAULT_STABILITY, DEFAULT_SIMILARITY),
                inputs=[api_input, voice_dropdown],
                outputs=[stability_slider, similarity_slider]
            )

            tts_button = gr.Button("üöÄ Chuy·ªÉn gi·ªçng")
            audio_output = gr.Audio()

            def update_checkboxes(text, split):
                try:
                    checkboxes, df, audio = conditional_display_sentences(text, split)
                    return gr.CheckboxGroup(choices=checkboxes, value=checkboxes), df, audio
                except Exception as e:
                    print(f"L·ªói khi c·∫≠p nh·∫≠t checkbox: {str(e)}")
                    return gr.CheckboxGroup(choices=[], value=[]), pd.DataFrame(columns=["STT", "C√¢u", "Tr·∫°ng th√°i"]), None

            text_input.change(
                fn=update_checkboxes,
                inputs=[text_input, split_toggle],
                outputs=[sentence_checkboxes, status_output, audio_output]
            )

            split_toggle.change(
                fn=update_checkboxes,
                inputs=[text_input, split_toggle],
                outputs=[sentence_checkboxes, status_output, audio_output]
            )

            tts_button.click(
                fn=text_to_speech,
                inputs=[api_input, text_input, split_toggle, model_dropdown, stability_slider, similarity_slider, voice_dropdown, sentence_checkboxes],
                outputs=[status_output, audio_output]
            )

        with gr.Tab("üìÇ Nh·∫≠p t·ª´ File"):
            file_input = gr.File(label="T·∫£i l√™n file (.txt)", file_types=[".txt"])
            file_text_output = gr.Textbox(label="N·ªôi dung file", lines=4, interactive=True)
            split_toggle_file = gr.Checkbox(label="‚úÇÔ∏è T√°ch c√¢u")

            with gr.Row():
                with gr.Column(scale=1, min_width=150):
                    sentence_checkboxes_file = gr.CheckboxGroup(
                        label="Ch·ªçn",
                        choices=[],
                        value=[],
                        elem_classes=["checkbox-group"]
                    )
                with gr.Column(scale=9):
                    status_output_file = gr.DataFrame(
                        label="",
                        headers=["STT", "C√¢u", "Tr·∫°ng th√°i"],
                        datatype=["number", "str", "str"],
                        interactive=False,
                        type="pandas",
                        elem_classes=["sync-scroll"],
                        wrap=True
                    )

            model_dropdown_file = gr.Dropdown(label="Model", choices=get_model_list(), value="eleven_multilingual_v2")
            voice_dropdown_file = gr.Dropdown(label="Gi·ªçng ƒë·ªçc", choices=["‚ùó H√£y ch·ªçn gi·ªçng ƒë·ªçc"] + list(VOICE_LIST.keys()), value="‚ùó H√£y ch·ªçn gi·ªçng ƒë·ªçc")
            stability_slider_file = gr.Slider(0, 1, label="Stability", step=0.01, value=DEFAULT_STABILITY)
            similarity_slider_file = gr.Slider(0, 1, label="Similarity Boost", step=0.01, value=DEFAULT_SIMILARITY)

            voice_dropdown_file.change(
                fn=lambda k, v: get_voice_settings(k, VOICE_LIST.get(v, "")) if not v.startswith("‚ùó") else (DEFAULT_STABILITY, DEFAULT_SIMILARITY),
                inputs=[api_input, voice_dropdown_file],
                outputs=[stability_slider_file, similarity_slider_file]
            )

            tts_button_file = gr.Button("üöÄ Chuy·ªÉn gi·ªçng")
            audio_output_file = gr.Audio()

            def update_file_checkboxes(file, split):
                try:
                    content = read_file_content(file) if file else ""
                    checkboxes, df, audio = conditional_display_sentences(content, split)
                    return content, gr.CheckboxGroup(choices=checkboxes, value=checkboxes), df, audio
                except Exception as e:
                    print(f"L·ªói khi c·∫≠p nh·∫≠t checkbox t·ª´ file: {str(e)}")
                    return "", gr.CheckboxGroup(choices=[], value=[]), pd.DataFrame(columns=["STT", "C√¢u", "Tr·∫°ng th√°i"]), None

            file_input.change(
                fn=update_file_checkboxes,
                inputs=[file_input, split_toggle_file],
                outputs=[file_text_output, sentence_checkboxes_file, status_output_file, audio_output_file]
            )

            file_text_output.change(
                fn=update_checkboxes,
                inputs=[file_text_output, split_toggle_file],
                outputs=[sentence_checkboxes_file, status_output_file, audio_output_file]
            )

            split_toggle_file.change(
                fn=update_checkboxes,
                inputs=[file_text_output, split_toggle_file],
                outputs=[sentence_checkboxes_file, status_output_file, audio_output_file]
            )

            tts_button_file.click(
                fn=text_to_speech,
                inputs=[api_input, file_text_output, split_toggle_file, model_dropdown_file, 
                        stability_slider_file, similarity_slider_file, voice_dropdown_file, 
                        sentence_checkboxes_file, file_input],
                outputs=[status_output_file, audio_output_file]
            )

    with gr.Row():
        ip_button = gr.Button("Xem IP", variant="primary")
        ip_output = gr.Markdown(label="Th√¥ng tin IP")
    ip_button.click(fn=get_ip_info, outputs=ip_output)
    gr.Markdown('<p style="text-align:center;">Source: <a href="https://github.com/PhamThangbn" target="_blank">github.com/PhamThangbn</a><br>Crafted with care by PhamThangbn</p>')

demo.launch()