In [None]:
!pip install numpy==1.25.2

In [None]:
!pip install torch==2.6.0 torchvision==0.17.0 torchaudio==2.6.0 --index-url https://download.pytorch.org/whl/cu121
!pip install chatterbox-tts


In [None]:
!pip install gradio

In [None]:
import torchaudio as ta
from chatterbox.tts import ChatterboxTTS

model = ChatterboxTTS.from_pretrained(device="cuda")

text = "Ezreal and Jinx teamed up with Ahri, Yasuo, and Teemo to take down the enemy's Nexus in an epic late-game pentakill."
wav = model.generate(text)
ta.save("test-1.wav", wav, model.sr)

# If you want to synthesize with a different voice, specify the audio prompt
AUDIO_PROMPT_PATH="/content/Voicy_The akatsuki.mp3"
wav = model.generate(text, audio_prompt_path=AUDIO_PROMPT_PATH)
ta.save("test-2.wav", wav, model.sr)


In [None]:
import torch
import gradio as gr
import torchaudio as ta
from chatterbox.tts import ChatterboxTTS
import tempfile

# Load model
model = ChatterboxTTS.from_pretrained(device="cuda" if torch.cuda.is_available() else "cpu")

# TTS function with parameters
def tts_generate(text, audio_prompt_path, exaggeration, cfg_weight, temperature):
    kwargs = {
        "exaggeration": exaggeration,
        "cfg_weight": cfg_weight,
        "temperature": temperature
    }

    if audio_prompt_path is not None:
        wav = model.generate(text, audio_prompt_path=audio_prompt_path, **kwargs)
    else:
        wav = model.generate(text, **kwargs)

    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_wav:
        ta.save(tmp_wav.name, wav, model.sr)
        return tmp_wav.name

# Gradio Interface
with gr.Blocks(title="Chatterbox TTS") as demo:
    gr.Markdown(
        """
        <div style="text-align: center; font-size: 24px; font-weight: bold;">üó£Ô∏è Chatterbox TTS</div>
        <div style="text-align: center; font-size: 16px; margin-top: -10px; color: gray;">
        Generate expressive speech from text ‚Äî with optional voice cloning using audio prompts.
        </div>
        <br>
        """
    )

    with gr.Row():
        with gr.Column():
            text_input = gr.Textbox(
                label="üìù Enter your text",
                placeholder="e.g. Ezreal and Jinx teamed up with Ahri...",
                lines=5
            )
            audio_prompt = gr.Audio(
                label="üé§ Audio Prompt (optional)",
                type="filepath",
                show_label=True
            )
        with gr.Column():
            with gr.Group():
                gr.Markdown("üéõÔ∏è **Voice Style Settings**")
                exaggeration = gr.Slider(
                    0.0, 1.5, 0.5, step=0.05,
                    label="üé≠ Exaggeration",
                    info="Controls emotional expression of the voice."
                )
                cfg_weight = gr.Slider(
                    0.0, 1.0, 0.5, step=0.05,
                    label="üéØ CFG Weight",
                    info="Strength of resemblance to the prompt voice."
                )
                temperature = gr.Slider(
                    0.01, 1.0, 0.8, step=0.01,
                    label="üå°Ô∏è Temperature",
                    info="Higher = more creative/random intonation."
                )

    generate_btn = gr.Button("üîä Generate Speech", size="lg", elem_classes="generate-btn")

    output_audio = gr.Audio(
        label="‚úÖ Generated Audio",
        type="filepath",
        show_download_button=True,
        autoplay=True
    )

    generate_btn.click(
        fn=tts_generate,
        inputs=[text_input, audio_prompt, exaggeration, cfg_weight, temperature],
        outputs=output_audio
    )

    # Custom CSS for style
    gr.Markdown("""
    <style>
    .generate-btn button {
        background-color: #4CAF50;
        color: white;
        font-size: 18px;
        padding: 14px 24px;
        border-radius: 10px;
    }
    .generate-btn button:hover {
        background-color: #45a049;
    }
    </style>
    """)

demo.launch(share=True)
