# TTS Worker - Chatterbox + Higgs Audio V2
**Purpose**: Run production-quality TTS engines on free Colab GPU

**Engines**:
- **Chatterbox** (Tier 2): Emotion exaggeration, 89/100
- **Higgs Audio V2** (Tier 3): Ultimate quality, 92/100

**Output**: HTTP API via ngrok for remote access from local machine

## Cell 1: Install Chatterbox

In [None]:
!pip install chatterbox-tts
print("âœ… Chatterbox installed")

## Cell 2: Install Higgs Audio V2

In [None]:
!git clone https://github.com/boson-ai/higgs-audio.git
%cd higgs-audio
!pip install -r requirements.txt
!pip install -e .
%cd ..
print("âœ… Higgs Audio V2 installed")

## Cell 3: Load Models (This takes ~2 minutes)

In [None]:
import torch
import torchaudio
from chatterbox.tts import ChatterboxTTS
from boson_multimodal.serve.serve_engine import HiggsAudioServeEngine
from boson_multimodal.data_types import ChatMLSample, Message

# Load Chatterbox
print("Loading Chatterbox...")
chatterbox = ChatterboxTTS.from_pretrained(device="cuda")
print(f"âœ… Chatterbox loaded (SR: {chatterbox.sr}Hz)")

# Load Higgs
print("\nLoading Higgs Audio V2...")
higgs = HiggsAudioServeEngine(
    "bosonai/higgs-audio-v2-generation-3B-base",
    "bosonai/higgs-audio-v2-tokenizer",
    device="cuda"
)
print("âœ… Higgs Audio V2 loaded")

print("\nðŸš€ Both models ready for inference")

## Cell 4: Test Chatterbox

In [None]:
# Test basic generation
test_text = "In a world where true crime narratives captivate millions, one story stands above the rest."

print("Generating with Chatterbox...")
wav = chatterbox.generate(test_text)
torchaudio.save("test_chatterbox_neutral.wav", wav, chatterbox.sr)
print("âœ… Saved: test_chatterbox_neutral.wav")

# Test emotion exaggeration
print("\nTesting emotion exaggeration...")
wav_dramatic = chatterbox.generate(test_text, exaggeration=0.8, cfg_weight=0.3)
torchaudio.save("test_chatterbox_dramatic.wav", wav_dramatic, chatterbox.sr)
print("âœ… Saved: test_chatterbox_dramatic.wav")

from IPython.display import Audio, display
print("\nNeutral:")
display(Audio("test_chatterbox_neutral.wav"))
print("\nDramatic (exaggeration=0.8):")
display(Audio("test_chatterbox_dramatic.wav"))

## Cell 5: Test Higgs Audio V2

In [None]:
system_prompt = (
    "Generate audio following instruction.\n\n<|scene_desc_start|>\n"
    "Audio is recorded from a quiet room.\n<|scene_desc_end|>"
)

messages = [
    Message(role="system", content=system_prompt),
    Message(role="user", content=test_text),
]

print("Generating with Higgs Audio V2...")
output = higgs.generate(
    chat_ml_sample=ChatMLSample(messages=messages),
    max_new_tokens=1024,
    temperature=0.3,
    top_p=0.95,
    top_k=50,
    stop_strings=["<|end_of_text|>", "<|eot_id|>"],
)

torchaudio.save(
    "test_higgs.wav",
    torch.from_numpy(output.audio)[None, :],
    output.sampling_rate
)
print("âœ… Saved: test_higgs.wav")

print("\nHiggs Audio V2:")
display(Audio("test_higgs.wav"))

## Cell 6: Create Flask API

In [None]:
from flask import Flask, request, jsonify, send_file
import hashlib
import os

app = Flask(__name__)

@app.route('/health', methods=['GET'])
def health():
    return jsonify({"status": "healthy", "engines": ["chatterbox", "higgs"]})

@app.route('/generate', methods=['POST'])
def generate():
    data = request.json
    text = data['text']
    engine = data.get('engine', 'chatterbox')
    
    # Generate hash for caching
    content_hash = hashlib.sha256(text.encode()).hexdigest()[:16]
    output_path = f"/tmp/tts_{engine}_{content_hash}.wav"
    
    # Check cache
    if os.path.exists(output_path):
        return send_file(output_path, mimetype="audio/wav")
    
    # Generate
    if engine == 'chatterbox':
        wav = chatterbox.generate(
            text,
            exaggeration=data.get('exaggeration', 0.5),
            cfg_weight=data.get('cfg_weight', 0.5)
        )
        torchaudio.save(output_path, wav, chatterbox.sr)
    
    elif engine == 'higgs':
        messages = [
            Message(role="system", content=system_prompt),
            Message(role="user", content=text),
        ]
        output = higgs.generate(
            chat_ml_sample=ChatMLSample(messages=messages),
            max_new_tokens=1024,
            temperature=data.get('temperature', 0.3),
            top_p=data.get('top_p', 0.95),
            stop_strings=["<|end_of_text|>", "<|eot_id|>"],
        )
        torchaudio.save(
            output_path,
            torch.from_numpy(output.audio)[None, :],
            output.sampling_rate
        )
    
    return send_file(output_path, mimetype="audio/wav")

print("âœ… Flask API configured")

## Cell 7: Start ngrok Tunnel & Run Server

In [None]:
!pip install flask-ngrok pyngrok

from pyngrok import ngrok
import threading

# Start ngrok tunnel
public_url = ngrok.connect(5000)
print(f"\n{'='*60}")
print(f"ðŸš€ TTS WORKER READY")
print(f"{'='*60}")
print(f"Public URL: {public_url}")
print(f"\nTest with:")
print(f"curl -X POST {public_url}/generate \\")
print(f"  -H 'Content-Type: application/json' \\")
print(f"  -d '{{\"text\": \"Hello world\", \"engine\": \"chatterbox\"}}' \\")
print(f"  --output test.wav")
print(f"\n{'='*60}\n")

# Run Flask in background
threading.Thread(target=app.run, kwargs={"port": 5000, "use_reloader": False}).start()

print("Server running... Keep this cell alive!")

## Usage from Local Machine

```python
import requests

# Copy the public URL from Cell 7
WORKER_URL = "https://xxxx-xx-xxx-xxx-xx.ngrok-free.app"

# Generate with Chatterbox
response = requests.post(f"{WORKER_URL}/generate", json={
    "text": "Your narration text here",
    "engine": "chatterbox",
    "exaggeration": 0.7,
    "cfg_weight": 0.4
})

with open("output.wav", "wb") as f:
    f.write(response.content)

print("âœ… Audio saved to output.wav")
```