# Chatterbox TTS Server — UdemyCrores Voice Clone

**Setup:**
1. Make sure GPU runtime is enabled: Runtime → Change runtime type → T4 GPU
2. Run all cells in order
3. Copy the ngrok URL printed at the end
4. Paste it in your `.env` file as `CHATTERBOX_API_URL=<url>`

Voice reference is automatically downloaded from the GitHub repo.

In [None]:
# Cell 1: Install Chatterbox TTS + dependencies
!pip install -q --upgrade pip setuptools wheel
!pip install -q torch torchaudio --index-url https://download.pytorch.org/whl/cu121
!git clone -q https://github.com/resemble-ai/chatterbox.git /content/chatterbox 2>/dev/null || true
# Remove strict numpy pin so it uses Colab's pre-installed version
!cd /content/chatterbox && sed -i 's/"numpy[^"]*"/"numpy"/g' pyproject.toml && pip install -q -e .
!pip install -q pyngrok flask soundfile

In [None]:
# Cell 2: Download voice reference from GitHub repo
import urllib.request
import os

VOICE_URL = 'https://github.com/JineeshTS/UdemyTrainings/raw/main/data/voice-reference.wav'
VOICE_FILE = '/content/voice-reference.wav'

if not os.path.exists(VOICE_FILE):
    print('Downloading voice reference from GitHub...')
    urllib.request.urlretrieve(VOICE_URL, VOICE_FILE)
    print(f'Downloaded: {VOICE_FILE} ({os.path.getsize(VOICE_FILE)} bytes)')
else:
    print(f'Voice reference already exists: {VOICE_FILE}')

# Or upload your own:
# from google.colab import files
# uploaded = files.upload()
# VOICE_FILE = list(uploaded.keys())[0]

In [None]:
# Cell 3: Load Chatterbox model
import torch
import torchaudio
from chatterbox.tts import ChatterboxTTS

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'Device: {device}')

model = ChatterboxTTS.from_pretrained(device=device)
print('Chatterbox model loaded!')

# Test with voice reference
test_wav = model.generate("Hello, welcome to the course.", audio_prompt_path=VOICE_FILE)
torchaudio.save('/content/test-output.wav', test_wav, model.sr)
print('Test generation successful!')

from IPython.display import Audio
Audio('/content/test-output.wav')

In [None]:
# Cell 4: Start OpenAI-compatible TTS API server with ngrok
from pyngrok import ngrok
from flask import Flask, request, send_file, jsonify
import io
import soundfile as sf
import numpy as np
import threading

# ============================================
# SET YOUR NGROK AUTH TOKEN HERE
# Get free token from: https://dashboard.ngrok.com/get-started/your-authtoken
NGROK_AUTH_TOKEN = 'YOUR_NGROK_TOKEN_HERE'  # <-- REPLACE THIS
# ============================================

ngrok.set_auth_token(NGROK_AUTH_TOKEN)

app = Flask(__name__)

@app.route('/health', methods=['GET'])
def health():
    return jsonify({'status': 'ok', 'model': 'chatterbox', 'device': device})

@app.route('/v1/audio/speech', methods=['POST'])
def tts():
    data = request.json
    text = data.get('input', '')
    if not text:
        return jsonify({'error': 'No input text'}), 400

    print(f'Generating: {text[:80]}...')
    wav = model.generate(text, audio_prompt_path=VOICE_FILE)

    # Convert to MP3 via buffer
    wav_np = wav.squeeze().cpu().numpy()
    buf = io.BytesIO()
    sf.write(buf, wav_np, model.sr, format='WAV')
    buf.seek(0)

    return send_file(buf, mimetype='audio/wav', as_attachment=True, download_name='speech.wav')

# Start ngrok tunnel
tunnel = ngrok.connect(8000)
print(f'''
╔══════════════════════════════════════════════════════════╗
║  CHATTERBOX TTS SERVER RUNNING                         ║
║                                                        ║
║  API URL: {str(tunnel.public_url):45s}║
║                                                        ║
║  Add to .env:                                          ║
║  CHATTERBOX_API_URL={str(tunnel.public_url):37s}║
╚══════════════════════════════════════════════════════════╝
''')

# Run Flask in thread so Colab stays responsive
threading.Thread(target=lambda: app.run(host='0.0.0.0', port=8000), daemon=True).start()

# Keep alive
import time
while True:
    time.sleep(60)
    print(f'Server running... {tunnel.public_url}')