# üéôÔ∏è AI Companion: XTTS v2 Remote Voice Bridge

### üõ†Ô∏è Setup Instructions
1. **GPU**: `Runtime` > `Change runtime type` > **T4 GPU**.
2. **Ngrok**: Add `NGROK_TOKEN` to **Secrets** (key icon) and enable "Notebook access".
3. **Run All**: `Ctrl + F9`.

**Note**: This version uses PyTorch 2.5.1 to ensure compatibility with XTTS v2.

In [None]:
# @title 1. Prepare Environment
import os

print("Installing Python 3.10...")
!sudo apt-get update -qq
!sudo apt-get install -y python3.10 python3.10-venv python3.10-dev > /dev/null

print("Creating virtual environment...")
if not os.path.exists("xtts_env"):
    !python3.10 -m venv xtts_env

print("Installing XTTS dependencies (3-5 minutes)...")
!./xtts_env/bin/pip install -q -U pip
# Downgrade torch to 2.5.1 to avoid the PyTorch 2.6 weights_only security error
print("Installing stable PyTorch 2.5.1...")
!./xtts_env/bin/pip install -q fastapi uvicorn torch==2.5.1 torchaudio==2.5.1 transformers==4.33.0 TTS

print("\n‚úÖ Environment Ready!")

In [None]:
# @title 2. Create Bridge Script
with open("bridge_server.py", "w") as f:
    f.write("""
import torch
import os
import shutil
import uuid
from TTS.api import TTS
from fastapi import FastAPI, UploadFile, File, Form
from fastapi.responses import FileResponse
import uvicorn

print("-> Loading XTTS v2 model... (This may take a minute)")
device = "cuda" if torch.cuda.is_available() else "cpu"
tts_model = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
print("‚úÖ Model Loaded.")

app = FastAPI()

@app.post("/generate_tts")
async def generate_tts_endpoint(
    text: str = Form(...),
    language: str = Form("en"),
    speaker_file: UploadFile = File(...)
):
    job_id = str(uuid.uuid4())
    temp_voice_path = f"voice_{job_id}.wav"
    output_path = f"out_{job_id}.wav"

    with open(temp_voice_path, "wb") as buffer:
        shutil.copyfileobj(speaker_file.file, buffer)

    try:
        tts_model.tts_to_file(
            text=text,
            speaker_wav=temp_voice_path,
            language=language,
            file_path=output_path
        )
        return FileResponse(output_path, media_type="audio/wav", filename="speech.wav")
    finally:
        if os.path.exists(temp_voice_path): os.remove(temp_voice_path)
""")
print("‚úÖ Bridge script created.")

In [None]:
# @title 3. Start XTTS Bridge
import sys
import subprocess
import time

try:
    from pyngrok import ngrok
except ImportError:
    subprocess.check_call([sys.executable, "-m", "pip", "install", "pyngrok"])
    from pyngrok import ngrok

from google.colab import userdata
try:
    NGROK_TOKEN = userdata.get('NGROK_TOKEN')
    ngrok.set_auth_token(NGROK_TOKEN)
except:
    print("‚ùå ERROR: NGROK_TOKEN not found in Secrets!")

ngrok.kill()
public_url = ngrok.connect(8001).public_url

print("="*50)
print(f"\nüöÄ XTTS BRIDGE ONLINE!\n")
print(f"URL: {public_url}\n")
print("="*50)

# Run server
!./xtts_env/bin/uvicorn bridge_server:app --host 0.0.0.0 --port 8001 --log-level error