# Launch Kokoro FastAPI GPU on Google Colab

This notebook launches the GPU-accelerated version of Kokoro TTS API and exposes it via Ngrok tunnel with a static domain for use in SillyTavern or other applications.

**Important:** Set your ngrok authentication token and custom domain in the code below before running the cell.

**Just run the cell below to start everything!**

In [None]:
# All-in-one Kokoro FastAPI GPU launcher

# ===== CONFIGURATION - EDIT THESE VALUES =====
# Your ngrok authentication token (required)
NGROK_AUTH_TOKEN = "your_ngrok_auth_token_here"

# Your ngrok custom domain (optional, leave empty if you don't have one)
# Example: "your-subdomain.ngrok.io"
NGROK_CUSTOM_DOMAIN = ""
# ============================================

# Install required packages first
!pip install -q docker requests loguru pyngrok

# Import libraries
import subprocess, time, json, requests, threading, os, sys
from pyngrok import ngrok, conf
print("🚀 Starting Kokoro FastAPI GPU setup...")

# Function to run a direct command and return output
def run_cmd(cmd):
    try:
        return subprocess.check_output(cmd, shell=True, stderr=subprocess.STDOUT, universal_newlines=True)
    except subprocess.CalledProcessError as e:
        print(f"Command failed: {e.output}")
        return e.output

# Removed Docker references, instead install astral-uv & espeak-ng
!apt-get update -qq && apt-get install -y -qq espeak-ng
!pip install uv

# Clone Kokoro-FastAPI repo and download model (with better error handling)
# Remove existing directory if present
!rm -rf Kokoro-FastAPI
!git clone https://github.com/remsky/Kokoro-FastAPI.git
!cd Kokoro-FastAPI && pip install loguru && python docker/scripts/download_model.py --output api/src/models/v1_0

# Global variable to track server startup progress
server_ready = False
server_status = "Starting"

# Launch Kokoro using a proper background thread approach instead of relying on bash &
def start_kokoro_server():
    global server_ready, server_status
    print("\nStarting Kokoro FastAPI server in background thread...")
    kokoro_dir = os.path.join(os.getcwd(), "Kokoro-FastAPI")
    # Run the server using subprocess
    cmd = f"cd {kokoro_dir} && bash start-gpu.sh"
    # Don't capture output to avoid blocking
    process = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
    # Read and print output without blocking
    def log_output():
        global server_ready, server_status
        for line in iter(process.stdout.readline, b''):
            line_str = line.decode('utf-8', errors='replace').strip()
            print(f"Server: {line_str}")
            
            # Track server progress through log messages
            if 'Loading TTS model' in line_str:
                server_status = "Loading model"
            elif 'Loading Kokoro model' in line_str:
                server_status = "Loading Kokoro"
            elif 'Creating new pipeline' in line_str:
                server_status = "Creating pipeline"
            elif 'Generating audio for text' in line_str:
                server_status = "Generating warmup audio"
            elif 'Got audio chunk with shape' in line_str:
                server_status = "Finishing warmup"
            elif 'Warmup completed' in line_str:
                server_status = "Warmup completed"
            elif '0.0.0.0:8880' in line_str and 'Uvicorn running' in line_str:
                server_status = "Server running"
                print("✓ Server started successfully and is listening on port 8880")
            elif 'Application startup complete' in line_str:
                server_status = "Ready"
                server_ready = True
                print("✓ Kokoro API is now fully initialized and ready!")
    
    # Start the log reading thread
    threading.Thread(target=log_output, daemon=True).start()
    return process

# Start the server in a background thread
server_process = start_kokoro_server()

# Function to wait for API to be ready with better diagnostics
def wait_for_api(max_attempts=180):  # Increased timeout to 3 minutes
    global server_ready, server_status
    print("Waiting for Kokoro API to initialize...")
    for attempt in range(1, max_attempts + 1):
        # First check if we've detected readiness from logs
        if server_ready:
            print(f"✓ Kokoro API is ready (detected from logs)!")
            return True
            
        # Then try API connection
        try:
            response = requests.get("http://localhost:8880/v1/audio/voices", timeout=2)
            if response.status_code == 200:
                print(f"✓ Kokoro API is ready after {attempt} seconds!")
                server_ready = True
                return True
            else:
                print(f"API returned status code {response.status_code}, retrying...")
        except requests.exceptions.RequestException as e:
            if attempt % 10 == 0:  # Only print every 10 attempts to avoid flooding
                print(f"Still waiting for API ({attempt}/{max_attempts}s): Current status: {server_status}")
        time.sleep(1)
    
    # Even if we timed out, check if server is still running
    if server_process.poll() is None:
        print(f"⚠️ API initialization timeout, but server is still running (status: {server_status})")
        # Let's try one more time with a longer timeout
        try:
            print("Making one final attempt with a longer timeout...")
            response = requests.get("http://localhost:8880/v1/audio/voices", timeout=10)
            if response.status_code == 200:
                print("✓ API is actually responding - continuing despite timeout!")
                server_ready = True
                return True
        except requests.exceptions.RequestException as e:
            print(f"Final attempt failed: {str(e)[:100]}")
        
        # Ask user if they want to continue anyway
        try:
            user_input = input("Server appears to be starting but taking longer than expected.\nDo you want to continue anyway? (y/n): ")
            if user_input.lower() in ['y', 'yes']:
                print("Continuing despite timeout...")
                return True
        except:
            pass  # If we can't get input (like in Colab), just continue with normal flow
    else:
        print(f"⚠️ Server process has exited with code {server_process.poll()}")
    
    print(f"⚠️ API initialization timed out after {max_attempts} seconds.")
    return False

# Function to test the API
def test_api(api_url="http://localhost:8880"):
    try:
        # Get available voices
        response = requests.get(f"{api_url}/v1/audio/voices")
        response_json = response.json()
        # First, print the structure to debug
        print(f"API response structure: {list(response_json.keys())}")
        
        # Handle different possible response structures
        if "voices" in response_json and isinstance(response_json["voices"], list):
            voices = response_json["voices"]
            if voices and isinstance(voices[0], dict) and "voice_id" in voices[0]:
                # Original expected format
                print(f"Available voices: {', '.join([v['voice_id'] for v in voices[:5]])}... (and {len(voices)-5} more)")
            else:
                # List but different structure
                print(f"Found {len(voices)} voices in a different format than expected")
        else:
            # Different structure entirely
            print(f"Found voice data in a different format: {str(response_json)[:200]}...")
        
        # Test text generation
        print("\nGenerating test audio... (this tests the API is working)")
        response = requests.post(
            f"{api_url}/v1/audio/speech",
            json={
                "model": "kokoro",
                "input": "Hello, this is a test of the Kokoro text-to-speech API!",
                "voice": "af_bella",
                "response_format": "mp3"
            }
        )
        if response.status_code == 200:
            print("✓ API test successful! Audio generated correctly.")
            # Save test audio
            with open("test_output.mp3", "wb") as f:
                f.write(response.content)
            print("✓ Test audio saved to test_output.mp3")
            return True
        else:
            print(f"⚠️ API test failed with status code {response.status_code}: {response.text}")
            return False
    except Exception as e:
        print(f"⚠️ Error testing API: {str(e)}")
        print(f"Exception details: {type(e).__name__}")
        # Try to print the response if available
        try:
            if 'response' in locals() and hasattr(response, 'text'):
                print(f"Response text: {response.text[:200]}...")
        except:
            pass
        return False

# Function to start the ngrok tunnel with custom domain support
def start_ngrok_tunnel():
    print("\nSetting up ngrok tunnel with custom static domain...")
    try:
        # Use the predefined authentication token
        if not NGROK_AUTH_TOKEN or NGROK_AUTH_TOKEN == "your_ngrok_auth_token_here":
            print("\n⚠️ Error: ngrok authentication token not configured!")
            print("Please edit the NGROK_AUTH_TOKEN variable at the top of this notebook.")
            return None
            
        # Set the environment variable and authenticate with ngrok
        os.environ['NGROK_AUTHTOKEN'] = NGROK_AUTH_TOKEN
        ngrok.set_auth_token(NGROK_AUTH_TOKEN)
        print(f"✓ Ngrok authentication configured successfully")
        
        # Configure ngrok
        pyngrok_config = conf.PyngrokConfig()
        
        # Start the tunnel with optional custom domain
        if NGROK_CUSTOM_DOMAIN:
            try:
                print(f"Attempting to use custom domain: {NGROK_CUSTOM_DOMAIN}")
                tunnel = ngrok.connect(addr=8880, domain=NGROK_CUSTOM_DOMAIN)
                print(f"✓ Ngrok tunnel established with custom domain: {NGROK_CUSTOM_DOMAIN}")
            except Exception as e:
                print(f"Failed to use custom domain: {str(e)}")
                print("Falling back to regular ngrok tunnel...")
                tunnel = ngrok.connect(8880)
        else:
            print("No custom domain configured, using standard ngrok URL")
            tunnel = ngrok.connect(8880)
        
        # Get the public URL
        public_url = tunnel.public_url
        
        # Display tunnel information
        print(f"\n🚀 Kokoro API is now available at: {public_url}")
        print(f"\nTest URL: {public_url}/v1/audio/voices")
        print(f"Web UI: {public_url}/web")
        print(f"API Docs: {public_url}/docs")
        print("\nUse this URL in SillyTavern or other applications as an OpenAI-compatible TTS endpoint")
        
        # Show active tunnels
        print("\nActive ngrok tunnels:")
        tunnels = ngrok.get_tunnels()
        for t in tunnels:
            print(f" - {t.public_url} -> {t.config['addr']}")
            
        return public_url
    except Exception as e:
        print(f"⚠️ Error setting up ngrok tunnel: {str(e)}")
        return None

# Delay a bit to let the server start up before checking
print("Giving the server a moment to start...")
time.sleep(10)  # Increased initial wait to 10 seconds

# Wait for the API to be ready
api_ready = wait_for_api()
print(f"API Ready status: {api_ready}")

if api_ready:
    # Test local API first
    local_test = test_api()
    print(f"Local API test: {'✓ Passed' if local_test else '❌ Failed'}")
    
    # Start ngrok tunnel
    print("\nStarting ngrok tunnel to make the API publicly accessible...")
    public_url = start_ngrok_tunnel()
    
    if public_url:
        print(f"\n✅ Setup complete! The Kokoro API is now running and accessible at {public_url}")
        print("Keep this notebook running to maintain the API connection.")
        print("The ngrok tunnel will remain active as long as this notebook is running.")
        
        # Keep the notebook running
        try:
            # Keep the notebook running and showing status message
            while True:
                time.sleep(60)  # Sleep for a minute between status messages
                print("Server still running - API remains available at the ngrok URL")
        except KeyboardInterrupt:
            print("Notebook stopped by user")
    else:
        print("\n❌ Failed to start ngrok tunnel. Please check the logs above for details.")
else:
    # Even if API didn't respond in time, still try ngrok if server is running
    if server_process.poll() is None:
        print("\n⚠️ API didn't respond in time, but server is still running.")
        print("Attempting to start ngrok tunnel anyway...")
        public_url = start_ngrok_tunnel()
        if public_url:
            print(f"\n⚠️ Ngrok tunnel established at {public_url}, but API might not be fully ready.")
            print("The server may need more time to initialize. Try accessing the URL in a few minutes.")
    else:
        print("\n❌ Failed to start Kokoro API. Please check the logs above for details.")
        print("You might want to try running the cell again or check for any error messages.")

To keep this Google Colab session alive, run the following JavaScript code in the browser console: `function ClickConnect() { console.log("Clicked on connect button"); document.querySelector("colab-toolbar-button#connect").click() } setInterval(ClickConnect, 60000)`

## Guide for Creating a Free Custom Static Ngrok Domain, Creating a Free Account, and Getting the Auth Token

### Step 1: Create a Free Ngrok Account
1. Go to the [ngrok website](https://ngrok.com/).
2. Click on the "Sign Up" button.
3. Fill in the required information to create a free account.
4. Verify your email address if required.

### Step 2: Get Your Ngrok Auth Token
1. Log in to your ngrok account.
2. Go to the "Auth" section in the dashboard.
3. Copy your auth token from the provided field.

### Step 3: Create a Free Custom Static Ngrok Domain
1. In the ngrok dashboard, go to the "Reserved" section.
2. Click on "Reserve a Domain".
3. Choose a subdomain name and reserve it.
4. Your custom static ngrok domain will be something like `your-subdomain.ngrok.io`.

### Step 4: Set Up Ngrok in the Notebook
1. In the code cell at the top of this notebook, set the `NGROK_AUTH_TOKEN` variable to your auth token.
2. Set the `NGROK_CUSTOM_DOMAIN` variable to your reserved custom domain (optional).
3. Run the notebook to start the Kokoro FastAPI server with ngrok tunnel.