In [None]:
!pip install fastapi uvicorn pyngrok deep-translator langdetect soundfile kokoro


Collecting fastapi
  Downloading fastapi-0.115.11-py3-none-any.whl.metadata (27 kB)
Collecting uvicorn
  Downloading uvicorn-0.34.0-py3-none-any.whl.metadata (6.5 kB)
Collecting pyngrok
  Downloading pyngrok-7.2.3-py3-none-any.whl.metadata (8.7 kB)
Collecting deep-translator
  Downloading deep_translator-1.11.4-py3-none-any.whl.metadata (30 kB)
Collecting langdetect
  Downloading langdetect-1.0.9.tar.gz (981 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m981.5/981.5 kB[0m [31m13.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting kokoro
  Downloading kokoro-0.9.2-py3-none-any.whl.metadata (21 kB)
Collecting starlette<0.47.0,>=0.40.0 (from fastapi)
  Downloading starlette-0.46.1-py3-none-any.whl.metadata (6.2 kB)
Collecting loguru (from kokoro)
  Downloading loguru-0.7.3-py3-none-any.whl.metadata (22 kB)
Collecting misaki>=0.9.2 (from misaki[en]>=0.9.2->kokoro)
  Downloading misaki-0.9.3-py3-none-any.whl.metadata (

In [None]:
2uhxbejq6iUflYv7CzkuxZnTEbQ_2dWqBrumk4Su1ZfAntRmH

In [None]:
import os
import uuid
import numpy as np
import soundfile as sf
from fastapi import FastAPI, HTTPException
from fastapi.responses import JSONResponse
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from langdetect import detect
from deep_translator import GoogleTranslator
from kokoro import KPipeline
from fastapi.staticfiles import StaticFiles
import uvicorn
from pyngrok import ngrok
import nest_asyncio

# Apply nest_asyncio patch (needed for Colab/Jupyter or similar environments)
nest_asyncio.apply()

# Set your ngrok authtoken (insert your token)
ngrok.set_auth_token("2uh7o1uV5qhID8boHAvZNSWYhaP_6HJ9dfTTWGgVvM612oYPB")

# Create the FastAPI app instance
app = FastAPI()

# Enable CORS for all origins (adjust for production if needed)
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# Start the ngrok tunnel early so that public_url is available for all endpoints
public_url = ngrok.connect(8000).public_url
print(f'ngrok tunnel "{public_url}" -> "http://127.0.0.1:8000"')
print("Use this public URL in your frontend to access the API.")

# Mapping of target languages to TTS language codes
LANGUAGE_CODES = {
    "en": "a", "es": "e", "fr": "f", "hi": "h", "it": "i",
    "pt": "p", "ja": "j", "zh-cn": "z", "zh-tw": "z"
}

# Available voices for TTS based on language code and gender
TOP_VOICES = {
    "a": {"male": ["am_fenrir"], "female": ["af_heart"]},
    "e": {"male": ["em_alex"], "female": ["ef_dora"]},
    "f": {"male": [], "female": ["ff_siwis"]},
    "h": {"male": ["hm_omega"], "female": ["hf_alpha"]},
    "i": {"male": ["im_nicola"], "female": ["if_sara"]},
    "p": {"male": ["pm_alex"], "female": ["pf_dora"]},
    "j": {"male": ["jm_kumo"], "female": ["jf_alpha"]},
    "z": {"male": ["zm_yunxi"], "female": ["zf_xiaoxiao"]}
}

# Ensure the 'audio' directory exists for saving generated files
os.makedirs("audio", exist_ok=True)

# Mount the 'audio' directory to serve static files
app.mount("/audio", StaticFiles(directory="audio"), name="audio")

# Request model for translation and TTS
class TranslationRequest(BaseModel):
    text: str
    target_language: str  # e.g., "fr", "es"
    gender: str           # "male" or "female"

# Request model for language detection
class DetectLanguageRequest(BaseModel):
    text: str

@app.post("/translate_and_tts/")
async def translate_and_generate_audio(request: TranslationRequest):
    try:
        print("Starting translation and TTS process...")
        # Detect language from the input text
        detected_lang = detect(request.text)
        print("Detected Language:", detected_lang)

        # Translate text using GoogleTranslator
        translated_text = GoogleTranslator(
            source=detected_lang, target=request.target_language
        ).translate(request.text)
        print("Translated Text:", translated_text)

        # Get the corresponding language code for TTS
        lang_code = LANGUAGE_CODES.get(request.target_language, "a")
        print("Language code for TTS:", lang_code)

        # Select an available voice based on gender
        available_voices = TOP_VOICES.get(lang_code, {}).get(request.gender, [])
        if not available_voices:
            raise HTTPException(status_code=400, detail="No voices available for this language and gender")
        voice = available_voices[0]
        print("Selected voice:", voice)

        # Generate speech audio using KPipeline
        pipeline = KPipeline(lang_code=lang_code, repo_id="hexgrad/Kokoro-82M")
        generator = pipeline(translated_text, voice=voice, speed=1, split_pattern=r'\n+')

        # Collect and concatenate audio segments if there are multiple segments
        audio_segments = []
        for _, _, segment in generator:
            audio_segments.append(segment)
        if not audio_segments:
            raise HTTPException(status_code=500, detail="No audio segments generated")
        final_audio = np.concatenate(audio_segments)

        # Save the audio file to disk
        file_id = str(uuid.uuid4())[:8]
        file_path = f"audio/{file_id}.wav"
        sf.write(file_path, final_audio, 24000)
        print("Audio file saved:", file_path)

        # Construct the audio file's public URL using the ngrok public URL
        audio_url = f"{public_url}/audio/{file_id}.wav"
        print("Audio URL:", audio_url)

        # Return the translated text and the public URL for the audio file
        return {
            "original_language": detected_lang,
            "translated_text": translated_text,
            "audio_url": audio_url
        }
    except Exception as e:
        print("Error during TTS processing:", e)
        raise HTTPException(status_code=500, detail=f"Error: {e}")

@app.post("/detect_language/")
async def detect_language(request: DetectLanguageRequest):
    try:
        detected_lang = detect(request.text)
        return {"language": detected_lang}
    except Exception as e:
        print("Error detecting language:", e)
        raise HTTPException(status_code=500, detail=f"Error: {e}")

# Run the Uvicorn server
if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=8000)


ngrok tunnel "https://1ed6-34-106-125-95.ngrok-free.app" -> "http://127.0.0.1:8000"
Use this public URL in your frontend to access the API.


INFO:     Started server process [904]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)


INFO:     14.139.251.152:0 - "GET / HTTP/1.1" 404 Not Found
INFO:     14.139.251.152:0 - "OPTIONS /detect_language/ HTTP/1.1" 200 OK
INFO:     14.139.251.152:0 - "OPTIONS /detect_language/ HTTP/1.1" 200 OK
INFO:     14.139.251.152:0 - "OPTIONS /detect_language/ HTTP/1.1" 200 OK
INFO:     14.139.251.152:0 - "OPTIONS /detect_language/ HTTP/1.1" 200 OK
INFO:     14.139.251.152:0 - "OPTIONS /detect_language/ HTTP/1.1" 200 OK
INFO:     14.139.251.152:0 - "OPTIONS /detect_language/ HTTP/1.1" 200 OK
INFO:     14.139.251.152:0 - "OPTIONS /detect_language/ HTTP/1.1" 200 OK
INFO:     14.139.251.152:0 - "OPTIONS /detect_language/ HTTP/1.1" 200 OK
INFO:     14.139.251.152:0 - "OPTIONS /translate_and_tts/ HTTP/1.1" 200 OK
INFO:     14.139.251.152:0 - "OPTIONS /detect_language/ HTTP/1.1" 200 OK
INFO:     14.139.251.152:0 - "OPTIONS /detect_language/ HTTP/1.1" 200 OK
INFO:     14.139.251.152:0 - "OPTIONS /detect_language/ HTTP/1.1" 200 OK
INFO:     14.139.251.152:0 - "OPTIONS /detect_language/ HTTP/1

hm_omega.pt:   0%|          | 0.00/523k [00:00<?, ?B/s]

Audio file saved: audio/f65d96fa.wav
Audio URL: https://1ed6-34-106-125-95.ngrok-free.app/audio/f65d96fa.wav
INFO:     14.139.251.152:0 - "POST /translate_and_tts/ HTTP/1.1" 200 OK
INFO:     14.139.251.152:0 - "GET /audio/f65d96fa.wav HTTP/1.1" 200 OK
INFO:     14.139.251.152:0 - "GET /audio/f65d96fa.wav HTTP/1.1" 206 Partial Content
