In [None]:
# --- 1. Provide your ngrok Authtoken ---
# Paste your token from https://dashboard.ngrok.com/get-started/your-authtoken
NGROK_AUTHTOKEN = "30OkHyquCV644UVTcVqMjCCTP48_wLu147RoDuXk56Y7SR4L"

# --- 2. Install all necessary libraries ---
# This installs FastAPI, the correct TTS version, pyngrok, and Bark dependencies
!pip uninstall -y torch TTS
!pip install "fastapi[all]"
!pip install "TTS==0.22.0"
!pip install pyngrok
!pip install soundfile scipy
!pip install transformers==4.33.0 # Downgrade transformers to a compatible version


# --- 3. Authenticate ngrok ---
from pyngrok import ngrok
ngrok.set_auth_token(NGROK_AUTHTOKEN)

print("âœ… Setup and Authentication Complete! You can now run the application cell.")

Found existing installation: torch 2.7.1
Uninstalling torch-2.7.1:
  Successfully uninstalled torch-2.7.1
Found existing installation: TTS 0.22.0
Uninstalling TTS-0.22.0:
  Successfully uninstalled TTS-0.22.0
Collecting TTS==0.22.0
  Using cached TTS-0.22.0-cp311-cp311-manylinux1_x86_64.whl.metadata (21 kB)
Collecting torch>=2.1 (from TTS==0.22.0)
  Using cached torch-2.7.1-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (29 kB)
Using cached TTS-0.22.0-cp311-cp311-manylinux1_x86_64.whl (937 kB)
Using cached torch-2.7.1-cp311-cp311-manylinux_2_28_x86_64.whl (821.2 MB)
Installing collected packages: torch, TTS
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
sentence-transformers 4.1.0 requires transformers<5.0.0,>=4.41.0, but you have transformers 4.33.0 which is incompatible.
fastai 2.7.19 requires torch<2.7,>=1.10, but you have torch 2.7.1 which is incompat

In [None]:
import nest_asyncio
from pyngrok import ngrok
import uvicorn
import shutil
from pathlib import Path
import torch

from fastapi import FastAPI, UploadFile, File, Form
from fastapi.responses import FileResponse, JSONResponse
from fastapi.middleware.cors import CORSMiddleware # Import CORS middleware
from TTS.api import TTS

# --- 1. FastAPI App Setup with CORS ---
app = FastAPI()

# Add CORS middleware to allow browser requests
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],  # Allows all origins
    allow_credentials=True,
    allow_methods=["*"],  # Allows all methods
    allow_headers=["*"],  # Allows all headers
)

# --- 2. Model Loading ---
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

print("Loading YourTTS model...")
try:
    tts = TTS("tts_models/multilingual/multi-dataset/your_tts", gpu=(device=="cuda"))
    print("âœ… Model loaded successfully!")
except Exception as e:
    print(f"An error occurred while loading the model: {e}")
    tts = None

# --- 3. API Endpoints ---
output_dir = Path("api_outputs")
output_dir.mkdir(exist_ok=True)

@app.get("/")
async def root():
    return {"message": "Voice Cloning API with YourTTS is running. Go to /docs to test."}


@app.post("/synthesize/")
async def synthesize_voice(
    text: str = Form(...),
    voice_sample: UploadFile = File(...)
):
    if tts is None:
        return JSONResponse(status_code=500, content={"error": "Model is not loaded."})

    speaker_wav_path = output_dir / voice_sample.filename
    output_wav_path = output_dir / "output.wav"

    with open(speaker_wav_path, "wb") as f:
        shutil.copyfileobj(voice_sample.file, f)

    print(f"Synthesizing text: '{text}'")
    try:
        tts.tts_to_file(
            text=text,
            file_path=str(output_wav_path),
            speaker_wav=str(speaker_wav_path),
            language="en"
        )
        print("âœ… Synthesis complete!")
        return FileResponse(path=str(output_wav_path), media_type="audio/wav", filename="cloned_voice.wav")
    except Exception as e:
        return JSONResponse(status_code=500, content={"error": f"An internal error occurred: {e}"})

# --- 4. Run the App ---
nest_asyncio.apply()
public_url = ngrok.connect(8000)
print(f"ðŸš€ FastAPI server is live at: {public_url}")
print(f"ðŸ“œ Swagger UI (for testing): {public_url}/docs")

uvicorn.run(app, host="0.0.0.0", port=8000)

  import pkg_resources


Using device: cpu
Loading YourTTS model...
 > tts_models/multilingual/multi-dataset/your_tts is already downloaded.
 > Using model: vits
 > Setting up Audio Processor...
 | > sample_rate:16000
 | > resample:False
 | > num_mels:80
 | > log_func:np.log10
 | > min_level_db:0
 | > frame_shift_ms:None
 | > frame_length_ms:None
 | > ref_level_db:None
 | > fft_size:1024
 | > power:None
 | > preemphasis:0.0
 | > griffin_lim_iters:None
 | > signal_norm:None
 | > symmetric_norm:None
 | > mel_fmin:0
 | > mel_fmax:None
 | > pitch_fmin:None
 | > pitch_fmax:None
 | > spec_gain:20.0
 | > stft_pad_mode:reflect
 | > max_norm:1.0
 | > clip_norm:True
 | > do_trim_silence:False
 | > trim_db:60
 | > do_sound_norm:False
 | > do_amp_to_db_linear:True
 | > do_amp_to_db_mel:True
 | > do_rms_norm:False
 | > db_level:None
 | > stats_path:None
 | > base:10
 | > hop_length:256
 | > win_length:1024
 > Model fully restored. 
 > Setting up Audio Processor...
 | > sample_rate:16000
 | > resample:False
 | > num_mels:64

INFO:     Started server process [30680]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)


ðŸš€ FastAPI server is live at: NgrokTunnel: "https://81226f981c70.ngrok-free.app" -> "http://localhost:8000"
ðŸ“œ Swagger UI (for testing): NgrokTunnel: "https://81226f981c70.ngrok-free.app" -> "http://localhost:8000"/docs




Synthesizing text: 'Hello, this is a clone of my voice. I can say anything you want.'
 > Text splitted to sentences.
['Hello, this is a clone of my voice.', 'I can say anything you want.']
 > Processing time: 4.329472303390503
 > Real-time factor: 0.8839265625542064
âœ… Synthesis complete!
INFO:     152.59.21.235:0 - "POST /synthesize/ HTTP/1.1" 200 OK
Synthesizing text: 'Hello, this is a clone of my voice. I can say anything you want.I am rajesh patel I want to do lot's of thing in my life can you help me to this '
 > Text splitted to sentences.
['Hello, this is a clone of my voice.', 'I can say anything you want.', "I am rajesh patel I want to do lot's of thing in my life can you help me to this"]
 > Processing time: 7.145496845245361
 > Real-time factor: 0.6889882215066397
âœ… Synthesis complete!
INFO:     152.59.21.235:0 - "POST /synthesize/ HTTP/1.1" 200 OK
Synthesizing text: 'Hey I am Kairavi ,I want to do lot's of things in my life ,My friend name Divy But I don't like him any m