# Hindi Voice Cloning TTS with Coqui XTTS v2 (Google Colab)

This notebook is built for **Google Colab Latest (Python 3.12)** and uses **XTTS v2** for multilingual voice cloning.

Default output language: **Hindi (`hi`)**.

## 1) Installation

In [None]:
# Upgrade pip
!pip -q install --upgrade pip

# Install PyTorch CUDA 11.8 wheels (T4 compatible in Colab)
!pip -q install --upgrade --index-url https://download.pytorch.org/whl/cu118 torch torchvision torchaudio

# Install Coqui TTS + soundfile (minimal dependencies)
!pip -q install --upgrade TTS soundfile

print('‚úÖ Installation complete: torch/torchvision/torchaudio (cu118), TTS, soundfile')

## 2) Model setup

In [None]:
import torch
from TTS.api import TTS

MODEL_NAME = 'tts_models/multilingual/multi-dataset/xtts_v2'
device = 'cuda' if torch.cuda.is_available() else 'cpu'

if device == 'cuda':
    print(f'‚úÖ GPU detected: {torch.cuda.get_device_name(0)}')
else:
    print('‚ö†Ô∏è GPU not detected. XTTS can run on CPU but will be much slower.')

print('‚è≥ Loading XTTS v2 model...')
tts = TTS(MODEL_NAME).to(device)
print('‚úÖ XTTS v2 model ready.')

## 3) Interactive UI (upload, generate, play, download)

In [None]:
import os
import ipywidgets as widgets
from IPython.display import Audio, display, clear_output

DEFAULT_LANGUAGE = 'hi'
OUTPUT_FILE = 'output.wav'

upload = widgets.FileUpload(
    accept='.wav',
    multiple=False,
    description='Upload .wav'
)

text_area = widgets.Textarea(
    value='‡§®‡§Æ‡§∏‡•ç‡§§‡•á! ‡§Ø‡§π ‡§π‡§ø‡§Ç‡§¶‡•Ä ‡§Æ‡•á‡§Ç XTTS v2 ‡§ï‡•á ‡§∏‡§æ‡§• ‡§¨‡§®‡§æ‡§Ø‡§æ ‡§ó‡§Ø‡§æ ‡§ï‡•ç‡§≤‡•ã‡§® ‡§ï‡§ø‡§Ø‡§æ ‡§ó‡§Ø‡§æ ‡§Ü‡§µ‡§æ‡§ú‡§º ‡§®‡§Æ‡•Ç‡§®‡§æ ‡§π‡•à‡•§',
    placeholder='‡§Ø‡§π‡§æ‡§Å ‡§π‡§ø‡§Ç‡§¶‡•Ä ‡§ü‡•á‡§ï‡•ç‡§∏‡•ç‡§ü ‡§≤‡§ø‡§ñ‡•á‡§Ç...',
    description='Story:',
    layout=widgets.Layout(width='100%', height='150px')
)

generate_btn = widgets.Button(
    description='Generate output.wav',
    button_style='success',
    icon='play'
)

status_out = widgets.Output()
media_out = widgets.Output()
download_btn = widgets.Button(
    description='Download output.wav',
    button_style='info',
    icon='download',
    disabled=True
)

def _save_uploaded_wav(file_upload_widget):
    if not file_upload_widget.value:
        raise ValueError('Please upload a .wav file before generating.')

    item = next(iter(file_upload_widget.value.values()))
    filename = item.get('metadata', {}).get('name', 'uploaded.wav')

    if not filename.lower().endswith('.wav'):
        raise ValueError('Only .wav files are allowed.')

    temp_path = 'speaker_reference.wav'
    with open(temp_path, 'wb') as f:
        f.write(item['content'])

    return temp_path, filename

def on_generate_clicked(_):
    download_btn.disabled = True
    with status_out:
        clear_output()
        print('üîé Validating inputs...')

    with media_out:
        clear_output()

    try:
        text = text_area.value.strip()
        if not text:
            raise ValueError('Text cannot be empty. Please enter Hindi text.')

        speaker_wav_path, uploaded_name = _save_uploaded_wav(upload)

        with status_out:
            print(f'‚úÖ Voice sample accepted: {uploaded_name}')
            print('‚è≥ Generating speech...')

        tts.tts_to_file(
            text=text,
            speaker_wav=speaker_wav_path,
            language=DEFAULT_LANGUAGE,
            file_path=OUTPUT_FILE
        )

        with status_out:
            print(f'‚úÖ Done! Saved as {OUTPUT_FILE} (language={DEFAULT_LANGUAGE})')

        with media_out:
            display(Audio(OUTPUT_FILE, autoplay=False))

        download_btn.disabled = False

    except Exception as e:
        with status_out:
            print(f'‚ùå Error: {e}')

def on_download_clicked(_):
    if not os.path.exists(OUTPUT_FILE):
        with status_out:
            print('‚ö†Ô∏è output.wav not found. Please generate audio first.')
        return

    from google.colab import files
    files.download(OUTPUT_FILE)

generate_btn.on_click(on_generate_clicked)
download_btn.on_click(on_download_clicked)

ui = widgets.VBox([
    widgets.HTML('<h4>Hindi Voice Clone Generator (XTTS v2)</h4>'),
    upload,
    text_area,
    widgets.HBox([generate_btn, download_btn]),
    widgets.HTML('<b>Status</b>'),
    status_out,
    widgets.HTML('<b>Playback</b>'),
    media_out
])

display(ui)
with status_out:
    print('Ready. Upload a .wav file, enter text, and click Generate output.wav.')