# Eryx Swahili TTS v1 - Interactive Testing

Test the Swahili text-to-speech model with custom text.

In [1]:
import os
import sys
sys.path.insert(0, '..')

import torch
import torchaudio
from IPython.display import Audio, display, Markdown
from pathlib import Path

## 1. Play Existing Test Audio

In [2]:
# Test audio files with their corresponding text
test_samples = [
    ("../checkpoints/test_outputs/swahili_test_1.wav", "Habari yako, mimi ni msaidizi wa Kiswahili."),
    ("../checkpoints/test_outputs/swahili_test_2.wav", "Karibu sana katika Eryx Labs."),
    ("../checkpoints/test_outputs/swahili_test_3.wav", "Teknolojia ya akili bandia inabadilisha dunia."),
]

for audio_path, text in test_samples:
    if Path(audio_path).exists():
        display(Markdown(f"**Text:** {text}"))
        display(Audio(audio_path))
        print("---")

**Text:** Habari yako, mimi ni msaidizi wa Kiswahili.

---


**Text:** Karibu sana katika Eryx Labs.

---


**Text:** Teknolojia ya akili bandia inabadilisha dunia.

---


## 2. Load Model for Custom Synthesis

In [3]:
# Patch torch.load for compatibility
_original_torch_load = torch.load
def _patched_torch_load(*args, **kwargs):
    if 'weights_only' not in kwargs:
        kwargs['weights_only'] = False
    return _original_torch_load(*args, **kwargs)
torch.load = _patched_torch_load

from TTS.tts.configs.xtts_config import XttsConfig
from TTS.tts.models.xtts import Xtts

# Load XTTS-v2 model
model_path = os.path.expanduser(
    "~/Library/Application Support/tts/tts_models--multilingual--multi-dataset--xtts_v2"
)

print("Loading XTTS-v2 model...")
config = XttsConfig()
config.load_json(os.path.join(model_path, "config.json"))
model = Xtts.init_from_config(config)
model.load_checkpoint(config, checkpoint_dir=model_path, eval=True, use_deepspeed=False)
print("Model loaded!")

  from .autonotebook import tqdm as notebook_tqdm


Loading XTTS-v2 model...
Model loaded!


In [4]:
# Load Swahili speaker embeddings
embeddings_path = "../checkpoints/speaker_embeddings/swahili_speaker.pt"
embeddings = torch.load(embeddings_path)
gpt_cond_latent = embeddings['gpt_cond_latent']
speaker_embedding = embeddings['speaker_embedding']
print(f"Loaded speaker embeddings from: {embeddings_path}")

Loaded speaker embeddings from: ../checkpoints/speaker_embeddings/swahili_speaker.pt


## 3. Synthesize Custom Swahili Text

In [5]:
def synthesize_swahili(text: str) -> Audio:
    """Synthesize Swahili text to speech."""
    out = model.inference(
        text=text,
        language="en",  # Using 'en' since XTTS-v2 doesn't support 'sw'
        gpt_cond_latent=gpt_cond_latent,
        speaker_embedding=speaker_embedding,
    )
    return Audio(out["wav"], rate=24000)

In [6]:
# Test with custom text - try your own Swahili phrases!
custom_texts = [
    "Jambo! Habari za asubuhi?",
    "Ninafuraha kukutana nawe.",
    "Kenya ni nchi nzuri sana.",
    "Asante sana kwa msaada wako.",
]

for text in custom_texts:
    display(Markdown(f"**Text:** {text}"))
    audio = synthesize_swahili(text)
    display(audio)
    print("---")

**Text:** Jambo! Habari za asubuhi?

---


**Text:** Ninafuraha kukutana nawe.

---


**Text:** Kenya ni nchi nzuri sana.

---


**Text:** Asante sana kwa msaada wako.

---


## 4. Interactive Input

In [9]:
# Enter your own Swahili text here
my_text = "Habari yako, jina langu ni Eryx. Welcome sana!"

display(Markdown(f"### Input: {my_text}"))
audio = synthesize_swahili(my_text)
display(audio)

### Input: Habari yako, jina langu ni Eryx. Welcome sana!

## 5. Save Custom Audio

In [None]:
def save_audio(text: str, filename: str):
    """Synthesize and save audio to file."""
    out = model.inference(
        text=text,
        language="en",
        gpt_cond_latent=gpt_cond_latent,
        speaker_embedding=speaker_embedding,
    )
    torchaudio.save(filename, torch.tensor(out["wav"]).unsqueeze(0), 24000)
    print(f"Saved: {filename}")
    return Audio(filename)

# Example: save custom audio
# audio = save_audio("Karibu Tanzania!", "../checkpoints/test_outputs/custom_output.wav")