In [None]:
%load_ext autoreload
%autoreload 2

from setup_imports import *  # noqa: F401,F403
from src.phrases.phrase_model import Phrase, Translation, get_phrase, get_phrase_by_english
from src.models import BCP47Language
from src.connections.gcloud_auth import get_firestore_client

In [None]:
# Step 1: Create a new phrase
phrase = Phrase.create_phrase("She runs to the store daily", source="manual")
print(f"Created phrase: {phrase.english}")
print(f"Phrase hash: {phrase.phrase_hash}")
print(f"Phrase model dump:\n{phrase.model_dump()}")

In [None]:
# Step 2: Translate the phrase to French
fr_translation = phrase.translate(BCP47Language.get("fr-FR"), refine=False)
print(f"French translation: {fr_translation.text}")
print(f"Language: {fr_translation.language.to_tag()}")
print(f"Tokens: {fr_translation.tokens}")
print(f"Audio: {fr_translation.audio}")  # Should be None initially

In [None]:
# Step 3: Add another translation (German) and verify translations list
de_translation = phrase.translate(BCP47Language.get("de-DE"), refine=False)
print(f"German translation: {de_translation.text}")
print(f"\nTotal translations on phrase: {len(phrase.translations)}")
for trans in phrase.translations:
    print(f"  - {trans.language.to_tag()}: {trans.text}")

In [None]:
# Step 4: Upload phrase to Firestore
firestore_client = get_firestore_client("firephrases")
uploaded_hash = phrase.upload_phrase(firestore_client)
print(f"Uploaded phrase with hash: {uploaded_hash}")

In [None]:
# Step 5: Retrieve phrase by hash
retrieved_phrase = get_phrase(uploaded_hash)
print(f"Retrieved phrase: {retrieved_phrase.english}")
print(f"Phrase hash: {retrieved_phrase.phrase_hash}")
print(f"Source: {retrieved_phrase.source}")
print(f"Translations: {len(retrieved_phrase.translations)}")
for trans in retrieved_phrase.translations:
    print(f"  - {trans.language.to_tag()}: {trans.text}")
print(f"\nRetrieved phrase model_dump:\n{retrieved_phrase.model_dump()}")

In [None]:
# Step 6: Retrieve phrase by English text (convenience wrapper)
english_phrase = "She runs to the store daily"
retrieved_by_english = get_phrase_by_english(english_phrase)
print(f"Retrieved by English text: {retrieved_by_english.english}")
print(f"Hash: {retrieved_by_english.phrase_hash}")
print(f"Translations: {len(retrieved_by_english.translations)}")
for trans in retrieved_by_english.translations:
    print(f"  - {trans.language.to_tag()}: {trans.text}")

In [None]:
# Step 7: Verify data integrity - compare original and retrieved
print("=== Data Integrity Check ===")
print(f"English matches: {phrase.english == retrieved_phrase.english}")
print(f"Hash matches: {phrase.phrase_hash == retrieved_phrase.phrase_hash}")
print(f"Translation count matches: {len(phrase.translations) == len(retrieved_phrase.translations)}")

# Verify each translation
original_translations = {t.language.to_tag(): t.text for t in phrase.translations}
retrieved_translations = {t.language.to_tag(): t.text for t in retrieved_phrase.translations}

print(f"\nOriginal translations: {original_translations}")
print(f"Retrieved translations: {retrieved_translations}")
print(f"Translations match: {original_translations == retrieved_translations}")

In [21]:
# ============================================================
# AUDIO MODULE EXAMPLES
# ============================================================
# The new audio module provides TTS, audio processing, and voice management
# Features:
# - Multiple TTS providers (Google, Azure, ElevenLabs)
# - Slow speech with word breaks for learning
# - Local fast audio processing (no API calls)
# - Parameter-driven API (no global config)

from src.audio import (
    load_voices_from_json,
    get_voice_model,
    get_voice_models,
    generate_translation_audio,
    generate_fast_audio,
    generate_normal_and_fast_audio,
    join_audio_segments,
    export_audio,
)

print("✓ Audio module imported successfully")
print(f"✓ Supported languages: {', '.join(load_voices_from_json().keys())}")

✓ Audio module imported successfully
✓ Supported languages: fr-FR, ru-RU, it-IT, nb-NO, sv-SE, cmn-CN, en-GB, es-ES, de-DE, cy-GB


In [None]:
get_

VoiceInfo(provider=<VoiceProvider.GOOGLE: 'google'>, voice_id='fr-FR-Neural2-G', language_code='fr-FR')

In [23]:
# Step 2: Generate audio for a translation at different speeds
# (These won't actually generate audio without proper API credentials,
#  but show how to structure the calls)

test_text = "Bonjour, comment allez-vous?"
fr_voice = get_voice_model("fr-FR", "FEMALE", "flashcards")

print(f"Example: Generating audio for French phrase")
print(f"Text: '{test_text}'")
print(f"Voice: {fr_voice.provider.value} - {fr_voice.voice_id}")
print(f"\nTo generate audio, you would call:")
print()
print("# Normal speed audio")
print("normal_audio = generate_translation_audio(")
print(f"    translated_text=\"{test_text}\",")
print(f"    voice_model=fr_voice,")
print(f"    speed='normal'")
print(")")
print()
print("# Slow speech with word breaks (for learning)")
print("slow_audio = generate_translation_audio(")
print(f"    translated_text=\"{test_text}\",")
print(f"    voice_model=fr_voice,")
print(f"    speed='slow'")
print(")")
print()
print("# Custom word break timing (in milliseconds)")
print("slow_audio = generate_translation_audio(")
print(f"    translated_text=\"{test_text}\",")
print(f"    voice_model=fr_voice,")
print(f"    speed='slow',")
print(f"    word_break_ms=500  # 500ms between words")
print(")")
print()
print("✓ Note: Audio generation requires API credentials:")
print("  - Google Cloud TTS: GOOGLE_APPLICATION_CREDENTIALS env var")
print("  - Azure: AZURE_API_KEY and AZURE_REGION env vars")
print("  - ElevenLabs: ELEVENLABS_API_KEY env var")

Example: Generating audio for French phrase
Text: 'Bonjour, comment allez-vous?'
Voice: google - fr-FR-Neural2-G

To generate audio, you would call:

# Normal speed audio
normal_audio = generate_translation_audio(
    translated_text="Bonjour, comment allez-vous?",
    voice_model=fr_voice,
    speed='normal'
)

# Slow speech with word breaks (for learning)
slow_audio = generate_translation_audio(
    translated_text="Bonjour, comment allez-vous?",
    voice_model=fr_voice,
    speed='slow'
)

# Custom word break timing (in milliseconds)
slow_audio = generate_translation_audio(
    translated_text="Bonjour, comment allez-vous?",
    voice_model=fr_voice,
    speed='slow',
    word_break_ms=500  # 500ms between words
)

✓ Note: Audio generation requires API credentials:
  - Google Cloud TTS: GOOGLE_APPLICATION_CREDENTIALS env var
  - Azure: AZURE_API_KEY and AZURE_REGION env vars
  - ElevenLabs: ELEVENLABS_API_KEY env var


In [None]:
# Step 3: Audio processing (can work without API credentials - uses dummy audio)
from pydub import AudioSegment

print("=== Audio Processing Examples ===\n")

# Create some dummy silent audio segments (in practice, these would be TTS outputs)
segment1 = AudioSegment.silent(duration=500)   # 500ms silent audio
segment2 = AudioSegment.silent(duration=500)   # 500ms silent audio
segment3 = AudioSegment.silent(duration=500)   # 500ms silent audio

print(f"Created 3 audio segments of 500ms each")

# Join audio segments with a gap
joined = join_audio_segments([segment1, segment2, segment3], gap_ms=100)
print(f"\n✓ Joined segments with 100ms gaps")
print(f"  Total duration: {len(joined)}ms (500 + 100 + 500 + 100 + 500 = 1700ms)")

# Export to file
import tempfile
from pathlib import Path

with tempfile.TemporaryDirectory() as tmpdir:
    output_file = Path(tmpdir) / "test_audio.mp3"
    export_audio(joined, filename=str(output_file))
    print(f"\n✓ Exported audio to: {output_file.name}")
    print(f"  File exists: {output_file.exists()}")
    print(f"  File size: {output_file.stat().st_size} bytes")

print("\n✓ Audio processing complete!")

In [None]:
# Step 4: Integration example - using audio with phrases
print("=== Integration: Phrase + Audio Workflow ===\n")

# Recall our phrase and translations from earlier
print(f"Phrase: '{phrase.english}'")
print(f"Has {len(phrase.translations)} translations:")
for trans in phrase.translations:
    lang_tag = trans.language.to_tag()
    print(f"  - {lang_tag}: {trans.text}")

print("\n✓ To generate audio for each translation:")
print()

# Show the workflow for each translation
for trans in phrase.translations:
    lang_tag = trans.language.to_tag()
    lang_code = lang_tag  # e.g., "fr-FR", "de-DE"
    
    # Get voice for this language (using female voice for flashcards)
    try:
        voice = get_voice_model(lang_code, "FEMALE", "flashcards")
        print(f"{lang_tag}: {voice.provider.value}")
        print(f"  # Generate audio")
        print(f"  normal_audio = generate_translation_audio(")
        print(f"      translated_text=\"{trans.text}\",")
        print(f"      voice_model=voice,")
        print(f"      speed='normal'")
        print(f"  )")
        print(f"  slow_audio = generate_translation_audio(")
        print(f"      translated_text=\"{trans.text}\",")
        print(f"      voice_model=voice,")
        print(f"      speed='slow'")
        print(f"  )")
        print()
    except ValueError as e:
        print(f"{lang_tag}: Not configured - {e}")

print("✓ This workflow would generate:")
print("  - Normal speed audio for natural listening")
print("  - Slow speed audio with word breaks for learning")
print("  - All audio linked to the translation")

In [24]:
# Step 5: Audio module constants and settings
from src.audio import (
    SPEAKING_RATE_SLOW,
    SPEAKING_RATE_NORMAL,
    AUDIO_SPEED_FAST,
    AUDIO_SPEED_NORMAL,
    DEFAULT_WORD_BREAK_MS,
    DEFAULT_GAP_MS,
    VoiceProvider,
)

print("=== Audio Module Constants ===\n")

print("Speaking Rates:")
print(f"  SPEAKING_RATE_NORMAL: {SPEAKING_RATE_NORMAL}x (natural speed)")
print(f"  SPEAKING_RATE_SLOW:   {SPEAKING_RATE_SLOW}x (slower for learning)")
print()

print("Audio Processing:")
print(f"  AUDIO_SPEED_NORMAL: {AUDIO_SPEED_NORMAL}x")
print(f"  AUDIO_SPEED_FAST:   {AUDIO_SPEED_FAST}x (local processing, no API call)")
print()

print("Timing:")
print(f"  DEFAULT_WORD_BREAK_MS: {DEFAULT_WORD_BREAK_MS}ms (break between words)")
print(f"  DEFAULT_GAP_MS:        {DEFAULT_GAP_MS}ms (gap between audio segments)")
print()

print("Voice Providers:")
for provider in VoiceProvider:
    print(f"  - {provider.value}")
print()

print("✓ All audio features are now ready to use!")
print("  See AUDIO_MODULE_GUIDE.md for complete documentation")

=== Audio Module Constants ===

Speaking Rates:
  SPEAKING_RATE_NORMAL: 1.0x (natural speed)
  SPEAKING_RATE_SLOW:   0.85x (slower for learning)

Audio Processing:
  AUDIO_SPEED_NORMAL: 1.0x
  AUDIO_SPEED_FAST:   2.0x (local processing, no API call)

Timing:
  DEFAULT_WORD_BREAK_MS: 250ms (break between words)
  DEFAULT_GAP_MS:        100ms (gap between audio segments)

Voice Providers:
  - google
  - azure
  - elevenlabs

✓ All audio features are now ready to use!
  See AUDIO_MODULE_GUIDE.md for complete documentation
