In [1]:
%load_ext autoreload
%autoreload 2

from setup_imports import *  # noqa: F401,F403
from src.phrases.phrase_model import Phrase, PhraseAudio, Translation, get_phrase, get_phrase_by_english
from src.phrases.search import get_phrases_by_collection
from src.phrases.utils import generate_phrase_hash
from src.models import BCP47Language
from src.connections.gcloud_auth import get_firestore_client
from src.audio import get_voice_model, generate_translation_audio
from storage import read_from_gcs
from src.storage import PRIVATE_BUCKET, download_from_gcs
from src.llm_tools.image_generation import generate_phrase_image_prompt
from src.llm_tools.refine_story_translation import refine_story_translation
from src.images.generator import generate_image
from src.story import Story, StoryPhrase, get_story
from src.utils import load_json
from langcodes import Language


In [3]:
story_title = "A Test Story"
story_summary="A brief summary of the test story."
story_title_hash = generate_phrase_hash(story_title)
story_dialogue = load_json("../data/test_story.json")


In [2]:
story_title = "A Test Story 2"
story_summary="A brief summary of the test story 2."
story_title_hash = generate_phrase_hash(story_title)
story_dialogue = load_json("../data/test_story2.json")

In [3]:
ts = Story.create(story_title, story_summary, story_dialogue)

✅ Authenticated with Google Cloud project: swedish-course
✅ Natural Language API client initialized


In [4]:
ts.translate("fr-FR")
ts.translate("it-IT")

✅ Google Translate API client initialized


In [7]:
ts.published

{'en-GB-fr-FR': PublishedStory(source_language_tag='en-GB', target_language_tag='fr-FR', gcs_path='stories/en-GB/fr-FR/a_test_story_2_a5e6f2/', active=True, public_url='https://storage.googleapis.com/audio-language-trainer-stories/stories/en-GB/fr-FR/a_test_story_2_a5e6f2/index.html'),
 'en-GB-it-IT': PublishedStory(source_language_tag='en-GB', target_language_tag='it-IT', gcs_path='stories/en-GB/it-IT/a_test_story_2_a5e6f2/', active=True, public_url='https://storage.googleapis.com/audio-language-trainer-stories/stories/en-GB/it-IT/a_test_story_2_a5e6f2/index.html')}

In [5]:
ts.publish_story("fr-FR")
ts.publish_story("it-IT")

2026-01-02 16:46:12 - audio-language-trainer - INFO - phrase_model.py:337 - Translation for fr-FR already exists
2026-01-02 16:46:13 - audio-language-trainer - INFO - phrase_model.py:337 - Translation for fr-FR already exists
2026-01-02 16:46:13 - audio-language-trainer - INFO - phrase_model.py:337 - Translation for fr-FR already exists
2026-01-02 16:46:14 - audio-language-trainer - INFO - phrase_model.py:337 - Translation for fr-FR already exists
2026-01-02 16:46:14 - audio-language-trainer - INFO - phrase_model.py:337 - Translation for fr-FR already exists
2026-01-02 16:46:15 - audio-language-trainer - INFO - phrase_model.py:337 - Translation for fr-FR already exists
✅ Google Cloud Storage client initialized
✅ Google Text-to-Speech API client initialized
2026-01-02 16:46:26 - audio-language-trainer - INFO - story.py:385 - Published story at gs://audio-language-trainer-stories/stories/en-GB/fr-FR/a_test_story_2_a5e6f2/index.html
2026-01-02 16:46:27 - audio-language-trainer - INFO - st

In [6]:
ts.upload()

2026-01-02 16:46:53 - audio-language-trainer - INFO - phrase_model.py:405 - Uploading phrase story_a_test_story_2_a5e6f2#part_1#0#hey_sam_this_is_our_second_story_60669d with all translations to Firestore and GCS
2026-01-02 16:46:53 - audio-language-trainer - INFO - phrase_model.py:1037 - Uploading all multimedia for en-GB translation
2026-01-02 16:46:53 - audio-language-trainer - INFO - phrase_model.py:1037 - Uploading all multimedia for fr-FR translation
2026-01-02 16:46:53 - audio-language-trainer - INFO - phrase_model.py:815 - Uploading audio: fr-FR story/normal to phrases/fr-FR/audio/story/normal/story_a_test_story_2_a5e6f2#part_1#0#hey_sam_this_is_our_second_story_60669d.mp3 (local cache enabled)
2026-01-02 16:46:53 - audio-language-trainer - INFO - phrase_model.py:1037 - Uploading all multimedia for it-IT translation
2026-01-02 16:46:53 - audio-language-trainer - INFO - phrase_model.py:815 - Uploading audio: it-IT story/normal to phrases/it-IT/audio/story/normal/story_a_test_sto

<google.cloud.firestore_v1.document.DocumentReference at 0x1e519b20210>

In [17]:
ts.story_parts['part_2'][1].story_phrase.translations['fr-FR'].text

"Quoi ? Oh non ! J'ai caché mes chaussures de randonnée quelque part et je ne les retrouve pas !"

In [25]:
ts.upload(overwrite=True)

2026-01-01 18:55:09 - audio-language-trainer - INFO - phrase_model.py:404 - Uploading phrase story_a_test_story_232dea#part_1#0#hey_sam_did_you_book_the_tickets_for_the_hiking_tr_5a2e92 with all translations to Firestore and GCS
2026-01-01 18:55:09 - audio-language-trainer - INFO - phrase_model.py:1036 - Uploading all multimedia for en-GB translation
2026-01-01 18:55:09 - audio-language-trainer - INFO - phrase_model.py:1036 - Uploading all multimedia for fr-FR translation
2026-01-01 18:55:09 - audio-language-trainer - INFO - phrase_model.py:814 - Uploading audio: fr-FR story/normal to phrases/fr-FR/audio/story/normal/story_a_test_story_232dea#part_1#0#hey_sam_did_you_book_the_tickets_for_the_hiking_tr_5a2e92.mp3 (local cache enabled)
2026-01-01 18:55:09 - audio-language-trainer - INFO - phrase_model.py:404 - Uploading phrase story_a_test_story_232dea#part_1#1#yes_i_just_signed_the_forms_yesterday_its_going_to_9a3ca4 with all translations to Firestore and GCS
2026-01-01 18:55:10 - audio

<google.cloud.firestore_v1.document.DocumentReference at 0x227d1efbe90>

In [18]:
ts.generate_audio(language="fr-FR", overwrite=True)

✅ Google Text-to-Speech API client initialized


In [21]:
ts.publish_story(target_language="fr-FR", overwrite=True)

2026-01-01 18:52:57 - audio-language-trainer - INFO - phrase_model.py:336 - Translation for fr-FR already exists
2026-01-01 18:52:57 - audio-language-trainer - INFO - phrase_model.py:336 - Translation for fr-FR already exists
2026-01-01 18:52:58 - audio-language-trainer - INFO - phrase_model.py:336 - Translation for fr-FR already exists
2026-01-01 18:52:58 - audio-language-trainer - INFO - phrase_model.py:336 - Translation for fr-FR already exists
2026-01-01 18:52:58 - audio-language-trainer - INFO - phrase_model.py:336 - Translation for fr-FR already exists
2026-01-01 18:52:59 - audio-language-trainer - INFO - phrase_model.py:336 - Translation for fr-FR already exists
2026-01-01 18:53:00 - audio-language-trainer - INFO - phrase_model.py:1165 - Audio already exists for fr-FR story normal, skipping generation
2026-01-01 18:53:00 - audio-language-trainer - INFO - phrase_model.py:1165 - Audio already exists for fr-FR story normal, skipping generation
2026-01-01 18:53:00 - audio-language-t

In [23]:
ts.upload()

2026-01-01 18:54:16 - audio-language-trainer - INFO - phrase_model.py:404 - Uploading phrase story_a_test_story_232dea#part_1#0#hey_sam_did_you_book_the_tickets_for_the_hiking_tr_5a2e92 with all translations to Firestore and GCS
2026-01-01 18:54:17 - audio-language-trainer - INFO - phrase_model.py:1036 - Uploading all multimedia for en-GB translation
2026-01-01 18:54:17 - audio-language-trainer - INFO - phrase_model.py:1036 - Uploading all multimedia for fr-FR translation
2026-01-01 18:54:17 - audio-language-trainer - INFO - phrase_model.py:1042 - Audio already exists at phrases/fr-FR/audio/story/normal/story_a_test_story_232dea#part_1#0#hey_sam_did_you_book_the_tickets_for_the_hiking_tr_5a2e92.mp3, skipping upload
2026-01-01 18:54:17 - audio-language-trainer - INFO - phrase_model.py:404 - Uploading phrase story_a_test_story_232dea#part_1#1#yes_i_just_signed_the_forms_yesterday_its_going_to_9a3ca4 with all translations to Firestore and GCS
2026-01-01 18:54:17 - audio-language-trainer -

<google.cloud.firestore_v1.document.DocumentReference at 0x227d1efbe90>

In [19]:
import os
os.path.dirname('../outputs/gcs\\audio-language-trainer-stories\\stories/en-GB/fr-FR/a_test_story_232dea.html')

'../outputs/gcs\\audio-language-trainer-stories\\stories/en-GB/fr-FR'

In [16]:
ts.story_parts['part_1'][0].story_phrase.translations


{'fr-FR': Translation(firestore_collection='phrases', bucket_name='audio-language-trainer-private-content', firestore_database='firephrases', firestore_document_ref=None, key='story_a_test_story_232dea#part_1#0#hey_sam_did_you_book_the_tickets_for_the_hiking_tr_5a2e92', language=Language.make(language='fr', territory='FR'), text='Salut Sam, as-tu réservé les billets pour la randonnée de ce week-end ?', text_lower='salut sam, as-tu réservé les billets pour la randonnée de ce week-end ?', tokens=['Salut', 'Sam', ',', 'as', '-', 'tu', 'réservé', 'les', 'billets', 'pour', 'la', 'randonnée', 'de', 'ce', 'week', '-', 'end', '?'], audio={'story': {'normal': PhraseAudio(firestore_collection='phrases', bucket_name='audio-language-trainer-private-content', firestore_database='firephrases', firestore_document_ref=None, key='story_a_test_story_232dea#part_1#0#hey_sam_did_you_book_the_tickets_for_the_hiking_tr_5a2e92', text='Salut Sam, as-tu réservé les billets pour la randonnée de ce week-end ?', 

In [50]:
ts.translate(target_language="fr-FR")

In [87]:
ts.upload()

<google.cloud.firestore_v1.document.DocumentReference at 0x1b96a7779d0>

In [53]:
ts.get_story_translation("fr-FR")

Story(firestore_collection='stories', bucket_name='audio-language-trainer-private-content', firestore_database='firephrases', firestore_document_ref=<google.cloud.firestore_v1.document.DocumentReference object at 0x000001B96A7779D0>, key='a_test_story_232dea', title='A Test Story', summary='A brief summary of the test story.', story_parts={'part_1': [Utterance(sequence=0, speaker='Alex', text='Hey Sam, did you book the tickets for the hiking trip this weekend?', story_phrase=StoryPhrase(firestore_collection='phrases', bucket_name='audio-language-trainer-private-content', firestore_database='firephrases', firestore_document_ref=<google.cloud.firestore_v1.document.DocumentReference object at 0x000001B966F7F050>, key='story_a_test_story_232dea#part_1#0#hey_sam_did_you_book_the_tickets_for_the_hiking_tr_5a2e92', english='Hey Sam, did you book the tickets for the hiking trip this weekend?', english_lower='hey sam, did you book the tickets for the hiking trip this weekend?', tokens=['hiking'

In [55]:
from jinja2 import Environment, FileSystemLoader

In [85]:
loader = FileSystemLoader("../src/templates")
env = Environment(loader=loader, autoescape=False)
template = env.get_template("story_template.html")

In [86]:
html_string = template.render(ts.model_dump())
with open("../src/templates/test_story/test_story.html", "w", encoding="utf-8") as f:
    f.write(html_string)

In [54]:
ts.model_dump()

{'firestore_collection': 'stories',
 'bucket_name': 'audio-language-trainer-private-content',
 'firestore_database': 'firephrases',
 'key': 'a_test_story_232dea',
 'title': 'A Test Story',
 'summary': 'A brief summary of the test story.',
 'story_parts': {'part_1': [{'sequence': 0,
    'speaker': 'Alex',
    'text': 'Hey Sam, did you book the tickets for the hiking trip this weekend?',
    'phrase_hash': 'story_a_test_story_232dea#part_1#0#hey_sam_did_you_book_the_tickets_for_the_hiking_tr_5a2e92',
    'part_name': 'part_1',
    'target_text': 'Salut Sam, as-tu réservé les billets pour la randonnée de ce week-end ?',
    'wiktionary_links': '<a href="https://en.wiktionary.org/wiki/salut#French" target="_blank" rel="noopener">Salut</a> Sam , <a href="https://en.wiktionary.org/wiki/as#French" target="_blank" rel="noopener">as</a> - <a href="https://en.wiktionary.org/wiki/tu#French" target="_blank" rel="noopener">tu</a> <a href="https://en.wiktionary.org/wiki/r%C3%A9serv%C3%A9#French" tar

In [18]:
for part, utterances in ts.story_parts.items():
    for utterance in utterances:
        print(utterance)

sequence=0 speaker='Alex' text='Hey Sam, did you book the tickets for the hiking trip this weekend?' story_phrase=StoryPhrase(firestore_collection='phrases', bucket_name='audio-language-trainer-private-content', firestore_database='firephrases', firestore_document_ref=None, key='story_a_test_story_232dea#part_1#0#hey_sam_did_you_book_the_tickets_for_the_hiking_tr_5a2e92', english='Hey Sam, did you book the tickets for the hiking trip this weekend?', english_lower='hey sam, did you book the tickets for the hiking trip this weekend?', tokens=['do', 'ticket', 'for', 'the', 'trip', 'you', 'this', 'book', 'sam', 'weekend', 'hey', 'hiking'], verbs=['do', 'book'], vocab=['for', 'ticket', 'the', 'trip', 'you', 'this', 'sam', 'weekend', 'hey', 'hiking'], translations={'en-GB': Translation(firestore_collection='phrases', bucket_name='audio-language-trainer-private-content', firestore_database='firephrases', firestore_document_ref=None, key='hey_sam_did_you_book_the_tickets_for_the_hiking_tr_5a2e

In [26]:
l.language_name()

'English'

In [27]:
l = BCP47Language.get("en-G5B")

LanguageTagError: Expected a valid subtag, got 'g5b'

In [None]:
ts = get_story("A Test Story")

In [None]:
ts.translate(BCP47Language("sv-SE"), refine=True, overwrite=True)

In [None]:
aud = ts.generate_audio(BCP47Language("sv-SE"))

In [None]:
ts.upload(language=BCP47Language("sv-SE"), overwrite=True)

In [None]:
ts._get_dialogue_with_translations(target_language=BCP47Language("sv-SE"))

In [None]:
ts.model_dump()

In [None]:
ts._get_dialogue_with_translations(BCP47Language("fr-FR"))

In [None]:
ts.translate(BCP47Language("fr-FR"))

In [None]:
ts.generate_audio(BCP47Language("fr-FR"))

In [None]:
ts.upload(language=BCP47Language("fr-FR"), overwrite=True)

In [None]:
story_json = ts._get_dialogue_with_translations(BCP47Language("fr-FR"))

In [None]:
aud = ts._load_audio("sv-SE")

In [None]:
aud['part_3'][2]

In [None]:
from src.utils import save_json, load_json
import json

new_story =load_json("../data/test_story_translated.json", )

In [None]:
new_story = refine_story_translation(story_json, language=BCP47Language("fr-FR"))

In [None]:
ts._replace_translations(new_story, BCP47Language("fr-FR"))

In [None]:
ts._get_dialogue_with_translations(BCP47Language("fr-FR"))

In [None]:
story_json

In [None]:

ts = Story.create(title=story_title, summary=story_summary, story_dialogue=story_dialogue)

In [None]:
new_story

In [None]:
ts.upload_phrase_entries()

In [None]:
ts.model_dump()

In [None]:
story_name = "Test Story"

In [None]:
new_story  = Story.create(
    title=story_name,
    story_parts=story_dialogue)

In [None]:
from google.cloud.firestore_v1 import FieldFilter
p = get_phrases_by_collection(["Pack01"])


In [None]:
len(p)

In [None]:
phrase = "Do you collect rare coins?"

prompt = generate_phrase_image_prompt(phrase)


In [None]:
style = "new_default"
# Generate the image using available providers
image = generate_image(
    prompt=prompt,
    style=style
)
image

In [None]:
image

In [None]:
WarmUp150_phrase = download_from_gcs(bucket_name=PRIVATE_BUCKET, file_path="collections/WarmUp150/phrases.json")

In [None]:
phrases = get_phrases_by_collection("WarmUp150")

In [None]:
phrases[0].translations['fr-FR'].get_wiktionary_links()

In [None]:
phrases[0].translations['fr-FR'].tokens

In [None]:
#french translation text

french_translations = download_from_gcs(bucket_name=PRIVATE_BUCKET, file_path="collections/WarmUp150/french/translations.json")

In [None]:
from itertools import islice
from tqdm import tqdm
WARUMUP_PHRASES = []

for old_hash, phrase_dict in tqdm(french_translations.items()):
    english_text = phrase_dict["english"]
    french_text = phrase_dict["french"]

    p = Phrase.create(english_text)
    p.collections.append('WarmUp150')
    p.translate('fr-FR', translated_text=french_text)
    p.upload()
    WARUMUP_PHRASES.append(p)

In [None]:
WARUMUP_PHRASES[5].model_dump()

In [None]:
for p in WARUMUP_PHRASES:
    p.download()

In [None]:
WARUMUP_PHRASES[5].translations['fr-FR'].image

In [None]:
for phrase in WarmUp150_phrase[:10]:
    p1 = Phrase.create(WarmUp150_phrase[0])
    p1.collections.append('WarmUp150')
    p1.translate('fr-FR')a
    p1.generate_audio()

In [None]:
p1.translate(BCP47Language('fr-FR'))

In [None]:
p1.upload()

In [None]:
p1.generate_audio("flashcard", BCP47Language('fr-FR'))

In [None]:
p1.get_audio('fr-FR', "flashcard", "slow")

In [None]:
p1.translations['fr-FR'].audio[0].model_dump()

In [None]:
p1.generate_audio(context="flashcard", language=BCP47Language('fr-FR'))

In [None]:
p1.translations['fr-FR'].audio[0].audio_segment

In [None]:
p1.upload()

In [None]:
p1 = get_phrase('lets_eat_lunch_tomorrow_b765fb')

In [None]:
p1.model_dump()

In [None]:
p1.generate_audio(context="flashcard", language="fr-FR")


In [None]:
p1.tran

In [None]:
phrase.translations[0].firestore_document_ref

In [None]:
phrase.translations[1]._upload_to_gcs()

In [None]:

# Add German translation
de = phrase.translate(BCP47Language.get("de-DE"), refine=True)
print(f"✓ German: {de.text}")


In [None]:
# Generate audio for each translation using the new generate_audio() method
# For flashcard context: generates slow and normal speeds
# For story context: generates normal and fast speeds

# French audio
print("Generating French audio (flashcard)...")
fr.generate_audio(context="flashcard", bucket_name="audio-language-trainer-private-content")
print(f"✓ French audio generated: {len(fr.audio)} audio files")
for audio in fr.audio:
    print(f"  - {audio.context} / {audio.speed}: {audio.file_path}")

# German audio
print("\nGenerating German audio (flashcard)...")
de.generate_audio(context="flashcard", bucket_name="audio-language-trainer-private-content")
print(f"✓ German audio generated: {len(de.audio)} audio files")
for audio in de.audio:
    print(f"  - {audio.context} / {audio.speed}: {audio.file_path}")

In [None]:
# Get the English translation and generate image
en_gb = phrase.translations[0]  # The en-GB translation created in create_phrase()
print(f"✓ English translation: {en_gb.text}")

# Generate image for the English translation (shared across all language variants)
image_prompt = "A cheerful greeting scene with two people waving hello, bright morning sunlight"
image = generate_image(image_prompt, style="ghibli", project_id="swedish-course")
en_gb.image = image
en_gb.upload_image("audio-language-trainer-private-content")
print(f"✓ Image generated and attached: {en_gb.image_file_path}")

In [None]:
# Test phrase generation with small vocab_dict
from src.phrases.generation import generate_phrases_from_vocab_dict

# Small test vocab_dict with a few verbs and vocabs
test_vocab_dict = {
    "verbs": ["want", "go", "see"],
    "vocab": ["apple", "table", "red", "big", "old", "door", "window", "house"]
}

print("=" * 60)
print("TESTING PHRASE GENERATION")
print("=" * 60)
print(f"\nInput vocab_dict:")
print(f"  Verbs: {test_vocab_dict['verbs']}")
print(f"  Vocab: {test_vocab_dict['vocab']}")
print(f"\nGenerating phrases...")
print("-" * 60)

try:
    phrases, tracking = generate_phrases_from_vocab_dict(
        test_vocab_dict, 
        max_iterations=1  # Just one iteration for testing
    )
    
    print(f"\n✓ Generation complete!")
    print(f"\nResults:")
    print(f"  Total phrases generated: {tracking['total_phrases']}")
    print(f"  Verb phrases: {tracking['verb_phrases']}")
    print(f"  Vocab phrases: {tracking['vocab_phrases']}")
    print(f"  Verbs processed: {tracking['verbs_processed']}")
    print(f"  Vocab processed: {tracking['vocab_processed']}")
    print(f"  Additional words found: {len(tracking['words_used'])}")
    
    if tracking['errors']:
        print(f"\n⚠ Errors encountered:")
        for error in tracking['errors']:
            print(f"  - {error}")
    
    print(f"\nGenerated Phrases (first 20):")
    for i, phrase in enumerate(phrases[:20], 1):
        print(f"  {i}. {phrase}")
    
    if len(phrases) > 20:
        print(f"  ... and {len(phrases) - 20} more phrases")
    
    print(f"\nAdditional words tracked:")
    print(f"  {tracking['words_used']}")
    
except Exception as e:
    print(f"\n✗ Error during phrase generation:")
    print(f"  {type(e).__name__}: {e}")
    import traceback
    traceback.print_exc()