In [14]:
from google.auth import default
credentials, project = default()

In [15]:
%load_ext autoreload
%autoreload 2
import os
import sys
from pathlib import Path
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
from dotenv import load_dotenv
load_dotenv()
from tqdm import tqdm

from src.utils import (load_json, get_first_n_items, save_json, save_text_file, load_text_file)  # noqa: E402
from src.story import upload_story_image, prepare_dialogue_with_wiktionary
from src.translation import review_story_dialogue_translations
from src.gcs_storage import check_blob_exists, read_from_gcs, upload_to_gcs, get_story_translated_dialogue_path, get_story_dialogue_path
from src.config_loader import config
print(config.TARGET_LANGUAGE_NAME)
COLLECTION = "WarmUp150"

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
French


In [None]:
config._load_config()
language_name = config.TARGET_LANGUAGE_NAME.lower()
print(language_name)
config.get_voice_models(enum_type="stories")

In [4]:
from src.gcs_storage import get_stories_from_collection

all_stories = get_stories_from_collection(collection=COLLECTION)

In [13]:
sorted(all_stories)

['story_airport_wedding_chaos',
 'story_birthday_train_adventure',
 'story_concert_plans_unravel',
 'story_midnight_bakery',
 'story_mysterious_wedding_gift',
 'story_sunset_adventure_documentary',
 'story_the_wedding_planner',
 'story_underwater_community_centre']

## Loop all stories (text)

Translate and re-upload

In [None]:
# loop stories and translate
from src.gcs_storage import check_blob_exists, get_story_dialogue_path, get_story_translated_dialogue_path
from src.dialogue_generation import translate_and_upload_dialogue
from src.translation import review_story_dialogue_translations

for story_name in all_stories:
    # get the dialogue
    story_file_path = get_story_dialogue_path(story_name, collection=COLLECTION)
    translated_file_path = get_story_translated_dialogue_path(story_name, collection=COLLECTION)
    if check_blob_exists(config.GCS_PRIVATE_BUCKET, translated_file_path):
        print(f"{story_name} already translated")
        #continue
    story_dialogue = read_from_gcs(config.GCS_PRIVATE_BUCKET, story_file_path)
    translate_and_upload_dialogue(story_dialogue, story_name, collection=COLLECTION)

## Review translations with LLM

In [None]:
for story_name in all_stories:
    print(f"processing {story_name}")
    # get the dialogue
    translated_file_path = get_story_translated_dialogue_path(story_name, collection=COLLECTION)
    translated_dialogue = read_from_gcs(config.GCS_PRIVATE_BUCKET, translated_file_path)
    reviewed_dialogue = review_story_dialogue_translations(translated_dialogue)
    upload_to_gcs(obj=reviewed_dialogue, bucket_name=config.GCS_PRIVATE_BUCKET, file_name=translated_file_path)

## Add wiktionary links to each story utterance

In [None]:
for story_name in all_stories:

    # get the dialogue
    translated_file_path = get_story_translated_dialogue_path(story_name, collection=COLLECTION)
    if not check_blob_exists(config.GCS_PRIVATE_BUCKET, translated_file_path):
        print(f"{story_name} not yet translated")
        continue
    translated_dialogue = read_from_gcs(config.GCS_PRIVATE_BUCKET, translated_file_path)
    translated_dialogue_with_links = prepare_dialogue_with_wiktionary(translated_dialogue)
    # now re-upload it with embedded witkionary_links
    upload_to_gcs(obj=translated_dialogue_with_links, bucket_name=config.GCS_PRIVATE_BUCKET, file_name=translated_file_path)


Generate audio and upload

### Generate audio and upload

### Loop through stories to generate audio

In [16]:
all_stories

['story_mysterious_wedding_gift',
 'story_airport_wedding_chaos',
 'story_birthday_train_adventure',
 'story_underwater_community_centre',
 'story_concert_plans_unravel',
 'story_sunset_adventure_documentary',
 'story_midnight_bakery',
 'story_the_wedding_planner']

In [None]:
from src.audio_generation import generate_dialogue_audio_and_upload
for story_name in all_stories:
    # get the dialogue
    translated_file_path = get_story_translated_dialogue_path(story_name, collection=COLLECTION)

    translated_dialogue = read_from_gcs(config.GCS_PRIVATE_BUCKET, translated_file_path)
    generate_dialogue_audio_and_upload(translated_dialogue, story_name, collection=COLLECTION, overwrite=True)

### Fast Audio 
1 file for each story part

In [19]:
from src.audio_generation import generate_and_upload_fast_audio

for story_name in tqdm(all_stories):
    # uploads 1 fast mp3 file for each story part. 
    generate_and_upload_fast_audio(story_name, collection=COLLECTION, overwrite=True)

[autoreload of src.story failed: Traceback (most recent call last):
  File "y:\Python Scripts\audio-language-trainer\.venv\Lib\site-packages\IPython\extensions\autoreload.py", line 276, in check
    superreload(m, reload, self.old_objects)
  File "y:\Python Scripts\audio-language-trainer\.venv\Lib\site-packages\IPython\extensions\autoreload.py", line 475, in superreload
    module = reload(module)
             ^^^^^^^^^^^^^^
  File "C:\Users\i5\AppData\Local\Programs\Python\Python311\Lib\importlib\__init__.py", line 169, in reload
    _bootstrap._exec(spec, module)
  File "<frozen importlib._bootstrap>", line 621, in _exec
  File "<frozen importlib._bootstrap_external>", line 940, in exec_module
  File "<frozen importlib._bootstrap>", line 241, in _call_with_frames_removed
  File "y:\Python Scripts\audio-language-trainer\src\story.py", line 24, in <module>
    from src.gcs_storage import (
ImportError: cannot import name 'get_fast_audio_path' from 'src.gcs_storage' (y:\Python Scripts\a

Default FFmpeg path added to system PATH: C:\Program Files\ffmpeg-7.0-essentials_build\bin


  0%|          | 0/8 [00:00<?, ?it/s]
[A
[A
Collecting utterance audio for introduction: 5it [00:00, 15.15it/s]


Generating fast audio for introduction...




Uploaded fast audio for introduction to gs://audio-language-trainer-private-content/collections/WarmUp150/french/audio/french_01_story_mysterious_wedding_gift_introduction_FAST.m4a



[A
[A
[A
Collecting utterance audio for development: 5it [00:00, 12.32it/s]


Generating fast audio for development...




Uploaded fast audio for development to gs://audio-language-trainer-private-content/collections/WarmUp150/french/audio/french_01_story_mysterious_wedding_gift_development_FAST.m4a



[A
[A
Collecting utterance audio for resolution: 5it [00:00, 16.13it/s]


Generating fast audio for resolution...


Processing story_mysterious_wedding_gift in french: 100%|██████████| 3/3 [00:13<00:00,  4.36s/it]
 12%|█▎        | 1/8 [00:13<01:31, 13.09s/it]

Uploaded fast audio for resolution to gs://audio-language-trainer-private-content/collections/WarmUp150/french/audio/french_01_story_mysterious_wedding_gift_resolution_FAST.m4a



[A
[A
Collecting utterance audio for introduction: 5it [00:00, 16.56it/s]


Generating fast audio for introduction...




Uploaded fast audio for introduction to gs://audio-language-trainer-private-content/collections/WarmUp150/french/audio/french_02_story_airport_wedding_chaos_introduction_FAST.m4a



[A
[A
Collecting utterance audio for development: 5it [00:00, 14.84it/s]


Generating fast audio for development...




Uploaded fast audio for development to gs://audio-language-trainer-private-content/collections/WarmUp150/french/audio/french_02_story_airport_wedding_chaos_development_FAST.m4a



[A
[A
Collecting utterance audio for resolution: 5it [00:00, 15.62it/s]


Generating fast audio for resolution...


Processing story_airport_wedding_chaos in french: 100%|██████████| 3/3 [00:12<00:00,  4.30s/it]
 25%|██▌       | 2/8 [00:26<01:17, 12.99s/it]

Uploaded fast audio for resolution to gs://audio-language-trainer-private-content/collections/WarmUp150/french/audio/french_02_story_airport_wedding_chaos_resolution_FAST.m4a



[A
[A
[A
Collecting utterance audio for introduction: 6it [00:00, 12.96it/s]


Generating fast audio for introduction...




Uploaded fast audio for introduction to gs://audio-language-trainer-private-content/collections/WarmUp150/french/audio/french_03_story_birthday_train_adventure_introduction_FAST.m4a



[A
[A
[A
Collecting utterance audio for development: 6it [00:00, 13.86it/s]


Generating fast audio for development...




Uploaded fast audio for development to gs://audio-language-trainer-private-content/collections/WarmUp150/french/audio/french_03_story_birthday_train_adventure_development_FAST.m4a



[A
[A
[A
Collecting utterance audio for resolution: 6it [00:00, 17.00it/s]


Generating fast audio for resolution...


Processing story_birthday_train_adventure in french: 100%|██████████| 3/3 [00:13<00:00,  4.38s/it]
 38%|███▊      | 3/8 [00:39<01:05, 13.05s/it]

Uploaded fast audio for resolution to gs://audio-language-trainer-private-content/collections/WarmUp150/french/audio/french_03_story_birthday_train_adventure_resolution_FAST.m4a



[A
[A
[A
Collecting utterance audio for setup: 7it [00:00, 16.99it/s]


Generating fast audio for setup...




Uploaded fast audio for setup to gs://audio-language-trainer-private-content/collections/WarmUp150/french/audio/french_04_story_underwater_community_centre_setup_FAST.m4a



[A
[A
[A
Collecting utterance audio for resolution: 6it [00:00, 12.12it/s]


Generating fast audio for resolution...


Processing story_underwater_community_centre in french: 100%|██████████| 2/2 [00:08<00:00,  4.35s/it]
 50%|█████     | 4/8 [00:47<00:45, 11.33s/it]

Uploaded fast audio for resolution to gs://audio-language-trainer-private-content/collections/WarmUp150/french/audio/french_04_story_underwater_community_centre_resolution_FAST.m4a



[A
[A
[A
Collecting utterance audio for introduction: 6it [00:00, 14.63it/s]


Generating fast audio for introduction...




Uploaded fast audio for introduction to gs://audio-language-trainer-private-content/collections/WarmUp150/french/audio/french_05_story_concert_plans_unravel_introduction_FAST.m4a



[A
[A
[A
Collecting utterance audio for development: 6it [00:00, 14.82it/s]


Generating fast audio for development...




Uploaded fast audio for development to gs://audio-language-trainer-private-content/collections/WarmUp150/french/audio/french_05_story_concert_plans_unravel_development_FAST.m4a



[A
[A
[A
Collecting utterance audio for resolution: 6it [00:00, 12.70it/s]


Generating fast audio for resolution...


Processing story_concert_plans_unravel in french: 100%|██████████| 3/3 [00:12<00:00,  4.09s/it]
 62%|██████▎   | 5/8 [01:00<00:35, 11.68s/it]

Uploaded fast audio for resolution to gs://audio-language-trainer-private-content/collections/WarmUp150/french/audio/french_05_story_concert_plans_unravel_resolution_FAST.m4a



[A
[A
Collecting utterance audio for introduction: 5it [00:00, 14.04it/s]


Generating fast audio for introduction...




Uploaded fast audio for introduction to gs://audio-language-trainer-private-content/collections/WarmUp150/french/audio/french_06_story_sunset_adventure_documentary_introduction_FAST.m4a



[A
[A
[A
Collecting utterance audio for development: 6it [00:00, 12.46it/s]


Generating fast audio for development...




Uploaded fast audio for development to gs://audio-language-trainer-private-content/collections/WarmUp150/french/audio/french_06_story_sunset_adventure_documentary_development_FAST.m4a



[A
[A
Collecting utterance audio for resolution: 5it [00:00, 14.85it/s]


Generating fast audio for resolution...


Processing story_sunset_adventure_documentary in french: 100%|██████████| 3/3 [00:12<00:00,  4.24s/it]
 75%|███████▌  | 6/8 [01:12<00:24, 12.04s/it]

Uploaded fast audio for resolution to gs://audio-language-trainer-private-content/collections/WarmUp150/french/audio/french_06_story_sunset_adventure_documentary_resolution_FAST.m4a



[A
[A
[A
Collecting utterance audio for introduction: 6it [00:00, 14.82it/s]


Generating fast audio for introduction...




Uploaded fast audio for introduction to gs://audio-language-trainer-private-content/collections/WarmUp150/french/audio/french_07_story_midnight_bakery_introduction_FAST.m4a



[A
[A
[A
[A
Collecting utterance audio for development: 8it [00:00, 13.14it/s]


Generating fast audio for development...




Uploaded fast audio for development to gs://audio-language-trainer-private-content/collections/WarmUp150/french/audio/french_07_story_midnight_bakery_development_FAST.m4a



[A
[A
[A
[A
Collecting utterance audio for resolution: 8it [00:00, 14.92it/s]


Generating fast audio for resolution...


Processing story_midnight_bakery in french: 100%|██████████| 3/3 [00:13<00:00,  4.53s/it]
 88%|████████▊ | 7/8 [01:26<00:12, 12.55s/it]

Uploaded fast audio for resolution to gs://audio-language-trainer-private-content/collections/WarmUp150/french/audio/french_07_story_midnight_bakery_resolution_FAST.m4a



[A
[A
[A
[A
[A
Collecting utterance audio for introduction: 10it [00:00, 14.79it/s]


Generating fast audio for introduction...




Uploaded fast audio for introduction to gs://audio-language-trainer-private-content/collections/WarmUp150/french/audio/french_08_story_the_wedding_planner_introduction_FAST.m4a



[A
[A
[A
[A
[A
[A
Collecting utterance audio for development: 12it [00:00, 13.24it/s]


Generating fast audio for development...




Uploaded fast audio for development to gs://audio-language-trainer-private-content/collections/WarmUp150/french/audio/french_08_story_the_wedding_planner_development_FAST.m4a



[A
[A
[A
[A
[A
[A
Collecting utterance audio for resolution: 12it [00:00, 14.79it/s]


Generating fast audio for resolution...


Processing story_the_wedding_planner in french: 100%|██████████| 3/3 [00:15<00:00,  5.01s/it]
100%|██████████| 8/8 [01:41<00:00, 12.69s/it]

Uploaded fast audio for resolution to gs://audio-language-trainer-private-content/collections/WarmUp150/french/audio/french_08_story_the_wedding_planner_resolution_FAST.m4a





## Upload challenges

In [11]:
from src.chat import get_html_challenge_inputs, create_html_challenges
from src.gcs_storage import upload_to_gcs, get_story_challenges_path, get_story_translated_challenges_path, read_from_gcs

for story_name in all_stories:
    challenge_file_path = get_story_challenges_path(story_name, collection=COLLECTION)
    scenario_dicts = read_from_gcs(bucket_name=config.GCS_PRIVATE_BUCKET, file_path=challenge_file_path)
    challenges = get_html_challenge_inputs(scenario_dicts)
    chat_webpage_file = create_html_challenges(challenges, story_name=story_name, collection=COLLECTION) # this creates and uploades


FileNotFoundError: File not found in GCS: audio-language-trainer-private-content/collections/LM1000/common/stories/story_mysterious_wedding_gift/challenges.json

In [10]:
COLLECTION="LM1000"