In [1]:
from google.auth import default
credentials, project = default()

In [2]:
%load_ext autoreload
%autoreload 2
import os
import sys
from pathlib import Path
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
from dotenv import load_dotenv
load_dotenv()
from tqdm import tqdm

from src.utils import (load_json, get_first_n_items, save_json, save_text_file, load_text_file)  # noqa: E402
from src.story import upload_story_image, prepare_dialogue_with_wiktionary
from src.translation import review_story_dialogue_translations
from src.gcs_storage import check_blob_exists, read_from_gcs, upload_to_gcs, get_story_translated_dialogue_path, get_story_dialogue_path
from src.config_loader import config
print(config.TARGET_LANGUAGE_NAME)
COLLECTION = "WarmUp150"

Default FFmpeg path added to system PATH: C:\Program Files\ffmpeg-7.0-essentials_build\bin
Spanish


In [None]:
config._load_config()
language_name = config.TARGET_LANGUAGE_NAME.lower()
print(language_name)
config.get_voice_models(enum_type="stories")

In [3]:
from src.gcs_storage import get_stories_from_collection

all_stories = get_stories_from_collection(collection=COLLECTION)

In [4]:
sorted(all_stories)

['story_airport_wedding_chaos',
 'story_birthday_train_adventure',
 'story_concert_plans_unravel',
 'story_midnight_bakery',
 'story_mysterious_wedding_gift',
 'story_sunset_adventure_documentary',
 'story_the_wedding_planner',
 'story_underwater_community_centre']

## Loop all stories (text)

Translate and re-upload

In [None]:
# loop stories and translate
from src.gcs_storage import check_blob_exists, get_story_dialogue_path, get_story_translated_dialogue_path
from src.dialogue_generation import translate_and_upload_dialogue
from src.translation import review_story_dialogue_translations

for story_name in all_stories:
    # get the dialogue
    story_file_path = get_story_dialogue_path(story_name, collection=COLLECTION)
    translated_file_path = get_story_translated_dialogue_path(story_name, collection=COLLECTION)
    if check_blob_exists(config.GCS_PRIVATE_BUCKET, translated_file_path):
        print(f"{story_name} already translated")
        #continue
    story_dialogue = read_from_gcs(config.GCS_PRIVATE_BUCKET, story_file_path)
    translate_and_upload_dialogue(story_dialogue, story_name, collection=COLLECTION)

## Review translations with LLM

In [None]:
for story_name in all_stories:
    print(f"processing {story_name}")
    # get the dialogue
    translated_file_path = get_story_translated_dialogue_path(story_name, collection=COLLECTION)
    translated_dialogue = read_from_gcs(config.GCS_PRIVATE_BUCKET, translated_file_path)
    reviewed_dialogue = review_story_dialogue_translations(translated_dialogue)
    upload_to_gcs(obj=reviewed_dialogue, bucket_name=config.GCS_PRIVATE_BUCKET, file_name=translated_file_path)

## Add wiktionary links to each story utterance

In [None]:
for story_name in all_stories:

    # get the dialogue
    translated_file_path = get_story_translated_dialogue_path(story_name, collection=COLLECTION)
    if not check_blob_exists(config.GCS_PRIVATE_BUCKET, translated_file_path):
        print(f"{story_name} not yet translated")
        continue
    translated_dialogue = read_from_gcs(config.GCS_PRIVATE_BUCKET, translated_file_path)
    translated_dialogue_with_links = prepare_dialogue_with_wiktionary(translated_dialogue)
    # now re-upload it with embedded witkionary_links
    upload_to_gcs(obj=translated_dialogue_with_links, bucket_name=config.GCS_PRIVATE_BUCKET, file_name=translated_file_path)


Generate audio and upload

### Generate audio and upload

### Loop through stories to generate audio

In [None]:
all_stories

In [None]:
from src.audio_generation import generate_dialogue_audio_and_upload
for story_name in all_stories:
    # get the dialogue
    translated_file_path = get_story_translated_dialogue_path(story_name, collection=COLLECTION)

    translated_dialogue = read_from_gcs(config.GCS_PRIVATE_BUCKET, translated_file_path)
    generate_dialogue_audio_and_upload(translated_dialogue, story_name, collection=COLLECTION, overwrite=True)

### Fast Audio 
1 file for each story part

In [5]:
from src.audio_generation import generate_and_upload_fast_audio

for story_name in tqdm(all_stories):
    # uploads 1 fast mp3 file for each story part. 
    generate_and_upload_fast_audio(story_name, collection=COLLECTION, overwrite=True)

  0%|          | 0/8 [00:00<?, ?it/s]
[A
[A
[A
Collecting utterance audio for introduction: 5it [00:01,  4.93it/s]


Generating fast audio for introduction...




Uploaded fast audio for introduction to gs://audio-language-trainer-private-content/collections/WarmUp150/spanish/stories/story_mysterious_wedding_gift/audio/introduction/fast.mp3



[A
[A
Collecting utterance audio for development: 5it [00:00, 11.96it/s]


Generating fast audio for development...




Uploaded fast audio for development to gs://audio-language-trainer-private-content/collections/WarmUp150/spanish/stories/story_mysterious_wedding_gift/audio/development/fast.mp3



[A
[A
[A
Collecting utterance audio for resolution: 5it [00:00,  9.65it/s]


Generating fast audio for resolution...


Processing story_mysterious_wedding_gift in spanish: 100%|██████████| 3/3 [00:40<00:00, 13.34s/it]
 12%|█▎        | 1/8 [00:40<04:40, 40.04s/it]

Uploaded fast audio for resolution to gs://audio-language-trainer-private-content/collections/WarmUp150/spanish/stories/story_mysterious_wedding_gift/audio/resolution/fast.mp3



[A
[A
Collecting utterance audio for introduction: 5it [00:00, 12.05it/s]


Generating fast audio for introduction...




Uploaded fast audio for introduction to gs://audio-language-trainer-private-content/collections/WarmUp150/spanish/stories/story_airport_wedding_chaos/audio/introduction/fast.mp3



[A
[A
Collecting utterance audio for development: 5it [00:00, 13.74it/s]


Generating fast audio for development...




Uploaded fast audio for development to gs://audio-language-trainer-private-content/collections/WarmUp150/spanish/stories/story_airport_wedding_chaos/audio/development/fast.mp3



[A
[A
[A
Collecting utterance audio for resolution: 5it [00:00,  5.39it/s]


Generating fast audio for resolution...


Processing story_airport_wedding_chaos in spanish: 100%|██████████| 3/3 [00:15<00:00,  5.09s/it]
 25%|██▌       | 2/8 [00:55<02:32, 25.49s/it]

Uploaded fast audio for resolution to gs://audio-language-trainer-private-content/collections/WarmUp150/spanish/stories/story_airport_wedding_chaos/audio/resolution/fast.mp3



[A
[A
[A
Collecting utterance audio for introduction: 6it [00:00, 12.29it/s]


Generating fast audio for introduction...




Uploaded fast audio for introduction to gs://audio-language-trainer-private-content/collections/WarmUp150/spanish/stories/story_birthday_train_adventure/audio/introduction/fast.mp3



[A
[A
[A
Collecting utterance audio for development: 6it [00:00,  7.93it/s]


Generating fast audio for development...




Uploaded fast audio for development to gs://audio-language-trainer-private-content/collections/WarmUp150/spanish/stories/story_birthday_train_adventure/audio/development/fast.mp3



[A
[A
[A
Collecting utterance audio for resolution: 6it [00:00, 11.61it/s]


Generating fast audio for resolution...


Processing story_birthday_train_adventure in spanish: 100%|██████████| 3/3 [00:15<00:00,  5.14s/it]
 38%|███▊      | 3/8 [01:10<01:44, 20.91s/it]

Uploaded fast audio for resolution to gs://audio-language-trainer-private-content/collections/WarmUp150/spanish/stories/story_birthday_train_adventure/audio/resolution/fast.mp3



[A
[A
[A
[A
Collecting utterance audio for setup: 7it [00:00, 10.29it/s]


Generating fast audio for setup...




Uploaded fast audio for setup to gs://audio-language-trainer-private-content/collections/WarmUp150/spanish/stories/story_underwater_community_centre/audio/setup/fast.mp3



[A
[A
[A
Collecting utterance audio for resolution: 6it [00:00, 10.08it/s]


Generating fast audio for resolution...


Processing story_underwater_community_centre in spanish: 100%|██████████| 2/2 [00:11<00:00,  5.63s/it]
 50%|█████     | 4/8 [01:22<01:08, 17.11s/it]

Uploaded fast audio for resolution to gs://audio-language-trainer-private-content/collections/WarmUp150/spanish/stories/story_underwater_community_centre/audio/resolution/fast.mp3



[A
[A
[A
Collecting utterance audio for introduction: 6it [00:00,  8.55it/s]


Generating fast audio for introduction...




Uploaded fast audio for introduction to gs://audio-language-trainer-private-content/collections/WarmUp150/spanish/stories/story_concert_plans_unravel/audio/introduction/fast.mp3



[A
[A
[A
Collecting utterance audio for development: 6it [00:00, 12.12it/s]


Generating fast audio for development...




Uploaded fast audio for development to gs://audio-language-trainer-private-content/collections/WarmUp150/spanish/stories/story_concert_plans_unravel/audio/development/fast.mp3



[A
[A
[A
Collecting utterance audio for resolution: 6it [00:00, 10.27it/s]


Generating fast audio for resolution...


Processing story_concert_plans_unravel in spanish: 100%|██████████| 3/3 [00:14<00:00,  4.69s/it]
 62%|██████▎   | 5/8 [01:36<00:48, 16.03s/it]

Uploaded fast audio for resolution to gs://audio-language-trainer-private-content/collections/WarmUp150/spanish/stories/story_concert_plans_unravel/audio/resolution/fast.mp3



[A
[A
Collecting utterance audio for introduction: 5it [00:00, 12.56it/s]


Generating fast audio for introduction...




Uploaded fast audio for introduction to gs://audio-language-trainer-private-content/collections/WarmUp150/spanish/stories/story_sunset_adventure_documentary/audio/introduction/fast.mp3



[A
[A
[A
Collecting utterance audio for development: 6it [00:00, 12.74it/s]


Generating fast audio for development...




Uploaded fast audio for development to gs://audio-language-trainer-private-content/collections/WarmUp150/spanish/stories/story_sunset_adventure_documentary/audio/development/fast.mp3



[A
[A
Collecting utterance audio for resolution: 5it [00:00, 11.76it/s]


Generating fast audio for resolution...


Processing story_sunset_adventure_documentary in spanish: 100%|██████████| 3/3 [00:13<00:00,  4.56s/it]
 75%|███████▌  | 6/8 [01:49<00:30, 15.24s/it]

Uploaded fast audio for resolution to gs://audio-language-trainer-private-content/collections/WarmUp150/spanish/stories/story_sunset_adventure_documentary/audio/resolution/fast.mp3



[A
[A
[A
[A
Collecting utterance audio for introduction: 6it [00:00,  6.34it/s]


Generating fast audio for introduction...




Uploaded fast audio for introduction to gs://audio-language-trainer-private-content/collections/WarmUp150/spanish/stories/story_midnight_bakery/audio/introduction/fast.mp3



[A
[A
[A
[A
[A
[A
Collecting utterance audio for development: 8it [00:00,  8.19it/s]


Generating fast audio for development...




Uploaded fast audio for development to gs://audio-language-trainer-private-content/collections/WarmUp150/spanish/stories/story_midnight_bakery/audio/development/fast.mp3



[A
[A
[A
[A
Collecting utterance audio for resolution: 8it [00:00,  8.67it/s]


Generating fast audio for resolution...


Processing story_midnight_bakery in spanish: 100%|██████████| 3/3 [00:14<00:00,  4.91s/it]
 88%|████████▊ | 7/8 [02:04<00:15, 15.08s/it]

Uploaded fast audio for resolution to gs://audio-language-trainer-private-content/collections/WarmUp150/spanish/stories/story_midnight_bakery/audio/resolution/fast.mp3



[A
[A
[A
[A
[A
Collecting utterance audio for introduction: 10it [00:01,  8.60it/s]


Generating fast audio for introduction...




Uploaded fast audio for introduction to gs://audio-language-trainer-private-content/collections/WarmUp150/spanish/stories/story_the_wedding_planner/audio/introduction/fast.mp3



[A
[A
[A
[A
[A
[A
Collecting utterance audio for development: 12it [00:01, 10.80it/s]


Generating fast audio for development...




Uploaded fast audio for development to gs://audio-language-trainer-private-content/collections/WarmUp150/spanish/stories/story_the_wedding_planner/audio/development/fast.mp3



[A
[A
[A
[A
[A
[A
Collecting utterance audio for resolution: 12it [00:02,  5.66it/s]


Generating fast audio for resolution...


Processing story_the_wedding_planner in spanish: 100%|██████████| 3/3 [00:19<00:00,  6.47s/it]
100%|██████████| 8/8 [02:24<00:00, 18.02s/it]

Uploaded fast audio for resolution to gs://audio-language-trainer-private-content/collections/WarmUp150/spanish/stories/story_the_wedding_planner/audio/resolution/fast.mp3





## Upload challenges

In [None]:
from src.chat import get_html_challenge_inputs, create_html_challenges
from src.gcs_storage import upload_to_gcs, get_story_challenges_path, get_story_translated_challenges_path, read_from_gcs

for story_name in all_stories:
    challenge_file_path = get_story_challenges_path(story_name, collection=COLLECTION)
    scenario_dicts = read_from_gcs(bucket_name=config.GCS_PRIVATE_BUCKET, file_path=challenge_file_path)
    challenges = get_html_challenge_inputs(scenario_dicts)
    chat_webpage_file = create_html_challenges(challenges, story_name=story_name, collection=COLLECTION) # this creates and uploades


In [10]:
COLLECTION="LM1000"