In [1]:
from google.auth import default
credentials, project = default()

In [7]:
%load_ext autoreload
%autoreload 2
import os
import sys
from pathlib import Path
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
from dotenv import load_dotenv
load_dotenv()
from tqdm import tqdm

from src.utils import get_first_n_items
from src.story import upload_story_image, prepare_dialogue_with_wiktionary
from src.gcs_storage import check_blob_exists, read_from_gcs, upload_to_gcs, get_story_translated_dialogue_path, get_story_dialogue_path
from src.translation import review_story_dialogue_translations
from src.config_loader import config
config.TARGET_LANGUAGE_NAME

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


'Spanish'

# Story Data

* based around a story_name which is part of a collection (LM1000 > story_the_birthday_party)
* translated dialogue (text file)
    * comes from google translate
    * added to with Wiktionary links for each utternace ('wiktionary_links')
* audio file for each utterance of Sam and Alex
* 1 x fast audio file for each story part, introduction etc
* an image for each story part
* challenges for each story
    * from a single challenges.json file for each story
    * the customisation is when the challenges.html file gets created as the language name is added into the prompt

## Storage overview

We want to save the english story dialogue as dialogue.json

Then translations by language_name, then each phrase as an audio clip. We might want higher quality audio for the stories e.g. chirp3 voice

This allows us to quickly retrieve a phrase based on the bucket name and the phrase key, as well as modify individual phrases for later correction

In [3]:
config._load_config()
language_name = config.TARGET_LANGUAGE_NAME.lower()
print(language_name)
config.get_voice_models(enum_type="stories")

spanish


(VoiceInfo(name='en-GB-Chirp3-HD-Algenib', provider=<VoiceProvider.GOOGLE: 'google'>, voice_id='en-GB-Chirp3-HD-Algenib', language_code='en-GB'),
 VoiceInfo(name='es-ES-Chirp-HD-F', provider=<VoiceProvider.GOOGLE: 'google'>, voice_id='es-ES-Chirp-HD-F', language_code='es-ES'),
 VoiceInfo(name='es-ES-Chirp-HD-D', provider=<VoiceProvider.GOOGLE: 'google'>, voice_id='es-ES-Chirp-HD-D', language_code='es-ES'))

In [5]:
from src.gcs_storage import get_stories_from_collection

all_stories = get_stories_from_collection(collection="LM1000")

In [6]:
all_stories

['story_sunset_wedding_blues',
 'story_better_than_a_movie',
 'story_unexpected_wedding_guests',
 'story_midnight_garden_mystery',
 'story_workplace_stress_vacation',
 'story_the_marathon',
 'story_teaching_music',
 'story_job_interview_gone_wrong',
 'story_winter_in_sweden',
 'story_a_fishing_trip',
 'story_team_spirit',
 'story_brussels_blend',
 'story_rainy_football_match',
 'story_camping_trip_gone_awry',
 'story_unexpected_career_change',
 'story_a_missed_stop',
 'story_road_trip',
 'story_forgetful_diver',
 'story_the_birthday_party',
 'story_the_power_cut']

## Loop all stories (text)

Translate and re-upload

In [8]:
# loop stories and translate
from src.gcs_storage import check_blob_exists, get_story_dialogue_path, get_story_translated_dialogue_path
from src.dialogue_generation import translate_and_upload_dialogue

for story_name in all_stories:
    # get the dialogue
    story_file_path = get_story_dialogue_path(story_name, collection="LM1000")
    translated_file_path = get_story_translated_dialogue_path(story_name, collection="LM1000")
    if check_blob_exists(config.GCS_PRIVATE_BUCKET, translated_file_path):
        print(f"{story_name} already translated")
        #continue
    story_dialogue = read_from_gcs(config.GCS_PRIVATE_BUCKET, story_file_path)
    translate_and_upload_dialogue(story_dialogue, story_name, collection="LM1000")

adding translations:   0%|          | 0/3 [00:00<?, ?it/s]

Beginning translation for introduction


adding translations:  33%|███▎      | 1/3 [00:01<00:03,  1.89s/it]

Translated dialogue
Beginning translation for development


adding translations:  67%|██████▋   | 2/3 [00:03<00:01,  1.97s/it]

Translated dialogue
Beginning translation for resolution


adding translations: 100%|██████████| 3/3 [00:05<00:00,  1.98s/it]

Translated dialogue





Translated dialogue uploaded to gs://audio-language-trainer-private-content/collections/LM1000/spanish/stories/story_sunset_wedding_blues/translated_dialogue.json


adding translations:   0%|          | 0/3 [00:00<?, ?it/s]

Beginning translation for introduction


adding translations:  33%|███▎      | 1/3 [00:02<00:04,  2.04s/it]

Translated dialogue
Beginning translation for development


adding translations:  67%|██████▋   | 2/3 [00:04<00:02,  2.05s/it]

Translated dialogue
Beginning translation for resolution


adding translations: 100%|██████████| 3/3 [00:06<00:00,  2.01s/it]

Translated dialogue





Translated dialogue uploaded to gs://audio-language-trainer-private-content/collections/LM1000/spanish/stories/story_better_than_a_movie/translated_dialogue.json


adding translations:   0%|          | 0/3 [00:00<?, ?it/s]

Beginning translation for introduction


adding translations:  33%|███▎      | 1/3 [00:01<00:03,  1.90s/it]

Translated dialogue
Beginning translation for development


adding translations:  67%|██████▋   | 2/3 [00:03<00:01,  1.95s/it]

Translated dialogue
Beginning translation for resolution


adding translations: 100%|██████████| 3/3 [00:05<00:00,  1.91s/it]

Translated dialogue





Translated dialogue uploaded to gs://audio-language-trainer-private-content/collections/LM1000/spanish/stories/story_unexpected_wedding_guests/translated_dialogue.json


adding translations:   0%|          | 0/3 [00:00<?, ?it/s]

Beginning translation for introduction


adding translations:  33%|███▎      | 1/3 [00:01<00:03,  1.84s/it]

Translated dialogue
Beginning translation for development


adding translations:  67%|██████▋   | 2/3 [00:03<00:01,  1.95s/it]

Translated dialogue
Beginning translation for resolution


adding translations: 100%|██████████| 3/3 [00:05<00:00,  1.94s/it]

Translated dialogue





Translated dialogue uploaded to gs://audio-language-trainer-private-content/collections/LM1000/spanish/stories/story_midnight_garden_mystery/translated_dialogue.json


adding translations:   0%|          | 0/3 [00:00<?, ?it/s]

Beginning translation for introduction


adding translations:  33%|███▎      | 1/3 [00:01<00:03,  1.92s/it]

Translated dialogue
Beginning translation for development


adding translations:  67%|██████▋   | 2/3 [00:03<00:01,  1.87s/it]

Translated dialogue
Beginning translation for resolution


adding translations: 100%|██████████| 3/3 [00:05<00:00,  1.89s/it]

Translated dialogue





Translated dialogue uploaded to gs://audio-language-trainer-private-content/collections/LM1000/spanish/stories/story_workplace_stress_vacation/translated_dialogue.json


adding translations:   0%|          | 0/3 [00:00<?, ?it/s]

Beginning translation for introduction


adding translations:  33%|███▎      | 1/3 [00:02<00:04,  2.09s/it]

Translated dialogue
Beginning translation for development


adding translations:  67%|██████▋   | 2/3 [00:03<00:01,  1.95s/it]

Translated dialogue
Beginning translation for resolution


adding translations: 100%|██████████| 3/3 [00:05<00:00,  1.93s/it]

Translated dialogue





Translated dialogue uploaded to gs://audio-language-trainer-private-content/collections/LM1000/spanish/stories/story_the_marathon/translated_dialogue.json


adding translations:   0%|          | 0/3 [00:00<?, ?it/s]

Beginning translation for introduction


adding translations:  33%|███▎      | 1/3 [00:02<00:04,  2.03s/it]

Translated dialogue
Beginning translation for development


adding translations:  67%|██████▋   | 2/3 [00:04<00:02,  2.01s/it]

Translated dialogue
Beginning translation for resolution


adding translations: 100%|██████████| 3/3 [00:05<00:00,  1.95s/it]

Translated dialogue





Translated dialogue uploaded to gs://audio-language-trainer-private-content/collections/LM1000/spanish/stories/story_teaching_music/translated_dialogue.json


adding translations:   0%|          | 0/3 [00:00<?, ?it/s]

Beginning translation for introduction


adding translations:  33%|███▎      | 1/3 [00:02<00:04,  2.16s/it]

Translated dialogue
Beginning translation for development


adding translations:  67%|██████▋   | 2/3 [00:04<00:02,  2.09s/it]

Translated dialogue
Beginning translation for resolution


adding translations: 100%|██████████| 3/3 [00:06<00:00,  2.02s/it]

Translated dialogue





Translated dialogue uploaded to gs://audio-language-trainer-private-content/collections/LM1000/spanish/stories/story_job_interview_gone_wrong/translated_dialogue.json


adding translations:   0%|          | 0/3 [00:00<?, ?it/s]

Beginning translation for introduction


adding translations:  33%|███▎      | 1/3 [00:01<00:03,  1.91s/it]

Translated dialogue
Beginning translation for development


adding translations:  67%|██████▋   | 2/3 [00:03<00:01,  1.95s/it]

Translated dialogue
Beginning translation for resolution


adding translations: 100%|██████████| 3/3 [00:05<00:00,  1.96s/it]

Translated dialogue





Translated dialogue uploaded to gs://audio-language-trainer-private-content/collections/LM1000/spanish/stories/story_winter_in_sweden/translated_dialogue.json


adding translations:   0%|          | 0/3 [00:00<?, ?it/s]

Beginning translation for introduction


adding translations:  33%|███▎      | 1/3 [00:01<00:03,  1.94s/it]

Translated dialogue
Beginning translation for development


adding translations:  67%|██████▋   | 2/3 [00:03<00:01,  1.99s/it]

Translated dialogue
Beginning translation for resolution


adding translations: 100%|██████████| 3/3 [00:05<00:00,  1.97s/it]

Translated dialogue





Translated dialogue uploaded to gs://audio-language-trainer-private-content/collections/LM1000/spanish/stories/story_a_fishing_trip/translated_dialogue.json


adding translations:   0%|          | 0/3 [00:00<?, ?it/s]

Beginning translation for introduction


adding translations:  33%|███▎      | 1/3 [00:01<00:03,  1.90s/it]

Translated dialogue
Beginning translation for development


adding translations:  67%|██████▋   | 2/3 [00:03<00:01,  1.90s/it]

Translated dialogue
Beginning translation for resolution


adding translations: 100%|██████████| 3/3 [00:05<00:00,  1.92s/it]

Translated dialogue





Translated dialogue uploaded to gs://audio-language-trainer-private-content/collections/LM1000/spanish/stories/story_team_spirit/translated_dialogue.json


adding translations:   0%|          | 0/3 [00:00<?, ?it/s]

Beginning translation for introduction


adding translations:  33%|███▎      | 1/3 [00:01<00:03,  1.90s/it]

Translated dialogue
Beginning translation for development


adding translations:  67%|██████▋   | 2/3 [00:03<00:01,  1.91s/it]

Translated dialogue
Beginning translation for resolution


adding translations: 100%|██████████| 3/3 [00:05<00:00,  1.94s/it]

Translated dialogue





Translated dialogue uploaded to gs://audio-language-trainer-private-content/collections/LM1000/spanish/stories/story_brussels_blend/translated_dialogue.json


adding translations:   0%|          | 0/3 [00:00<?, ?it/s]

Beginning translation for introduction


adding translations:  33%|███▎      | 1/3 [00:02<00:04,  2.05s/it]

Translated dialogue
Beginning translation for development


adding translations:  67%|██████▋   | 2/3 [00:04<00:01,  2.00s/it]

Translated dialogue
Beginning translation for resolution


adding translations: 100%|██████████| 3/3 [00:06<00:00,  2.01s/it]

Translated dialogue





Translated dialogue uploaded to gs://audio-language-trainer-private-content/collections/LM1000/spanish/stories/story_rainy_football_match/translated_dialogue.json


adding translations:   0%|          | 0/3 [00:00<?, ?it/s]

Beginning translation for introduction


adding translations:  33%|███▎      | 1/3 [00:01<00:03,  1.98s/it]

Translated dialogue
Beginning translation for development


adding translations:  67%|██████▋   | 2/3 [00:03<00:01,  1.97s/it]

Translated dialogue
Beginning translation for resolution


adding translations: 100%|██████████| 3/3 [00:05<00:00,  1.94s/it]

Translated dialogue





Translated dialogue uploaded to gs://audio-language-trainer-private-content/collections/LM1000/spanish/stories/story_camping_trip_gone_awry/translated_dialogue.json


adding translations:   0%|          | 0/3 [00:00<?, ?it/s]

Beginning translation for introduction


adding translations:  33%|███▎      | 1/3 [00:02<00:04,  2.11s/it]

Translated dialogue
Beginning translation for development


adding translations:  67%|██████▋   | 2/3 [00:03<00:01,  1.97s/it]

Translated dialogue
Beginning translation for resolution


adding translations: 100%|██████████| 3/3 [00:05<00:00,  1.94s/it]

Translated dialogue





Translated dialogue uploaded to gs://audio-language-trainer-private-content/collections/LM1000/spanish/stories/story_unexpected_career_change/translated_dialogue.json


adding translations:   0%|          | 0/3 [00:00<?, ?it/s]

Beginning translation for introduction


adding translations:  33%|███▎      | 1/3 [00:01<00:03,  1.87s/it]

Translated dialogue
Beginning translation for development


adding translations:  67%|██████▋   | 2/3 [00:03<00:01,  2.00s/it]

Translated dialogue
Beginning translation for resolution


adding translations: 100%|██████████| 3/3 [00:05<00:00,  1.98s/it]

Translated dialogue





Translated dialogue uploaded to gs://audio-language-trainer-private-content/collections/LM1000/spanish/stories/story_a_missed_stop/translated_dialogue.json


adding translations:   0%|          | 0/3 [00:00<?, ?it/s]

Beginning translation for introduction


adding translations:  33%|███▎      | 1/3 [00:01<00:03,  1.88s/it]

Translated dialogue
Beginning translation for development


adding translations:  67%|██████▋   | 2/3 [00:03<00:01,  1.99s/it]

Translated dialogue
Beginning translation for resolution


adding translations: 100%|██████████| 3/3 [00:05<00:00,  1.98s/it]

Translated dialogue





Translated dialogue uploaded to gs://audio-language-trainer-private-content/collections/LM1000/spanish/stories/story_road_trip/translated_dialogue.json


adding translations:   0%|          | 0/3 [00:00<?, ?it/s]

Beginning translation for introduction


adding translations:  33%|███▎      | 1/3 [00:01<00:03,  1.88s/it]

Translated dialogue
Beginning translation for development


adding translations:  67%|██████▋   | 2/3 [00:03<00:01,  2.00s/it]

Translated dialogue
Beginning translation for resolution


adding translations: 100%|██████████| 3/3 [00:05<00:00,  1.97s/it]

Translated dialogue





Translated dialogue uploaded to gs://audio-language-trainer-private-content/collections/LM1000/spanish/stories/story_forgetful_diver/translated_dialogue.json


adding translations:   0%|          | 0/3 [00:00<?, ?it/s]

Beginning translation for introduction


adding translations:  33%|███▎      | 1/3 [00:02<00:04,  2.08s/it]

Translated dialogue
Beginning translation for development


adding translations:  67%|██████▋   | 2/3 [00:04<00:02,  2.01s/it]

Translated dialogue
Beginning translation for resolution


adding translations: 100%|██████████| 3/3 [00:05<00:00,  1.99s/it]

Translated dialogue





Translated dialogue uploaded to gs://audio-language-trainer-private-content/collections/LM1000/spanish/stories/story_the_birthday_party/translated_dialogue.json


adding translations:   0%|          | 0/3 [00:00<?, ?it/s]

Beginning translation for introduction


adding translations:  33%|███▎      | 1/3 [00:01<00:03,  1.92s/it]

Translated dialogue
Beginning translation for development


adding translations:  67%|██████▋   | 2/3 [00:04<00:02,  2.10s/it]

Translated dialogue
Beginning translation for resolution


adding translations: 100%|██████████| 3/3 [00:06<00:00,  2.03s/it]

Translated dialogue





Translated dialogue uploaded to gs://audio-language-trainer-private-content/collections/LM1000/spanish/stories/story_the_power_cut/translated_dialogue.json


In [None]:
# reivew the translations with anthropic 
for story_name in tqdm(all_stories[0:1], desc="Reviewing story translations"):
    # Get the translated dialogue path
    translated_file_path = get_story_translated_dialogue_path(story_name, collection="LM1000")
    
    # Check if translated dialogue exists
    if not check_blob_exists(config.GCS_PRIVATE_BUCKET, translated_file_path):
        print(f"No translated dialogue found for {story_name}, skipping...")
        continue

    # Download the translated dialogue
    print(f"Downloading translated dialogue for {story_name}...")
    story_dialogue = read_from_gcs(config.GCS_PRIVATE_BUCKET, translated_file_path)

    # Review the translations
    print(f"Reviewing translations for {story_name}...")
    reviewed_dialogue = review_story_dialogue_translations(
        story_dialogue=story_dialogue, verbose=False
    )

    #Upload the reviewed translations
    print(f"Uploading reviewed translations for {story_name}...")
    upload_to_gcs(
        bucket_name=config.GCS_PRIVATE_BUCKET,
        file_name=translated_file_path,
        obj=reviewed_dialogue
    )
    print(f"Successfully processed {story_name}")


Reviewing story translations:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading translated dialogue for story_sunset_wedding_blues...
Reviewing translations for story_sunset_wedding_blues...


Reviewing story translations:   0%|          | 0/1 [00:20<?, ?it/s]

Uploading reviewed translations for story_sunset_wedding_blues...





TypeError: upload_to_gcs() got an unexpected keyword argument 'blob_name'

## Add wiktionary links to each story utterance

In [23]:
# Test story with deliberately poor Spanish translations
test_story = {
    "part1": {
        "dialogue": [
            {"speaker": "Sam", "text": "Hello, how are you?"},
            {"speaker": "Alex", "text": "I'm doing great, thanks!"},
            {"speaker": "Sam", "text": "Would you like to go for a coffee?"}
        ],
        "translated_dialogue": [
            {"speaker": "Sam", "text": "Hola, como estas tu?"},  # Poor: too literal
            {"speaker": "Alex", "text": "Estoy haciendo grande, gracias!"},  # Poor: literal translation of "doing great"
            {"speaker": "Sam", "text": "Te gustaría ir para un café?"}  # Poor: "para" instead of "por"
        ]
    }
}

from src.translation import review_story_dialogue_translations

# Review with verbose=True to see the changes
reviewed = review_story_dialogue_translations(
    story_dialogue=test_story,
    target_language="spanish",
    verbose=True
)

English: Hello, how are you?
Old: Hola, como estas tu?
New: Hola, ¿cómo estás?

English: I'm doing great, thanks!
Old: Estoy haciendo grande, gracias!
New: Estoy muy bien, ¡gracias!

English: Would you like to go for a coffee?
Old: Te gustaría ir para un café?
New: ¿Quieres ir a tomar un café?



In [22]:
reviewed

{'part1': {'dialogue': [{'speaker': 'Sam', 'text': 'Hello, how are you?'},
   {'speaker': 'Alex', 'text': "I'm doing great, thanks!"},
   {'speaker': 'Sam', 'text': 'Would you like to go for a coffee?'}],
  'translated_dialogue': [{'speaker': 'Sam', 'text': 'Hola, ¿cómo estás?'},
   {'speaker': 'Alex', 'text': 'Estoy muy bien, ¡gracias!'},
   {'speaker': 'Sam', 'text': '¿Quieres ir a tomar un café?'}]}}

In [26]:
for story_name in all_stories[0:1]:
    print(f"processing {story_name}")
    # get the dialogue
    translated_file_path = get_story_translated_dialogue_path(story_name, collection="LM1000")
    if not check_blob_exists(config.GCS_PRIVATE_BUCKET, translated_file_path):
        print(f"{story_name} not yet translated")
        continue
    translated_dialogue = read_from_gcs(config.GCS_PRIVATE_BUCKET, translated_file_path)
    translated_dialogue_with_links = prepare_dialogue_with_wiktionary(translated_dialogue)
    # now re-upload it with embedded witkionary_links
    uploaded = upload_to_gcs(obj=translated_dialogue_with_links, bucket_name=config.GCS_PRIVATE_BUCKET, file_name=translated_file_path)
    print(f"uploaded {story_name} : {uploaded}")

processing story_sunset_wedding_blues
Got word link cache of size 1885 from GCS


Getting dialogue links for story_parts: 100%|██████████| 3/3 [00:23<00:00,  7.92s/it]


uploaded story_sunset_wedding_blues : gs://audio-language-trainer-private-content/collections/LM1000/spanish/stories/story_sunset_wedding_blues/translated_dialogue.json


Generate audio and upload

### Generate audio and upload

### Loop through stories to generate audio

In [None]:
all_stories

In [28]:
from src.audio_generation import generate_dialogue_audio_and_upload
for story_name in all_stories[0:1]:
    # get the dialogue
    translated_file_path = get_story_translated_dialogue_path(story_name, collection="LM1000")

    translated_dialogue = read_from_gcs(config.GCS_PRIVATE_BUCKET, translated_file_path)
    generate_dialogue_audio_and_upload(translated_dialogue, story_name, overwrite=True)

Processing story parts:   0%|          | 0/3 [00:00<?, ?it/s]

Generated and uploaded: gs://audio-language-trainer-private-content/collections/LM1000/spanish/stories/story_sunset_wedding_blues/audio/introduction/part_0_alex.mp3




Generated and uploaded: gs://audio-language-trainer-private-content/collections/LM1000/spanish/stories/story_sunset_wedding_blues/audio/introduction/part_1_sam.mp3




Generated and uploaded: gs://audio-language-trainer-private-content/collections/LM1000/spanish/stories/story_sunset_wedding_blues/audio/introduction/part_2_alex.mp3




Generated and uploaded: gs://audio-language-trainer-private-content/collections/LM1000/spanish/stories/story_sunset_wedding_blues/audio/introduction/part_3_sam.mp3




Generated and uploaded: gs://audio-language-trainer-private-content/collections/LM1000/spanish/stories/story_sunset_wedding_blues/audio/introduction/part_4_alex.mp3


Processing story parts:  33%|███▎      | 1/3 [01:00<02:01, 60.57s/it]

Generated and uploaded: gs://audio-language-trainer-private-content/collections/LM1000/spanish/stories/story_sunset_wedding_blues/audio/introduction/part_5_sam.mp3




Generated and uploaded: gs://audio-language-trainer-private-content/collections/LM1000/spanish/stories/story_sunset_wedding_blues/audio/development/part_0_alex.mp3




Generated and uploaded: gs://audio-language-trainer-private-content/collections/LM1000/spanish/stories/story_sunset_wedding_blues/audio/development/part_1_sam.mp3




Generated and uploaded: gs://audio-language-trainer-private-content/collections/LM1000/spanish/stories/story_sunset_wedding_blues/audio/development/part_2_alex.mp3




Generated and uploaded: gs://audio-language-trainer-private-content/collections/LM1000/spanish/stories/story_sunset_wedding_blues/audio/development/part_3_sam.mp3




Generated and uploaded: gs://audio-language-trainer-private-content/collections/LM1000/spanish/stories/story_sunset_wedding_blues/audio/development/part_4_alex.mp3




Generated and uploaded: gs://audio-language-trainer-private-content/collections/LM1000/spanish/stories/story_sunset_wedding_blues/audio/development/part_5_sam.mp3




Generated and uploaded: gs://audio-language-trainer-private-content/collections/LM1000/spanish/stories/story_sunset_wedding_blues/audio/development/part_6_alex.mp3


Processing story parts:  67%|██████▋   | 2/3 [02:18<01:10, 70.84s/it]

Generated and uploaded: gs://audio-language-trainer-private-content/collections/LM1000/spanish/stories/story_sunset_wedding_blues/audio/development/part_7_sam.mp3




Generated and uploaded: gs://audio-language-trainer-private-content/collections/LM1000/spanish/stories/story_sunset_wedding_blues/audio/resolution/part_0_alex.mp3




Generated and uploaded: gs://audio-language-trainer-private-content/collections/LM1000/spanish/stories/story_sunset_wedding_blues/audio/resolution/part_1_sam.mp3




Generated and uploaded: gs://audio-language-trainer-private-content/collections/LM1000/spanish/stories/story_sunset_wedding_blues/audio/resolution/part_2_alex.mp3




Generated and uploaded: gs://audio-language-trainer-private-content/collections/LM1000/spanish/stories/story_sunset_wedding_blues/audio/resolution/part_3_sam.mp3




Generated and uploaded: gs://audio-language-trainer-private-content/collections/LM1000/spanish/stories/story_sunset_wedding_blues/audio/resolution/part_4_alex.mp3


Processing story parts: 100%|██████████| 3/3 [03:16<00:00, 65.37s/it]

Generated and uploaded: gs://audio-language-trainer-private-content/collections/LM1000/spanish/stories/story_sunset_wedding_blues/audio/resolution/part_5_sam.mp3





### Fast Audio 
1 file for each story part

In [29]:
from src.audio_generation import generate_and_upload_fast_audio

for story_name in tqdm(all_stories[0:1]):
    # uploads 1 fast mp3 file for each story part. 
    generate_and_upload_fast_audio(story_name, collection="LM1000", overwrite=True)

  0%|          | 0/1 [00:00<?, ?it/s]
[A
[A
[A
Collecting utterance audio for introduction: 6it [00:00, 14.25it/s]


Generating fast audio for introduction...




Uploaded fast audio for introduction to gs://audio-language-trainer-private-content/collections/LM1000/spanish/stories/story_sunset_wedding_blues/audio/introduction/fast.mp3



[A
[A
[A
[A
Collecting utterance audio for development: 8it [00:00, 11.91it/s]


Generating fast audio for development...




Uploaded fast audio for development to gs://audio-language-trainer-private-content/collections/LM1000/spanish/stories/story_sunset_wedding_blues/audio/development/fast.mp3



[A
[A
[A
Collecting utterance audio for resolution: 6it [00:00, 13.91it/s]


Generating fast audio for resolution...


Processing story_sunset_wedding_blues in spanish: 100%|██████████| 3/3 [00:31<00:00, 10.48s/it]
100%|██████████| 1/1 [00:31<00:00, 31.45s/it]

Uploaded fast audio for resolution to gs://audio-language-trainer-private-content/collections/LM1000/spanish/stories/story_sunset_wedding_blues/audio/resolution/fast.mp3





## Upload images
For when we have them stored locally

In [None]:
for story_name in all_stories:
    notebook_dir = Path().absolute()  # This gives src/notebooks
    story_dir = notebook_dir.parent / "outputs" / "stories"
    # get the dialogue
    story_file_path = get_story_dialogue_path(story_name, collection="LM1000")
    story_dialogue = read_from_gcs(config.GCS_PRIVATE_BUCKET, story_file_path)
    for story_part in story_dialogue:
        image_file = story_dir / story_name / f"{story_name}_{story_part}.png"
        assert image_file.exists()
        upload_story_image(image_file, story_part, story_name)

## Upload challenges

In [30]:
from src.chat import get_html_challenge_inputs, create_html_challenges
from src.gcs_storage import upload_to_gcs, get_story_challenges_path, get_story_translated_challenges_path, read_from_gcs

for story_name in all_stories[0:1]:

    challenge_file_path = get_story_challenges_path(story_name, collection="LM1000")
    scenario_dicts = read_from_gcs(bucket_name=config.GCS_PRIVATE_BUCKET, file_path=challenge_file_path)
    challenges = get_html_challenge_inputs(scenario_dicts)
    chat_webpage_file = create_html_challenges(challenges, story_name=story_name) # this creates and uploades


HTML challenges created at: gs://audio-language-trainer-stories/spanish/lm1000/story_sunset_wedding_blues/challenges.html
