In [1]:
from google.auth import default
credentials, project = default()

In [None]:
%load_ext autoreload
%autoreload 2
import os
import sys
from pathlib import Path
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
from dotenv import load_dotenv
load_dotenv()
from tqdm import tqdm

from src.utils import (load_json, get_first_n_items, save_json, save_text_file, load_text_file)  # noqa: E402
from src.story import upload_story_image, prepare_dialogue_with_wiktionary
from src.translation import review_story_dialogue_translations
from src.gcs_storage import check_blob_exists, read_from_gcs, upload_to_gcs, get_story_translated_dialogue_path, get_story_dialogue_path
from src.config_loader import config
print(config.TARGET_LANGUAGE_NAME)
COLLECTION = "LM1000"

In [None]:
config._load_config()
language_name = config.TARGET_LANGUAGE_NAME.lower()
print(language_name)
config.get_voice_models(enum_type="stories")

In [4]:
from src.gcs_storage import get_stories_from_collection

all_stories = get_stories_from_collection(collection=COLLECTION)

In [None]:
all_stories[7:]

## Loop all stories (text)

Translate and re-upload

In [None]:
# loop stories and translate
from src.gcs_storage import check_blob_exists, get_story_dialogue_path, get_story_translated_dialogue_path
from src.dialogue_generation import translate_and_upload_dialogue
from src.translation import review_story_dialogue_translations

for story_name in all_stories:
    # get the dialogue
    story_file_path = get_story_dialogue_path(story_name, collection=COLLECTION)
    translated_file_path = get_story_translated_dialogue_path(story_name, collection=COLLECTION)
    if check_blob_exists(config.GCS_PRIVATE_BUCKET, translated_file_path):
        print(f"{story_name} already translated")
        #continue
    story_dialogue = read_from_gcs(config.GCS_PRIVATE_BUCKET, story_file_path)
    translate_and_upload_dialogue(story_dialogue, story_name, collection=COLLECTION)

## Review translations with LLM

In [None]:
for story_name in all_stories:
    print(f"processing {story_name}")
    # get the dialogue
    translated_file_path = get_story_translated_dialogue_path(story_name, collection=COLLECTION)
    translated_dialogue = read_from_gcs(config.GCS_PRIVATE_BUCKET, translated_file_path)
    reviewed_dialogue = review_story_dialogue_translations(translated_dialogue)
    upload_to_gcs(obj=reviewed_dialogue, bucket_name=config.GCS_PRIVATE_BUCKET, file_name=translated_file_path)

## Add wiktionary links to each story utterance

In [None]:
for story_name in all_stories:

    # get the dialogue
    translated_file_path = get_story_translated_dialogue_path(story_name, collection=COLLECTION)
    if not check_blob_exists(config.GCS_PRIVATE_BUCKET, translated_file_path):
        print(f"{story_name} not yet translated")
        continue
    translated_dialogue = read_from_gcs(config.GCS_PRIVATE_BUCKET, translated_file_path)
    translated_dialogue_with_links = prepare_dialogue_with_wiktionary(translated_dialogue)
    # now re-upload it with embedded witkionary_links
    upload_to_gcs(obj=translated_dialogue_with_links, bucket_name=config.GCS_PRIVATE_BUCKET, file_name=translated_file_path)


Generate audio and upload

### Generate audio and upload

### Loop through stories to generate audio

In [None]:
all_stories

In [None]:
from src.audio_generation import generate_dialogue_audio_and_upload
for story_name in all_stories[7:]:
    # get the dialogue
    translated_file_path = get_story_translated_dialogue_path(story_name, collection=COLLECTION)

    translated_dialogue = read_from_gcs(config.GCS_PRIVATE_BUCKET, translated_file_path)
    generate_dialogue_audio_and_upload(translated_dialogue, story_name, collection=COLLECTION, overwrite=True)

### Fast Audio 
1 file for each story part

In [None]:
from src.audio_generation import generate_and_upload_fast_audio

for story_name in tqdm(all_stories):
    # uploads 1 fast mp3 file for each story part. 
    generate_and_upload_fast_audio(story_name, collection=COLLECTION, overwrite=True)

## Upload challenges

In [None]:
from src.chat import get_html_challenge_inputs, create_html_challenges
from src.gcs_storage import upload_to_gcs, get_story_challenges_path, get_story_translated_challenges_path, read_from_gcs

for story_name in all_stories:
    challenge_file_path = get_story_challenges_path(story_name, collection=COLLECTION)
    scenario_dicts = read_from_gcs(bucket_name=config.GCS_PRIVATE_BUCKET, file_path=challenge_file_path)
    challenges = get_html_challenge_inputs(scenario_dicts)
    chat_webpage_file = create_html_challenges(challenges, story_name=story_name, collection=COLLECTION) # this creates and uploades


In [10]:
COLLECTION="LM1000"