In [None]:
from google.auth import default
credentials, project = default()

In [None]:
%load_ext autoreload
%autoreload 2
import os
import sys
from pathlib import Path
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
from dotenv import load_dotenv
load_dotenv()

from src.utils import (load_json, get_first_n_items, list_story_folders, save_json, save_text_file, load_text_file, clean_filename, read_from_gcs, upload_to_gcs)  # noqa: E402
from src.config_loader import config
config.TARGET_LANGUAGE_NAME

# Story Data

## Storage overview

We want to save the english story dialogue as dialogue.json

Then translations by language_name, then each phrase as an audio clip. We might want higher quality audio for the stories e.g. chirp3 voice

```raw
LM1000/stories/story_murder_mystery/dialogue.json
LM1000/stories/story_murder_mystery/dialogue/french/translated_dialogue.json
LM1000/stories/story_murder_mystery/images/introduction.png etc
LM1000/stories/story_murder_mystery/audio/french/introdution/part_0_sam.mp3
LM1000/stories/story_murder_mystery/audio/french/introdution/part_1_alex.mp3```


This allows us to quickly retrieve a phrase based on the bucket name and the phrase key, as well as modify individual phrases for later correction

In [None]:
language_name = config.TARGET_LANGUAGE_NAME.lower()
print(language_name)
config.get_voice_models()

In [None]:
# get story data
notebook_dir = Path().absolute()  # This gives src/notebooks
phrase_dir = notebook_dir.parent / "data" / "phrases" #where we store text files of phrases
story_dir = notebook_dir.parent / "outputs" / "stories" # where we store our stories


In [None]:
all_stories = list_story_folders()

In [None]:
story_name = all_stories[0]
story_dialogue_file = story_dir / story_name / f"{story_name}.json"
story_dialogue = load_json(story_dialogue_file)

Upload the base dialogue

In [None]:
from src.dialogue_generation import upload_dialogue_to_gcs, translate_and_upload_dialogue

upload_dialogue_to_gcs(story_dialogue, story_name, collection ="LM1000")

Translate and upload the story

In [None]:
translate_and_upload_dialogue(story_dialogue, story_name, collection="LM1000")

## Loop all stories (text)

In [None]:
# loop all stories for initial upload of english base story

for story_name in all_stories[8:]:
    story_dialogue_file = story_dir / story_name / f"{story_name}.json"
    story_dialogue = load_json(story_dialogue_file)
    upload_dialogue_to_gcs(story_dialogue, story_name, collection ="LM1000")

In [None]:
# loop stories and translate
from src.utils import check_blob_exists
for story_name in all_stories:
    # get the dialogue
    story_file_path = f"LM1000/stories/{story_name}/dialogue.json"
    translated_file_path = f"LM1000/stories/{story_name}/dialogue/{language_name}/translated_dialogue.json"
    if check_blob_exists(config.GCS_PRIVATE_BUCKET, translated_file_path):
        print(f"{story_name} already translated")
        continue
    story_dialogue = read_from_gcs(config.GCS_PRIVATE_BUCKET, story_file_path)
    translate_and_upload_dialogue(story_dialogue, story_name, collection="LM1000")

Generate audio and upload

In [None]:
# get the translated dialogue
file_path = f"LM1000/stories/{story_name}/dialogue/{language_name}/translated_dialogue.json"
translated_dialogue = read_from_gcs(config.GCS_PRIVATE_BUCKET, file_path)

In [None]:
translated_dialogue

### Generate audio and upload

In [None]:
translated_dialogue

In [None]:
config.get_voice_models()

In [None]:
from src.audio_generation import generate_dialogue_audio_and_upload

generate_dialogue_audio_and_upload(translated_dialogue, story_name)

### Loop through stories to generate audio