# Import

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys

sys.path.insert(0, "Y:/python scripts/audio-language-trainer/.venv/lib/site-packages")

In [3]:
import os
import pickle
import sys

from dotenv import load_dotenv

# Add the parent directory of 'src' to the Python path
module_path = os.path.abspath(os.path.join(".."))
if module_path not in sys.path:
    sys.path.append(module_path)
# Load environment variables from .env file
load_dotenv()
from src.anki_tools import export_to_anki
from src.config_loader import config
from src.generate import (
    add_audio,
    add_practice_phrases,
    add_translations,
    create_album_files,
    create_story_plan_and_dialogue,
)
from src.utils import (
    create_html_story,
    generate_story_image,
    load_json,
    save_defaultdict,
)

Found config file at: Y:\Python Scripts\audio-language-trainer\src\config.json
Language name: Swedish determined from code sv
Successfully loaded config from: Y:\Python Scripts\audio-language-trainer\src\config.json
Multiple country codes available for en: en-AU, en-GB, en-IN, en-US
FFmpeg path added to system PATH: C:\Program Files\ffmpeg-7.0-essentials_build\bin


## Setup Google Cloud credentials and prerequisites
You will need a Google Project with the following APIs enabled:
* Text to Speech
* Translate
* Vertex AI with the following Anthropic models enabled (from the model garden)
    * Sonnet 3.5
    * Imagen 3
* Add your GOOGLE_PROJECT_ID to the .env file

You should alter src/config.json which contains your target language.


In [4]:
from google.auth import default

credentials, project = default()

### Setup story parameters

In [5]:
STORY_NAME = "lost in stockholm"  # this phrase will be used to prompt the story creation and the cover image creation

STORY_NAME_CLEAN = STORY_NAME.replace(" ", "_")  # just to make directory names easier
OUTPUT_DIR = f"../outputs/{STORY_NAME_CLEAN}"
STORY_DATA_PATH = f"{OUTPUT_DIR}/story_data_{STORY_NAME_CLEAN}.json"
# Create the output directory if it doesn't exist
os.makedirs(OUTPUT_DIR, exist_ok=True)
config._load_config()
print(f"Your target language is {config.get_language_name()}")

Language name: Swedish determined from code sv
Successfully loaded config from: Y:\Python Scripts\audio-language-trainer\src\config.json
Multiple country codes available for en: en-AU, en-GB, en-IN, en-US
Your target language is Swedish


### Generate story content and outputs

In [9]:
from src.utils import anthropic_generate, extract_json_from_llm_response

In [7]:
response = anthropic_generate(prompt="provide a test response in JSON")

Function that called this one: <module>. Sleeping for 20 seconds


In [10]:
extract_json_from_llm_response(response)

{'status': 'success',
 'code': 200,
 'data': {'id': 12345,
  'name': 'Test User',
  'email': 'testuser@example.com',
  'age': 30,
  'is_active': True,
  'interests': ['programming', 'reading', 'hiking'],
  'address': {'street': '123 Main St',
   'city': 'Anytown',
   'state': 'CA',
   'zip': '12345'}},
 'metadata': {'timestamp': '2023-04-15T14:30:00Z', 'version': '1.0.0'}}

In [None]:
story_data_dict = create_story_plan_and_dialogue(STORY_NAME, output_dir=OUTPUT_DIR)
save_defaultdict(story_data_dict, STORY_DATA_PATH)

In [13]:
story_data_dict = add_practice_phrases(story_data_dict)
save_defaultdict(story_data_dict, STORY_DATA_PATH)

Function that called this one: generate_practice_phrases_from_dialogue. Sleeping for 20 seconds
Added practice phrases for exposition

Function that called this one: generate_practice_phrases_from_dialogue. Sleeping for 20 seconds


Waiting for API cooldown: 100%|[34m████████████[0m| 12/12 [00:12<00:00,  1.00s/it][0m


Added practice phrases for rising_action

Function that called this one: generate_practice_phrases_from_dialogue. Sleeping for 20 seconds


Waiting for API cooldown: 100%|[34m████████████[0m| 11/11 [00:11<00:00,  1.00s/it][0m


Added practice phrases for climax

Function that called this one: generate_practice_phrases_from_dialogue. Sleeping for 20 seconds


Waiting for API cooldown: 100%|[34m████████████[0m| 12/12 [00:12<00:00,  1.00s/it][0m


Added practice phrases for falling_action

Function that called this one: generate_practice_phrases_from_dialogue. Sleeping for 20 seconds


Waiting for API cooldown: 100%|[34m████████████[0m| 11/11 [00:11<00:00,  1.00s/it][0m


Added practice phrases for resolution



In [14]:
story_data_dict = add_translations(story_data_dict)
save_defaultdict(story_data_dict, STORY_DATA_PATH)
story_data_dict = await add_audio(story_data_dict)


# Saving as pickle file to retain audio data
with open(f"{STORY_DATA_PATH}.pkl", "wb") as file:
    pickle.dump(dict(story_data_dict), file)

IMAGE_DATA = generate_story_image(STORY_NAME_CLEAN)

with open(f"{OUTPUT_DIR}/cover_artwork.jpg", "wb") as f:
    f.write(IMAGE_DATA)

create_album_files(story_data_dict, IMAGE_DATA, OUTPUT_DIR, STORY_NAME_CLEAN)

adding translations:   0%|                                                                       | 0/5 [00:00<?, ?it/s]

Beginning translation for exposition
Translated dialogue


adding translations:  20%|████████████▌                                                  | 1/5 [00:00<00:02,  1.85it/s]

Translated phrases

Beginning translation for rising_action
Translated dialogue


adding translations:  40%|█████████████████████████▏                                     | 2/5 [00:01<00:01,  1.73it/s]

Translated phrases

Beginning translation for climax
Translated dialogue


adding translations:  60%|█████████████████████████████████████▊                         | 3/5 [00:01<00:01,  1.72it/s]

Translated phrases

Beginning translation for falling_action
Translated dialogue


adding translations:  80%|██████████████████████████████████████████████████▍            | 4/5 [00:02<00:00,  1.74it/s]

Translated phrases

Beginning translation for resolution
Translated dialogue


adding translations: 100%|███████████████████████████████████████████████████████████████| 5/5 [00:02<00:00,  1.75it/s]


Translated phrases



adding audio:   0%|                                                                              | 0/5 [00:00<?, ?it/s]

Beginning text-to-speech for exposition
Text-to-speech for dialogue done


adding audio:  20%|██████████████                                                        | 1/5 [01:34<06:16, 94.15s/it]

Text-to-speech for phrases done

Beginning text-to-speech for rising_action
Text-to-speech for dialogue done


adding audio:  40%|████████████████████████████                                          | 2/5 [03:03<04:33, 91.23s/it]

Text-to-speech for phrases done

Beginning text-to-speech for climax
Text-to-speech for dialogue done


adding audio:  60%|██████████████████████████████████████████                            | 3/5 [04:28<02:56, 88.42s/it]

Text-to-speech for phrases done

Beginning text-to-speech for falling_action
Text-to-speech for dialogue done


adding audio:  80%|████████████████████████████████████████████████████████              | 4/5 [05:46<01:24, 84.54s/it]

Text-to-speech for phrases done

Beginning text-to-speech for resolution
Text-to-speech for dialogue done


adding audio: 100%|██████████████████████████████████████████████████████████████████████| 5/5 [07:07<00:00, 85.51s/it]

Text-to-speech for phrases done




creating album:  20%|█████████████▌                                                      | 1/5 [00:08<00:34,  8.61s/it]

Saving M4A file track number 1


creating album:  40%|███████████████████████████▏                                        | 2/5 [00:17<00:26,  8.72s/it]

Saving M4A file track number 2


creating album:  60%|████████████████████████████████████████▊                           | 3/5 [00:25<00:16,  8.26s/it]

Saving M4A file track number 3


creating album:  80%|██████████████████████████████████████████████████████▍             | 4/5 [00:33<00:08,  8.50s/it]

Saving M4A file track number 4


creating album: 100%|████████████████████████████████████████████████████████████████████| 5/5 [00:43<00:00,  8.72s/it]

Saving M4A file track number 5





Saving M4A file track number 6


In [16]:
create_html_story(
    story_data_dict,
    f"{OUTPUT_DIR}/{STORY_NAME_CLEAN}.html",
    component_path="../src/StoryViewer.js",
    title=STORY_NAME,
)

# export_to_anki(story_data_dict, OUTPUT_DIR, STORY_NAME_CLEAN)

HTML story created at: ../outputs/lost_in_stockholm/lost_in_stockholm.html


### If picking up from a saved file:

In [10]:
with open(f"{STORY_DATA_PATH}.pkl", "rb") as file:
    story_data_dict = pickle.load(file)

In [11]:
story_data_dict = load_json(STORY_DATA_PATH)

In [12]:
story_data_dict

{'exposition': {'dialogue': [{'speaker': 'Alex',
    'text': "Hello, Sam! Isn't Stockholm great? I'm so excited to explore the city!"},
   {'speaker': 'Sam',
    'text': "It's amazing! We'll learn so much Swedish here, won't we?"},
   {'speaker': 'Alex',
    'text': 'Absolutely! What should we do first? Visit the port or walk through the old town?'},
   {'speaker': 'Sam',
    'text': "Let's walk through the old town. How do you spell it in Swedish?"},
   {'speaker': 'Alex',
    'text': "I think it's Gamla Stan. Why don't we ask that person over there?"},
   {'speaker': 'Sam',
    'text': 'Great idea! Excuse me, hello! Could you help us, please?'}],
  'dialogue_generation_prompt': 'Create a complete multi-part dialogue for language learners following these guidelines:\n\n    1. Create separate dialogues for each part of this story plan:\n    exposition: Alex and Sam, two American students, arrive in Stockholm for a language exchange program. They\'re excited to explore the city and prac