# Import

In [5]:
%load_ext autoreload

In [6]:
%autoreload 2
import os
from dotenv import load_dotenv
import sys
import os
import networkx as nx
import ipywidgets as widgets
from collections import defaultdict
from tqdm import tqdm

# Add the parent directory of 'src' to the Python path
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
# Load environment variables from .env file
load_dotenv()

from src.dialogue_generation import get_vocab_from_dialogue, update_vocab_usage
from src.dialogue_generation import generate_story_plan, generate_dialogue_prompt, generate_dialogue, generate_recap
from src.audio_generation import  text_to_speech, play_audio, generate_audio_from_dialogue, generate_normal_and_fast_audio, generate_translated_phrase_audio, join_audio_segments, export_audio, async_process_phrases
from src.phrase import correct_phrases, generate_practice_phrases_from_dialogue
from src.initialise import initialise_usage_data
from src.utils import save_json, convert_defaultdict, save_defaultdict, load_json, create_pdf_booklet
from src.translation import translate_dialogue, translate_phrases

STORY_DATA_PATH = "../outputs/story_data.json"


Searching for config.json...
Checking: c:\Users\i5\Documents\Python Scripts\audio-language-trainer\notebooks\config.json
Checking: c:\Users\i5\Documents\Python Scripts\audio-language-trainer\src\config.json
Found config file at: c:\Users\i5\Documents\Python Scripts\audio-language-trainer\src\config.json
Successfully loaded config from: c:\Users\i5\Documents\Python Scripts\audio-language-trainer\src\config.json
Multiple country codes available for en: en-AU, en-GB, en-IN, en-US
Config loader initialized.
Config file location: c:\Users\i5\Documents\Python Scripts\audio-language-trainer\src\config.json
Current working directory: c:\Users\i5\Documents\Python Scripts\audio-language-trainer\notebooks
Searching for config.json...
Checking: c:\Users\i5\Documents\Python Scripts\audio-language-trainer\notebooks\config.json
Checking: c:\Users\i5\Documents\Python Scripts\audio-language-trainer\src\config.json
Found config file at: c:\Users\i5\Documents\Python Scripts\audio-language-trainer\src\con



## Setup Google Cloud credentials and prerequisites
You will need a Google Project with the following APIs enabled:
* Text to Speech
* Translate
* Vertex AI with the following Anthropic models enabled (from the model garden)
    * Sonnet 3.5
    * Haiku
* Add your GOOGLE_PROJECT_ID to the .env file

You should alter src/config.json which contains your target language.


In [2]:
from google.auth import default
credentials, project = default()

# Audio Language Trainer Workflow

The aim of this project is to create audio material for you to practise a foreign language. It needs to be engaging and be tailored to words you want to practise. 

The overall steps we follow are:

1. Create an outline story plan based on a theme you select (e.g. 'an adventure', 'a holiday in Rome'). An LLM produces a story plan following a typical story arc (exposition, rising action, climax, falling action, resolution). This ensures an engaging plot.
2. Flesh out the story using your practice vocabulary and grammatical concepts. Vocab and concepts are sampled from lists you provide in the 'data' folder (vocab_usage.json and grammar_concepts_usage.json), with sampling being skewed towards words you haven't heard yet. The output here is a dialogue between two people (Sam and Alex).

Recaps are generated between each story part so when the LLM generates the next dialogue it logically continues from the previous one.

3. The dialogue is broken up into shorter practice phrases via a 'language graph' concept, so we give you not just the long-form dialogue to listen and practise to, but smaller, mixed-up phrases based on the vocab in the story, starting small and buliding to more complex phrases.
4. Your vocab list is updated based on the produced dialogue.
5. The smaller phrases and main dialogue are translated into your target language and convert to speech.
6. Research shows that listening to double-speed audio (on words you already known) can help with your listening comprehension for a foreign language (it helps the brain with the ability to separate distinct words). We therefore create a fast version of the dialogue for listening practice.
7. The audio files are stiched together to create an MP3 file for each part in the story (there are 5 parts to the story). The stages for each audio lesson are: 
* dialogue in the target language
* practice phrases of the form 'how do you say: "practice phrase' in 'target language'?". A pause (where you speak in the foreign language), then the correct translation is played twice, first fast, then slow.
* repeat of the dialogue in the target language so you can satisfy yourself you understand it properly
* 12 repeated playings of the fast version of the dialogue to improve your listening comprehension.

The intent is then you would listen to the next audio lesson in the story.


## Setup your vocab and grammatical concepts
You should populate or edit
* known_vocab_list.json 
* grammar_concepts.json

### Initiliase the vocab and grammar counters
This creates vocab_usage.json (setting all values to 0) and grammar_concepts_usage.json (setting all values to 'true' and counts to 0)

You can tweak these to minimise what words and concepts you are exposed to

In [4]:
initialise_usage_data(overwrite=False) #the overwrite commands stops you wiping all your usage data if it already exists

Data saved to ../data/grammar_concepts_usage.json
Data saved to ../data/vocab_usage.json


# Being Lesson Generation

## Create a story plan

In [53]:
%autoreload 2
from src.dialogue_generation import get_least_used_words, add_usage_to_words

verbs_for_story = get_least_used_words("verbs", 10)
vocab_for_story = get_least_used_words("vocab", 30)

story_plan = generate_story_plan(story_guide = "a mystery", verb_list=verbs_for_story, vocab_list=vocab_for_story, test = False) #the test parameter will provide pre-canned responses avoiding LLM costs

Create a brief story plan (story guide: a mystery) for a language learning dialogue series.The story should be simple enough for beginners to follow but interesting enough to maintain engagement.Use the following structure:
exposition: Introduce the main characters (Alex and Sam) and setting rising_action: Present a challenge or conflict climax: The turning point where the conflict reaches its peak falling_action: Events following the climax resolution: The conclusion of the story
Keep each part of the story plan to 1-2 sentences. The entire plan should be no more than 200 words.
You should consider the following verbs and vocab list to write the plan: verbs: (ask, drink, buy, went, dance, come, shave, play, shall, recognise), vocab:(funny, which, his, day, ourselves, sure, hour, she, lot, angry, match, about, teacher, cold, theatre, theatre, shop, month, anything, Swedish, entire, big, coffee, food, ourselves, sick, television, crown, present, tea).
Output the story plan as a JSON obj

{'exposition': "Alex and Sam are two friends who meet at a coffee shop. They're discussing their Swedish teacher, who has been acting strangely lately.",
 'rising_action': "The next day, their teacher doesn't show up for class. Alex and Sam decide to investigate, asking other students if they've seen her. They buy a funny hat as a disguise and go to the teacher's favorite tea shop.",
 'climax': "At the shop, they recognise their teacher wearing a crown and speaking in code. She doesn't seem to know them, which makes Alex and Sam sure something is wrong.",
 'falling_action': 'They follow her to a theatre, where they see her hand over an envelope to a mysterious person. Alex and Sam dance closer to overhear the conversation, learning their teacher is part of a secret language club.',
 'resolution': 'The friends confront their teacher, who explains the club is planning a surprise language festival for the entire class. Alex and Sam laugh about their wild imagination and help organize the 

In [49]:
verbs_for_story_usage = add_usage_to_words(verbs_for_story, "verbs")
vocab_for_story_usage = add_usage_to_words(vocab_for_story, "vocab")

## Create all dialogue

1. Create dialouge LLM prompt based on the story part
2. LLM generates dialogue
3. LLM generates recap
4. move to next story part and repeat


In [63]:
PAY_FOR_LLM = True

if PAY_FOR_LLM:
    story_data_dict = defaultdict(lambda: defaultdict(str))
    recap = "This is the beginning of the story."
    for step, story_part in enumerate(list(story_plan.keys())):
        prompt = generate_dialogue_prompt(story_part=story_part,
                                        story_part_outline=story_plan[story_part],
                                        last_recap = recap,
                                        verb_usage_str=verbs_for_story_usage,
                                        vocab_usage_str=vocab_for_story_usage,
                                        verb_use_count=5,
                                        vocab_use_count=10,
                                        grammar_concept_count=5,
                                        grammar_use_count=3)
        dialogue = generate_dialogue(prompt)
        vocab_used = get_vocab_from_dialogue(dialogue)
        update_vocab_usage(vocab_used)
        verbs_for_story_usage = add_usage_to_words(verbs_for_story, "verbs")
        vocab_for_story_usage = add_usage_to_words(vocab_for_story, "vocab")
        recap = generate_recap(dialogue, test=False)
        story_data_dict[story_part]["dialogue_generation_prompt"] = prompt
        story_data_dict[story_part]["dialogue"] = dialogue
        story_data_dict[story_part]["recap"] = recap


Data saved to ../data/vocab_usage.json
Data saved to ../data/vocab_usage.json
Data saved to ../data/vocab_usage.json
Data saved to ../data/vocab_usage.json
Data saved to ../data/vocab_usage.json


In [64]:
save_defaultdict(story_data_dict, STORY_DATA_PATH)


Data saved to ../outputs/story_data.json


### Build phrases from dialogue

Here we:
1. Break up the dialogue into separate sentences. For this bit we don't care who the speaker is, we just want to create different phrases of different lengths and combinations based on the vocab int the dialogue
2. We use another LLM call to do this, with some one-shot learning

In [65]:
story_data_dict = load_json(STORY_DATA_PATH)

In [66]:
for story_part in story_data_dict:
    dialogue = story_data_dict[story_part]["dialogue"]
    story_data_dict[story_part]["corrected_phrase_list"] = generate_practice_phrases_from_dialogue(dialogue)

In [67]:
save_defaultdict(story_data_dict, STORY_DATA_PATH)

Data saved to ../outputs/story_data.json


### Translate dialogue and phrases

In [68]:
PAY_FOR_TRANSLATE_API = True

if PAY_FOR_TRANSLATE_API:

    for story_part in tqdm(story_data_dict):
        dialogue = story_data_dict[story_part]["dialogue"]
        translated_dialogue = translate_dialogue(dialogue)

        corrected_phrase_list = story_data_dict[story_part]["corrected_phrase_list"]
        translated_phrase_list = translate_phrases(corrected_phrase_list)

        story_data_dict[story_part]["translated_dialogue"] = translated_dialogue
        story_data_dict[story_part]["translated_phrase_list"] = translated_phrase_list


100%|██████████| 5/5 [00:44<00:00,  9.00s/it]


In [70]:
save_defaultdict(story_data_dict, STORY_DATA_PATH)

Data saved to ../outputs/story_data.json


## Generate Audio Lesson

The steps here are
1. The target language dialogue at normal speed
2. Each corrected and translated phrase in the form english - target fast - target slow
3. Each dialogue utterance in the form english - target fast - target slow
4. The 2 x sped up target language dialogue

In [18]:
%autoawait
PAY_FOR_TEXT_TO_SPEECH = True

audio_lessons = []
dialogues =[]

story_data_dict =load_json(STORY_DATA_PATH)
if PAY_FOR_TEXT_TO_SPEECH:
    for story_part in tqdm(story_data_dict):

        single_audio_lesson = []
        single_audio_lesson_filename = story_part + ".mp3"
        translated_dialogue_audio_segments = generate_audio_from_dialogue(story_data_dict[story_part]["translated_dialogue"])
        normal_translated_clip, fast_translated_clips = generate_normal_and_fast_audio(translated_dialogue_audio_segments)
        dialogues.append(normal_translated_clip)
        single_audio_lesson.append(normal_translated_clip)

        #now do phrases asynchronoulsy (still unsure if Google API allows this, not getting huge speed up)
        translated_phrases = story_data_dict[story_part]["translated_phrase_list"]
        tranlsated_phrases_audio = await async_process_phrases(translated_phrases)
        single_audio_lesson.extend(tranlsated_phrases_audio)

        #now add fast bit at the end
        single_audio_lesson.append(fast_translated_clips)
        single_audio_lesson.append(normal_translated_clip)
        single_audio_lesson = join_audio_segments(single_audio_lesson)
        audio_lessons.append(single_audio_lesson)
        
        export_audio(single_audio_lesson, f"../outputs/{single_audio_lesson_filename}")
    
    full_lesson = join_audio_segments(audio_lessons, gap_ms=3000)
    all_dialogues = join_audio_segments(dialogues, gap_ms=3000)
    full_lesson = join_audio_segments([full_lesson, all_dialogues], gap_ms=3000)
    export_audio(full_lesson, filename="../outputs/full_lesson.mp3")

IPython autoawait is `on`, and set to use `asyncio`


100%|██████████| 7/7 [00:12<00:00,  1.79s/it]
100%|██████████| 7/7 [00:12<00:00,  1.80s/it]]
100%|██████████| 8/8 [00:14<00:00,  1.76s/it]]
100%|██████████| 7/7 [00:12<00:00,  1.75s/it]]
100%|██████████| 8/8 [00:13<00:00,  1.70s/it]]
100%|██████████| 5/5 [15:09<00:00, 181.95s/it]


## Create PDF Booklet
So you can see the spelling of the phrases and dialogue

In [19]:
create_pdf_booklet(story_data_dict, output_filename="../outputs/story_booklet.pdf")
