In [6]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [7]:
# Add the parent directory of 'src' to the Python path
import os
import sys

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

# Longman phrase and anki deck generation
A more memorable way to learn the core vocabulary as defined in Longman Communications vocab lists, we take the vocab and use an LLM
to generate phrases using it.

## Longman 1000, 2000 and 3000 already provided
Enlish phrases for the longman vocab have already been created and can be found in the 'data' folder

In [None]:
from src.utils import get_longman_verb_vocab_dict
from src.phrase import generate_phrases_from_vocab_dict

file_path = '../data/longman-communication-3000.json'
vocab_dict = get_longman_verb_vocab_dict(file_path, "S3") #S1 = 1st 1000 words used in Speech, options are S1-3 and W1-3

In [None]:
#uses LLM calls - it sometimes generates phrases terminated too early (e.g. Mind the pot on the), so advise you scan through and check

#english phrases only initially
longman_phrases = generate_phrases_from_vocab_dict(vocab_dict, max_iterations=15)

In [None]:
with open("../outputs/longman/longman_1000_phrases.txt", "w", encoding="utf-8") as f:
    for phrase in longman_phrases:
        f.write(phrase + "\n")

## Create an Anki deck from thoses phrases

Or, load one of the english Longman phrase lists in data/ already created and apply to your language

This function:
1. translates
2. generates audio using text to speech
3. packages up the text and audio into several anki decks (in batches), that can be imported into Anki.

The deck_name will is used to derive the deck_id and so despite there being several *.apkg files created, these will all merge successfully into the same deck


In [9]:

from src.anki_tools import create_anki_deck_from_english_phrase_list, export_to_anki_with_images, export_to_anki_with_images_english
from src.config_loader import config
from src.audio_generation import generate_phrase_audio_files
config._load_config()

print(config.english_voice_models, config.target_language_voice_models)


Language name: French determined from code fr
Successfully loaded config from: y:\Python Scripts\audio-language-trainer\src\config.json
Multiple country codes available for en: en-AU, en-GB, en-IN, en-US
Multiple country codes available for fr: fr-CA, fr-FR
{'language_code': 'en-GB', 'male_voice': 'en-GB-Studio-B', 'female_voice': 'en-GB-Studio-C'} {'language_code': 'fr-CA', 'male_voice': 'fr-CA-Neural2-B', 'female_voice': 'fr-CA-Neural2-A'}


#### Load longman phrases

In [10]:
with open("..\data\longman_phrases_convo_1000.txt", "r", encoding="utf-8") as f:
    longman_phrases = [line.strip() for line in f]

#### Add english audio for each longman phrase

In [16]:
example_phrases = ["Raise your hand if you have a question", "The professor will teach us about science tomorrow"]

In [17]:
generate_phrase_audio_files(example_phrases, output_dir="../data/longman_audio")

 50%|█████     | 1/2 [00:06<00:06,  6.20s/it]

Generated audio files for phrase: Raise your hand if you have a question


100%|██████████| 2/2 [00:12<00:00,  6.24s/it]

Generated audio files for phrase: The professor will teach us about science tomorrow





In [20]:
AUDIO_DIR = "../data/longman_audio"
IMAGE_DIR = "../data/longman_phrase_images"
export_to_anki_with_images_english(example_phrases, output_dir = "../outputs/test", image_dir=IMAGE_DIR, audio_dir=AUDIO_DIR, story_name="eal_swedish", deck_name="Swedish EAL")

English learning Anki deck exported to ../outputs/test\eal_swedish_english_anki_deck.apkg
Cleanup of temporary files completed.


#### create an anki flash deck in any language

In [16]:
config._load_config()

Language name: Spanish determined from code es
Successfully loaded config from: y:\Python Scripts\audio-language-trainer\src\config.json
Multiple country codes available for en: en-AU, en-GB, en-IN, en-US
Multiple country codes available for es: es-ES, es-US


In [17]:
#the longman 1000 images are already generated, so can be reused for different languages
anki_dict = create_anki_deck_from_english_phrase_list(longman_phrases[:10],
                                                     deck_name=f"Longman 1000 Images - {config.language_name}",
                                                       anki_filename_prefix=f"longman_1000_{config.language_name.lower()}",
                                                         batch_size=50,
                                                         image_dir="../data/longman_phrase_images")

adding translations:   0%|          | 0/1 [00:00<?, ?it/s]

Beginning translation for anki


adding translations: 100%|██████████| 1/1 [00:01<00:00,  1.76s/it]


Translated phrases



Generating audio: 100%|██████████| 10/10 [01:31<00:00,  9.11s/it]
adding audio: 100%|██████████| 1/1 [01:31<00:00, 91.07s/it]


Text-to-speech for phrases done



Processing story parts: 100%|██████████| 1/1 [00:00<00:00, 166.61it/s]
generating image and sound files: 10it [01:00,  6.04s/it]
adding notes to deck: 100%|██████████| 10/10 [00:00<00:00, 10012.66it/s]


Anki deck exported to ../outputs/longman\longman_1000_spanish_0_anki_deck.apkg


deleting temp files: 100%|██████████| 30/30 [00:00<00:00, 3764.41it/s]

Cleanup of temporary files completed.





In [6]:
anki_dict

<coroutine object create_anki_deck_from_english_phrase_list at 0x00000218DE51A200>

## Get anki flash card data
This is bespoke for my flashcard format

In [None]:
from src.anki_tools import convert_anki_to_story_dict, print_deck_info
# Example usage
collection_path =r"C:\Users\i5\AppData\Roaming\Anki2\User 1\collection.anki2"  # Your Anki database path
deck_name = "Longman 1000 - Swedish"

# Convert the Anki deck to story_dict format
story_dict = convert_anki_to_story_dict(collection_path, deck_name)

# Generate Images for Longman Phrases

In [None]:
from src.utils import create_image_generation_prompt, generate_image_imagen, add_images_to_phrases, clean_filename, save_json, generate_image_stability, load_json, generate_image_deepai, add_image_paths
from src.config_loader import config



In [None]:

with open("../data/longman_1000_phrases.txt", "r") as f:
    LONGMAN_PHRASES = [line.strip() for line in f.readlines()]
PROMPT_DICT = load_json("../data/longman_phrase_images/phrase_image_dict.json")

In [None]:
new_prompt_dict = add_images_to_phrases(LONGMAN_PHRASES, output_dir="../data/longman_phrase_images")


In [None]:

PROMPT_DICT.update(new_prompt_dict)
save_json(PROMPT_DICT, "../data/longman_phrase_images/phrase_image_dict.json")

### Add image paths to story_data_dict

In [None]:
story_dict = add_image_paths(story_dict, image_dir="../data/longman_phrase_images")

Now image_paths are linked to each phrase we can createa an Anki flash deck with images

In [None]:
export_to_anki_with_images(story_dict, output_dir="../outputs/longman", deck_name="Longman 1000 Images - Swedish", story_name="longman_swedish_images")