In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# Add the parent directory of 'src' to the Python path
import os
import sys

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

# Longman phrase and anki deck generation
A more memorable way to learn the core vocabulary as defined in Longman Communications vocab lists, we take the vocab and use an LLM
to generate phrases using it.

## Longman 1000, 2000 and 3000 already provided
Enlish phrases for the longman vocab have already been created and can be found in the 'data' folder

In [None]:
from src.utils import get_longman_verb_vocab_dict
from src.phrase import generate_phrases_from_vocab_dict

file_path = '../data/longman-communication-3000.json'
vocab_dict = get_longman_verb_vocab_dict(file_path, "S3") #S1 = 1st 1000 words used in Speech, options are S1-3 and W1-3

In [None]:
#uses LLM calls - it sometimes generates phrases terminated too early (e.g. Mind the pot on the), so advise you scan through and check

#english phrases only initially
longman_phrases = generate_phrases_from_vocab_dict(vocab_dict, max_iterations=15)

In [None]:
with open("../outputs/longman/longman_1000_phrases.txt", "w", encoding="utf-8") as f:
    for phrase in longman_phrases:
        f.write(phrase + "\n")

## Create an Anki deck from thoses phrases

Or, load one of the english Longman phrase lists in data/ already created and apply to your language

This function:
1. translates
2. generates audio using text to speech
3. packages up the text and audio into several anki decks (in batches), that can be imported into Anki.

The deck_name will is used to derive the deck_id and so despite there being several *.apkg files created, these will all merge successfully into the same deck


In [3]:

from src.anki_tools import create_anki_deck_from_english_phrase_list, export_to_anki_with_images
from src.config_loader import config

config._load_config()
config.target_language_voice_models



Found config file at: y:\Python Scripts\audio-language-trainer\src\config.json
Language name: Italian determined from code it
Successfully loaded config from: y:\Python Scripts\audio-language-trainer\src\config.json
Multiple country codes available for en: en-AU, en-GB, en-IN, en-US
FFmpeg path added to system PATH: C:\Program Files\ffmpeg-7.0-essentials_build\bin
Language name: Italian determined from code it
Successfully loaded config from: y:\Python Scripts\audio-language-trainer\src\config.json
Multiple country codes available for en: en-AU, en-GB, en-IN, en-US


{'language_code': 'it-IT',
 'male_voice': 'it-IT-Neural2-C',
 'female_voice': 'it-IT-Neural2-A'}

In [4]:
with open("../data/longman_1000_phrases.txt", "r", encoding="utf-8") as f:
    longman_phrases = [line.strip() for line in f]

In [None]:
longman_phrases[1]

'The driver will face terrible traffic on holiday'

In [7]:
_

In [10]:
#the longman 1000 images are already generated, so can be reused for different languages
anki_dict = await create_anki_deck_from_english_phrase_list(longman_phrases[0:2],
                                                     deck_name="Longman 1000 Images - Italian",
                                                       anki_filename_prefix="longman_1000_italian",
                                                         batch_size=50,
                                                         image_dir="../data/longman_phrase_images")

FFmpeg path added to system PATH: C:\Program Files\ffmpeg-7.0-essentials_build\bin


adding translations:   0%|          | 0/1 [00:00<?, ?it/s]

Beginning translation for anki


adding translations: 100%|██████████| 1/1 [00:01<00:00,  1.81s/it]


Translated phrases



adding audio: 100%|██████████| 1/1 [00:12<00:00, 12.69s/it]


Text-to-speech for phrases done



Processing story parts: 100%|██████████| 1/1 [00:00<00:00, 166.71it/s]
generating image and sound files: 2it [00:33, 16.61s/it]
adding notes to deck: 100%|██████████| 2/2 [00:00<?, ?it/s]


Anki deck exported to ../outputs/longman\longman_1000_italian_0_anki_deck.apkg


deleting temp files: 100%|██████████| 8/8 [00:00<00:00, 2018.19it/s]

Cleanup of temporary files completed.





In [11]:
anki_dict

{'anki': {'translated_phrase_list': [('The driver will face terrible traffic on holiday',
    'L&#39;autista dovrà affrontare un traffico terribile durante le vacanze'),
   ('I need to read the important letter quickly',
    'Ho bisogno di leggere velocemente la lettera importante')],
  'translated_phrase_list_audio': [[<pydub.audio_segment.AudioSegment at 0x24cd3feae50>,
    <pydub.audio_segment.AudioSegment at 0x24cf8f9ac90>,
    <pydub.audio_segment.AudioSegment at 0x24cf8bedd50>],
   [<pydub.audio_segment.AudioSegment at 0x24cf8c51e10>,
    <pydub.audio_segment.AudioSegment at 0x24cf8c48750>,
    <pydub.audio_segment.AudioSegment at 0x24cfa760b50>]],
  'image_path': ['../data/longman_phrase_images\\the_driver_will_face_terrible_traffic_on_holiday.png',
   '../data/longman_phrase_images\\i_need_to_read_the_important_letter_quickly.png']}}

## Get anki flash card data
This is bespoke for my flashcard format

In [None]:
from src.anki_tools import convert_anki_to_story_dict, print_deck_info
# Example usage
collection_path =r"C:\Users\i5\AppData\Roaming\Anki2\User 1\collection.anki2"  # Your Anki database path
deck_name = "Longman 1000 - Swedish"

# Convert the Anki deck to story_dict format
story_dict = convert_anki_to_story_dict(collection_path, deck_name)

# Generate Images for Longman Phrases

In [None]:
from src.utils import create_image_generation_prompt, generate_image_imagen, add_images_to_phrases, clean_filename, save_json, generate_image_stability, load_json, generate_image_deepai, add_image_paths
from src.config_loader import config



In [None]:

with open("../data/longman_1000_phrases.txt", "r") as f:
    LONGMAN_PHRASES = [line.strip() for line in f.readlines()]
PROMPT_DICT = load_json("../data/longman_phrase_images/phrase_image_dict.json")

In [None]:
new_prompt_dict = add_images_to_phrases(LONGMAN_PHRASES, output_dir="../data/longman_phrase_images")


In [None]:

PROMPT_DICT.update(new_prompt_dict)
save_json(PROMPT_DICT, "../data/longman_phrase_images/phrase_image_dict.json")

### Add image paths to story_data_dict

In [None]:
story_dict = add_image_paths(story_dict, image_dir="../data/longman_phrase_images")

Now image_paths are linked to each phrase we can createa an Anki flash deck with images

In [None]:
export_to_anki_with_images(story_dict, output_dir="../outputs/longman", deck_name="Longman 1000 Images - Swedish", story_name="longman_swedish_images")