In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
# Add the parent directory of 'src' to the Python path
import os
import sys

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

# Longman phrase and anki deck generation
A more memorable way to learn the core vocabulary as defined in Longman Communications vocab lists, we take the vocab and use an LLM
to generate phrases using it.

## Longman 1000, 2000 and 3000 already provided
Enlish phrases for the longman vocab have already been created and can be found in the 'data' folder

In [None]:
from src.utils import get_longman_verb_vocab_dict
from src.phrase import generate_phrases_from_vocab_dict

file_path = '../data/longman-communication-3000.json'
vocab_dict = get_longman_verb_vocab_dict(file_path, "S3") #S1 = 1st 1000 words used in Speech, options are S1-3 and W1-3

In [None]:
#uses LLM calls - it sometimes generates phrases terminated too early (e.g. Mind the pot on the), so advise you scan through and check

#english phrases only initially
longman_phrases = generate_phrases_from_vocab_dict(vocab_dict, max_iterations=15)

### Open Longman phrases if generated

In [None]:
with open("../outputs/longman/longman_1000_phrases.txt", "w", encoding="utf-8") as f:
    for phrase in longman_phrases:
        f.write(phrase + "\n")

## Create an Anki deck from thoses phrases

Or, load one of the english Longman phrase lists in data/ already created and apply to your language

This function:
1. translates
2. generates audio using text to speech
3. packages up the text and audio into several anki decks (in batches), that can be imported into Anki.

The deck_name will is used to derive the deck_id and so despite there being several *.apkg files created, these will all merge successfully into the same deck


In [None]:

from src.anki_tools import create_anki_deck_from_english_phrase_list, export_to_anki_with_images
from src.utils import add_image_paths



In [None]:
#the longman 1000 images are already generated, so can be reused for different languages
_ = await create_anki_deck_from_english_phrase_list(longman_phrases,
                                                     deck_name="Longman 1000 Images - Basque",
                                                       anki_filename_prefix="longman_1000_basque",
                                                         batch_size=50,
                                                         image_dir="../data/longman_phrase_images")

## Get anki flash card data
This is bespoke for my flashcard format

In [42]:
from src.anki_tools import convert_anki_to_story_dict, print_deck_info
# Example usage
collection_path =r"C:\Users\i5\AppData\Roaming\Anki2\User 1\collection.anki2"  # Your Anki database path
deck_name = "Longman 1000 - Swedish"

# Convert the Anki deck to story_dict format
story_dict = convert_anki_to_story_dict(collection_path, deck_name)

100%|██████████| 841/841 [01:38<00:00,  8.50it/s]


# Generate Images for Longman Phrases

In [None]:
from src.utils import create_image_generation_prompt, generate_image_imagen, add_images_to_phrases, clean_filename, save_json, generate_image_stability, load_json, generate_image_deepai, add_image_paths
from src.config_loader import config



In [None]:

with open("../data/longman_1000_phrases.txt", "r") as f:
    LONGMAN_PHRASES = [line.strip() for line in f.readlines()]
PROMPT_DICT = load_json("../data/longman_phrase_images/phrase_image_dict.json")

In [None]:
new_prompt_dict = add_images_to_phrases(LONGMAN_PHRASES, output_dir="../data/longman_phrase_images")


In [None]:

PROMPT_DICT.update(new_prompt_dict)
save_json(PROMPT_DICT, "../data/longman_phrase_images/phrase_image_dict.json")

### Add image paths to story_data_dict

In [43]:
story_dict = add_image_paths(story_dict, image_dir="../data/longman_phrase_images")

Now image_paths are linked to each phrase we can createa an Anki flash deck with images

In [None]:
export_to_anki_with_images(story_dict, output_dir="../outputs/longman", deck_name="Longman 1000 Images - Swedish", story_name="longman_swedish_images")