In [19]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [20]:
# Add the parent directory of 'src' to the Python path
import os
import sys

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

# Longman phrase and anki deck generation
A more memorable way to learn the core vocabulary as defined in Longman Communications vocab lists, we take the vocab and use an LLM
to generate phrases using it.

## Longman 1000, 2000 and 3000 already provided
Enlish phrases for the longman vocab have already been created and can be found in the 'data' folder

In [32]:
from src.utils import get_longman_verb_vocab_dict
from src.phrase import generate_phrases_from_vocab_dict

file_path = '../data/longman-communication-3000.json'
vocab_dict = get_longman_verb_vocab_dict(file_path, "S3") #S1 = 1st 1000 words used in Speech, options are S1-3 and W1-3

In [33]:
#uses LLM calls - it sometimes generates phrases terminated too early (e.g. Mind the pot on the), so advise you scan through and check

#english phrases only initially
longman_phrases = generate_phrases_from_vocab_dict(vocab_dict, max_iterations=15)

Iteration 1/15
Generated 101 phrases
We have 127 verbs and 728 vocab words left
Iteration 2/15
Generated 106 phrases
We have 58 verbs and 660 vocab words left
Iteration 3/15
Generated 105 phrases
We have 3 verbs and 594 vocab words left
Iteration 4/15
Generated 98 phrases
We have 195 verbs and 520 vocab words left
Iteration 5/15
Generated 105 phrases
We have 112 verbs and 434 vocab words left
Iteration 6/15
Generated 103 phrases
We have 43 verbs and 344 vocab words left
Iteration 7/15
Generated 87 phrases
We have 2 verbs and 307 vocab words left
Iteration 8/15
Generated 99 phrases
We have 250 verbs and 272 vocab words left
Iteration 9/15
Generated 101 phrases
We have 173 verbs and 232 vocab words left
Iteration 10/15
Generated 109 phrases
We have 95 verbs and 161 vocab words left
Iteration 11/15
Generated 100 phrases
We have 37 verbs and 136 vocab words left
Iteration 12/15
Generated 60 phrases - with minimal phrase prompt
We have 30 verbs and 25 vocab words left
Iteration 13/15
Genera

In [34]:
with open("../outputs/longman/longman_3000_phrases.txt", "w", encoding="utf-8") as f:
    for phrase in longman_phrases:
        f.write(phrase + "\n")

## Create an Anki deck from thoses phrases

Or, load one of the english Longman phrase lists in data/ already created and apply to your language

This function:
1. translates
2. generates audio using text to speech
3. packages up the text and audio into several anki decks (in batches), that can be imported into Anki.

The deck_name will is used to derive the deck_id and so despite there being several *.apkg files created, these will all merge successfully into the same deck


In [37]:

from src.anki import create_anki_deck_from_english_phrase_list

_ = await create_anki_deck_from_english_phrase_list(longman_phrases[:3], deck_name="Longman 3000 - Swedish", anki_filename_prefix="longman_3000_swedish", batch_size=50)

Beginning translation for anki
Translated phrases

Text-to-speech for phrases done

Anki deck exported to ../outputs/longman\longman_3000_0_anki_deck.apkg
Cleanup of temporary MP3 files completed.


In [42]:
from src.utils import create_image_generation_prompt


prompt = create_image_generation_prompt(longman_phrases[0])

In [44]:
longman_phrases[0]

'The lazy engineer dared to repair the cooker.'

In [43]:
print(prompt)


    Create a vivid, memorable image for language learners based on the following description:
    
    An exhausted-looking engineer in wrinkled clothes and messy hair, slouching on a couch with a half-eaten pizza nearby. Next to the couch is a broken cooker with its parts scattered around. The engineer is lazily reaching towards the cooker with a screwdriver, barely extending their arm. The room has a warm, dim lighting, creating a cozy yet slightly chaotic atmosphere. The engineer's facial expression shows a mix of reluctance and determination.
    
    The image should be:
    - Colorful and engaging
    - Styled like a modern, slightly stylized illustration (not photorealistic)
    - Suitable for display on a mobile phone screen (square 1:1 aspect ratio)
    - Clear and easily understandable at a glance
    


In [None]:
from diffusers import DiffusionPipeline
import torch

pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16, use_safetensors=True, variant="fp16")
pipe.to("cuda")

# if using torch < 2.0
# pipe.enable_xformers_memory_efficient_attention()

prompt = "An astronaut riding a green horse"

images = pipe(prompt=prompt).images[0]


Fetching 19 files:   0%|          | 0/19 [00:00<?, ?it/s]



diffusion_pytorch_model.fp16.safetensors:   0%|          | 0.00/167M [00:00<?, ?B/s]

model.fp16.safetensors:   0%|          | 0.00/246M [00:00<?, ?B/s]

diffusion_pytorch_model.fp16.safetensors:   0%|          | 0.00/167M [00:00<?, ?B/s]

model.fp16.safetensors:   0%|          | 0.00/1.39G [00:00<?, ?B/s]

diffusion_pytorch_model.fp16.safetensors:   0%|          | 0.00/5.14G [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


In [10]:
import subprocess
import sys

# Install required packages
#subprocess.check_call([sys.executable, "-m", "pip", "install", "torch", "torchvision", "torchaudio", "--index-url", "https://download.pytorch.org/whl/cu117"])
#subprocess.check_call([sys.executable, "-m", "pip", "install", "diffusers", "transformers", "accelerate"])

from diffusers import StableDiffusionPipeline
import torch

# Load the model
model_id = "runwayml/stable-diffusion-v1-5" #"stabilityai/stable-diffusion-xl-base-1.0"
#model_id = "stabilityai/stable-diffusion-xl-base-1.0" #"
pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
pipe = pipe.to("cuda")


Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

In [12]:

# Generate an image
prompt = "An exhausted engineer on a couch with a half-eaten pizza nearby. Next to the couch is a broken cooker with its parts scattered around"
image = pipe(prompt).images[0]
image.show()


  0%|          | 0/50 [00:00<?, ?it/s]

In [None]:
generate_im

In [46]:
image.show()