In [None]:
%load_ext autoreload
%autoreload 2
import os
import sys

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

PAY_FOR_API = False #change to True to run cells that cost money via API calls

# Flash Card Generation 01

## Generate english phrases

The core way we store vocabularly for generating phrases, and then flashcards, is in a dictionary with two keys. 'verbs' (for verbs in the infinitive form, like 'be', 'run') and 'vocab' (for everything else).

The intent is that a vocab list is a core learning requirement (e.g. for an exam), and that it is easier to remember words in the context of common phrases. i.e. learning the phrase 'I want', and separtely learning the noun 'cake' is less efficient than learning the phrase 'I want some cake, please'.

Even better if we link that phrase to an image and associated audio. This is the dual-encoding theory of langauge learning and leads to retention and recall benefits.

The first step is generating your english phrases from your vocab list

### Longman corpus

A common 'starter' corpus containing core words you should learn in terms of the 1st 1000 words, 2nd 1000 words etc

You can replace vocab_dict with any custom made python dictionary with 'verbs' and 'vocab' keys

In [None]:
from src.utils import get_longman_verb_vocab_dict
from src.phrase import generate_phrases_from_vocab_dict

file_path = '../data/longman-communication-3000.json'
vocab_dict = get_longman_verb_vocab_dict(file_path, "S2") #S1 = 1st 1000 words used in Speech, W2 = 2nd 1000 words used in written etc

In [None]:
print(f" first 10 verbs: {vocab_dict['verbs'][:10]}, \nand first 10 other words: {vocab_dict['vocab'][:10]}")

### Creating conversational phrases from a vocabulary dictionary

This function will iterate through (by sampling) the vocabularly dictionary, until it is exhausted.
We run a check against generated phrases so we can 'tick off' words already used.

Phrases are generated using an LLM

In [None]:
if PAY_FOR_API:
    generated_phrases = generate_phrases_from_vocab_dict(vocab_dict, max_iterations=15)
    #It takes about 15 iterations to go through 200 verbs, 800 vocab (1000 words total)
    #You will end up with about 1000 phrases, so get practice of the same verb etc in different contexts

In [None]:
# save phrases
output_dir = "../outputs"
filename = "my_phrases.txt"

file_path = os.path.join(output_dir, filename)

with open(file_path, "w", encoding="utf-8") as f:
    for phrase in generated_phrases:
        f.write(phrase + "\n")