In [10]:
%load_ext autoreload
%autoreload 2
import os
import sys

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from src.utils import load_json
PAY_FOR_API = True #change to True to run cells that cost money via API calls

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Flash Card Generation 01

## Generate english phrases

The core way we store vocabularly for generating phrases, and then flashcards, is in a dictionary with two keys. 'verbs' (for verbs in the infinitive form, like 'be', 'run') and 'vocab' (for everything else).

The intent is that a vocab list is a core learning requirement (e.g. for an exam), and that it is easier to remember words in the context of common phrases. i.e. learning the phrase 'I want', and separtely learning the noun 'cake' is less efficient than learning the phrase 'I want some cake, please'.

Even better if we link that phrase to an image and associated audio. This is the dual-encoding theory of langauge learning and leads to retention and recall benefits.

The first step is generating your english phrases from your vocab list

### Longman corpus

A common 'starter' corpus containing core words you should learn in terms of the 1st 1000 words, 2nd 1000 words etc

You can replace vocab_dict with any custom made python dictionary with 'verbs' and 'vocab' keys

In [3]:
from src.utils import get_longman_verb_vocab_dict
from src.phrase import generate_phrases_from_vocab_dict

file_path = '../data/longman-communication-3000.json' # a specifc format
vocab_dict = get_longman_verb_vocab_dict(file_path, "S2") #S1 = 1st 1000 words used in Speech, W2 = 2nd 1000 words used in written etc

In [4]:
print(f" first 10 verbs: {vocab_dict['verbs'][:10]}, \nand first 10 other words: {vocab_dict['vocab'][:10]}")

 first 10 verbs: ['achieve', 'act', 'address', 'admit', 'advise', 'affect', 'aim', 'announce', 'apologize', 'appear'], 
and first 10 other words: ['ability', 'abuse', 'access', 'accident', 'accommodation', 'activity', 'address', 'administration', 'adult', 'advance']


### Creating conversational phrases from a vocabulary dictionary

This function will iterate through (by sampling) the vocabularly dictionary, until it is exhausted.
We run a check against generated phrases so we can 'tick off' words already used.

Phrases are generated using an LLM

In [11]:
# or for GCSE vocab:
vocab_dict = load_json("..\data\gcse_vocab_list_cambridge.json")

In [None]:
if PAY_FOR_API:
    #comment out the below two lines to go for the default of 6 - 9 word phrases and no more than 2 verbs
    length_phrase = "4-5 words long, for beginner GCSE level, but treat common lexical chunks (I'm going to.., Do you.., Let us.. etc) as a single 'word'"
    verbs_per_phrase = "one verb (but OK for an additional auxillary verb if necessary)"
    localise = False # whether to tweak the prompt to set phrases within the target country
    generated_phrases = generate_phrases_from_vocab_dict(   
        vocab_dict, max_iterations=1,
         length_phrase=length_phrase,
           verbs_per_phrase=verbs_per_phrase,
           localise=localise)
    #It takes about 15 iterations to go through 200 verbs, 800 vocab (1000 words total)
    #You will end up with about 1000 phrases, so get practice of the same verb etc in different contexts

Config file has been modified. Reloading...
Function that called this one: generate_phrases_with_llm. Sleeping for 20 seconds
Config file has been modified. Reloading...
Iteration 1/1
Generated 99 phrases
We have 128 verbs and 551 vocab words left
Reached maximum number of iterations (1). Stopping phrase generation.


In [16]:
generated_phrases

['Please open the door',
 'Do you need help?',
 "I'm going to visit Paris",
 "Let's dance in the rain",
 "Don't worry about the test",
 'Can you lend me money?',
 'I love eating cheese',
 'Shall we shop for clothes?',
 'The sun is shining brightly',
 'Did you hear the news?',
 "I'm trying to lose weight",
 "Let's plan a city trip",
 "Don't forget to pack socks",
 'Can you smell the flowers?',
 "We're going to the zoo",
 'Do you want some tea?',
 'I need new shoes',
 "Let's chat about work",
 "Don't complain about the weather",
 'Can you drive me home?',
 "I hope it doesn't rain",
 'Shall we rest a bit?',
 'The cake tastes delicious',
 'Did they find your keys?',
 "I'm learning to cook",
 "Let's climb that hill",
 "Don't smoke in here",
 'Can you accompany me tomorrow?',
 "We're going to escape Paris",
 'Do you like French food?',
 'I want to learn French',
 "Let's visit the Eiffel Tower",
 "Don't forget your umbrella",
 'Can you help me please?',
 "I'm trying to stay awake",
 'Shall we

In [None]:
# save phrases
output_dir = "../outputs"
filename = "my_phrases.txt"

file_path = os.path.join(output_dir, filename)

with open(file_path, "w", encoding="utf-8") as f:
    for phrase in generated_phrases:
        f.write(phrase + "\n")