# Import

In [1]:
%load_ext autoreload

In [21]:
%autoreload 2
%autoawait
import os
from dotenv import load_dotenv
import sys
import os
from collections import defaultdict
from tqdm import tqdm
import pickle

# Add the parent directory of 'src' to the Python path
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
# Load environment variables from .env file
load_dotenv()

from src.audio_generation import async_process_phrases, generate_audio_from_dialogue
from pydub import AudioSegment
from src.config_loader import config
from src.dialogue_generation import get_vocab_from_dialogue, update_vocab_usage
from src.dialogue_generation import generate_story_plan, generate_dialogue_prompt, generate_dialogue, generate_recap
from src.audio_generation import  text_to_speech, play_audio, generate_audio_from_dialogue, generate_normal_and_fast_audio, join_audio_segments, export_audio, async_process_phrases
from src.phrase import generate_practice_phrases_from_dialogue
from src.initialise import initialise_usage_data, initialise_vocab_usage
from src.utils import save_json, convert_defaultdict, save_defaultdict, load_json, create_pdf_booklet, generate_story_image, create_test_story_dict
from src.translation import translate_dialogue, translate_phrases
from src.audio_generation import create_m4a_with_timed_lyrics
from src.anki import export_to_anki


STORY_NAME = "the chocolate murder"
STORY_NAME = STORY_NAME.replace(" ", "_") #just to make directory names easier
OUTPUT_DIR = f"../outputs/{STORY_NAME}"
STORY_DATA_PATH = f"{OUTPUT_DIR}/story_data_{STORY_NAME}.json"

# Create the output directory if it doesn't exist
os.makedirs(OUTPUT_DIR, exist_ok=True)

IPython autoawait is `on`, and set to use `asyncio`


In [22]:
with open(F'{STORY_DATA_PATH}.pkl', 'rb') as file:
    story_data_dict = pickle.load(file)

## Setup Google Cloud credentials and prerequisites
You will need a Google Project with the following APIs enabled:
* Text to Speech
* Translate
* Vertex AI with the following Anthropic models enabled (from the model garden)
    * Sonnet 3.5
    * Haiku
* Add your GOOGLE_PROJECT_ID to the .env file

You should alter src/config.json which contains your target language.


In [4]:
from google.auth import default
credentials, project = default()

# Audio Language Trainer Workflow

The aim of this project is to create audio material for you to practise a foreign language. It needs to be engaging and be tailored to words you want to practise. 

The overall steps we follow are:

1. Create an outline story plan based on a theme you select (e.g. 'an adventure', 'a holiday in Rome'). An LLM produces a story plan following a typical story arc (exposition, rising action, climax, falling action, resolution). This ensures an engaging plot.
2. Flesh out the story using your practice vocabulary and grammatical concepts. Vocab and concepts are sampled from lists you provide in the 'data' folder (vocab_usage.json and grammar_concepts_usage.json), with sampling being skewed towards words you haven't heard yet. The output here is a dialogue between two people (Sam and Alex).

Recaps are generated between each story part so when the LLM generates the next dialogue it logically continues from the previous one.

3. The dialogue is broken up into shorter practice phrases via a 'language graph' concept, so we give you not just the long-form dialogue to listen and practise to, but smaller, mixed-up phrases based on the vocab in the story, starting small and buliding to more complex phrases.
4. Your vocab list is updated based on the produced dialogue.
5. The smaller phrases and main dialogue are translated into your target language and convert to speech.
6. Research shows that listening to double-speed audio (on words you already known) can help with your listening comprehension for a foreign language (it helps the brain with the ability to separate distinct words). We therefore create a fast version of the dialogue for listening practice.
7. The audio files are stiched together to create an MP3 file for each part in the story (there are 5 parts to the story). The stages for each audio lesson are: 
* dialogue in the target language
* practice phrases of the form 'how do you say: "practice phrase' in 'target language'?". A pause (where you speak in the foreign language), then the correct translation is played twice, first fast, then slow.
* repeat of the dialogue in the target language so you can satisfy yourself you understand it properly
* 12 repeated playings of the fast version of the dialogue to improve your listening comprehension.

The intent is then you would listen to the next audio lesson in the story.


## Setup your vocab and grammatical concepts
You should populate or edit
* known_vocab_list.json 
* grammar_concepts.json

### Initiliase the vocab and grammar counters
This creates vocab_usage.json (setting all values to 0) and grammar_concepts_usage.json (setting all values to 'true' and counts to 0)

You can tweak these to minimise what words and concepts you are exposed to

In [37]:
#initialise_vocab_usage()
#initialise_usage_data(overwrite=False) #the overwrite commands stops you wiping all your usage data if it already exists

Data saved to ../data/1000_vocab_usage.json
Usage files already exist. Set overwrite=True to reinitialize.


## Create a story plan

In [12]:
%autoreload 2
from src.dialogue_generation import get_least_used_words, add_usage_to_words

verbs_for_story = get_least_used_words("verbs", 10)
vocab_for_story = get_least_used_words("vocab", 30)

story_plan = generate_story_plan(story_guide = STORY_NAME, verb_list=verbs_for_story, vocab_list=vocab_for_story, test = False, story_name= STORY_NAME) #the test parameter will provide pre-canned responses avoiding LLM costs

[('suggest', 'VERB'), ('could', 'VERB'), ('wash', 'VERB'), ('listen', 'VERB'), ('lose', 'VERB'), ('speak', 'VERB'), ('knit', 'VERB'), ('meet', 'VERB'), ('bake', 'VERB'), ('marry', 'VERB')]
Data saved to ../data/vocab_usage.json
[('buddy', 'vocab'), ('because', 'vocab'), ('buddy', 'vocab'), ('two', 'vocab'), ('someone', 'vocab'), ('married', 'vocab'), ('emergency', 'vocab'), ('track', 'vocab'), ('nicely', 'vocab'), ('news', 'vocab'), ('anyone', 'vocab'), ('late', 'vocab'), ('certainly', 'vocab'), ('high', 'vocab'), ('even', 'vocab'), ('huh', 'vocab'), ('entrance', 'vocab'), ('move', 'vocab'), ('lunch', 'vocab'), ('summer', 'vocab'), ('anymore', 'vocab'), ('any', 'vocab'), ('clear', 'vocab'), ('anyone', 'vocab'), ('lady', 'vocab'), ('police', 'vocab'), ('first', 'vocab'), ('case', 'vocab'), ('doctor', 'vocab'), ('none', 'vocab')]
Data saved to ../data/vocab_usage.json
Data saved to ../outputs/story_plan_the_chocolate_murder.json


In [13]:
verbs_for_story_usage = add_usage_to_words(verbs_for_story, "verbs")
vocab_for_story_usage = add_usage_to_words(vocab_for_story, "vocab")

## Create all dialogue

1. Create dialouge LLM prompt based on the story part
2. LLM generates dialogue
3. LLM generates recap
4. move to next story part and repeat


In [14]:
PAY_FOR_LLM = True

if PAY_FOR_LLM:
    story_data_dict = defaultdict(lambda: defaultdict(str))
    recap = "This is the beginning of the story."
    for step, story_part in enumerate(list(story_plan.keys())):
        prompt = generate_dialogue_prompt(story_part=story_part,
                                        story_part_outline=story_plan[story_part],
                                        last_recap = recap,
                                        verb_usage_str=verbs_for_story_usage,
                                        vocab_usage_str=vocab_for_story_usage,
                                        verb_use_count=5,
                                        vocab_use_count=10,
                                        grammar_concept_count=5,
                                        grammar_use_count=3)
        dialogue = generate_dialogue(prompt)
        vocab_used = get_vocab_from_dialogue(dialogue)
        update_vocab_usage(vocab_used)
        verbs_for_story_usage = add_usage_to_words(verbs_for_story, "verbs")
        vocab_for_story_usage = add_usage_to_words(vocab_for_story, "vocab")
        recap = generate_recap(dialogue, test=False)
        story_data_dict[story_part]["dialogue_generation_prompt"] = prompt
        story_data_dict[story_part]["dialogue"] = dialogue
        story_data_dict[story_part]["recap"] = recap

save_defaultdict(story_data_dict, STORY_DATA_PATH)


{('to', 'ADP'), ('at', 'ADP'), ('case', 'NOUN'), ('be', 'AUX'), ('tough', 'ADJ'), ('hear', 'VERB'), ('we', 'PRON'), ('wash', 'VERB'), ('so', 'ADV'), ('of', 'ADP'), ('track', 'VERB'), ('entrance', 'NOUN'), ('need', 'VERB'), ('do', 'AUX'), ('a', 'DET'), ('suspect', 'NOUN'), ('officer', 'NOUN'), ('more', 'ADV'), ('detective', 'NOUN'), ('lose', 'VERB'), ('news', 'NOUN'), ('good', 'ADJ'), ('excited', 'ADJ'), ('first', 'ADJ'), ('gossip', 'NOUN'), ('for', 'ADP'), ('one', 'NOUN'), ('should', 'AUX'), ('listen', 'VERB'), ('by', 'ADP'), ('lady', 'NOUN'), ('my', 'PRON'), ('idea', 'NOUN'), ('yourself', 'PRON'), ('i', 'PRON'), ('any', 'DET'), ('way', 'NOUN'), ('the', 'DET'), ('could', 'AUX'), ('do', 'VERB'), ('not', 'PART'), ('and', 'CCONJ'), ('marry', 'VERB'), ('nice', 'ADJ'), ('even', 'ADV'), ('sometimes', 'ADV'), ('huh', 'INTJ'), ('getting', 'AUX'), ('update', 'NOUN'), ('about', 'ADP'), ('hey', 'INTJ'), ('certainly', 'ADV'), ('meet', 'VERB'), ('as', 'ADP'), ('to', 'PART'), ('evidence', 'NOUN'), (

### Build phrases from dialogue

Here we:
1. Break up the dialogue into separate sentences. For this bit we don't care who the speaker is, we just want to create different phrases of different lengths and combinations based on the vocab int the dialogue
2. We use another LLM call to do this, with some one-shot learning

In [15]:
story_data_dict = load_json(STORY_DATA_PATH)

In [16]:
for story_part in story_data_dict:
    dialogue = story_data_dict[story_part]["dialogue"]
    story_data_dict[story_part]["corrected_phrase_list"] = generate_practice_phrases_from_dialogue(dialogue)

In [17]:
save_defaultdict(story_data_dict, STORY_DATA_PATH)

Data saved to ../outputs/the_chocolate_murder/story_data_the_chocolate_murder.json


### Translate dialogue and phrases

In [18]:
PAY_FOR_TRANSLATE_API = True

if PAY_FOR_TRANSLATE_API:

    for story_part in tqdm(story_data_dict):
        dialogue = story_data_dict[story_part]["dialogue"]
        translated_dialogue = translate_dialogue(dialogue)

        corrected_phrase_list = story_data_dict[story_part]["corrected_phrase_list"]
        translated_phrase_list = translate_phrases(corrected_phrase_list)

        story_data_dict[story_part]["translated_dialogue"] = translated_dialogue
        story_data_dict[story_part]["translated_phrase_list"] = translated_phrase_list


100%|██████████| 5/5 [00:17<00:00,  3.46s/it]


In [19]:
save_defaultdict(story_data_dict, STORY_DATA_PATH)

Data saved to ../outputs/the_chocolate_murder/story_data_the_chocolate_murder.json


## Generate Audio Lesson

The steps here are
1. The target language dialogue at normal speed
2. Each corrected and translated phrase in the form english - target fast - target slow
3. Each dialogue utterance in the form english - target fast - target slow
4. The 2 x sped up target language dialogue

In [20]:
story_data_dict =load_json(STORY_DATA_PATH)

In [21]:

PAY_FOR_TEXT_TO_SPEECH = True

story_data_dict =load_json(STORY_DATA_PATH)
if PAY_FOR_TEXT_TO_SPEECH:
    for story_part in tqdm(story_data_dict):

        translated_dialogue_audio_segments = generate_audio_from_dialogue(story_data_dict[story_part]["translated_dialogue"])
        story_data_dict[story_part]["translated_dialogue_audio"] = translated_dialogue_audio_segments
        normal_translated_clip, fast_translated_clips = generate_normal_and_fast_audio(translated_dialogue_audio_segments)
        story_data_dict[story_part]["translated_dialogue_audio_fast"] = fast_translated_clips
        print(f"{story_part} dialogue done")
        #now do phrases asynchronoulsy (still unsure if Google API allows this, not getting huge speed up)
        translated_phrases = story_data_dict[story_part]["translated_phrase_list"]
        tranlsated_phrases_audio = await async_process_phrases(translated_phrases)
        story_data_dict[story_part]["translated_phrase_list_audio"] = tranlsated_phrases_audio
        print(f"{story_part} phrases done")


       


  0%|          | 0/5 [00:00<?, ?it/s]

exposition dialogue done


 20%|██        | 1/5 [04:45<19:01, 285.32s/it]

exposition phrases done
rising_action dialogue done


 40%|████      | 2/5 [09:49<14:48, 296.20s/it]

rising_action phrases done
climax dialogue done


 60%|██████    | 3/5 [15:04<10:10, 305.02s/it]

climax phrases done
falling_action dialogue done


 80%|████████  | 4/5 [20:18<05:08, 308.39s/it]

falling_action phrases done
resolution dialogue done


100%|██████████| 5/5 [25:26<00:00, 305.22s/it]

resolution phrases done





In [22]:

# Save the dictionary to a file
with open(F'{STORY_DATA_PATH}.pkl', 'wb') as file:
    pickle.dump(story_data_dict, file)

In [27]:
with open(F'{STORY_DATA_PATH}.pkl', 'rb') as file:
    story_data_dict = pickle.load(file)

### Generate album artwork

In [26]:
IMAGE_DATA = generate_story_image(STORY_NAME.replace("_", " "))

with open(f"{OUTPUT_DIR}/cover_artwork.jpg", "wb") as f:
    f.write(IMAGE_DATA)

### Generate M4A file with synchronised captions

In [28]:

#get lists and audio clips synced together
full_audio_list = []
full_captions_list = []


#fast dialogue (no text)
PAUSE_TEXT = "---------"
THINKING_GAP = AudioSegment.silent(duration=config.THINKING_GAP_MS)
GAP_BETWEEN_PHRASES = AudioSegment.silent(duration=500)
#translated dialogue

TOTAL_TRACKS = len(story_data_dict) + 1 #to account for the full dialogue as a separate track
ALBUM_NAME = STORY_NAME.replace("_", " ")
TRACK_NUMBER = 0

for story_part in story_data_dict:
    TRACK_NUMBER += 1 #so we don't start at 0

    audio_list = []
    captions_list = []
    dialogue_list = [utterence["text"] for utterence in story_data_dict[story_part]["translated_dialogue"]]
    dialogue_audio_list = story_data_dict[story_part]["translated_dialogue_audio"]

    audio_list.append(GAP_BETWEEN_PHRASES)
    captions_list.append(f"{story_part} - First dialogue")

    audio_list.extend(dialogue_audio_list)
    captions_list.extend(dialogue_list)
    #print(f"audio {len(audio_list)} - captions {len(captions_list)}")

    audio_list.append(GAP_BETWEEN_PHRASES)
    captions_list.append(f"{story_part} - Practice phrases")
    
    for step, phrase in enumerate(story_data_dict[story_part]["translated_phrase_list"]):
        english_text = phrase[0]
        target_text = phrase[1]

        english_audio = story_data_dict[story_part]["translated_phrase_list_audio"][step][0]
        target_audio_slow = story_data_dict[story_part]["translated_phrase_list_audio"][step][1]
        target_audio_normal = story_data_dict[story_part]["translated_phrase_list_audio"][step][2]

        audio_list.append(english_audio)
        captions_list.append(english_text)

        audio_list.append(THINKING_GAP)
        captions_list.append(PAUSE_TEXT)

        audio_list.append(target_audio_normal)
        captions_list.append(target_text)

        audio_list.append(GAP_BETWEEN_PHRASES)
        captions_list.append(PAUSE_TEXT)

        audio_list.append(target_audio_slow)
        captions_list.append(target_text)

        audio_list.append(GAP_BETWEEN_PHRASES)
        captions_list.append(PAUSE_TEXT)

    audio_list.append(story_data_dict[story_part]["translated_dialogue_audio_fast"])
    captions_list.append(f"{story_part} - Repeated Fast Dialogue")

    audio_list.append(GAP_BETWEEN_PHRASES)
    captions_list.append(f"{story_part} - Final Dialogue")
    
    audio_list.extend(dialogue_audio_list)
    captions_list.extend(dialogue_list)

    create_m4a_with_timed_lyrics(audio_segments=audio_list,
                                  phrases=captions_list,
                                   output_file= F"{OUTPUT_DIR}/{STORY_NAME}_{story_part}.m4a",
                                   album_name=ALBUM_NAME,
                                   track_title=story_part,
                                   track_number=TRACK_NUMBER,
                                   total_tracks=TOTAL_TRACKS,
                                   image_data=IMAGE_DATA)
    full_audio_list.extend(audio_list)
    full_captions_list.extend(captions_list)



In [29]:
#add the complete story as a single dialogue at the end

all_dialogue_audio = []
all_dialogue_captions = []

for story_part in story_data_dict:
    dialogue_list = [utterence["text"] for utterence in story_data_dict[story_part]["translated_dialogue"]]
    dialogue_audio_list = story_data_dict[story_part]["translated_dialogue_audio"]
    all_dialogue_audio.extend(dialogue_audio_list)
    all_dialogue_captions.extend(dialogue_list)

    all_dialogue_audio.append(GAP_BETWEEN_PHRASES)
    all_dialogue_captions.append(PAUSE_TEXT)

full_audio_list.extend(all_dialogue_audio)
full_captions_list.extend(all_dialogue_captions)

TRACK_NUMBER += 1
create_m4a_with_timed_lyrics(audio_segments=all_dialogue_audio,
                             phrases= all_dialogue_captions,
                              output_file= F"{OUTPUT_DIR}/{STORY_NAME}_full_dialogue.m4a",
                             album_name=ALBUM_NAME,
                                   track_title="Full Dialogue - All episodes",
                                   track_number=TRACK_NUMBER,
                                   total_tracks=TOTAL_TRACKS,
                                   image_data=IMAGE_DATA)

## Create PDF Booklet
So you can see the spelling of the phrases and dialogue

In [25]:
create_pdf_booklet(story_data_dict, output_filename=f"{OUTPUT_DIR}/story_booklet_{STORY_NAME}.pdf")


## Create Anki Deck for phrases

In [23]:
export_to_anki(story_data_dict, OUTPUT_DIR, STORY_NAME)

Anki deck exported to ../outputs/the_chocolate_murder\the_chocolate_murder_anki_deck.apkg
Deleted temporary file: ../outputs/the_chocolate_murder\14c4cb10-c58f-40ac-ad97-608ba3fff2ff.mp3
Deleted temporary file: ../outputs/the_chocolate_murder\67e959c3-5658-4c8b-8297-333360e12a30.mp3
Deleted temporary file: ../outputs/the_chocolate_murder\a2de6125-d5c0-4578-a5a9-b90ecb537dfa.mp3
Deleted temporary file: ../outputs/the_chocolate_murder\c50ac7e9-7f3a-430e-8c51-ceaf2ca4ec01.mp3
Deleted temporary file: ../outputs/the_chocolate_murder\f1246c09-006d-438e-835b-60347b37eab9.mp3
Deleted temporary file: ../outputs/the_chocolate_murder\18508074-c19c-42e7-949b-cf1e16581335.mp3
Deleted temporary file: ../outputs/the_chocolate_murder\efd1ca35-6e6e-4d4a-b9d9-84baeaddbe49.mp3
Deleted temporary file: ../outputs/the_chocolate_murder\f5b9f12b-1c60-4984-be61-795f71b334c8.mp3
Deleted temporary file: ../outputs/the_chocolate_murder\81ffb4f6-ba9d-4f57-a977-d9046bc78f5e.mp3
Deleted temporary file: ../outputs/th

## Import Anki Deck for new vocab

In [99]:
%autoreload 2
from src.anki import import_anki_deck, inspect_anki_deck

inspect_anki_deck("../data/swedish_3000.txt")

english_phrases, target_phrases, all_fields = import_anki_deck("../data/swedish_3000.txt", english_field_index=4, target_field_index=3)
target_phrases = [phrase.split('<br>')[0].strip() for phrase in target_phrases]

Header: #separator:tab
Header: #html:true
Header: #guid column:1
Header: #notetype column:2
Header: #deck column:3
Header: #tags column:11

Field inspection:
0: ll-}IfsmOR
1: 0 Neri's Sentences (Read)
2: swedish 10 000 sentences easiest to hardest::Neri'...
3: Är han?
4: Is he?
5: Är han?
6: sv
7: Swedish
8: 
9: 
10: 

Total number of fields: 11


In [148]:
# Remove duplicates while maintaining order
unique_pairs = list(set(zip(english_phrases, target_phrases)))

# Create a dictionary in the format similar to story_data_dict
anki_dictionary = {"anki_import": {"translated_phrase_list": unique_pairs, "translated_phrase_list_audio" : []}}

In [149]:
LENGTH = len(unique_pairs)

for step, pair in enumerate(unique_pairs):
    target_text = pair[1]
    audio_segment  = text_to_speech(text = target_text, language_code=config.TARGET_LANGUAGE, voice_name=config.target_language_voice_models["female_voice"])
    print(f"{step} of {LENGTH} -- {pair[1]}")
    anki_dictionary["anki_import"]["translated_phrase_list_audio"].append(audio_segment)

0 of 2304 -- Men det är jag.
1 of 2304 -- Mig med.
2 of 2304 -- Inte det?
3 of 2304 -- Jag gillar den.
4 of 2304 -- Jag är klar.
5 of 2304 -- Så är det bara.
6 of 2304 -- Ja, faktiskt.
7 of 2304 -- Ser bra ut.
8 of 2304 -- Jag känner dig.
9 of 2304 -- Kommer du ihåg?
10 of 2304 -- Är alla här?
11 of 2304 -- Jag tror dig.
12 of 2304 -- Vänta lite nu.
13 of 2304 -- Vad heter hon?
14 of 2304 -- Jag ska gå nu.
15 of 2304 -- Strax tillbaka.
16 of 2304 -- Vad vill han?
17 of 2304 -- Jag bara vet det.
18 of 2304 -- Låter bra.
19 of 2304 -- Gör som jag säger.
20 of 2304 -- Jag tar den.
21 of 2304 -- Vad ska ni göra?
22 of 2304 -- Tack, pappa.
23 of 2304 -- Får jag vara med?
24 of 2304 -- Den här är min.
25 of 2304 -- Allt ordnar sig.
26 of 2304 -- Det här är otroligt.
27 of 2304 -- Just det.
28 of 2304 -- Vad mer?
29 of 2304 -- Är du det?
30 of 2304 -- Ja, självklart.
31 of 2304 -- Två år.
32 of 2304 -- Du klarar det här.
33 of 2304 -- Mycket mer.
34 of 2304 -- Den är vacker.
35 of 2304 -- Du 

In [150]:
%autoreload 2
from src.anki import export_to_anki 
export_to_anki(anki_dictionary, OUTPUT_DIR, "swedish_3000")

Anki deck exported to ../outputs/swedish_anki\swedish_3000_anki_deck.apkg
Deleted temporary file: ../outputs/swedish_anki\236795b9-215d-47a7-b382-da3406baa503.mp3
Deleted temporary file: ../outputs/swedish_anki\17f7d72d-ba24-4d46-8815-45d3cd0cb89b.mp3
Deleted temporary file: ../outputs/swedish_anki\b2b7779f-61b1-43f5-903b-bf485fbedea3.mp3
Deleted temporary file: ../outputs/swedish_anki\f62b41bf-0ef6-4181-98c7-739458250a3e.mp3
Deleted temporary file: ../outputs/swedish_anki\3575d0ac-90ca-4dac-89f4-06b706100fdf.mp3
Deleted temporary file: ../outputs/swedish_anki\d97156d9-a181-4431-abb3-22fd6903b1f2.mp3
Deleted temporary file: ../outputs/swedish_anki\79b53acb-f599-418a-835c-fc53316d600a.mp3
Deleted temporary file: ../outputs/swedish_anki\fe251c5a-518a-4a0c-a824-b50835848ee8.mp3
Deleted temporary file: ../outputs/swedish_anki\cd705aa5-8036-4be8-b712-901c4a929986.mp3
Deleted temporary file: ../outputs/swedish_anki\f2d29fdf-fe4a-474d-a8e4-d0288c70ae05.mp3
Deleted temporary file: ../outputs/s

In [151]:
#update vocab with new english phrases
from src.utils import extract_vocab_and_pos, update_vocab_usage

vocab_and_pos = extract_vocab_and_pos(english_phrases)
update_vocab_usage(vocab_and_pos, update_amount = 0)

{('receive', 'VERB'), ('late', 'ADV'), ('honor', 'NOUN'), ('there', 'ADV'), ('girl', 'NOUN'), ('time', 'NOUN'), ('anything', 'PRON'), ('over', 'ADV'), ('dare', 'VERB'), ('more', 'ADJ'), ('beautiful', 'ADJ'), ('correct', 'ADJ'), ('say', 'VERB'), ('to', 'ADP'), ('what', 'PRON'), ('preferably', 'ADV'), ('actually', 'ADV'), ('follow', 'VERB'), ('true', 'ADJ'), ('favor', 'NOUN'), ('fast', 'ADJ'), ('week', 'NOUN'), ('try', 'VERB'), ('every', 'DET'), ('amazing', 'ADJ'), ('should', 'AUX'), ('mind', 'VERB'), ('yes', 'ADV'), ('no', 'INTJ'), ('excuse', 'VERB'), ('last', 'ADJ'), ('one', 'NUM'), ('darling', 'NOUN'), ('want', 'VERB'), ('goodness', 'NOUN'), ('forever', 'ADV'), ('too', 'ADV'), ('quite', 'ADV'), ('man', 'NOUN'), ('indeed', 'ADV'), ('understand', 'VERB'), ('christmas', 'PROPN'), ('an', 'DET'), ('gentleman', 'NOUN'), ('remember', 'VERB'), ('honey', 'NOUN'), ('doctor', 'PROPN'), ('afternoon', 'NOUN'), ('but', 'CCONJ'), ('release', 'VERB'), ('pleasure', 'NOUN'), ('today', 'NOUN'), ('pregna