# Story Generation
We remember things better as stories. The plan here is to pick a subset of our phrases, extract the vocabularly, and generate a story based off of them. We can then pull in more flashcards / phrases to ensure a more complete phrase coverage

In [2]:
%load_ext autoreload
%autoreload 2

In [None]:
from dotenv import load_dotenv
import sys
import os
import pickle
from pathlib import Path
load_dotenv()
# Add the parent directory of 'src' to the Python path
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [None]:
from src.utils import load_text_file, save_json, load_json
from src.nlp import get_vocab_dictionary_from_phrases, get_vocab_dict_from_dialogue, compare_vocab_overlap
from src.config_loader import config
from pprint import pprint
import random

filepath = "../data/longman_1000_phrases.txt"
phrases = load_text_file(filepath)
pprint(f"First few phrases {phrases[:10]}")

#we already have flashcards generated for some phrases:
#a flashcard index allows us to select flashcards that cover a specific
#vocabulary range, it's quite computationally expensive, but is generated
#using create_flashcard_index



("First few phrases ['Do you want to become a famous writer?', 'Let me show "
 "you around the city', 'We need to handle this situation carefully', 'Stop "
 'wasting time on this\', \'Do you like playing the guitar at night?\', "I\'m '
 'taking a vacation next month", "Don\'t forget to wear a helmet while '
 'cycling", "Let\'s cut unnecessary expenses this year", "We\'re producing a '
 'new product soon", \'Did you remember to turn off the stove?\']')


## create the flashcard index

In [None]:



flashcard_index_path = Path()

In [None]:


flashcard_index = load_json("../data/longman_1000_phrase_index.json")

In [7]:
vocab_dict_flashcards = get_vocab_dictionary_from_phrases(phrases[:50])

In [None]:
from src.dialogue_generation import generate_story
story_50_phrases = generate_story(vocab_dict_flashcards)

Now we have a story, it has probably added additional vocabulary, so we need to check our flaschard sample covers the story

In [17]:
#load story from before

story_50_phrases = load_json("../data/stories/test_story/story_community_park.json")

In [39]:
from src. nlp import get_vocab_dict_from_dialogue
vocab_dict_story = get_vocab_dict_from_dialogue(story_50_phrases, limit_story_parts=["introduction"])

In [40]:
vocab_overlap = compare_vocab_overlap(vocab_dict_flashcards, vocab_dict_story)

=== VERB ANALYSIS ===
Original verbs: 77
Verbs used in story: 33
Verbs from original used: 18 (23.4%)
New verbs introduced: 15
Examples of new verbs: ['plan', 'could', 'cycle', 'ride', 'love']

=== VOCABULARY ANALYSIS ===
Original vocabulary: 148
Vocabulary used in story: 86
Vocabulary from original used: 37 (25.0%)
New vocabulary introduced: 49
Examples of new vocabulary: ['when', 'work', 'forward', 'free', 'definitely']


In [41]:
from src.nlp import get_matching_flashcards_indexed
# Let's pull all the existing phrases we need to cover the vocab on our story
results = get_matching_flashcards_indexed(vocab_dict_story, flashcard_index)

verb matches: 100%|██████████| 33/33 [00:00<?, ?it/s]
vocab matches: 100%|██████████| 86/86 [00:00<?, ?it/s]
ranking cards: 100%|██████████| 803/803 [00:00<00:00, 267515.97it/s]
verb matches: 100%|██████████| 29/29 [00:00<?, ?it/s]
vocab matches: 100%|██████████| 81/81 [00:00<00:00, 81025.19it/s]
ranking cards: 100%|██████████| 749/749 [00:00<00:00, 149853.73it/s]
verb matches: 100%|██████████| 28/28 [00:00<00:00, 28042.15it/s]
vocab matches: 100%|██████████| 75/75 [00:00<00:00, 74987.56it/s]
ranking cards: 100%|██████████| 726/726 [00:00<00:00, 359426.90it/s]
verb matches: 100%|██████████| 26/26 [00:00<00:00, 25689.49it/s]
vocab matches: 100%|██████████| 71/71 [00:00<?, ?it/s]
ranking cards: 100%|██████████| 667/667 [00:00<00:00, 166682.60it/s]
verb matches: 100%|██████████| 24/24 [00:00<?, ?it/s]
vocab matches: 100%|██████████| 68/68 [00:00<?, ?it/s]
ranking cards: 100%|██████████| 641/641 [00:00<00:00, 213478.55it/s]
verb matches: 100%|██████████| 23/23 [00:00<?, ?it/s]
vocab matche

In [42]:
proposed_flashcard_phrases = [card.get('phrase') for card in results['selected_cards']]
vocab_from_new_flashcards = get_vocab_dictionary_from_phrases(proposed_flashcard_phrases)

In [43]:
new_overlap = compare_vocab_overlap(vocab_from_new_flashcards, vocab_dict_story)

=== VERB ANALYSIS ===
Original verbs: 65
Verbs used in story: 33
Verbs from original used: 30 (46.2%)
New verbs introduced: 3
Examples of new verbs: ['ride', 'cycle', 'create']

=== VOCABULARY ANALYSIS ===
Original vocabulary: 148
Vocabulary used in story: 86
Vocabulary from original used: 75 (50.7%)
New vocabulary introduced: 11
Examples of new vocabulary: ['snack', 'call', 'outdoors', 'alright', 'perfect']


In [45]:
proposed_flashcard_phrases

["Don't worry, you'll get used to the new system.",
 'Bye for now, see you later this evening!',
 "I'm planning to organize the office party soon",
 "Don't forget to wear a helmet while cycling",
 'Could the best solution be right in front of us?',
 'Look at that beautiful sunset over there',
 "Did you hear they're building a new community center?",
 'Make sure to double-check your work',
 'Oh no, I dropped the glass and it broke!',
 'I should have chosen a different career path',
 "Don't you think they worry too much about small things?",
 'How do you manage your time so efficiently?',
 'Remember when we met at the coffee shop last week?',
 'I really want to achieve more in my career',
 'We need to cut back on spending this month',
 'Do you like playing the guitar at night?',
 'Shall we go to the park to feed the ducks?',
 'I hope the weather will be nice tomorrow',
 "We must ensure everyone's safety first.",
 'Can you hang the wet clothes out to dry?',
 'Should we start from the begi