In [None]:
%load_ext autoreload
%autoreload 2

from setup_imports import *  # noqa: F401,F403

from src.phrases.generation import generate_phrases_from_vocab_dict
from src.utils import (
    save_text_file,
    load_text_file,
)
from src.phrases.phrase_model import Phrase
from src.phrases.search import get_verbs_from_phrases, get_vocab_from_phrases

# Phrase Generation

We will have short, simple verb phrases, and associated vocab-only phrases from a vocab_dict. The deliberate removal of verbs from vocab phrases is designed to make the cards easier, and split those tasks

In [None]:
COLLECTION = "SURVIVAL"
DECK = "Pack01"

In [None]:
all_verbs = load_text_file(f"../data/{COLLECTION}_verbs.txt")
all_vocab = load_text_file(f"../data/{COLLECTION}_vocab.txt")
print(f"num verbs: {len(all_verbs)}, num vocab: {len(all_vocab)}")

In [None]:
FROM_INDEX = 0  # <--- we start by 0 now as we remove used up words
TO_INDEX = FROM_INDEX + 10
VOCAB_FROM_INDEX = int((FROM_INDEX / 10) * 50)
VOCAB_TO_INDEX = VOCAB_FROM_INDEX + 50

some_verbs = all_verbs[FROM_INDEX:TO_INDEX]
some_vocab = all_vocab[VOCAB_FROM_INDEX:VOCAB_TO_INDEX]

current_dict = {}
current_dict["verbs"] = list(some_verbs)
current_dict["vocab"] = list(some_vocab)

print(
    f"FROM_INDEX: {FROM_INDEX}, TO_INDEX: {TO_INDEX}, VOCAB_FROM_INDEX: {VOCAB_FROM_INDEX}, VOCAB_TO_INDEX: {VOCAB_TO_INDEX}, len verbs: {len(some_verbs)}, len vocab: {len(some_vocab)}"
)
print(current_dict)

In [None]:
generated_phrases = generate_phrases_from_vocab_dict(
    current_dict,
    max_iterations=20,
)

In [None]:
save_text_file(generated_phrases[0], f"..\data\{COLLECTION}-{DECK}.txt")

In [None]:
# generated_phrases = load_text_file(f"..\data\{COLLECTION}-{DECK}.txt")
generated_phrases = load_text_file(f"..\data\phrases\survival.txt")

In [None]:
len(generated_phrases)

In [None]:
ALL_PHRASES = []
for phrase in generated_phrases:
    try:
        p = Phrase.create(phrase)
    except ValueError as e:
        print(e)
        continue
    p.collection = COLLECTION
    p.deck = DECK
    p.generate_image()
    p.translate("sv-SE", refine=True)
    p.upload()
    p.generate_audio(context="flashcard", language="sv-SE")
    p.upload()
    ALL_PHRASES.append(p)

## Remove words from original list

In [None]:
# _phrases = get_phrases_by_collection(COLLECION, DECK)
_verbs = get_verbs_from_phrases(ALL_PHRASES)
_vocab = get_vocab_from_phrases(ALL_PHRASES)
print(f"num verbs: {_verbs}, num vocab: {_vocab}")

In [None]:
remaining_verbs = set(all_verbs).difference(_verbs)
remaining_vocab = set(all_vocab).difference(_vocab)

print(f"num verbs: {len(remaining_verbs)}, num vocab: {len(remaining_vocab)}")

In [None]:
save_text_file(remaining_verbs, f"../data/{COLLECTION}_verbs.txt")
save_text_file(remaining_vocab, f"../data/{COLLECTION}_vocab.txt")

# create bespoke phrases

In [9]:
new_phrases = ["A table for four"]


In [10]:
for p in new_phrases:
    phrase = Phrase.create(p)
    phrase.collection = COLLECTION
    phrase.deck = DECK
    phrase.generate_image()
    phrase.translate("de-DE", refine=True)
    phrase.upload()
    phrase.generate_audio(context="flashcard", language="de-DE")
    phrase.upload()

ðŸŽ¨ Starting image generation process
   Prompt: A restaurant table set with exactly four place settings (four plates, four sets of silverware, four glasses, four napkins), with four empty chairs positioned around it, warm inviting lighting highlighting the number four throughout the scene in the style of digital painting style, soft brush blending, subtle gradient shading, realistic human proportions, semi-realistic character design, muted warm color palette with peachy tones, atmospheric lighting, detailed environmental backgrounds, smooth color transitions, contemporary illustration style, gentle line work, painterly texture, expressive facial features, emotional character acting, slightly simplified facial anatomy, clear readable expressions, animated character emotion
   Will try providers in order: ['imagen', 'stability', 'deepai']
ðŸ”„ Attempting image generation with imagen...
(y) Successfully generated image with imagen
2026-02-01 18:40:11 - audio-language-trainer - INFO - ph