In [5]:
from inference.config import PromptBuilderConfig  # settings dataclasses live here
from inference.utils.wiki_reader import WikipediaReader
from inference.utils.spacy_extractor import SpacyAnchorExtractor
from inference.utils.prompt_builder import PromptBuilder

cfg = PromptBuilderConfig()  # IMPORTANT: parentheses create an instance

print("Wiki cache path:", cfg.paths.wiki_cache_path)
print("Data dir:", cfg.paths.data_dir)
print("Output dir:", cfg.paths.output_dir)



Wiki cache path: /Users/andrey/Documents/_Artemis_tum/Semester5/MWAHAHA_Competition/data/wiki_cache.json
Data dir: /Users/andrey/Documents/_Artemis_tum/Semester5/MWAHAHA_Competition/data
Output dir: /Users/andrey/Documents/_Artemis_tum/Semester5/MWAHAHA_Competition/outputs


In [6]:
reader = WikipediaReader(settings=cfg.wiki)  # IMPORTANT: pass cfg.wiki (an instance), not WikiSettings (the class)

# Load cache if present
try:
    reader.load_cache(cfg.paths.wiki_cache_path)
    print("Cache loaded:", cfg.paths.wiki_cache_path)
except FileNotFoundError:
    print("No cache yet, will create one:", cfg.paths.wiki_cache_path)

w1, w2 = "Bulls", "Bears"

e1 = reader.get_wikipedia_extract(w1)
e2 = reader.get_wikipedia_extract(w2)

print("\n--- Extract 1 ---\n", e1[:400])
print("\n--- Extract 2 ---\n", e2[:400])

# Save cache
cfg.paths.wiki_cache_path.parent.mkdir(parents=True, exist_ok=True)
reader.save_cache(cfg.paths.wiki_cache_path)
print("\nCache saved:", cfg.paths.wiki_cache_path)


Cache loaded: /Users/andrey/Documents/_Artemis_tum/Semester5/MWAHAHA_Competition/data/wiki_cache.json

--- Extract 1 ---
 A bull is an intact adult male of the species Bos taurus (cattle). More muscular and aggressive than the females of the same species, bulls have long been an important symbol in many religions, including for sacrifices. These animals play a significant role in beef ranching, dairy farming, and a variety of sporting and cultural activities, including bullfighting and bull riding.

--- Extract 2 ---
 Bears are carnivoran mammals of the family Ursidae. They are classified as caniforms, or doglike carnivorans. Although only eight species of bears are extant, they are widespread, appearing in a wide variety of habitats throughout most of the Northern Hemisphere and partially in the Southern Hemisphere. Bears are found on the continents of North America, South America, and Eurasia. Common characte

Cache saved: /Users/andrey/Documents/_Artemis_tum/Semester5/MWAHAHA_Compet

In [7]:
spacy_extractor = SpacyAnchorExtractor(settings=cfg.spacy)

headline = "Local scientist teaches pigeons to use a vending machine"
doc = spacy_extractor.parse(headline)

# Show extracted noun candidates (simple preview)
candidates = [t.text for t in doc if t.pos_ in ("NOUN", "PROPN")]
print("Headline:", headline)
print("Noun and proper noun tokens:", candidates)

# If your extractor exposes a pair picking method, use it; otherwise keep the tokens.
try:
    n1, n2 = spacy_extractor.pick_two_anchors(headline, seed=123)
    print("Picked anchors:", (n1, n2))
except Exception as e:
    print("No pick_two_anchors method (that is fine). Error:", e)


Headline: Local scientist teaches pigeons to use a vending machine
Noun and proper noun tokens: ['scientist', 'pigeons', 'machine']
No pick_two_anchors method (that is fine). Error: SpacyAnchorExtractor.pick_two_anchors() got an unexpected keyword argument 'seed'


In [8]:
builder = PromptBuilder(config=cfg)

facts_block = builder.format_facts_block("Bulls", "Bears")
print(facts_block)

text_ok = "My bulls joined the bears’ book club; now every chapter ends in a market crash."
print("\nText:", text_ok)
print("Required words present:", builder.required_words_present(text_ok, "Bulls", "Bears"))


FACTS:
- Bulls: WHAT: A bull is an intact adult male of the species Bos taurus (cattle). | DOMAIN: biology | KEYWORDS: intact, adult, species, taurus, cattle, muscular
- Bears: WHAT: Bears are carnivoran mammals of the family Ursidae. | DOMAIN: biology | KEYWORDS: bears, carnivoran, mammals, family, ursidae, classified

Text: My bulls joined the bears’ book club; now every chapter ends in a market crash.
Required words present: True
