In [1]:
from inference.config import PromptBuilderConfig
from inference.utils.wiki_reader import WikipediaReader
from inference.utils.spacy_extractor import SpacyAnchorExtractor
from inference.utils.prompt_builder import PromptBuilder
from inference.utils.response_evaluator import ResponseEvaluator

cfg = PromptBuilderConfig()  # IMPORTANT: parentheses create an instance

print("Wiki cache path:", cfg.paths.wiki_cache_path)
print("Data dir:", cfg.paths.data_dir)
print("Output dir:", cfg.paths.output_dir)



Wiki cache path: /Users/andrey/Documents/_Artemis_tum/Semester5/MWAHAHA_Competition/data/wiki_cache.json
Data dir: /Users/andrey/Documents/_Artemis_tum/Semester5/MWAHAHA_Competition/data
Output dir: /Users/andrey/Documents/_Artemis_tum/Semester5/MWAHAHA_Competition/outputs


In [2]:
reader = WikipediaReader(settings=cfg.wiki)  # IMPORTANT: pass cfg.wiki (an instance), not WikiSettings (the class)

# Load cache if present
try:
    reader.load_cache(cfg.paths.wiki_cache_path)
    print("Cache loaded:", cfg.paths.wiki_cache_path)
except FileNotFoundError:
    print("No cache yet, will create one:", cfg.paths.wiki_cache_path)

w1, w2 = "Bulls", "Bears"

e1 = reader.get_wikipedia_extract(w1)
e2 = reader.get_wikipedia_extract(w2)

print("\n--- Extract 1 ---\n", e1[:400])
print("\n--- Extract 2 ---\n", e2[:400])

# Save cache
cfg.paths.wiki_cache_path.parent.mkdir(parents=True, exist_ok=True)
reader.save_cache(cfg.paths.wiki_cache_path)
print("\nCache saved:", cfg.paths.wiki_cache_path)


Cache loaded: /Users/andrey/Documents/_Artemis_tum/Semester5/MWAHAHA_Competition/data/wiki_cache.json

--- Extract 1 ---
 A bull is an intact adult male of the species Bos taurus (cattle). More muscular and aggressive than the females of the same species, bulls have long been an important symbol in many religions, including for sacrifices. These animals play a significant role in beef ranching, dairy farming, and a variety of sporting and cultural activities, including bullfighting and bull riding.

--- Extract 2 ---
 Bears are carnivoran mammals of the family Ursidae. They are classified as caniforms, or doglike carnivorans. Although only eight species of bears are extant, they are widespread, appearing in a wide variety of habitats throughout most of the Northern Hemisphere and partially in the Southern Hemisphere. Bears are found on the continents of North America, South America, and Eurasia. Common characte

Cache saved: /Users/andrey/Documents/_Artemis_tum/Semester5/MWAHAHA_Compet

In [3]:
spacy_extractor = SpacyAnchorExtractor(settings=cfg.spacy)

headline = "Local scientist teaches pigeons to use a vending machine"
doc = spacy_extractor.parse(headline)

# Show extracted noun candidates (simple preview)
candidates = [t.text for t in doc if t.pos_ in ("NOUN", "PROPN")]
print("Headline:", headline)
print("Noun and proper noun tokens:", candidates)

# If your extractor exposes a pair picking method, use it; otherwise keep the tokens.
try:
    n1, n2 = spacy_extractor.pick_two_anchors(headline, seed=123)
    print("Picked anchors:", (n1, n2))
except Exception as e:
    print("No pick_two_anchors method (that is fine). Error:", e)


Headline: Local scientist teaches pigeons to use a vending machine
Noun and proper noun tokens: ['scientist', 'pigeons', 'machine']
No pick_two_anchors method (that is fine). Error: SpacyAnchorExtractor.pick_two_anchors() got an unexpected keyword argument 'seed'


In [5]:
builder = PromptBuilder(config=cfg)

facts_block = builder.format_facts_block("Bulls", "Bears")
print(facts_block)



FACTS:
- Bulls: WHAT: A bull is an intact adult male of the species Bos taurus (cattle). | DOMAIN: biology | KEYWORDS: intact, adult, species, taurus, cattle, muscular
- Bears: WHAT: Bears are carnivoran mammals of the family Ursidae. | DOMAIN: biology | KEYWORDS: bears, carnivoran, mammals, family, ursidae, classified


In [6]:
import time


word1, word2 = "spray", "eggs"

tests = [
    ("good_example", f"I tried to {word1} my confidence on, but {word2} still cracked me up."),
    ("missing_one_word", f"I tried to {word1} my confidence on, but it did not help."),
    ("missing_both", "This sentence contains neither of the required words."),
]

t0 = time.time()
evaluator = ResponseEvaluator()
print(f"Initialized ResponseEvaluator in {time.time() - t0:.2f} seconds")

for name, text in tests:
    required_ok = evaluator.required_words_present(text, word1, word2)

    # Humor classifier can be slow on first run because it loads a separate model.
    try:
        humorous_ok = evaluator.is_humorous(text)
    except Exception as exc:
        humorous_ok = None
        print(f"[{name}] Humor classifier failed: {type(exc).__name__}: {exc}")

    is_good = (required_ok and bool(humorous_ok)) if humorous_ok is not None else None

    print("\n---")
    print(f"case: {name}")
    print(f"text: {text}")
    print(f"required_words_present: {required_ok}")
    print(f"is_humorous: {humorous_ok}")
    print(f"is_good: {is_good}")


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/752 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/499M [00:00<?, ?B/s]

Initialized ResponseEvaluator in 23.15 seconds

---
case: good_example
text: I tried to spray my confidence on, but eggs still cracked me up.
required_words_present: True
is_humorous: True
is_good: True

---
case: missing_one_word
text: I tried to spray my confidence on, but it did not help.
required_words_present: False
is_humorous: False
is_good: False

---
case: missing_both
text: This sentence contains neither of the required words.
required_words_present: False
is_humorous: False
is_good: False
