# Intent LLM Playground

Use this notebook to exercise the intents classifier + slot filler end to end. Configure your Ollama endpoint, type some artifact text, and capture the raw responses before wiring them into the worker.

> Tip: keep Ollama running (`ollama serve`) and ensure the requested model is already pulled. You can also swap in a mock responder if you want deterministic outputs (see the optional section below).



In [1]:
from __future__ import annotations

import json
import os
import sys
from pathlib import Path
from typing import Any, Dict, Optional
from uuid import uuid4

import httpx

# ---------------------------------------------------------------------------
# Ensure the Haven project root (and src/) are importable before using modules
# ---------------------------------------------------------------------------
def resolve_project_root() -> Path:
    env_root = Path(os.getenv("HAVEN_PROJECT_ROOT", "")).expanduser()
    if env_root and (env_root / "src" / "haven").exists():
        return env_root

    cwd = Path.cwd().resolve()
    if (cwd / "src" / "haven").exists():
        return cwd

    if (cwd.parent / "src" / "haven").exists():
        return cwd.parent

    raise RuntimeError(
        "Unable to locate Haven project root. Set HAVEN_PROJECT_ROOT or launch the notebook from the repo root."
    )


PROJECT_ROOT = resolve_project_root()
SRC_PATH = PROJECT_ROOT / "src"
for candidate in (PROJECT_ROOT, SRC_PATH):
    path_str = str(candidate)
    if path_str not in sys.path:
        sys.path.insert(0, path_str)

from haven.intents.classifier.classifier import ClassifierSettings, classify_artifact
from haven.intents.classifier.taxonomy import IntentTaxonomy, load_taxonomy
from haven.intents.models import ClassificationResult
from haven.intents.slots import SlotFiller, SlotFillerResult, SlotFillerSettings
from shared.logging import setup_logging

# ---------------------------------------------------------------------------
# Configure runtime knobs here
# ---------------------------------------------------------------------------
OLLAMA_BASE_URL = 'http://localhost:11434' # os.getenv("OLLAMA_BASE_URL", "http://localhost:11434")
INTENT_MODEL = os.getenv("INTENT_MODEL", "llama3.2")
DEFAULT_TAXONOMY_PATH = PROJECT_ROOT / "services/worker_service/taxonomies/taxonomy_v1.0.0.yaml"

# Optional: point to an alternate taxonomy payload if you're testing new definitions
CUSTOM_TAXONOMY_PATH = os.getenv("INTENT_TAXONOMY_PATH")

setup_logging()
print(f"Project root: {PROJECT_ROOT}")
print(f"Using Ollama endpoint: {OLLAMA_BASE_URL}")
print(f"Model: {INTENT_MODEL}")
print(f"Taxonomy: {CUSTOM_TAXONOMY_PATH or DEFAULT_TAXONOMY_PATH}")



Project root: /Users/chrispatten/workspace/haven
Using Ollama endpoint: http://localhost:11434
Model: llama3.2
Taxonomy: /Users/chrispatten/workspace/haven/services/worker_service/taxonomies/taxonomy_v1.0.0.yaml


In [2]:
def load_intent_taxonomy(path: Optional[Path] = None) -> IntentTaxonomy:
    resolved = Path(path or CUSTOM_TAXONOMY_PATH or DEFAULT_TAXONOMY_PATH).expanduser()
    if not resolved.exists():
        raise FileNotFoundError(f"Taxonomy file not found: {resolved}")
    return load_taxonomy(resolved)


def build_slot_filler() -> SlotFiller:
    return SlotFiller(
        SlotFillerSettings(
            ollama_base_url=OLLAMA_BASE_URL,
            slot_model=INTENT_MODEL,
            request_timeout=30.0,
        )
    )


def run_intent_pipeline(
    *,
    text: str,
    entities: Dict[str, Any],
    source_type: str = "imessage",
    artifact_id: Optional[str] = None,
    client: Optional[httpx.Client] = None,
    slot_filler: Optional[SlotFiller] = None,
    taxonomy: Optional[IntentTaxonomy] = None,
) -> tuple[ClassificationResult, SlotFillerResult]:
    taxonomy = taxonomy or load_intent_taxonomy()
    artifact_id = artifact_id or str(uuid4())
    owns_client = client is None

    if client is None:
        client = httpx.Client(base_url=OLLAMA_BASE_URL, timeout=30.0)
    if slot_filler is None:
        slot_filler = build_slot_filler()

    classifier_settings = ClassifierSettings(
        base_url=OLLAMA_BASE_URL,
        model=INTENT_MODEL,
        timeout=30.0,
        min_confidence=0.35,
    )

    try:
        classification = classify_artifact(
            text=text,
            taxonomy=taxonomy,
            entities=entities,
            settings=classifier_settings,
            client=client,
        )
        slot_result = slot_filler.fill_slots(
            job_text=text,
            classification=classification,
            taxonomy=taxonomy,
            entity_payload=entities,
            artifact_id=artifact_id,
            source_type=source_type,
        )
    finally:
        if owns_client:
            client.close()

    return classification, slot_result



## Sample artifact + entities

Adjust the text/entities below to mirror the document you want to test. Including `channel_context.from` / `channel_context.to` helps the model resolve pronouns for messaging/email content.



In [3]:
sample_text = """
Hey Chris â€” can you remember to pick up eggs for me before tomorrow night? 
I'll be tied up with meetings until late Wednesday.
""".strip()

sample_entities = {
    "people": [
        {
            "normalizedValue": "Chris",
            "identifier": "imessage:+15551234567",
            "role": "recipient",
        }
    ],
    "dates": [
        {
            "normalizedValue": "2025-11-12T21:00:00-05:00",
            "entity": {"text": "tomorrow night"},
        }
    ],
    "channel_context": {
        "from": {
            "display_name": "Alex",
            "identifier": "imessage:+15550987654",
        },
        "to": [
            {
                "display_name": "Chris",
                "identifier": "imessage:+15551234567",
            }
        ],
    },
}

print(sample_text)
print(json.dumps(sample_entities, indent=2))



Hey Chris â€” can you remember to pick up eggs for me before tomorrow night? 
I'll be tied up with meetings until late Wednesday.
{
  "people": [
    {
      "normalizedValue": "Chris",
      "identifier": "imessage:+15551234567",
      "role": "recipient"
    }
  ],
  "dates": [
    {
      "normalizedValue": "2025-11-12T21:00:00-05:00",
      "entity": {
        "text": "tomorrow night"
      }
    }
  ],
  "channel_context": {
    "from": {
      "display_name": "Alex",
      "identifier": "imessage:+15550987654"
    },
    "to": [
      {
        "display_name": "Chris",
        "identifier": "imessage:+15551234567"
      }
    ]
  }
}


In [4]:
classification, slot_result = run_intent_pipeline(
    text=sample_text,
    entities=sample_entities,
    source_type="imessage",
)

print("=" * 80)
print("CLASSIFIER OUTPUT")
print("=" * 80)
for intent in classification.intents:
    print(f"\nâœ“ Intent: {intent.intent_name}")
    print(f"  Confidence: {intent.confidence:.2%} (base: {intent.base_confidence:.2%}, prior: {intent.prior_applied:.2f}x)")
    if intent.reasons:
        print(f"  Reasons:")
        for reason in intent.reasons:
            print(f"    â€¢ {reason}")

print("\n" + "=" * 80)
print("SLOT FILLING OUTPUT")
print("=" * 80)
for assignment in slot_result.assignments:
    print(f"\nðŸ“‹ Intent: {assignment.intent_name} ({assignment.confidence:.2%})")
    
    if assignment.slots:
        print(f"  âœ“ Resolved Slots:")
        for slot_name, value in assignment.slots.items():
            source = assignment.slot_sources.get(slot_name, "?")
            if isinstance(value, (dict, list)):
                print(f"    â€¢ {slot_name} (from {source}): {json.dumps(value)}")
            else:
                print(f"    â€¢ {slot_name} (from {source}): {value}")
    
    if assignment.missing_slots:
        print(f"  âš  Missing Required Slots: {', '.join(assignment.missing_slots)}")
    
    if assignment.notes:
        print(f"  â„¹ Notes:")
        for note in assignment.notes:
            print(f"    â€¢ {note}")

print("\n" + "=" * 80)
print("SUMMARY")
print("=" * 80)
print(f"Text analyzed: '{sample_text[:60]}...'")
print(f"Source type: imessage")
print(f"Channel context: from={sample_entities['channel_context']['from']['display_name']}, to={[p['display_name'] for p in sample_entities['channel_context']['to']]}")
print(f"Intents detected: {len(classification.intents)}")
print(f"Top intent: {classification.intents[0].intent_name if classification.intents else 'none'}")
if slot_result.assignments:
    top_assignment = slot_result.assignments[0]
    print(f"Resolved slots: {len(top_assignment.slots)} / {len(top_assignment.slots) + len(top_assignment.missing_slots)}")



HTTP Request: POST http://localhost:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://localhost:11434/api/generate "HTTP/1.1 200 OK"
CLASSIFIER OUTPUT

âœ“ Intent: reminder.create
  Confidence: 75.00% (base: 75.00%, prior: 1.00x)
  Reasons:
    â€¢ 'remember to' suggests reminder.create
    â€¢ The text contains a specific request for a follow-up action ('pick up eggs')

SLOT FILLING OUTPUT

ðŸ“‹ Intent: reminder.create (75.00%)
  âœ“ Resolved Slots:
    â€¢ source_ref (from default): d1178bdb-55a5-402b-82ba-5dc6607d9c20
    â€¢ remind_at (from entity): 2025-11-12T21:00:00-05:00
    â€¢ person (from entity): {"name": "Chris", "identifier": "imessage:+15551234567", "role": "recipient"}
    â€¢ what (from llm): pick up eggs
  â„¹ Notes:
    â€¢ Although the conversation also mentions a task (picking up eggs), the 'reminder.create' intent is more strongly supported by the language and context, as it explicitly requests a follow-up action.

SUMMARY
Text analyzed: 'Hey Chris â€

In [5]:
print(f"OLLAMA_BASE_URL: {repr(OLLAMA_BASE_URL)}")
print(f"INTENT_MODEL: {repr(INTENT_MODEL)}")

# Capture raw Ollama response before validation
from haven.intents.classifier.classifier import _build_prompt, _invoke_ollama, _parse_response, ClassifierSettings as CS

prompt = _build_prompt(
    text=sample_text,
    taxonomy=load_intent_taxonomy(),
    entities=sample_entities,
    min_confidence=0.35,
)
settings = CS(base_url=OLLAMA_BASE_URL, model=INTENT_MODEL, timeout=30.0, min_confidence=0.35)

print("\n=== RAW OLLAMA RESPONSE ===")
raw_text = _invoke_ollama(
    payload={"model": INTENT_MODEL, "prompt": prompt, "format": "json", "stream": False},
    settings=settings,
    client=None,
)
print(raw_text)

print("\n=== PARSED JSON ===")
parsed = _parse_response(raw_text)
print(json.dumps(parsed, indent=2))

print("\n=== INTENTS ARRAY ===")
print(json.dumps(parsed.get("intents", []), indent=2))



OLLAMA_BASE_URL: 'http://localhost:11434'
INTENT_MODEL: 'llama3.2'

=== RAW OLLAMA RESPONSE ===
HTTP Request: POST http://localhost:11434/api/generate "HTTP/1.1 200 OK"
{
  "intents": [
    {
      "name": "reminder.create",
      "base_confidence": 0.87,
      "reasons": ["Hey Chris â€” can you remember to pick up eggs for me before tomorrow night?"]
    }
  ],
  "notes": ["The intent 'reminder.create' is supported because of the request to 'remember to' pick up eggs. The temporal context provided also suggests a reminder is being asked for."]
}

=== PARSED JSON ===
{
  "intents": [
    {
      "name": "reminder.create",
      "base_confidence": 0.87,
      "reasons": [
        "Hey Chris \u2014 can you remember to pick up eggs for me before tomorrow night?"
      ]
    }
  ],
  "notes": [
    "The intent 'reminder.create' is supported because of the request to 'remember to' pick up eggs. The temporal context provided also suggests a reminder is being asked for."
  ]
}

=== INTENTS AR

In [6]:
classification, slot_result = run_intent_pipeline(
    text=sample_text,
    entities=sample_entities,
    source_type="imessage",
)

print("Classifier intents:")
for intent in classification.intents:
    print(f"- {intent.intent_name}: confidence={intent.confidence:.2f} (base={intent.base_confidence:.2f})")
    if intent.reasons:
        for reason in intent.reasons:
            print(f"    reason: {reason}")

print("\nSlot assignments:")
for assignment in slot_result.assignments:
    print(f"\nIntent: {assignment.intent_name} (confidence={assignment.confidence:.2f})")
    print("Resolved slots:")
    for name, value in assignment.slots.items():
        print(f"  - {name}: {value}")
    if assignment.missing_slots:
        print("Missing required:", assignment.missing_slots)
    if assignment.slot_sources:
        print("Sources:", assignment.slot_sources)
    if assignment.notes:
        print("Notes:")
        for note in assignment.notes:
            print(f"  â€¢ {note}")



HTTP Request: POST http://localhost:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://localhost:11434/api/generate "HTTP/1.1 200 OK"
Classifier intents:
- reminder.create: confidence=0.75 (base=0.75)
    reason: Hey Chris â€” can you remember to pick up eggs for me before tomorrow night?
    reason: I'll be tied up with meetings until late Wednesday.

Slot assignments:

Intent: reminder.create (confidence=0.75)
Resolved slots:
  - source_ref: 24ae8f8c-b8cb-415f-adf9-78cbfcd5824d
  - remind_at: 2025-11-12T21:00:00-05:00
  - person: {'name': 'Chris', 'identifier': 'imessage:+15551234567', 'role': 'recipient'}
  - what: pick up eggs
Sources: {'source_ref': 'default', 'remind_at': 'entity', 'person': 'entity', 'what': 'llm'}
Notes:
  â€¢ Entity 'people' contains information about the person involved (Chris), and channel context provides display names and identifiers for both parties.


### Optional: offline stub

If you want predictable responses without calling Ollama, you can monkeypatch `httpx.Client.post` to return canned JSON. The snippet below shows one way to do it; uncomment and adjust as needed.



In [7]:
# Example stub for deterministic testing.
#
# from contextlib import contextmanager
#
# @contextmanager
# def canned_llm_response(payload: Dict[str, Any]):
#     original_post = httpx.Client.post
#
#     def fake_post(self, url, json=None, *args, **kwargs):
#         # Only intercept Ollama generation calls
#         if url == "/api/generate":
#             return httpx.Response(
#                 status_code=200,
#                 json={"response": json.dumps(payload)},
#             )
#         return original_post(self, url, json=json, *args, **kwargs)
#
#     httpx.Client.post = fake_post
#     try:
#         yield
#     finally:
#         httpx.Client.post = original_post
#
# canned_response = {
#     "intents": [
#        {
#            "name": "reminder.create",
#            "base_confidence": 0.82,
#            "confidence": 0.86,
#            "reasons": [
#                "Follow-up request detected",
#                "Contains explicit reminder phrasing",
#            ],
#        }
#     ],
#     "notes": ["Mocked classifier output"],
# }
#
# with canned_llm_response(canned_response):
#     classification, slot_result = run_intent_pipeline(
#         text=sample_text,
#         entities=sample_entities,
#         source_type="imessage",
#     )
#     print(classification)

