In [1]:
import dataclasses
import functools
import inspect
import logging
import sys
from collections.abc import Callable

from effectful.handlers.llm import Template, Tool
from effectful.handlers.llm.providers import (
    CacheLLMRequestHandler,
    LiteLLMProvider,
    LLMLoggingHandler,
    RetryLLMHandler,
    completion,
)
from effectful.handlers.llm.synthesis import ProgramSynthesis
from effectful.ops.semantics import NotHandled, fwd, handler
from effectful.ops.syntax import defop

provider = LiteLLMProvider()

## Interface

The `robotl.ops.llm` module provides a simplified LLM interface that uses algebraic effects to provide modularity. The module interface consists of:

- A decorator `template` which creates a prompt template from a callable. We should think of the prompt template as an LLM-implemented function with behavior specified by a template string. When a templated function is called, an LLM is invoked to produce the specified behavior. The `__call__` method of a template is a handleable operation.
- An operation `decode` which parses LLM output. `decode(t: type, c: str)` converts an LLM response `c` to the type `t`. It can be handled to provide decoding logic for particular types.
- Interpretations for LLM providers `OpenAIIntp` and callable decoding `ProgramSynthesisIntp`. These interpretations can be composed to handle a variety of template behaviors.

## Prompt Templates

This template function writes (bad) poetry on a given theme. While difficult to implement in Python, an LLM can provide a reasonable implementation.

In [2]:
@Template.define
def limerick(theme: str) -> str:
    """Write a limerick on the theme of {theme}. Do not use any tools."""
    raise NotHandled

If we call the template with a provider interpretation installed, we get reasonable behavior. The LLM is nondeterministic by default, so calling the template twice with the same arguments gives us different results.

Templates are regular callables, so can be converted to operations with `defop` if we want to override the LLM implementation in some cases.

In [3]:
with handler(provider):
    print(limerick("fish"))
    print("-" * 40)
    print(limerick("fish"))

In the sea, where the wild fish do roam,  
A small fish called Nemo left home.  
With a flick of his fin,  
He did spin and did grin,  
In the ocean, he'd freely now comb.
----------------------------------------
In the sea where the fish like to dash,  
Swam a trout with a silvery flash.  
With a flip and a spin,  
Through the water so thin,  
It avoided becoming a splash!


If we want deterministic behavior, we can cache the template call. We can either cache it with the default `@functools.cache` or using `CacheLLMRequestHandler`:

In [4]:
@functools.cache
@Template.define
def haiku(theme: str) -> str:
    """Write a haiku on the theme of {theme}. Do not use any tools."""
    raise NotHandled


@Template.define
def haiku_no_cache(theme: str) -> str:
    """Write a haiku on the theme of {theme}. Do not use any tools."""
    raise NotHandled


print()
with handler(provider):
    print(haiku("fish"))
    print("-" * 40)
    print(haiku("fish"))

print()
cache_handler1 = CacheLLMRequestHandler()
with handler(provider), handler(cache_handler1):
    print(haiku_no_cache("fish2"))
    print("-" * 40)
    print(haiku_no_cache("fish2"))

print()
cache_handler2 = CacheLLMRequestHandler()
with handler(provider), handler(cache_handler2):
    print(haiku_no_cache("fish3"))
    print("-" * 40)
    print(haiku_no_cache("fish3"))


Silver fins that dance,  
In the depths, whispers of waves,  
Peace in the water.
----------------------------------------
Silver fins that dance,  
In the depths, whispers of waves,  
Peace in the water.

In the tranquil stream,  
Silver scales glint in sunlight,  
Fish dance with the waves.  
----------------------------------------
In the tranquil stream,  
Silver scales glint in sunlight,  
Fish dance with the waves.  

In the silent stream,  
Silver scales shimmer with light,  
Fish dance through currents.
----------------------------------------
In the silent stream,  
Silver scales shimmer with light,  
Fish dance through currents.


## Converting LLM Results to Python Objects

Type conversion is handled by `decode`. By default, primitive types are converted. `DecodeError` is raised if a response cannot be converted.

In [5]:
@Template.define
def primes(first_digit: int) -> int:
    """Give a prime number with {first_digit} as the first digit. Do not use any tools."""
    raise NotHandled


with handler(provider):
    assert type(primes(6)) is int

More complex types can be converted by providing handlers for `decode`. `ProgramSynthesisIntp` provides a `decode` handler that parses Python callables.

In [6]:
@Template.define
def count_char(char: str) -> Callable[[str], int]:
    """Write a function which takes a string and counts the occurrances of '{char}'. Do not use any tools."""
    raise NotHandled


with handler(provider), handler(ProgramSynthesis()):
    count_a = count_char("a")
    assert callable(count_a)
    assert count_a("banana") == 3
    assert count_a("cherry") == 0
    # Print the source code of the generated function
    print(inspect.getsource(count_a))

def count_a(s: str) -> int:
    return s.count('a')



## Tool Calling

`Operation`s defined in the lexical scope of a `Template` are automatically available for the LLM to call as tools. The description of these operations is inferred from their type annotations and docstrings.

Tool calls are mediated by a helper operation `tool_call`. Handling this operation allows tool use to be tracked or logged.

In [9]:
@Tool.define
def cities() -> list[str]:
    """Return a list of cities that can be passed to `weather`."""
    return ["Chicago", "New York", "Barcelona"]


@Tool.define
def weather(city: str) -> str:
    """Given a city name, return a description of the weather in that city."""
    status = {"Chicago": "cold", "New York": "wet", "Barcelona": "sunny"}
    return status.get(city, "unknown")


@Template.define  # cities and weather auto-captured from lexical scope
def vacation() -> str:
    """Use the provided tools to suggest a city that has good weather. Use only the `cities` and `weather` tools provided."""
    raise NotHandled


def log_tool_call(tool, *args, **kwargs):
    result = fwd()
    print(f"Tool call: {tool}(*{args}, **{kwargs}) -> {result}")
    return result


with handler(provider), handler({Tool.__apply__: log_tool_call}):
    print(vacation())

Tool call: cities(*(), **{}) -> ['Chicago', 'New York', 'Barcelona']
Tool call: weather(*(), **{'city': 'Chicago'}) -> cold
Tool call: weather(*(), **{'city': 'New York'}) -> wet
Tool call: weather(*(), **{'city': 'Barcelona'}) -> sunny
Barcelona has sunny weather, making it a good choice for a city with pleasant weather.


## Structured Output Generation

Constrained generation is used for any type that is convertible to a Pydantic model.

In [10]:
@dataclasses.dataclass
class KnockKnockJoke:
    whos_there: str
    punchline: str


@Template.define
def write_joke(theme: str) -> KnockKnockJoke:
    """Write a knock-knock joke on the theme of {theme}. Do not use any tools."""
    raise NotHandled


@Template.define
def rate_joke(joke: KnockKnockJoke) -> bool:
    """Decide if {joke} is funny or not. Do not use any tools."""
    raise NotHandled


def do_comedy():
    joke = write_joke("lizards")
    print("> You are onstage at a comedy club. You tell the following joke:")
    print(
        f"Knock knock.\nWho's there?\n{joke.whos_there}.\n{joke.whos_there} who?\n{joke.punchline}"
    )
    if rate_joke(joke):
        print("> The crowd laughs politely.")
    else:
        print("> The crowd stares in stony silence.")


with handler(provider):
    do_comedy()

> You are onstage at a comedy club. You tell the following joke:
Knock knock.
Who's there?
Lizard.
Lizard who?
Lizard who? Lizard you heard the best knock-knock joke, 'cause this one's tongue-in-cheek!
> The crowd laughs politely.


### Logging LLM requests
To intercept messages being called on the lower-level, we can write a handler for `completion`:

In [17]:
def log_llm(*args, **kwargs):
    result = fwd()

    print(f'> {kwargs["messages"][0]["content"][0]["text"]}')
    print(result.choices[0].message.content)
    return result


# Avoid cache
try:
    haiku.cache_clear()
except Exception:
    pass

# Put completion handler innermost so it has highest precedence during the call
with handler(provider), handler({completion: log_llm}):
    _ = haiku("fish2")
    _ = limerick("fish")  # or use haiku("fish-2") to avoid cache

> Write a haiku on the theme of fish2. Do not use any tools.
None
> Write a haiku on the theme of fish2. Do not use any tools.
In gentle rivers,  
Fish dance beneath the moonlight,  
Secrets below waves.  
> Write a haiku on the theme of fish2. Do not use any tools.
In gentle rivers,  
Fish dance beneath the moonlight,  
Secrets below waves.  
> Write a limerick on the theme of fish. Do not use any tools.
There once was a fish in the sea,  
Who longed to be wild and free.  
It swam with great flair,  
In water so clear,  
And danced with the waves in glee.  


### Python logging for LLM requests and tool calls
We can also uses Python logger through `LLMLoggingHandler` to log both low-level LLM requests (`completion`) and model-initiated tool use (`tool_call`):


In [12]:
# 1. Create a logger
logger = logging.getLogger("effectful.llm")
logger.setLevel(logging.INFO)
log_handler = logging.StreamHandler(sys.stdout)
log_handler.setFormatter(logging.Formatter("%(levelname)s %(payload)s"))
logger.addHandler(log_handler)
# 2. Pass it to the handler
llm_logger = LLMLoggingHandler(logger=logger)  # can also be LLMLoggingHandler()

# Avoid cache for demonstration
try:
    haiku.cache_clear()
    limerick.cache_clear()
except Exception:
    pass

with handler(provider), handler(llm_logger):
    _ = haiku("fish3")
    _ = limerick("fish4")

INFO {'args': (), 'kwargs': {'messages': [{'type': 'message', 'content': [{'type': 'text', 'text': 'Write a haiku on the theme of fish3. Do not use any tools.'}], 'role': 'user'}], 'response_format': None, 'tools': [{'type': 'function', 'function': {'name': 'rate_joke', 'description': 'Decide if {joke} is funny or not. Do not use any tools.', 'parameters': {'$defs': {'KnockKnockJoke': {'properties': {'whos_there': {'title': 'Whos There', 'type': 'string'}, 'punchline': {'title': 'Punchline', 'type': 'string'}}, 'required': ['whos_there', 'punchline'], 'title': 'KnockKnockJoke', 'type': 'object', 'additionalProperties': False}}, 'additionalProperties': False, 'properties': {'joke': {'$ref': '#/$defs/KnockKnockJoke'}}, 'required': ['joke'], 'title': 'Params', 'type': 'object'}, 'strict': True}}, {'type': 'function', 'function': {'name': 'cities', 'description': 'Return a list of cities that can be passed to `weather`.', 'parameters': {'additionalProperties': False, 'properties': {}, 'tit

## Template Composition

Templates defined in the lexical scope are also captured, enabling template composition. One template can use the result of another template in a pipeline:


In [13]:
# Sub-templates for different story styles
@Template.define
def story_with_moral(topic: str) -> str:
    """Write a short story about {topic} and end with a moral lesson. Do not use any tools."""
    raise NotHandled


@Template.define
def story_funny(topic: str) -> str:
    """Write a funny, humorous story about {topic}. Do not use any tools."""
    raise NotHandled


# Main orchestrator template - has access to sub-templates
@Template.define
def write_story(topic: str, style: str) -> str:
    """Write a story about {topic} in the style: {style}.
    Available styles: 'moral' for a story with a lesson, 'funny' for humor. Use story_funny for humor, story_with_moral for a story with a lesson."""
    raise NotHandled


# Verify sub-templates are captured in write_story's lexical context
assert story_with_moral in write_story.tools
assert story_funny in write_story.tools
print("Sub-templates available to write_story:", [t.__name__ for t in write_story.tools])

with handler(provider), handler(llm_logger):
    print("=== Story with moral ===")
    print(write_story("a curious cat", "moral"))
    print()
    print("=== Funny story ===")
    print(write_story("a curious cat", "funny"))

Sub-templates available to write_story: ['rate_joke', 'cities', 'vacation', 'primes', 'write_story', 'write_joke', 'story_with_moral', 'haiku_no_cache', 'limerick', 'count_char', 'weather', 'story_funny']
=== Story with moral ===
INFO {'args': (), 'kwargs': {'messages': [{'type': 'message', 'content': [{'type': 'text', 'text': "Write a story about a curious cat in the style: moral.\n    Available styles: 'moral' for a story with a lesson, 'funny' for humor. Use story_funny for humor, story_with_moral for a story with a lesson."}], 'role': 'user'}], 'response_format': None, 'tools': [{'type': 'function', 'function': {'name': 'rate_joke', 'description': 'Decide if {joke} is funny or not. Do not use any tools.', 'parameters': {'$defs': {'KnockKnockJoke': {'properties': {'whos_there': {'title': 'Whos There', 'type': 'string'}, 'punchline': {'title': 'Punchline', 'type': 'string'}}, 'required': ['whos_there', 'punchline'], 'title': 'KnockKnockJoke', 'type': 'object', 'additionalProperties':

### Retrying LLM Requests
LLM calls can sometimes fail due to transient errors or produce invalid outputs. The `RetryLLMHandler` automatically retries failed template calls:

- `max_retries`: Maximum number of retry attempts (default: 3)
- `add_error_feedback`: When `True`, appends the error message to the prompt on retry, helping the LLM correct its output.
- `exception_cls`: RetryHandler will only attempt to try again when a specific type of `Exception` is thrown.


Example usage: having an unstable service that seldomly fail.

In [16]:
call_count = 0
REQUIRED_RETRIES = 3


@defop
def unstable_service() -> str:
    """Fetch data from an unstable external service. May require retries."""
    global call_count
    call_count += 1
    if call_count < REQUIRED_RETRIES:
        raise ConnectionError(
            f"Service unavailable! Attempt {call_count}/{REQUIRED_RETRIES}. Please retry."
        )
    return "{ 'status': 'ok', 'data': [1, 2, 3] }"


@Template.define  # unstable_service auto-captured from lexical scope
def fetch_data() -> str:
    """Use the unstable_service tool to fetch data."""
    raise NotHandled


retry_handler = RetryLLMHandler(max_retries=5, add_error_feedback=True)

with handler(provider), handler(retry_handler), handler(llm_logger):
    result = fetch_data()
    print(f"Result: {result}", "Retries:", call_count)

INFO {'args': (), 'kwargs': {'messages': [{'type': 'message', 'content': [{'type': 'text', 'text': 'Use the unstable_service tool to fetch data.'}], 'role': 'user'}], 'response_format': None, 'tools': [{'type': 'function', 'function': {'name': 'limerick', 'description': 'Write a limerick on the theme of {theme}. Do not use any tools.', 'parameters': {'additionalProperties': False, 'properties': {'theme': {'title': 'Theme', 'type': 'string'}}, 'required': ['theme'], 'title': 'Params', 'type': 'object'}, 'strict': True}}, {'type': 'function', 'function': {'name': 'haiku_no_cache', 'description': 'Write a haiku on the theme of {theme}. Do not use any tools.', 'parameters': {'additionalProperties': False, 'properties': {'theme': {'title': 'Theme', 'type': 'string'}}, 'required': ['theme'], 'title': 'Params', 'type': 'object'}, 'strict': True}}, {'type': 'function', 'function': {'name': 'primes', 'description': 'Give a prime number with {first_digit} as the first digit. Do not use any tools

### Retrying with Validation Errors
As noted above, the `RetryHandler` can also be used to retry on runtime/validation error:

In [None]:
import pydantic
from pydantic import ValidationError, field_validator
from pydantic_core import PydanticCustomError


@pydantic.dataclasses.dataclass
class Rating:
    score: int
    explanation: str

    @field_validator("score")
    @classmethod
    def check_score(cls, v):
        if v < 1 or v > 5:
            raise PydanticCustomError(
                "invalid_score",
                "score must be 1–5, got {v}",
                {"v": v},
            )
        return v

    @field_validator("explanation")
    @classmethod
    def check_explanation_contains_score(cls, v, info):
        score = info.data.get("score", None)
        if score is not None and str(score) not in v:
            raise PydanticCustomError(
                "invalid_explanation",
                "explanation must mention the score {score}, got '{explanation}'",
                {"score": score, "explanation": v},
            )
        return v


@Template.define
def give_rating_for_movie(movie_name: str) -> Rating:
    """Give a rating for {movie_name}. The explanation MUST include the numeric score. Do not use any tools."""
    raise NotHandled


# RetryLLMHandler with error feedback - the traceback helps LLM correct validation errors
# Note: Pydantic wraps PydanticCustomError inside ValidationError, so we catch ValidationError instead
retry_handler = RetryLLMHandler(
    max_retries=3,
    add_error_feedback=True,
    exception_cls=ValidationError,  # Catch validation errors
)

with handler(provider), handler(retry_handler), handler(llm_logger):
    rating = give_rating_for_movie("Die Hard")
    print(f"Score: {rating.score}/5")
    print(f"Explanation: {rating.explanation}")

INFO {'args': (), 'kwargs': {'messages': [{'type': 'message', 'content': [{'type': 'text', 'text': 'Give a rating for Die Hard. The explanation MUST include the numeric score.'}], 'role': 'user'}], 'response_format': <class 'effectful.handlers.llm.providers.Response'>, 'tools': []}, 'response': ModelResponse(id='chatcmpl-ClKlfxy7G2JCHEWA97nDGMi5WQIfB', created=1765397283, model='gpt-4o-2024-08-06', object='chat.completion', system_fingerprint='fp_e819e3438b', choices=[Choices(finish_reason='stop', index=0, message=Message(content='{"value":{"score":9,"explanation":"Die Hard is widely regarded as a classic in the action film genre, offering a perfect blend of intense action sequences, clever plot, and memorable performances, particularly by Bruce Willis as the iconic John McClane. The film\'s strong pace, witty dialogue, and exceptional direction by John McTiernan make it a standout. It set a new standard for action movies and has a lasting impact that is still felt today, which merits 