Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 60 additions & 24 deletions examples/advanced_selection_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,28 +24,43 @@
# Agent, dataset, and eval_fn (same as custom_agent_example.py)
# ---------------------------------------------------------------------------


class MyAgent:
def __init__(self, models):
self.client = OpenAI()
self.planner_model = models["planner"]
self.solver_model = models["solver"]

def run(self, input_data):
plan = self.client.chat.completions.create(
model=self.planner_model,
messages=[
{"role": "system", "content": "Create a brief plan to answer the question."},
{"role": "user", "content": input_data},
],
).choices[0].message.content

answer = self.client.chat.completions.create(
model=self.solver_model,
messages=[
{"role": "system", "content": f"Follow this plan and answer concisely:\n{plan}"},
{"role": "user", "content": input_data},
],
).choices[0].message.content
plan = (
self.client.chat.completions.create(
model=self.planner_model,
messages=[
{
"role": "system",
"content": "Create a brief plan to answer the question.",
},
{"role": "user", "content": input_data},
],
)
.choices[0]
.message.content
)

answer = (
self.client.chat.completions.create(
model=self.solver_model,
messages=[
{
"role": "system",
"content": f"Follow this plan and answer concisely:\n{plan}",
},
{"role": "user", "content": input_data},
],
)
.choices[0]
.message.content
)
return answer


Expand All @@ -71,19 +86,22 @@ def eval_fn(expected, actual):
# Selection algorithms
# ---------------------------------------------------------------------------


def run_auto():
"""method="auto" — automatically picks the best algorithm (default)."""
selector = ModelSelector(
agent=MyAgent, models=models, eval_fn=eval_fn, dataset=dataset,
method="auto",
agent=MyAgent, models=models, eval_fn=eval_fn, dataset=dataset, method="auto",
)
return selector.select_best(parallel=True)


def run_random():
"""method="random" — evaluate a random subset of combinations."""
selector = ModelSelector(
agent=MyAgent, models=models, eval_fn=eval_fn, dataset=dataset,
agent=MyAgent,
models=models,
eval_fn=eval_fn,
dataset=dataset,
method="random",
sample_fraction=0.5, # evaluate 50% of all combinations
)
Expand All @@ -93,7 +111,10 @@ def run_random():
def run_hill_climbing():
"""method="hill_climbing" — greedy search using model quality/speed rankings."""
selector = ModelSelector(
agent=MyAgent, models=models, eval_fn=eval_fn, dataset=dataset,
agent=MyAgent,
models=models,
eval_fn=eval_fn,
dataset=dataset,
method="hill_climbing",
batch_size=4, # number of neighbors to evaluate per step
)
Expand All @@ -103,7 +124,10 @@ def run_hill_climbing():
def run_arm_elimination():
"""method="arm_elimination" — eliminates statistically dominated combinations early."""
selector = ModelSelector(
agent=MyAgent, models=models, eval_fn=eval_fn, dataset=dataset,
agent=MyAgent,
models=models,
eval_fn=eval_fn,
dataset=dataset,
method="arm_elimination",
)
return selector.select_best(parallel=True)
Expand All @@ -112,7 +136,10 @@ def run_arm_elimination():
def run_epsilon_lucb():
"""method="epsilon_lucb" — stops when the best arm is identified within epsilon."""
selector = ModelSelector(
agent=MyAgent, models=models, eval_fn=eval_fn, dataset=dataset,
agent=MyAgent,
models=models,
eval_fn=eval_fn,
dataset=dataset,
method="epsilon_lucb",
epsilon=0.05, # acceptable gap from the true best
)
Expand All @@ -122,7 +149,10 @@ def run_epsilon_lucb():
def run_threshold():
"""method="threshold" — classify combinations as above/below a quality threshold."""
selector = ModelSelector(
agent=MyAgent, models=models, eval_fn=eval_fn, dataset=dataset,
agent=MyAgent,
models=models,
eval_fn=eval_fn,
dataset=dataset,
method="threshold",
threshold=0.8, # minimum acceptable accuracy
)
Expand All @@ -132,7 +162,10 @@ def run_threshold():
def run_lm_proposal():
"""method="lm_proposal" — use a proposer LLM to shortlist promising combinations."""
selector = ModelSelector(
agent=MyAgent, models=models, eval_fn=eval_fn, dataset=dataset,
agent=MyAgent,
models=models,
eval_fn=eval_fn,
dataset=dataset,
method="lm_proposal",
)
return selector.select_best(parallel=True)
Expand All @@ -141,7 +174,10 @@ def run_lm_proposal():
def run_bayesian():
"""method="bayesian" — GP-based Bayesian optimization (requires agentopt[bayesian])."""
selector = ModelSelector(
agent=MyAgent, models=models, eval_fn=eval_fn, dataset=dataset,
agent=MyAgent,
models=models,
eval_fn=eval_fn,
dataset=dataset,
method="bayesian",
batch_size=4,
)
Expand Down
3 changes: 3 additions & 0 deletions examples/ag2_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
# run(input_data) runs the agent on a single datapoint and returns the output.
# ---------------------------------------------------------------------------


class MyAgent:
"""AG2 planner+solver agent pair."""

Expand Down Expand Up @@ -76,6 +77,7 @@ def run(self, input_data):
# Step 3: Evaluation function — score agent output against expected answer.
# ---------------------------------------------------------------------------


def eval_fn(expected, actual):
return 1.0 if expected.lower() in str(actual).lower() else 0.0

Expand All @@ -99,6 +101,7 @@ def eval_fn(expected, actual):

results = selector.select_best(parallel=True)
results.print_summary()
results.plot_pareto()

best = results.get_best_combo()
if best:
Expand Down
3 changes: 3 additions & 0 deletions examples/crewai_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
# run(input_data) runs the agent on a single datapoint and returns the output.
# ---------------------------------------------------------------------------


class MyAgent:
"""CrewAI crew with researcher + writer agents."""

Expand Down Expand Up @@ -90,6 +91,7 @@ def run(self, input_data):
# Step 3: Evaluation function — score agent output against expected answer.
# ---------------------------------------------------------------------------


def eval_fn(expected, actual):
return 1.0 if expected.lower() in str(actual).lower() else 0.0

Expand All @@ -113,6 +115,7 @@ def eval_fn(expected, actual):

results = selector.select_best(parallel=True)
results.print_summary()
results.plot_pareto()

best = results.get_best_combo()
if best:
Expand Down
45 changes: 31 additions & 14 deletions examples/custom_agent_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
# run() takes a single datapoint and returns the agent's output.
# ---------------------------------------------------------------------------


class MyAgent:
"""A simple planner+solver agent using the OpenAI SDK."""

Expand All @@ -36,22 +37,36 @@ def __init__(self, models):

def run(self, input_data):
# Step 1: Planner generates a plan
plan = self.client.chat.completions.create(
model=self.planner_model,
messages=[
{"role": "system", "content": "You are a planning assistant. Create a brief plan to answer the question."},
{"role": "user", "content": input_data},
],
).choices[0].message.content
plan = (
self.client.chat.completions.create(
model=self.planner_model,
messages=[
{
"role": "system",
"content": "You are a planning assistant. Create a brief plan to answer the question.",
},
{"role": "user", "content": input_data},
],
)
.choices[0]
.message.content
)

# Step 2: Solver executes the plan
answer = self.client.chat.completions.create(
model=self.solver_model,
messages=[
{"role": "system", "content": f"Follow this plan and answer concisely:\n{plan}"},
{"role": "user", "content": input_data},
],
).choices[0].message.content
answer = (
self.client.chat.completions.create(
model=self.solver_model,
messages=[
{
"role": "system",
"content": f"Follow this plan and answer concisely:\n{plan}",
},
{"role": "user", "content": input_data},
],
)
.choices[0]
.message.content
)
return answer


Expand All @@ -75,6 +90,7 @@ def run(self, input_data):
# It compares agent output against expected output and returns a score.
# ---------------------------------------------------------------------------


def eval_fn(expected, actual):
return 1.0 if expected.lower() in str(actual).lower() else 0.0

Expand All @@ -99,6 +115,7 @@ def eval_fn(expected, actual):

results = selector.select_best(parallel=True)
results.print_summary()
results.plot_pareto()

best = results.get_best_combo()
if best:
Expand Down
12 changes: 8 additions & 4 deletions examples/langchain_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,10 @@ def search(query: str) -> str:

PROMPT = ChatPromptTemplate.from_messages(
[
("system", "You are a helpful assistant. Use tools when needed to answer questions concisely."),
(
"system",
"You are a helpful assistant. Use tools when needed to answer questions concisely.",
),
("human", "{input}"),
("placeholder", "{agent_scratchpad}"),
]
Expand All @@ -41,6 +44,7 @@ def search(query: str) -> str:
# run(input_data) runs the agent on a single datapoint and returns the output.
# ---------------------------------------------------------------------------


class MyAgent:
"""LangChain tool-calling agent."""

Expand Down Expand Up @@ -71,6 +75,7 @@ def run(self, input_data):
# Step 3: Evaluation function — score agent output against expected answer.
# ---------------------------------------------------------------------------


def eval_fn(expected, actual):
return 1.0 if expected.lower() in str(actual).lower() else 0.0

Expand All @@ -83,16 +88,15 @@ def eval_fn(expected, actual):
if __name__ == "__main__":
selector = ModelSelector(
agent=MyAgent,
models={
"agent": ["gpt-4o", "gpt-4o-mini", "gpt-4.1-nano"],
},
models={"agent": ["gpt-4o", "gpt-4o-mini", "gpt-4.1-nano"],},
eval_fn=eval_fn,
dataset=dataset,
method="brute_force", # or "auto" for smarter selection algorithms
)

results = selector.select_best(parallel=True)
results.print_summary()
results.plot_pareto()

best = results.get_best_combo()
if best:
Expand Down
19 changes: 16 additions & 3 deletions examples/langgraph_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ class AgentState(TypedDict):
# run(input_data) runs the agent on a single datapoint and returns the output.
# ---------------------------------------------------------------------------


class MyAgent:
"""LangGraph planner+solver agent."""

Expand All @@ -40,15 +41,23 @@ def __init__(self, models):

def planner_node(state: AgentState) -> dict:
response = planner_llm.invoke(
[{"role": "system", "content": "Create a brief plan to answer the question."}]
[
{
"role": "system",
"content": "Create a brief plan to answer the question.",
}
]
+ state["messages"]
)
return {"plan": response.content}

def solver_node(state: AgentState) -> dict:
response = solver_llm.invoke(
[
{"role": "system", "content": f"Follow this plan and answer concisely:\n{state['plan']}"},
{
"role": "system",
"content": f"Follow this plan and answer concisely:\n{state['plan']}",
},
state["messages"][-1],
]
)
Expand All @@ -63,7 +72,9 @@ def solver_node(state: AgentState) -> dict:
self._app = graph.compile()

def run(self, input_data):
result = self._app.invoke({"messages": [{"role": "user", "content": input_data}]})
result = self._app.invoke(
{"messages": [{"role": "user", "content": input_data}]}
)
return result["answer"]


Expand All @@ -82,6 +93,7 @@ def run(self, input_data):
# Step 3: Evaluation function — score agent output against expected answer.
# ---------------------------------------------------------------------------


def eval_fn(expected, actual):
return 1.0 if expected.lower() in str(actual).lower() else 0.0

Expand All @@ -105,6 +117,7 @@ def eval_fn(expected, actual):

results = selector.select_best(parallel=True)
results.print_summary()
results.plot_pareto()

best = results.get_best_combo()
if best:
Expand Down
Loading