From 947634de8455d75471cc1d6c5b277dc54b48cd0c Mon Sep 17 00:00:00 2001
From: Wenyueh <norahua1996@outlook.com>
Date: Sun, 22 Mar 2026 14:29:29 -0400
Subject: [PATCH 1/7] pareto graph

---
 examples/ag2_example.py              |   1 +
 examples/crewai_example.py           |   1 +
 examples/custom_agent_example.py     |   1 +
 examples/langchain_example.py        |   1 +
 examples/langgraph_example.py        |   1 +
 examples/llamaindex_example.py       |   1 +
 examples/openai_sdk_example.py       |   1 +
 pyproject.toml                       |   1 +
 src/agentopt/model_selection/base.py | 157 +++++++++++++++++++++++++++
 9 files changed, 165 insertions(+)

diff --git a/examples/ag2_example.py b/examples/ag2_example.py
index 6477bd6..b673694 100644
--- a/examples/ag2_example.py
+++ b/examples/ag2_example.py
@@ -187,6 +187,7 @@ def main():
         parallel=args.parallel, max_concurrent=args.max_concurrent
     )
     results.print_summary()
+    results.plot_pareto()
 
     best = results.get_best_combo()
     if best:
diff --git a/examples/crewai_example.py b/examples/crewai_example.py
index 31e0e81..1841c0a 100644
--- a/examples/crewai_example.py
+++ b/examples/crewai_example.py
@@ -195,6 +195,7 @@ def main():
         parallel=args.parallel, max_concurrent=args.max_concurrent
     )
     results.print_summary()
+    results.plot_pareto()
 
     best = results.get_best_combo()
     if best:
diff --git a/examples/custom_agent_example.py b/examples/custom_agent_example.py
index 33c5c47..8191f41 100644
--- a/examples/custom_agent_example.py
+++ b/examples/custom_agent_example.py
@@ -160,6 +160,7 @@ def main():
         parallel=args.parallel, max_concurrent=args.max_concurrent
     )
     results.print_summary()
+    results.plot_pareto()
 
     # Export optimized config
     best = results.get_best_combo()
diff --git a/examples/langchain_example.py b/examples/langchain_example.py
index 1855c89..1323f7d 100644
--- a/examples/langchain_example.py
+++ b/examples/langchain_example.py
@@ -175,6 +175,7 @@ def main():
         parallel=args.parallel, max_concurrent=args.max_concurrent
     )
     results.print_summary()
+    results.plot_pareto()
 
     best = results.get_best_combo()
     if best:
diff --git a/examples/langgraph_example.py b/examples/langgraph_example.py
index 1c31265..0d10cf6 100644
--- a/examples/langgraph_example.py
+++ b/examples/langgraph_example.py
@@ -195,6 +195,7 @@ def main():
         parallel=args.parallel, max_concurrent=args.max_concurrent
     )
     results.print_summary()
+    results.plot_pareto()
 
     best = results.get_best_combo()
     if best:
diff --git a/examples/llamaindex_example.py b/examples/llamaindex_example.py
index 8d885f6..8a21d69 100644
--- a/examples/llamaindex_example.py
+++ b/examples/llamaindex_example.py
@@ -190,6 +190,7 @@ def main():
         parallel=args.parallel, max_concurrent=args.max_concurrent
     )
     results.print_summary()
+    results.plot_pareto()
 
     best = results.get_best_combo()
     if best:
diff --git a/examples/openai_sdk_example.py b/examples/openai_sdk_example.py
index 406f9c5..0691dfb 100644
--- a/examples/openai_sdk_example.py
+++ b/examples/openai_sdk_example.py
@@ -179,6 +179,7 @@ def main():
         parallel=args.parallel, max_concurrent=args.max_concurrent
     )
     results.print_summary()
+    results.plot_pareto()
 
     best = results.get_best_combo()
     if best:
diff --git a/pyproject.toml b/pyproject.toml
index 1104e5f..e496f4c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -47,6 +47,7 @@ examples = [
     "openai-agents",
     "python-dotenv",
 ]
+plot = ["matplotlib>=3.5"]
 dev = ["pytest>=8.0"]
 docs = ["mkdocs-material", "mkdocstrings[python]"]
 
diff --git a/src/agentopt/model_selection/base.py b/src/agentopt/model_selection/base.py
index 722554a..5d0329f 100644
--- a/src/agentopt/model_selection/base.py
+++ b/src/agentopt/model_selection/base.py
@@ -504,6 +504,163 @@ def print_summary(self) -> None:
         """Print the formatted summary table of all results."""
         print(self)
 
+    # ------------------------------------------------------------------
+    # Pareto frontier visualisation
+    # ------------------------------------------------------------------
+
+    @staticmethod
+    def _pareto_mask(
+        xs: List[float], ys: List[float], x_minimize: bool, y_minimize: bool,
+    ) -> List[bool]:
+        """Return a boolean mask marking Pareto-optimal points.
+
+        A point is Pareto-optimal if no other point is strictly better on both
+        objectives.
+        """
+        n = len(xs)
+        mask = [True] * n
+        for i in range(n):
+            if not mask[i]:
+                continue
+            for j in range(n):
+                if i == j or not mask[j]:
+                    continue
+                xi, yi, xj, yj = xs[i], ys[i], xs[j], ys[j]
+                # Is j at least as good as i on both, and strictly better on one?
+                x_ok = (xj <= xi) if x_minimize else (xj >= xi)
+                y_ok = (yj <= yi) if y_minimize else (yj >= yi)
+                x_strict = (xj < xi) if x_minimize else (xj > xi)
+                y_strict = (yj < yi) if y_minimize else (yj > yi)
+                if x_ok and y_ok and (x_strict or y_strict):
+                    mask[i] = False
+                    break
+        return mask
+
+    def plot_pareto(self, path: Optional[str] = None) -> None:
+        """Generate three pairwise Pareto frontier plots.
+
+        Subplots: Accuracy vs Latency, Accuracy vs Price, Latency vs Price.
+
+        Requires ``matplotlib`` (install with ``pip install agentopt[plot]``).
+        If *path* is given the figure is saved to that file, otherwise
+        ``plt.show()`` is called.
+        """
+        try:
+            import matplotlib.pyplot as plt
+        except ImportError:
+            raise ImportError(
+                "matplotlib is required for plot_pareto. "
+                "Install it with: pip install agentopt[plot]"
+            )
+
+        # Deduplicate (same logic as __str__).
+        seen: Dict[str, "ModelResult"] = {}
+        for r in self.results:
+            if r.model_name not in seen or (
+                r.is_best and not seen[r.model_name].is_best
+            ):
+                seen[r.model_name] = r
+        unique = [r for r in seen.values() if r.price is not None]
+
+        if len(unique) < 2:
+            print("Not enough results with pricing data to plot.")
+            return
+
+        names = [r.model_name for r in unique]
+        accs = [r.accuracy for r in unique]
+        lats = [r.latency_seconds for r in unique]
+        prices = [r.price for r in unique]  # type: ignore[misc]
+        is_best = [r.is_best for r in unique]
+
+        pairs = [
+            (accs, lats, "Accuracy", "Latency (s)", False, True),
+            (accs, prices, "Accuracy", "Price ($)", False, True),
+            (lats, prices, "Latency (s)", "Price ($)", True, True),
+        ]
+
+        fig, axes = plt.subplots(1, 3, figsize=(18, 5))
+        fig.suptitle("Pareto Frontiers", fontsize=14, fontweight="bold")
+
+        for ax, (xs, ys, xlabel, ylabel, x_min, y_min) in zip(axes, pairs):
+            mask = self._pareto_mask(xs, ys, x_min, y_min)
+
+            # Non-Pareto points.
+            np_x = [x for x, m in zip(xs, mask) if not m]
+            np_y = [y for y, m in zip(ys, mask) if not m]
+            ax.scatter(
+                np_x,
+                np_y,
+                c="lightgray",
+                edgecolors="gray",
+                s=60,
+                zorder=2,
+                label="Dominated",
+            )
+
+            # Pareto-optimal points.
+            p_x = [x for x, m in zip(xs, mask) if m]
+            p_y = [y for y, m in zip(ys, mask) if m]
+            ax.scatter(
+                p_x,
+                p_y,
+                c="steelblue",
+                edgecolors="navy",
+                s=80,
+                zorder=3,
+                label="Pareto-optimal",
+            )
+
+            # Connect frontier with a line (sorted by x).
+            if p_x:
+                order = sorted(range(len(p_x)), key=lambda i: p_x[i])
+                ax.plot(
+                    [p_x[i] for i in order],
+                    [p_y[i] for i in order],
+                    c="steelblue",
+                    linewidth=1.5,
+                    alpha=0.6,
+                    zorder=2,
+                )
+
+            # Highlight best combo.
+            for x, y, b, name in zip(xs, ys, is_best, names):
+                if b:
+                    ax.scatter(
+                        [x],
+                        [y],
+                        c="gold",
+                        edgecolors="darkorange",
+                        s=140,
+                        zorder=4,
+                        marker="*",
+                        label="Best",
+                    )
+
+            # Labels for all points.
+            for x, y, name in zip(xs, ys, names):
+                short = name if len(name) <= 30 else name[:27] + "..."
+                ax.annotate(
+                    short,
+                    (x, y),
+                    textcoords="offset points",
+                    xytext=(5, 5),
+                    fontsize=6,
+                    alpha=0.8,
+                )
+
+            ax.set_xlabel(xlabel)
+            ax.set_ylabel(ylabel)
+            ax.legend(fontsize=7, loc="best")
+            ax.grid(True, alpha=0.3)
+
+        plt.tight_layout()
+        if path:
+            fig.savefig(path, dpi=150, bbox_inches="tight")
+            print(f"Pareto plot saved to {path}")
+        else:
+            plt.show()
+        plt.close(fig)
+
 
 class BaseModelSelector(ABC):
     """Abstract base class for model selectors.

From 7e3bbf5e458bcc057a02179dcf6beb47ab9f2ec2 Mon Sep 17 00:00:00 2001
From: Wenyueh <norahua1996@outlook.com>
Date: Sun, 22 Mar 2026 16:08:58 -0400
Subject: [PATCH 2/7] clean graph

---
 src/agentopt/model_selection/base.py | 46 ++++++++++++++++++++++------
 1 file changed, 37 insertions(+), 9 deletions(-)

diff --git a/src/agentopt/model_selection/base.py b/src/agentopt/model_selection/base.py
index 5d0329f..676318d 100644
--- a/src/agentopt/model_selection/base.py
+++ b/src/agentopt/model_selection/base.py
@@ -572,13 +572,19 @@ def plot_pareto(self, path: Optional[str] = None) -> None:
         prices = [r.price for r in unique]  # type: ignore[misc]
         is_best = [r.is_best for r in unique]
 
+        # Build numbered labels: (1), (2), ...
+        num_labels = [f"({i})" for i in range(1, len(unique) + 1)]
+
         pairs = [
             (accs, lats, "Accuracy", "Latency (s)", False, True),
             (accs, prices, "Accuracy", "Price ($)", False, True),
             (lats, prices, "Latency (s)", "Price ($)", True, True),
         ]
 
-        fig, axes = plt.subplots(1, 3, figsize=(18, 5))
+        fig = plt.figure(figsize=(20, 5))
+        # Reserve right margin for the legend.
+        gs = fig.add_gridspec(1, 3, left=0.04, right=0.75, wspace=0.3)
+        axes = [fig.add_subplot(gs[0, i]) for i in range(3)]
         fig.suptitle("Pareto Frontiers", fontsize=14, fontweight="bold")
 
         for ax, (xs, ys, xlabel, ylabel, x_min, y_min) in zip(axes, pairs):
@@ -623,7 +629,7 @@ def plot_pareto(self, path: Optional[str] = None) -> None:
                 )
 
             # Highlight best combo.
-            for x, y, b, name in zip(xs, ys, is_best, names):
+            for x, y, b in zip(xs, ys, is_best):
                 if b:
                     ax.scatter(
                         [x],
@@ -636,24 +642,46 @@ def plot_pareto(self, path: Optional[str] = None) -> None:
                         label="Best",
                     )
 
-            # Labels for all points.
-            for x, y, name in zip(xs, ys, names):
-                short = name if len(name) <= 30 else name[:27] + "..."
+            # Number labels on points.
+            for x, y, lbl in zip(xs, ys, num_labels):
                 ax.annotate(
-                    short,
+                    lbl,
                     (x, y),
                     textcoords="offset points",
                     xytext=(5, 5),
-                    fontsize=6,
-                    alpha=0.8,
+                    fontsize=7,
+                    fontweight="bold",
                 )
 
+            # Invert "lower is better" axes so better is always top-right,
+            # producing a concave frontier.
+            if x_min:
+                ax.invert_xaxis()
+            if y_min:
+                ax.invert_yaxis()
+
             ax.set_xlabel(xlabel)
             ax.set_ylabel(ylabel)
             ax.legend(fontsize=7, loc="best")
             ax.grid(True, alpha=0.3)
 
-        plt.tight_layout()
+        # External legend mapping numbers to combo names.
+        legend_lines = [f"({i}) {name}" for i, name in enumerate(names, 1)]
+        fig.text(
+            0.77,
+            0.5,
+            "\n".join(legend_lines),
+            fontsize=8,
+            verticalalignment="center",
+            fontfamily="monospace",
+            bbox=dict(
+                boxstyle="round,pad=0.5",
+                facecolor="lightyellow",
+                edgecolor="gray",
+                alpha=0.9,
+            ),
+        )
+
         if path:
             fig.savefig(path, dpi=150, bbox_inches="tight")
             print(f"Pareto plot saved to {path}")

From 754a55defd0c7e1bb807467f2265d5c3b48dd550 Mon Sep 17 00:00:00 2001
From: Wenyueh <norahua1996@outlook.com>
Date: Sun, 22 Mar 2026 16:21:07 -0400
Subject: [PATCH 3/7] clean graph for bandit

---
 src/agentopt/model_selection/base.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/agentopt/model_selection/base.py b/src/agentopt/model_selection/base.py
index 676318d..177323f 100644
--- a/src/agentopt/model_selection/base.py
+++ b/src/agentopt/model_selection/base.py
@@ -560,7 +560,15 @@ def plot_pareto(self, path: Optional[str] = None) -> None:
                 r.is_best and not seen[r.model_name].is_best
             ):
                 seen[r.model_name] = r
-        unique = [r for r in seen.values() if r.price is not None]
+        all_unique = [r for r in seen.values() if r.price is not None]
+
+        # For bandit algorithms, only plot the final layer (combos with the
+        # most datapoints) so all plotted combos are directly comparable.
+        if all_unique:
+            max_samples = max(r.num_samples for r in all_unique)
+            unique = [r for r in all_unique if r.num_samples == max_samples]
+        else:
+            unique = all_unique
 
         if len(unique) < 2:
             print("Not enough results with pricing data to plot.")

From 9e4d114de21720d81d7153c873e386d5146b8707 Mon Sep 17 00:00:00 2001
From: Wenyueh <norahua1996@outlook.com>
Date: Sun, 22 Mar 2026 16:55:31 -0400
Subject: [PATCH 4/7] order

---
 src/agentopt/model_selection/base.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/agentopt/model_selection/base.py b/src/agentopt/model_selection/base.py
index 177323f..d853dd0 100644
--- a/src/agentopt/model_selection/base.py
+++ b/src/agentopt/model_selection/base.py
@@ -570,6 +570,9 @@ def plot_pareto(self, path: Optional[str] = None) -> None:
         else:
             unique = all_unique
 
+        # Sort so numbering matches the final results table rank order.
+        unique.sort(key=lambda r: (-r.accuracy, r.latency_seconds))
+
         if len(unique) < 2:
             print("Not enough results with pricing data to plot.")
             return

From 7913277f8fd2ed4668c943bc5e64f6db6f283b03 Mon Sep 17 00:00:00 2001
From: Wenyueh <norahua1996@outlook.com>
Date: Sun, 22 Mar 2026 22:03:12 -0400
Subject: [PATCH 5/7] merge main and remove latency vs. cost

---
 examples/advanced_selection_example.py | 84 ++++++++++++++++++--------
 examples/ag2_example.py                |  2 +
 examples/crewai_example.py             |  2 +
 examples/custom_agent_example.py       | 44 +++++++++-----
 examples/langchain_example.py          | 11 ++--
 examples/langgraph_example.py          | 18 +++++-
 examples/llamaindex_example.py         |  6 +-
 examples/openai_sdk_example.py         |  2 +
 src/agentopt/__init__.py               | 10 +--
 src/agentopt/model_selection/base.py   | 11 ++--
 10 files changed, 128 insertions(+), 62 deletions(-)

diff --git a/examples/advanced_selection_example.py b/examples/advanced_selection_example.py
index 799646d..95b842c 100644
--- a/examples/advanced_selection_example.py
+++ b/examples/advanced_selection_example.py
@@ -24,6 +24,7 @@
 # Agent, dataset, and eval_fn (same as custom_agent_example.py)
 # ---------------------------------------------------------------------------
 
+
 class MyAgent:
     def __init__(self, models):
         self.client = OpenAI()
@@ -31,21 +32,35 @@ def __init__(self, models):
         self.solver_model = models["solver"]
 
     def run(self, input_data):
-        plan = self.client.chat.completions.create(
-            model=self.planner_model,
-            messages=[
-                {"role": "system", "content": "Create a brief plan to answer the question."},
-                {"role": "user", "content": input_data},
-            ],
-        ).choices[0].message.content
-
-        answer = self.client.chat.completions.create(
-            model=self.solver_model,
-            messages=[
-                {"role": "system", "content": f"Follow this plan and answer concisely:\n{plan}"},
-                {"role": "user", "content": input_data},
-            ],
-        ).choices[0].message.content
+        plan = (
+            self.client.chat.completions.create(
+                model=self.planner_model,
+                messages=[
+                    {
+                        "role": "system",
+                        "content": "Create a brief plan to answer the question.",
+                    },
+                    {"role": "user", "content": input_data},
+                ],
+            )
+            .choices[0]
+            .message.content
+        )
+
+        answer = (
+            self.client.chat.completions.create(
+                model=self.solver_model,
+                messages=[
+                    {
+                        "role": "system",
+                        "content": f"Follow this plan and answer concisely:\n{plan}",
+                    },
+                    {"role": "user", "content": input_data},
+                ],
+            )
+            .choices[0]
+            .message.content
+        )
         return answer
 
 
@@ -71,11 +86,11 @@ def eval_fn(expected, actual):
 # Selection algorithms
 # ---------------------------------------------------------------------------
 
+
 def run_auto():
     """method="auto" — automatically picks the best algorithm (default)."""
     selector = ModelSelector(
-        agent=MyAgent, models=models, eval_fn=eval_fn, dataset=dataset,
-        method="auto",
+        agent=MyAgent, models=models, eval_fn=eval_fn, dataset=dataset, method="auto",
     )
     return selector.select_best(parallel=True)
 
@@ -83,7 +98,10 @@ def run_auto():
 def run_random():
     """method="random" — evaluate a random subset of combinations."""
     selector = ModelSelector(
-        agent=MyAgent, models=models, eval_fn=eval_fn, dataset=dataset,
+        agent=MyAgent,
+        models=models,
+        eval_fn=eval_fn,
+        dataset=dataset,
         method="random",
         sample_fraction=0.5,  # evaluate 50% of all combinations
     )
@@ -93,7 +111,10 @@ def run_random():
 def run_hill_climbing():
     """method="hill_climbing" — greedy search using model quality/speed rankings."""
     selector = ModelSelector(
-        agent=MyAgent, models=models, eval_fn=eval_fn, dataset=dataset,
+        agent=MyAgent,
+        models=models,
+        eval_fn=eval_fn,
+        dataset=dataset,
         method="hill_climbing",
         batch_size=4,  # number of neighbors to evaluate per step
     )
@@ -103,7 +124,10 @@ def run_hill_climbing():
 def run_arm_elimination():
     """method="arm_elimination" — eliminates statistically dominated combinations early."""
     selector = ModelSelector(
-        agent=MyAgent, models=models, eval_fn=eval_fn, dataset=dataset,
+        agent=MyAgent,
+        models=models,
+        eval_fn=eval_fn,
+        dataset=dataset,
         method="arm_elimination",
     )
     return selector.select_best(parallel=True)
@@ -112,7 +136,10 @@ def run_arm_elimination():
 def run_epsilon_lucb():
     """method="epsilon_lucb" — stops when the best arm is identified within epsilon."""
     selector = ModelSelector(
-        agent=MyAgent, models=models, eval_fn=eval_fn, dataset=dataset,
+        agent=MyAgent,
+        models=models,
+        eval_fn=eval_fn,
+        dataset=dataset,
         method="epsilon_lucb",
         epsilon=0.05,  # acceptable gap from the true best
     )
@@ -122,7 +149,10 @@ def run_epsilon_lucb():
 def run_threshold():
     """method="threshold" — classify combinations as above/below a quality threshold."""
     selector = ModelSelector(
-        agent=MyAgent, models=models, eval_fn=eval_fn, dataset=dataset,
+        agent=MyAgent,
+        models=models,
+        eval_fn=eval_fn,
+        dataset=dataset,
         method="threshold",
         threshold=0.8,  # minimum acceptable accuracy
     )
@@ -132,7 +162,10 @@ def run_threshold():
 def run_lm_proposal():
     """method="lm_proposal" — use a proposer LLM to shortlist promising combinations."""
     selector = ModelSelector(
-        agent=MyAgent, models=models, eval_fn=eval_fn, dataset=dataset,
+        agent=MyAgent,
+        models=models,
+        eval_fn=eval_fn,
+        dataset=dataset,
         method="lm_proposal",
     )
     return selector.select_best(parallel=True)
@@ -141,7 +174,10 @@ def run_lm_proposal():
 def run_bayesian():
     """method="bayesian" — GP-based Bayesian optimization (requires agentopt[bayesian])."""
     selector = ModelSelector(
-        agent=MyAgent, models=models, eval_fn=eval_fn, dataset=dataset,
+        agent=MyAgent,
+        models=models,
+        eval_fn=eval_fn,
+        dataset=dataset,
         method="bayesian",
         batch_size=4,
     )
diff --git a/examples/ag2_example.py b/examples/ag2_example.py
index 2878fa8..2f54ca6 100644
--- a/examples/ag2_example.py
+++ b/examples/ag2_example.py
@@ -25,6 +25,7 @@
 # run(input_data) runs the agent on a single datapoint and returns the output.
 # ---------------------------------------------------------------------------
 
+
 class MyAgent:
     """AG2 planner+solver agent pair."""
 
@@ -76,6 +77,7 @@ def run(self, input_data):
 # Step 3: Evaluation function — score agent output against expected answer.
 # ---------------------------------------------------------------------------
 
+
 def eval_fn(expected, actual):
     return 1.0 if expected.lower() in str(actual).lower() else 0.0
 
diff --git a/examples/crewai_example.py b/examples/crewai_example.py
index bac0892..5dcb50d 100644
--- a/examples/crewai_example.py
+++ b/examples/crewai_example.py
@@ -21,6 +21,7 @@
 # run(input_data) runs the agent on a single datapoint and returns the output.
 # ---------------------------------------------------------------------------
 
+
 class MyAgent:
     """CrewAI crew with researcher + writer agents."""
 
@@ -90,6 +91,7 @@ def run(self, input_data):
 # Step 3: Evaluation function — score agent output against expected answer.
 # ---------------------------------------------------------------------------
 
+
 def eval_fn(expected, actual):
     return 1.0 if expected.lower() in str(actual).lower() else 0.0
 
diff --git a/examples/custom_agent_example.py b/examples/custom_agent_example.py
index efb2c7b..0f5f8f3 100644
--- a/examples/custom_agent_example.py
+++ b/examples/custom_agent_example.py
@@ -26,6 +26,7 @@
 # run() takes a single datapoint and returns the agent's output.
 # ---------------------------------------------------------------------------
 
+
 class MyAgent:
     """A simple planner+solver agent using the OpenAI SDK."""
 
@@ -36,22 +37,36 @@ def __init__(self, models):
 
     def run(self, input_data):
         # Step 1: Planner generates a plan
-        plan = self.client.chat.completions.create(
-            model=self.planner_model,
-            messages=[
-                {"role": "system", "content": "You are a planning assistant. Create a brief plan to answer the question."},
-                {"role": "user", "content": input_data},
-            ],
-        ).choices[0].message.content
+        plan = (
+            self.client.chat.completions.create(
+                model=self.planner_model,
+                messages=[
+                    {
+                        "role": "system",
+                        "content": "You are a planning assistant. Create a brief plan to answer the question.",
+                    },
+                    {"role": "user", "content": input_data},
+                ],
+            )
+            .choices[0]
+            .message.content
+        )
 
         # Step 2: Solver executes the plan
-        answer = self.client.chat.completions.create(
-            model=self.solver_model,
-            messages=[
-                {"role": "system", "content": f"Follow this plan and answer concisely:\n{plan}"},
-                {"role": "user", "content": input_data},
-            ],
-        ).choices[0].message.content
+        answer = (
+            self.client.chat.completions.create(
+                model=self.solver_model,
+                messages=[
+                    {
+                        "role": "system",
+                        "content": f"Follow this plan and answer concisely:\n{plan}",
+                    },
+                    {"role": "user", "content": input_data},
+                ],
+            )
+            .choices[0]
+            .message.content
+        )
         return answer
 
 
@@ -75,6 +90,7 @@ def run(self, input_data):
 # It compares agent output against expected output and returns a score.
 # ---------------------------------------------------------------------------
 
+
 def eval_fn(expected, actual):
     return 1.0 if expected.lower() in str(actual).lower() else 0.0
 
diff --git a/examples/langchain_example.py b/examples/langchain_example.py
index 2bbcc65..3979bf8 100644
--- a/examples/langchain_example.py
+++ b/examples/langchain_example.py
@@ -28,7 +28,10 @@ def search(query: str) -> str:
 
 PROMPT = ChatPromptTemplate.from_messages(
     [
-        ("system", "You are a helpful assistant. Use tools when needed to answer questions concisely."),
+        (
+            "system",
+            "You are a helpful assistant. Use tools when needed to answer questions concisely.",
+        ),
         ("human", "{input}"),
         ("placeholder", "{agent_scratchpad}"),
     ]
@@ -41,6 +44,7 @@ def search(query: str) -> str:
 # run(input_data) runs the agent on a single datapoint and returns the output.
 # ---------------------------------------------------------------------------
 
+
 class MyAgent:
     """LangChain tool-calling agent."""
 
@@ -71,6 +75,7 @@ def run(self, input_data):
 # Step 3: Evaluation function — score agent output against expected answer.
 # ---------------------------------------------------------------------------
 
+
 def eval_fn(expected, actual):
     return 1.0 if expected.lower() in str(actual).lower() else 0.0
 
@@ -83,9 +88,7 @@ def eval_fn(expected, actual):
 if __name__ == "__main__":
     selector = ModelSelector(
         agent=MyAgent,
-        models={
-            "agent": ["gpt-4o", "gpt-4o-mini", "gpt-4.1-nano"],
-        },
+        models={"agent": ["gpt-4o", "gpt-4o-mini", "gpt-4.1-nano"],},
         eval_fn=eval_fn,
         dataset=dataset,
         method="brute_force",  # or "auto" for smarter selection algorithms
diff --git a/examples/langgraph_example.py b/examples/langgraph_example.py
index ff602f0..e99f08c 100644
--- a/examples/langgraph_example.py
+++ b/examples/langgraph_example.py
@@ -31,6 +31,7 @@ class AgentState(TypedDict):
 # run(input_data) runs the agent on a single datapoint and returns the output.
 # ---------------------------------------------------------------------------
 
+
 class MyAgent:
     """LangGraph planner+solver agent."""
 
@@ -40,7 +41,12 @@ def __init__(self, models):
 
         def planner_node(state: AgentState) -> dict:
             response = planner_llm.invoke(
-                [{"role": "system", "content": "Create a brief plan to answer the question."}]
+                [
+                    {
+                        "role": "system",
+                        "content": "Create a brief plan to answer the question.",
+                    }
+                ]
                 + state["messages"]
             )
             return {"plan": response.content}
@@ -48,7 +54,10 @@ def planner_node(state: AgentState) -> dict:
         def solver_node(state: AgentState) -> dict:
             response = solver_llm.invoke(
                 [
-                    {"role": "system", "content": f"Follow this plan and answer concisely:\n{state['plan']}"},
+                    {
+                        "role": "system",
+                        "content": f"Follow this plan and answer concisely:\n{state['plan']}",
+                    },
                     state["messages"][-1],
                 ]
             )
@@ -63,7 +72,9 @@ def solver_node(state: AgentState) -> dict:
         self._app = graph.compile()
 
     def run(self, input_data):
-        result = self._app.invoke({"messages": [{"role": "user", "content": input_data}]})
+        result = self._app.invoke(
+            {"messages": [{"role": "user", "content": input_data}]}
+        )
         return result["answer"]
 
 
@@ -82,6 +93,7 @@ def run(self, input_data):
 # Step 3: Evaluation function — score agent output against expected answer.
 # ---------------------------------------------------------------------------
 
+
 def eval_fn(expected, actual):
     return 1.0 if expected.lower() in str(actual).lower() else 0.0
 
diff --git a/examples/llamaindex_example.py b/examples/llamaindex_example.py
index 5eed0bb..8093a59 100644
--- a/examples/llamaindex_example.py
+++ b/examples/llamaindex_example.py
@@ -46,6 +46,7 @@ def divide(a: float, b: float) -> float:
 # Note: run() can be async — AgentOpt detects this automatically.
 # ---------------------------------------------------------------------------
 
+
 class MyAgent:
     """LlamaIndex math agent with calculator tools."""
 
@@ -85,6 +86,7 @@ async def run(self, input_data):
 # Step 3: Evaluation function — score agent output against expected answer.
 # ---------------------------------------------------------------------------
 
+
 def eval_fn(expected, actual):
     return 1.0 if expected.lower() in str(actual).lower() else 0.0
 
@@ -97,9 +99,7 @@ def eval_fn(expected, actual):
 if __name__ == "__main__":
     selector = ModelSelector(
         agent=MyAgent,
-        models={
-            "agent": ["gpt-4o", "gpt-4o-mini", "gpt-4.1-nano"],
-        },
+        models={"agent": ["gpt-4o", "gpt-4o-mini", "gpt-4.1-nano"],},
         eval_fn=eval_fn,
         dataset=dataset,
         method="brute_force",  # or "auto" for smarter selection algorithms
diff --git a/examples/openai_sdk_example.py b/examples/openai_sdk_example.py
index 11bd4f0..ccb7808 100644
--- a/examples/openai_sdk_example.py
+++ b/examples/openai_sdk_example.py
@@ -27,6 +27,7 @@ def search(query: str) -> str:
 # run(input_data) runs the agent on a single datapoint and returns the output.
 # ---------------------------------------------------------------------------
 
+
 class MyAgent:
     """OpenAI Agents SDK planner+solver agent pair."""
 
@@ -67,6 +68,7 @@ def run(self, input_data):
 # Step 3: Evaluation function — score agent output against expected answer.
 # ---------------------------------------------------------------------------
 
+
 def eval_fn(expected, actual):
     return 1.0 if expected.lower() in str(actual).lower() else 0.0
 
diff --git a/src/agentopt/__init__.py b/src/agentopt/__init__.py
index 5d7cb93..5b09b15 100644
--- a/src/agentopt/__init__.py
+++ b/src/agentopt/__init__.py
@@ -45,12 +45,7 @@
 
 
 def ModelSelector(
-    agent=None,
-    models=None,
-    eval_fn=None,
-    dataset=None,
-    method="auto",
-    **kwargs,
+    agent=None, models=None, eval_fn=None, dataset=None, method="auto", **kwargs,
 ):
     """Create a model selector.
 
@@ -82,8 +77,7 @@ def ModelSelector(
                 'install with `pip install "agentopt[bayesian]"`'
             )
         raise ValueError(
-            f"Unknown method {method!r}. "
-            f"Choose from: {', '.join(_METHODS)}"
+            f"Unknown method {method!r}. " f"Choose from: {', '.join(_METHODS)}"
         )
     return cls(agent=agent, models=models, eval_fn=eval_fn, dataset=dataset, **kwargs)
 
diff --git a/src/agentopt/model_selection/base.py b/src/agentopt/model_selection/base.py
index d08376a..2ed4256 100644
--- a/src/agentopt/model_selection/base.py
+++ b/src/agentopt/model_selection/base.py
@@ -537,9 +537,9 @@ def _pareto_mask(
         return mask
 
     def plot_pareto(self, path: Optional[str] = None) -> None:
-        """Generate three pairwise Pareto frontier plots.
+        """Generate two pairwise Pareto frontier plots.
 
-        Subplots: Accuracy vs Latency, Accuracy vs Price, Latency vs Price.
+        Subplots: Accuracy vs Latency, Accuracy vs Price.
 
         Requires ``matplotlib`` (install with ``pip install agentopt[plot]``).
         If *path* is given the figure is saved to that file, otherwise
@@ -589,13 +589,12 @@ def plot_pareto(self, path: Optional[str] = None) -> None:
         pairs = [
             (accs, lats, "Accuracy", "Latency (s)", False, True),
             (accs, prices, "Accuracy", "Price ($)", False, True),
-            (lats, prices, "Latency (s)", "Price ($)", True, True),
         ]
 
-        fig = plt.figure(figsize=(20, 5))
+        fig = plt.figure(figsize=(14, 5))
         # Reserve right margin for the legend.
-        gs = fig.add_gridspec(1, 3, left=0.04, right=0.75, wspace=0.3)
-        axes = [fig.add_subplot(gs[0, i]) for i in range(3)]
+        gs = fig.add_gridspec(1, 2, left=0.06, right=0.72, wspace=0.3)
+        axes = [fig.add_subplot(gs[0, i]) for i in range(2)]
         fig.suptitle("Pareto Frontiers", fontsize=14, fontweight="bold")
 
         for ax, (xs, ys, xlabel, ylabel, x_min, y_min) in zip(axes, pairs):

From ce30e9286496af9b628cde54fa25630e6f309ec9 Mon Sep 17 00:00:00 2001
From: Wenyueh <norahua1996@outlook.com>
Date: Sun, 22 Mar 2026 22:23:59 -0400
Subject: [PATCH 6/7] update

---
 src/agentopt/model_selection/base.py | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/src/agentopt/model_selection/base.py b/src/agentopt/model_selection/base.py
index 2ed4256..a477881 100644
--- a/src/agentopt/model_selection/base.py
+++ b/src/agentopt/model_selection/base.py
@@ -587,8 +587,8 @@ def plot_pareto(self, path: Optional[str] = None) -> None:
         num_labels = [f"({i})" for i in range(1, len(unique) + 1)]
 
         pairs = [
-            (accs, lats, "Accuracy", "Latency (s)", False, True),
-            (accs, prices, "Accuracy", "Price ($)", False, True),
+            (lats, accs, "Latency (s)", "Accuracy", True, False),
+            (prices, accs, "Price ($)", "Accuracy", True, False),
         ]
 
         fig = plt.figure(figsize=(14, 5))
@@ -663,13 +663,6 @@ def plot_pareto(self, path: Optional[str] = None) -> None:
                     fontweight="bold",
                 )
 
-            # Invert "lower is better" axes so better is always top-right,
-            # producing a concave frontier.
-            if x_min:
-                ax.invert_xaxis()
-            if y_min:
-                ax.invert_yaxis()
-
             ax.set_xlabel(xlabel)
             ax.set_ylabel(ylabel)
             ax.legend(fontsize=7, loc="best")

From aad735c940c76ec07ccc92f1bf436f3b4b18710b Mon Sep 17 00:00:00 2001
From: Wenyueh <norahua1996@outlook.com>
Date: Sun, 22 Mar 2026 22:26:51 -0400
Subject: [PATCH 7/7] update

---
 src/agentopt/model_selection/base.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/agentopt/model_selection/base.py b/src/agentopt/model_selection/base.py
index a477881..85db0d2 100644
--- a/src/agentopt/model_selection/base.py
+++ b/src/agentopt/model_selection/base.py
@@ -593,7 +593,7 @@ def plot_pareto(self, path: Optional[str] = None) -> None:
 
         fig = plt.figure(figsize=(14, 5))
         # Reserve right margin for the legend.
-        gs = fig.add_gridspec(1, 2, left=0.06, right=0.72, wspace=0.3)
+        gs = fig.add_gridspec(1, 2, left=0.06, right=0.68, wspace=0.3)
         axes = [fig.add_subplot(gs[0, i]) for i in range(2)]
         fig.suptitle("Pareto Frontiers", fontsize=14, fontweight="bold")
 
@@ -671,7 +671,7 @@ def plot_pareto(self, path: Optional[str] = None) -> None:
         # External legend mapping numbers to combo names.
         legend_lines = [f"({i}) {name}" for i, name in enumerate(names, 1)]
         fig.text(
-            0.77,
+            0.72,
             0.5,
             "\n".join(legend_lines),
             fontsize=8,