Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 53 additions & 3 deletions cmd/obol/model.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ func modelCommand(cfg *config.Config) *cli.Command {
modelSyncCommand(cfg),
modelPullCommand(),
modelListCommand(cfg),
modelPreferCommand(cfg),
modelRemoveCommand(cfg),
},
}
Expand Down Expand Up @@ -194,12 +195,33 @@ func setupCloudProvider(cfg *config.Config, u *ui.UI, provider, apiKey string, m
}

if len(models) == 0 {
// Sensible defaults
// Per-provider defaults — kept in sync with what the providers
// document as their current chat-tuned flagship. Bumping these is a
// small follow-up PR when frontier models drop, and it isolates the
// "what's good today" maintenance to one place.
var defaultModel string
switch provider {
case "anthropic":
models = []string{"claude-sonnet-4-6"}
defaultModel = "claude-sonnet-4-6"
case "openai":
models = []string{"gpt-4.1"}
defaultModel = "gpt-5.5"
}

// Interactive: let the user override the default with a free-text
// entry. Non-interactive (no TTY): silently use the default — the
// caller can always pass --model to be explicit.
chosen := defaultModel
if defaultModel != "" && u.IsTTY() && !u.IsJSON() {
input, err := u.Input(fmt.Sprintf("Model for %s", provider), defaultModel)
if err != nil {
return err
}
if strings.TrimSpace(input) != "" {
chosen = strings.TrimSpace(input)
}
}
if chosen != "" {
models = []string{chosen}
}
}

Expand Down Expand Up @@ -493,6 +515,34 @@ func modelListCommand(cfg *config.Config) *cli.Command {
}
}

func modelPreferCommand(cfg *config.Config) *cli.Command {
return &cli.Command{
Name: "prefer",
Usage: "Pull one or more models to the head of the LiteLLM model_list (the head becomes the agent's primary)",
ArgsUsage: "<model-name> [<model-name> ...]",
Flags: []cli.Flag{
&cli.BoolFlag{Name: "no-sync", Usage: "Skip the agent model sync (batch with other model commands, then run `obol model sync` once)"},
},
Action: func(ctx context.Context, cmd *cli.Command) error {
u := getUI(cmd)

names := cmd.Args().Slice()
if len(names) == 0 {
return errors.New("at least one model name is required\n\nUsage: obol model prefer <model-name> [<model-name> ...]\n\nList configured models with: obol model list")
}

if err := model.PreferModels(cfg, u, names); err != nil {
return err
}

if cmd.Bool("no-sync") {
return nil
}
return syncAgentModels(cfg, u)
},
}
}

func modelRemoveCommand(cfg *config.Config) *cli.Command {
return &cli.Command{
Name: "remove",
Expand Down
18 changes: 18 additions & 0 deletions internal/embed/infrastructure/helmfile.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,24 @@
# Orchestrates core infrastructure components deployed with every stack
# Uses Traefik with Gateway API for routing (replaces nginx-ingress)

# Force helm to use Server-Side Apply with --force-conflicts on every release.
# Without this, every `obol stack down`/`up` cycle hits a different SSA
# ownership conflict on resources helm shares with other writers — for example
# litellm-config.data["config.yaml"] (touched by `obol model setup` patches),
# remote-signer-keystore-password.metadata.labels (helm 3 pre-SSA default
# manager was kubectl-client-side-apply), or fields the apiserver synthesises
# under "before-first-apply" when an Update-managed object first sees an
# Apply call. --server-side=true switches helm to SSA semantics so it can
# reason about field ownership at all; --force-conflicts tells it to take
# ownership when it disagrees with another manager rather than aborting the
# upgrade. The `=true` form is required: helm's --server-side flag takes a
# value (auto|true|false), so a bare --server-side would consume the next
# arg as its value and reject --force-conflicts as an unknown apply method.
helmDefaults:
args:
- "--server-side=true"
- "--force-conflicts"

repositories:
- name: traefik
url: https://traefik.github.io/charts
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ image:

repository: obolnetwork/obol-stack-front-end
pullPolicy: IfNotPresent
tag: "v0.1.17-rc.5"
tag: "v0.1.18"

service:
type: ClusterIP
Expand Down
28 changes: 20 additions & 8 deletions internal/hermes/hermes.go
Original file line number Diff line number Diff line change
Expand Up @@ -690,6 +690,19 @@ func writeDeploymentFiles(cfg *config.Config, id, deploymentDir, agentBaseURL st
func generateHelmfile(namespace string) string {
return fmt.Sprintf(`# Managed by obol agent

# Force helm to use Server-Side Apply with --force-conflicts on every
# release. Without this, every Hermes resync after a fresh install hits SSA
# ownership conflicts on shared fields — most often
# remote-signer-keystore-password.metadata.labels, where helm's pre-SSA
# install left field ownership under "kubectl-client-side-apply" and a
# subsequent helm upgrade with SSA can't reclaim them. =true is required
# because helm's --server-side flag takes a value (auto|true|false); a bare
# --server-side would consume the next arg and reject --force-conflicts.
helmDefaults:
args:
- "--server-side=true"
- "--force-conflicts"

repositories:
- name: obol
url: https://obolnetwork.github.io/helm-charts/
Expand Down Expand Up @@ -1260,15 +1273,14 @@ func litellmMasterKey(cfg *config.Config) string {
return "sk-obol-" + strings.TrimSpace(string(data))
}

// rankModels delegates to model.Rank, which knows how to prefer larger local
// models and frontier cloud models. Kept as a thin wrapper so call sites
// don't need to import internal/model directly.
// rankModels delegates to model.Rank, which preserves configured LiteLLM model
// order and keeps known embedding-only models behind chat-capable models. Kept
// as a thin wrapper so call sites don't need to import internal/model directly.
//
// IMPORTANT: do NOT pre-strip provider prefixes here. model.Rank strips
// internally for ranking heuristics but returns the ORIGINAL strings so the
// agent can round-trip them back to LiteLLM. Stripping at this layer would
// break that round-trip — that's exactly the double-strip bug that
// ca820c9 worked around for custom endpoints.
// IMPORTANT: do NOT strip provider prefixes here. model.Rank returns the
// original strings so the agent can round-trip them back to LiteLLM. Stripping
// at this layer would break that round-trip — that's exactly the double-strip
// bug that ca820c9 worked around for custom endpoints.
func rankModels(models []string) (primary string, fallbacks []string) {
return model.Rank(models)
}
Expand Down
46 changes: 12 additions & 34 deletions internal/hermes/rankmodels_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,42 +2,20 @@ package hermes

import "testing"

// TestRankModels_HermesWrapper_PrefersLargerLocalModel encodes the regression
// from the colleague's screenshot: Hermes was deploying with `llama3.2:1b` as
// the default model, which then parroted its own tool list back on every
// "hello" prompt. The fix moved capability ranking into model.Rank; this test
// just confirms the Hermes-side wrapper still calls into it correctly.
//
// Contract: bare LiteLLM model_name strings come in, the SAME bare strings
// come back out — no provider-prefix stripping at this layer. The agent must
// be able to round-trip the returned primary back to LiteLLM without
// modification.
func TestRankModels_HermesWrapper_PrefersLargerLocalModel(t *testing.T) {
// Contract: LiteLLM model_name strings come in, the SAME strings come back
// out in configured order. The agent must be able to round-trip the returned
// primary back to LiteLLM without modification.
func TestRankModels_HermesWrapper_PreservesConfiguredOrder(t *testing.T) {
primary, fallbacks := rankModels([]string{
"llama3.2:1b",
"llama3.1:8b",
"llama3.2:3b",
})
if primary != "llama3.1:8b" {
t.Fatalf("primary: got %q, want llama3.1:8b", primary)
}
if len(fallbacks) != 2 || fallbacks[0] != "llama3.2:3b" || fallbacks[1] != "llama3.2:1b" {
t.Fatalf("fallbacks: got %v, want [llama3.2:3b llama3.2:1b]", fallbacks)
}
}

// TestRankModels_HermesWrapper_PrefersClaudeOverLocal exercises the cloud
// tier. Cloud entries written by buildModelEntries are bare (e.g.
// `claude-opus-4-7`, not `anthropic/claude-opus-4-7`), and the wrapper must
// preserve that.
func TestRankModels_HermesWrapper_PrefersClaudeOverLocal(t *testing.T) {
primary, _ := rankModels([]string{
"llama3.1:8b",
"claude-opus-4-7",
"llama3.2:1b",
})
if primary != "claude-opus-4-7" {
t.Fatalf("primary: got %q, want claude-opus-4-7", primary)
if primary != "llama3.2:1b" {
t.Fatalf("primary: got %q, want llama3.2:1b", primary)
}
if len(fallbacks) != 2 || fallbacks[0] != "llama3.1:8b" || fallbacks[1] != "claude-opus-4-7" {
t.Fatalf("fallbacks: got %v, want [llama3.1:8b claude-opus-4-7]", fallbacks)
}
}

Expand All @@ -50,12 +28,12 @@ func TestRankModels_HermesWrapper_PrefersClaudeOverLocal(t *testing.T) {
// reintroducing it.
func TestRankModels_HermesWrapper_PreservesProviderPrefixIfPresent(t *testing.T) {
primary, _ := rankModels([]string{
"llama3.1:8b",
"anthropic/claude-opus-4-7",
"openai/gpt-4o",
"llama3.1:8b",
})
if primary != "anthropic/claude-opus-4-7" {
t.Fatalf("primary: got %q, want anthropic/claude-opus-4-7 (unstripped)", primary)
if primary != "llama3.1:8b" {
t.Fatalf("primary: got %q, want llama3.1:8b", primary)
}
}

Expand Down
Loading
Loading