diff --git a/PossibleRefactoring.md b/PossibleRefactoring.md index 3114b628..e5882754 100644 --- a/PossibleRefactoring.md +++ b/PossibleRefactoring.md @@ -86,6 +86,110 @@ Suggested action: - Introduce a small shared internal package for cache semantics. - Do it only if it can be done without creating import cycles. +## 6. Centralize fallback-mode semantics in `config` + +Effort: low +Risk: low + +Why: +- `config.ResolveFallbackDefaultMode()` now owns the blank-to-`auto` defaulting rule. +- `internal/app/app.go` still re-implements fallback-mode parsing in: + - `dashboardFallbackModeValue()` + - `fallbackFeatureEnabledGlobally()` + - `fallbackModeEnabled()` +- Those helpers currently perform their own `TrimSpace` / case-folding instead of reusing config-owned semantics. + +Suggested action: +- Add small config-owned helpers for: + - "is fallback enabled for this mode?" + - "what dashboard mode should be exposed for this config?" +- Remove the ad hoc mode parsing from `internal/app/app.go`. +- This keeps blank, mixed-case, and future mode handling in one place. + +## 7. Collapse the duplicated translated fallback attempt loops + +Effort: medium +Risk: medium + +Why: +- `internal/server/translated_inference_service.go` has two near-identical fallback loops: + - `tryFallbackResponse()` + - `tryFallbackStream()` +- Both: + - fetch selectors + - gate on `shouldAttemptFallback()` + - derive `providerType` + - log attempt/success messages + - walk candidates while preserving the last error + +Suggested action: +- Extract one shared iterator that owns: + - selector traversal + - provider-type lookup + - attempt logging + - last-error handling +- Keep the typed wrappers only for the response/stream result shapes. + +## 8. Precompute fallback source identity once per resolution + +Effort: medium +Risk: low to medium + +Why: +- `internal/fallback/resolver.go` recomputes trimmed selector identity several times per request: + - `sourceModelInfo()` + - `modeFor()` + - `manualSelectorsFor()` + - `matchKeys()` + - `sourceKey()` +- `modeFor()` and `manualSelectorsFor()` each rebuild the same ordered match-key list. + +Suggested action: +- Introduce a small internal struct for one fallback resolution pass, containing: + - source model info + - canonical source key + - ordered match keys +- Build it once in `ResolveFallbacks()` and pass it through helper calls. +- This would trim repeated string cleanup and make precedence rules easier to inspect. + +## 9. Extract manual fallback-rule file parsing from `loadFallbackConfig` + +Effort: medium +Risk: low to medium + +Why: +- `config.loadFallbackConfig()` currently owns both: + - fallback-mode validation/defaulting + - the custom JSON loader for `manual_rules_path` +- The manual loader includes: + - duplicate raw JSON key detection + - `null` rejection + - trailing-content validation + - whitespace normalization +- That makes the config loader harder to scan than the rest of the config pipeline. + +Suggested action: +- Move the manual-rule JSON parsing into a dedicated helper or file, for example `loadFallbackManualRules(path string)`. +- Keep `loadFallbackConfig()` focused on policy validation and wiring. +- Preserve the current strict error messages and test coverage while isolating the parser. + +## 10. Pick one owner for execution-plan fallback defaults + +Effort: medium +Risk: medium + +Why: +- The managed backend default is set in `internal/app/app.go:defaultExecutionPlanInput()`. +- The dashboard draft default is separately hardcoded in `internal/admin/dashboard/static/js/modules/execution-plans.js:defaultExecutionPlanForm()`. +- We already changed both once to keep them aligned, which confirms the drift risk is real. + +Suggested action: +- Prefer a single server-owned default surface for execution-plan authoring defaults. +- Options: + - expose default feature flags from the admin config endpoint + - derive the initial dashboard form from the active managed default workflow +- This reduces UI/backend drift for fallback and other execution-plan features. + ## Recommended order 1. Remove `CacheTypeBoth`. @@ -93,3 +197,8 @@ Suggested action: 3. Keep cached-only policy in one layer. 4. Remove the legacy middleware path. 5. Centralize cache semantics in a shared package. +6. Centralize fallback-mode semantics in `config`. +7. Collapse the duplicated translated fallback attempt loops. +8. Precompute fallback source identity once per resolution. +9. Extract manual fallback-rule file parsing from `loadFallbackConfig`. +10. Pick one owner for execution-plan fallback defaults. diff --git a/config/config.example.yaml b/config/config.example.yaml index 6a40b3a4..530ced24 100644 --- a/config/config.example.yaml +++ b/config/config.example.yaml @@ -139,7 +139,7 @@ guardrails: # content: "Follow safety guidelines." fallback: - default_mode: "off" # "off", "manual", or "auto" + default_mode: "auto" # "off", "manual", or "auto" manual_rules_path: "config/fallback.example.json" # optional JSON map: {"model": ["fallback-1", "provider/model"]}; required when fallback.default_mode or any fallback.overrides.*.mode is "manual" overrides: "gpt-4o": diff --git a/config/config.go b/config/config.go index 47a0b04b..a391abf3 100644 --- a/config/config.go +++ b/config/config.go @@ -114,6 +114,16 @@ func normalizeFallbackMode(mode FallbackMode) FallbackMode { return FallbackMode(strings.ToLower(strings.TrimSpace(string(mode)))) } +// ResolveFallbackDefaultMode canonicalizes the global fallback default mode and +// applies the process default when unset. +func ResolveFallbackDefaultMode(mode FallbackMode) FallbackMode { + mode = normalizeFallbackMode(mode) + if mode == "" { + return FallbackModeAuto + } + return mode +} + // FallbackModelOverride holds per-model mode overrides. type FallbackModelOverride struct { Mode FallbackMode `yaml:"mode" json:"mode"` @@ -122,7 +132,7 @@ type FallbackModelOverride struct { // FallbackConfig holds translated-route model fallback policy. type FallbackConfig struct { // DefaultMode controls the fallback behavior when no per-model override exists. - // Supported values: "auto", "manual", "off". Default: "off". + // Supported values: "auto", "manual", "off". Default: "auto". DefaultMode FallbackMode `yaml:"default_mode" env:"FEATURE_FALLBACK_MODE"` // ManualRulesPath points to a JSON file that maps source model selectors to @@ -426,11 +436,11 @@ type EmbedderConfig struct { // VectorStoreConfig selects the vector DB backend. // Type must be set when semantic caching is enabled: qdrant, pgvector, pinecone, weaviate. type VectorStoreConfig struct { - Type string `yaml:"type"` - Qdrant QdrantConfig `yaml:"qdrant"` - PGVector PGVectorConfig `yaml:"pgvector"` - Pinecone PineconeConfig `yaml:"pinecone"` - Weaviate WeaviateConfig `yaml:"weaviate"` + Type string `yaml:"type"` + Qdrant QdrantConfig `yaml:"qdrant"` + PGVector PGVectorConfig `yaml:"pgvector"` + Pinecone PineconeConfig `yaml:"pinecone"` + Weaviate WeaviateConfig `yaml:"weaviate"` } // QdrantConfig holds connection configuration for the Qdrant vector store. @@ -868,7 +878,7 @@ func buildDefaultConfig() *Config { ResponseHeaderTimeout: 600, }, Fallback: FallbackConfig{ - DefaultMode: FallbackModeOff, + DefaultMode: FallbackModeAuto, }, ExecutionPlans: ExecutionPlansConfig{ RefreshInterval: time.Minute, @@ -986,10 +996,7 @@ func loadFallbackConfig(cfg *FallbackConfig) error { return nil } - cfg.DefaultMode = normalizeFallbackMode(cfg.DefaultMode) - if cfg.DefaultMode == "" { - cfg.DefaultMode = FallbackModeOff - } + cfg.DefaultMode = ResolveFallbackDefaultMode(cfg.DefaultMode) if !cfg.DefaultMode.Valid() { return fmt.Errorf("fallback.default_mode must be one of: auto, manual, off") } diff --git a/config/config_test.go b/config/config_test.go index e3afe953..dbd4284c 100644 --- a/config/config_test.go +++ b/config/config_test.go @@ -168,8 +168,8 @@ func TestBuildDefaultConfig(t *testing.T) { if cfg.Guardrails.EnableForBatchProcessing { t.Error("expected Guardrails.EnableForBatchProcessing=false") } - if cfg.Fallback.DefaultMode != FallbackModeOff { - t.Errorf("expected Fallback.DefaultMode=off, got %q", cfg.Fallback.DefaultMode) + if cfg.Fallback.DefaultMode != FallbackModeAuto { + t.Errorf("expected Fallback.DefaultMode=auto, got %q", cfg.Fallback.DefaultMode) } if cfg.Cache.Response.Simple != nil { t.Errorf("expected Cache.Response.Simple=nil in defaults, got %+v", cfg.Cache.Response.Simple) @@ -570,6 +570,28 @@ func TestLoad_FeatureFallbackModeEnvOverridesFallbackDefaultMode(t *testing.T) { }) } +func TestLoad_BlankFallbackDefaultModeResolvesToAuto(t *testing.T) { + clearAllConfigEnvVars(t) + + withTempDir(t, func(dir string) { + yaml := ` +fallback: + default_mode: "" +` + if err := os.WriteFile(filepath.Join(dir, "config.yaml"), []byte(yaml), 0644); err != nil { + t.Fatalf("Failed to write config.yaml: %v", err) + } + + result, err := Load() + if err != nil { + t.Fatalf("Load() failed: %v", err) + } + if result.Config.Fallback.DefaultMode != FallbackModeAuto { + t.Fatalf("Fallback.DefaultMode = %q, want %q", result.Config.Fallback.DefaultMode, FallbackModeAuto) + } + }) +} + func TestLoad_PassthroughFlags_EnvOverridesYAML(t *testing.T) { tests := []struct { name string diff --git a/internal/admin/dashboard/static/css/dashboard.css b/internal/admin/dashboard/static/css/dashboard.css index 8185c9d3..3d02b0a8 100644 --- a/internal/admin/dashboard/static/css/dashboard.css +++ b/internal/admin/dashboard/static/css/dashboard.css @@ -3138,14 +3138,6 @@ body.conversation-drawer-open { font-weight: 700; } -.ep-node-async-usage { - --accent: var(--success); -} - -.ep-node-async-audit { - --accent: var(--warning); -} - /* "ASYNC" label inline on the right of the branch */ .ep-async-label { display: inline-flex; diff --git a/internal/admin/dashboard/static/js/modules/execution-plans-layout.test.js b/internal/admin/dashboard/static/js/modules/execution-plans-layout.test.js index eb798715..b4e5e1ab 100644 --- a/internal/admin/dashboard/static/js/modules/execution-plans-layout.test.js +++ b/internal/admin/dashboard/static/js/modules/execution-plans-layout.test.js @@ -62,17 +62,17 @@ test('async label stays inline on the right side of the branch', () => { ); assert.match( template, - /