diff --git a/PossibleRefactoring.md b/PossibleRefactoring.md index 3114b628..e5882754 100644 --- a/PossibleRefactoring.md +++ b/PossibleRefactoring.md @@ -86,6 +86,110 @@ Suggested action: - Introduce a small shared internal package for cache semantics. - Do it only if it can be done without creating import cycles. +## 6. Centralize fallback-mode semantics in `config` + +Effort: low +Risk: low + +Why: +- `config.ResolveFallbackDefaultMode()` now owns the blank-to-`auto` defaulting rule. +- `internal/app/app.go` still re-implements fallback-mode parsing in: + - `dashboardFallbackModeValue()` + - `fallbackFeatureEnabledGlobally()` + - `fallbackModeEnabled()` +- Those helpers currently perform their own `TrimSpace` / case-folding instead of reusing config-owned semantics. + +Suggested action: +- Add small config-owned helpers for: + - "is fallback enabled for this mode?" + - "what dashboard mode should be exposed for this config?" +- Remove the ad hoc mode parsing from `internal/app/app.go`. +- This keeps blank, mixed-case, and future mode handling in one place. + +## 7. Collapse the duplicated translated fallback attempt loops + +Effort: medium +Risk: medium + +Why: +- `internal/server/translated_inference_service.go` has two near-identical fallback loops: + - `tryFallbackResponse()` + - `tryFallbackStream()` +- Both: + - fetch selectors + - gate on `shouldAttemptFallback()` + - derive `providerType` + - log attempt/success messages + - walk candidates while preserving the last error + +Suggested action: +- Extract one shared iterator that owns: + - selector traversal + - provider-type lookup + - attempt logging + - last-error handling +- Keep the typed wrappers only for the response/stream result shapes. + +## 8. Precompute fallback source identity once per resolution + +Effort: medium +Risk: low to medium + +Why: +- `internal/fallback/resolver.go` recomputes trimmed selector identity several times per request: + - `sourceModelInfo()` + - `modeFor()` + - `manualSelectorsFor()` + - `matchKeys()` + - `sourceKey()` +- `modeFor()` and `manualSelectorsFor()` each rebuild the same ordered match-key list. + +Suggested action: +- Introduce a small internal struct for one fallback resolution pass, containing: + - source model info + - canonical source key + - ordered match keys +- Build it once in `ResolveFallbacks()` and pass it through helper calls. +- This would trim repeated string cleanup and make precedence rules easier to inspect. + +## 9. Extract manual fallback-rule file parsing from `loadFallbackConfig` + +Effort: medium +Risk: low to medium + +Why: +- `config.loadFallbackConfig()` currently owns both: + - fallback-mode validation/defaulting + - the custom JSON loader for `manual_rules_path` +- The manual loader includes: + - duplicate raw JSON key detection + - `null` rejection + - trailing-content validation + - whitespace normalization +- That makes the config loader harder to scan than the rest of the config pipeline. + +Suggested action: +- Move the manual-rule JSON parsing into a dedicated helper or file, for example `loadFallbackManualRules(path string)`. +- Keep `loadFallbackConfig()` focused on policy validation and wiring. +- Preserve the current strict error messages and test coverage while isolating the parser. + +## 10. Pick one owner for execution-plan fallback defaults + +Effort: medium +Risk: medium + +Why: +- The managed backend default is set in `internal/app/app.go:defaultExecutionPlanInput()`. +- The dashboard draft default is separately hardcoded in `internal/admin/dashboard/static/js/modules/execution-plans.js:defaultExecutionPlanForm()`. +- We already changed both once to keep them aligned, which confirms the drift risk is real. + +Suggested action: +- Prefer a single server-owned default surface for execution-plan authoring defaults. +- Options: + - expose default feature flags from the admin config endpoint + - derive the initial dashboard form from the active managed default workflow +- This reduces UI/backend drift for fallback and other execution-plan features. + ## Recommended order 1. Remove `CacheTypeBoth`. @@ -93,3 +197,8 @@ Suggested action: 3. Keep cached-only policy in one layer. 4. Remove the legacy middleware path. 5. Centralize cache semantics in a shared package. +6. Centralize fallback-mode semantics in `config`. +7. Collapse the duplicated translated fallback attempt loops. +8. Precompute fallback source identity once per resolution. +9. Extract manual fallback-rule file parsing from `loadFallbackConfig`. +10. Pick one owner for execution-plan fallback defaults. diff --git a/config/config.example.yaml b/config/config.example.yaml index 6a40b3a4..530ced24 100644 --- a/config/config.example.yaml +++ b/config/config.example.yaml @@ -139,7 +139,7 @@ guardrails: # content: "Follow safety guidelines." fallback: - default_mode: "off" # "off", "manual", or "auto" + default_mode: "auto" # "off", "manual", or "auto" manual_rules_path: "config/fallback.example.json" # optional JSON map: {"model": ["fallback-1", "provider/model"]}; required when fallback.default_mode or any fallback.overrides.*.mode is "manual" overrides: "gpt-4o": diff --git a/config/config.go b/config/config.go index 47a0b04b..a391abf3 100644 --- a/config/config.go +++ b/config/config.go @@ -114,6 +114,16 @@ func normalizeFallbackMode(mode FallbackMode) FallbackMode { return FallbackMode(strings.ToLower(strings.TrimSpace(string(mode)))) } +// ResolveFallbackDefaultMode canonicalizes the global fallback default mode and +// applies the process default when unset. +func ResolveFallbackDefaultMode(mode FallbackMode) FallbackMode { + mode = normalizeFallbackMode(mode) + if mode == "" { + return FallbackModeAuto + } + return mode +} + // FallbackModelOverride holds per-model mode overrides. type FallbackModelOverride struct { Mode FallbackMode `yaml:"mode" json:"mode"` @@ -122,7 +132,7 @@ type FallbackModelOverride struct { // FallbackConfig holds translated-route model fallback policy. type FallbackConfig struct { // DefaultMode controls the fallback behavior when no per-model override exists. - // Supported values: "auto", "manual", "off". Default: "off". + // Supported values: "auto", "manual", "off". Default: "auto". DefaultMode FallbackMode `yaml:"default_mode" env:"FEATURE_FALLBACK_MODE"` // ManualRulesPath points to a JSON file that maps source model selectors to @@ -426,11 +436,11 @@ type EmbedderConfig struct { // VectorStoreConfig selects the vector DB backend. // Type must be set when semantic caching is enabled: qdrant, pgvector, pinecone, weaviate. type VectorStoreConfig struct { - Type string `yaml:"type"` - Qdrant QdrantConfig `yaml:"qdrant"` - PGVector PGVectorConfig `yaml:"pgvector"` - Pinecone PineconeConfig `yaml:"pinecone"` - Weaviate WeaviateConfig `yaml:"weaviate"` + Type string `yaml:"type"` + Qdrant QdrantConfig `yaml:"qdrant"` + PGVector PGVectorConfig `yaml:"pgvector"` + Pinecone PineconeConfig `yaml:"pinecone"` + Weaviate WeaviateConfig `yaml:"weaviate"` } // QdrantConfig holds connection configuration for the Qdrant vector store. @@ -868,7 +878,7 @@ func buildDefaultConfig() *Config { ResponseHeaderTimeout: 600, }, Fallback: FallbackConfig{ - DefaultMode: FallbackModeOff, + DefaultMode: FallbackModeAuto, }, ExecutionPlans: ExecutionPlansConfig{ RefreshInterval: time.Minute, @@ -986,10 +996,7 @@ func loadFallbackConfig(cfg *FallbackConfig) error { return nil } - cfg.DefaultMode = normalizeFallbackMode(cfg.DefaultMode) - if cfg.DefaultMode == "" { - cfg.DefaultMode = FallbackModeOff - } + cfg.DefaultMode = ResolveFallbackDefaultMode(cfg.DefaultMode) if !cfg.DefaultMode.Valid() { return fmt.Errorf("fallback.default_mode must be one of: auto, manual, off") } diff --git a/config/config_test.go b/config/config_test.go index e3afe953..dbd4284c 100644 --- a/config/config_test.go +++ b/config/config_test.go @@ -168,8 +168,8 @@ func TestBuildDefaultConfig(t *testing.T) { if cfg.Guardrails.EnableForBatchProcessing { t.Error("expected Guardrails.EnableForBatchProcessing=false") } - if cfg.Fallback.DefaultMode != FallbackModeOff { - t.Errorf("expected Fallback.DefaultMode=off, got %q", cfg.Fallback.DefaultMode) + if cfg.Fallback.DefaultMode != FallbackModeAuto { + t.Errorf("expected Fallback.DefaultMode=auto, got %q", cfg.Fallback.DefaultMode) } if cfg.Cache.Response.Simple != nil { t.Errorf("expected Cache.Response.Simple=nil in defaults, got %+v", cfg.Cache.Response.Simple) @@ -570,6 +570,28 @@ func TestLoad_FeatureFallbackModeEnvOverridesFallbackDefaultMode(t *testing.T) { }) } +func TestLoad_BlankFallbackDefaultModeResolvesToAuto(t *testing.T) { + clearAllConfigEnvVars(t) + + withTempDir(t, func(dir string) { + yaml := ` +fallback: + default_mode: "" +` + if err := os.WriteFile(filepath.Join(dir, "config.yaml"), []byte(yaml), 0644); err != nil { + t.Fatalf("Failed to write config.yaml: %v", err) + } + + result, err := Load() + if err != nil { + t.Fatalf("Load() failed: %v", err) + } + if result.Config.Fallback.DefaultMode != FallbackModeAuto { + t.Fatalf("Fallback.DefaultMode = %q, want %q", result.Config.Fallback.DefaultMode, FallbackModeAuto) + } + }) +} + func TestLoad_PassthroughFlags_EnvOverridesYAML(t *testing.T) { tests := []struct { name string diff --git a/internal/admin/dashboard/static/css/dashboard.css b/internal/admin/dashboard/static/css/dashboard.css index 8185c9d3..3d02b0a8 100644 --- a/internal/admin/dashboard/static/css/dashboard.css +++ b/internal/admin/dashboard/static/css/dashboard.css @@ -3138,14 +3138,6 @@ body.conversation-drawer-open { font-weight: 700; } -.ep-node-async-usage { - --accent: var(--success); -} - -.ep-node-async-audit { - --accent: var(--warning); -} - /* "ASYNC" label inline on the right of the branch */ .ep-async-label { display: inline-flex; diff --git a/internal/admin/dashboard/static/js/modules/execution-plans-layout.test.js b/internal/admin/dashboard/static/js/modules/execution-plans-layout.test.js index eb798715..b4e5e1ab 100644 --- a/internal/admin/dashboard/static/js/modules/execution-plans-layout.test.js +++ b/internal/admin/dashboard/static/js/modules/execution-plans-layout.test.js @@ -62,17 +62,17 @@ test('async label stays inline on the right side of the branch', () => { ); assert.match( template, - /
<\/div>\s*
/ + /
<\/div>\s*
/ + ); + assert.match( + template, + /
/ ); const asyncLabelRule = readCSSRule(css, '.ep-async-label'); assert.doesNotMatch(asyncLabelRule, /position:\s*absolute/); - - const asyncUsageRule = readCSSRule(css, '.ep-node-async-usage'); - assert.match(asyncUsageRule, /--accent:\s*var\(--success\)/); - - const asyncAuditRule = readCSSRule(css, '.ep-node-async-audit'); - assert.match(asyncAuditRule, /--accent:\s*var\(--warning\)/); + assert.doesNotMatch(css, /\.ep-node-async-usage\s*\{/); + assert.doesNotMatch(css, /\.ep-node-async-audit\s*\{/); }); test('workflow nodes use endpoint and feature color groups consistently', () => { diff --git a/internal/admin/dashboard/static/js/modules/execution-plans.js b/internal/admin/dashboard/static/js/modules/execution-plans.js index 402f2658..924fd152 100644 --- a/internal/admin/dashboard/static/js/modules/execution-plans.js +++ b/internal/admin/dashboard/static/js/modules/execution-plans.js @@ -34,7 +34,7 @@ audit: true, usage: true, guardrails: false, - fallback: false + fallback: true }, guardrails: [] }, @@ -51,7 +51,7 @@ audit: true, usage: true, guardrails: false, - fallback: false + fallback: true }, guardrails: [] }; @@ -1091,6 +1091,7 @@ ? this.executionPlanNormalizedFeatures(config.features) : this.executionPlanSourceFeatures(source); const forceAudit = !!config.forceAudit; + const highlightAsyncPresent = !!config.highlightAsyncPresent; const showGuardrails = !!features.guardrails; const showUsage = !!features.usage; const showAudit = forceAudit || !!features.audit; @@ -1111,6 +1112,8 @@ responseNodeClass: this.epResponseNodeClass(runtime), authNodeClass: this.epAuthNodeClass(runtime), authNodeSublabel: this.epAuthNodeSublabel(runtime), + usageNodeClass: this.epAsyncNodeClass(showUsage, highlightAsyncPresent), + auditNodeClass: this.epAsyncNodeClass(showAudit, highlightAsyncPresent), showAsync, showUsage, showAudit, @@ -1130,7 +1133,8 @@ entry, features, forceAudit: true, - forceAsync: true + forceAsync: true, + highlightAsyncPresent: true }); }, @@ -1200,6 +1204,10 @@ return runtime.authMethod; }, + epAsyncNodeClass(visible, highlightPresent) { + return visible && highlightPresent ? 'ep-node-success' : ''; + }, + epRuntimeFromEntry(entry) { if (!entry) return null; const normalizedCacheType = (() => { diff --git a/internal/admin/dashboard/static/js/modules/execution-plans.test.js b/internal/admin/dashboard/static/js/modules/execution-plans.test.js index b2d687a3..0f5fde69 100644 --- a/internal/admin/dashboard/static/js/modules/execution-plans.test.js +++ b/internal/admin/dashboard/static/js/modules/execution-plans.test.js @@ -38,11 +38,11 @@ test('executionPlanProviderOptions returns unique sorted provider types', () => ); }); -test('defaultExecutionPlanForm starts fallback disabled for new workflows', () => { +test('defaultExecutionPlanForm starts fallback enabled for new workflows', () => { const module = createExecutionPlansModule(); - assert.equal(module.executionPlanForm.features.fallback, false); - assert.equal(module.defaultExecutionPlanForm().features.fallback, false); + assert.equal(module.executionPlanForm.features.fallback, true); + assert.equal(module.defaultExecutionPlanForm().features.fallback, true); }); test('executionPlanPreview mirrors the draft workflow card state from the editor form', () => { @@ -203,6 +203,8 @@ test('executionPlanWorkflowChart returns the shared chart contract for workflow responseNodeClass: '', authNodeClass: '', authNodeSublabel: null, + usageNodeClass: '', + auditNodeClass: '', showAsync: true, showUsage: false, showAudit: true, @@ -256,6 +258,8 @@ test('executionPlanWorkflowChart masks globally disabled workflow features from responseNodeClass: '', authNodeClass: '', authNodeSublabel: null, + usageNodeClass: '', + auditNodeClass: '', showAsync: false, showUsage: false, showAudit: false, @@ -330,6 +334,8 @@ test('executionPlanAuditChart returns the shared chart contract for audit runtim responseNodeClass: 'ep-node-success', authNodeClass: '', authNodeSublabel: null, + usageNodeClass: 'ep-node-success', + auditNodeClass: 'ep-node-success', showAsync: true, showUsage: true, showAudit: true, @@ -364,6 +370,8 @@ test('executionPlanAuditChart forces audit nodes even when the workflow version responseNodeClass: 'ep-node-success', authNodeClass: '', authNodeSublabel: null, + usageNodeClass: '', + auditNodeClass: 'ep-node-success', showAsync: true, showUsage: false, showAudit: true, @@ -427,6 +435,8 @@ test('executionPlanAuditChart prefers request-time execution features over curre responseNodeClass: 'ep-node-success', authNodeClass: '', authNodeSublabel: null, + usageNodeClass: '', + auditNodeClass: 'ep-node-success', showAsync: true, showUsage: false, showAudit: true, @@ -435,6 +445,14 @@ test('executionPlanAuditChart prefers request-time execution features over curre ); }); +test('epAsyncNodeClass only marks async nodes green when the audit-log override is enabled', () => { + const module = createExecutionPlansModule(); + + assert.equal(module.epAsyncNodeClass(true, false), ''); + assert.equal(module.epAsyncNodeClass(false, true), ''); + assert.equal(module.epAsyncNodeClass(true, true), 'ep-node-success'); +}); + test('executionPlanSubmitMode switches to save when an active workflow already matches the selected scope', () => { const module = createExecutionPlansModule(); module.executionPlans = [ diff --git a/internal/admin/dashboard/templates/execution-plan-chart.html b/internal/admin/dashboard/templates/execution-plan-chart.html index 837ab55d..0e83376e 100644 --- a/internal/admin/dashboard/templates/execution-plan-chart.html +++ b/internal/admin/dashboard/templates/execution-plan-chart.html @@ -56,14 +56,14 @@
-
+
Usage
-
+
diff --git a/internal/fallback/resolver.go b/internal/fallback/resolver.go index b100f925..b2f127ad 100644 --- a/internal/fallback/resolver.go +++ b/internal/fallback/resolver.go @@ -42,10 +42,7 @@ func NewResolver(cfg config.FallbackConfig, registry Registry) *Resolver { return nil } - mode := cfg.DefaultMode - if mode == "" { - mode = config.FallbackModeOff - } + mode := config.ResolveFallbackDefaultMode(cfg.DefaultMode) if mode == config.FallbackModeOff && len(cfg.Manual) == 0 && len(cfg.Overrides) == 0 { return nil } diff --git a/internal/fallback/resolver_test.go b/internal/fallback/resolver_test.go index 2b665128..6aa4aecd 100644 --- a/internal/fallback/resolver_test.go +++ b/internal/fallback/resolver_test.go @@ -90,6 +90,32 @@ func TestResolverAutoModeAppendsRankingCandidates(t *testing.T) { } } +func TestResolverBlankDefaultModeUsesAutoFallback(t *testing.T) { + registry := newFakeRegistry( + modelInfo("gpt-4o", "openai", "openai", 1287, "gpt-4o"), + modelInfo("gpt-4o", "azure", "azure", 1287, "gpt-4o"), + modelInfo("gemini-2.5-pro", "gemini", "gemini", 1290, "gemini-2.5-pro"), + ) + + resolver := NewResolver(config.FallbackConfig{}, registry) + if resolver == nil { + t.Fatal("NewResolver() = nil, want auto-enabled resolver") + } + + got := resolver.ResolveFallbacks(&core.RequestModelResolution{ + Requested: core.NewRequestedModelSelector("gpt-4o", ""), + ResolvedSelector: core.ModelSelector{Model: "gpt-4o"}, + ProviderType: "openai", + }, core.OperationChatCompletions) + + if len(got) == 0 { + t.Fatal("len(got) = 0, want auto fallback candidates") + } + if got[0].QualifiedModel() != "azure/gpt-4o" { + t.Fatalf("got[0] = %q, want %q", got[0].QualifiedModel(), "azure/gpt-4o") + } +} + func TestResolverOverrideOffDisablesFallbacks(t *testing.T) { registry := newFakeRegistry( modelInfo("gpt-4o", "openai", "openai", 1287, "gpt-4o"),