From 4b972b4091c3017710811def37b90b948beb010e Mon Sep 17 00:00:00 2001 From: Jon Langevin Date: Thu, 14 May 2026 19:12:45 -0400 Subject: [PATCH 1/4] feat: grpcKubernetesBackend adapter for plugin-served gke (per ADR 0037) --- module/platform_kubernetes_grpc.go | 280 ++++++++++++++++++++++++ module/platform_kubernetes_grpc_test.go | 253 +++++++++++++++++++++ 2 files changed, 533 insertions(+) create mode 100644 module/platform_kubernetes_grpc.go create mode 100644 module/platform_kubernetes_grpc_test.go diff --git a/module/platform_kubernetes_grpc.go b/module/platform_kubernetes_grpc.go new file mode 100644 index 00000000..61a41c04 --- /dev/null +++ b/module/platform_kubernetes_grpc.go @@ -0,0 +1,280 @@ +// platform_kubernetes_grpc.go — the host-side kubernetesBackend that dispatches +// the `gke` cluster type to a plugin-served ResourceDriver gRPC client. +// +// Per ADR 0037 (decisions/0037-gke-cross-process-contract.md), `gke` folds into +// the existing ResourceDriver contract — ZERO new proto surface. A GKE cluster +// is a managed resource served by workflow-plugin-gcp under the resource type +// `infra.k8s_cluster`: +// +// kubernetesBackend → ResourceDriver RPC +// plan → Read (probe existence, synthesize create|noop plan) +// apply → Create (AlreadyExists resolves to success) +// status → Read (project outputs_json onto KubernetesClusterState) +// destroy → Delete (NotFound resolves to success) +// +// Precedent: the Phase A grpcIaCStateStore adapter (module/iac_state_grpc_client.go). +package module + +import ( + "context" + "encoding/json" + "fmt" + + pb "github.com/GoCodeAlone/workflow/plugin/external/proto" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" +) + +// gkeResourceType is the ResourceDriver resource type workflow-plugin-gcp serves +// GKE clusters under (provider/provider.go registers GKEDriver here). ADR 0037 +// Option 1 dispatches the in-core `gke` kubernetesBackend to this driver. +const gkeResourceType = "infra.k8s_cluster" + +// KubernetesClusterState outputs_json key contract. ADR 0037 makes the host +// adapter the owner of these key names; workflow-plugin-gcp's GKEDriver.Read +// (Task 22) conforms its output to them. Keys mirror the KubernetesClusterState +// JSON field tags so the projection is a direct re-marshal. +const ( + k8sOutputKeyStatus = "status" + k8sOutputKeyEndpoint = "endpoint" + k8sOutputKeyVersion = "version" + k8sOutputKeyNodeGroups = "nodeGroups" +) + +// grpcKubernetesBackend adapts a pb.ResourceDriverClient (resource type +// infra.k8s_cluster) to the in-core kubernetesBackend interface, so a +// plugin-served `gke` backend is dispatched exactly like the deleted in-core +// gkeBackend was. +type grpcKubernetesBackend struct { + client pb.ResourceDriverClient +} + +// newGRPCKubernetesBackend wraps a ResourceDriverClient as a kubernetesBackend. +func newGRPCKubernetesBackend(c pb.ResourceDriverClient) *grpcKubernetesBackend { + return &grpcKubernetesBackend{client: c} +} + +// Compile-time guard: the gRPC adapter MUST satisfy the in-core contract so the +// engine seam (Task 26) can register it like any other kubernetesBackend. +var _ kubernetesBackend = (*grpcKubernetesBackend)(nil) + +// plan probes the cluster's existence via ResourceDriver.Read and synthesizes a +// PlatformPlan — a single `create` action when the cluster is absent +// (codes.NotFound), a `noop` action when it already exists. This mirrors the +// deleted in-core gkeBackend.plan, whose own logic was a Get-or-create check. +func (b *grpcKubernetesBackend) plan(k *PlatformKubernetes) (*PlatformPlan, error) { + plan := &PlatformPlan{Provider: "gke", Resource: k.clusterName()} + resp, err := b.client.Read(context.Background(), &pb.ResourceReadRequest{ + ResourceType: gkeResourceType, + Ref: b.buildResourceRef(k), + }) + if err != nil { + if status.Code(err) == codes.NotFound { + plan.Actions = []PlatformAction{{ + Type: "create", + Resource: k.clusterName(), + Detail: fmt.Sprintf("create GKE cluster %q", k.clusterName()), + }} + return plan, nil + } + return nil, fmt.Errorf("gke plan: Read %q: %w", k.clusterName(), err) + } + if resp.GetOutput() != nil { + plan.Actions = []PlatformAction{{ + Type: "noop", + Resource: k.clusterName(), + Detail: fmt.Sprintf("GKE cluster %q exists (status: %s)", k.clusterName(), resp.GetOutput().GetStatus()), + }} + return plan, nil + } + plan.Actions = []PlatformAction{{ + Type: "create", + Resource: k.clusterName(), + Detail: fmt.Sprintf("create GKE cluster %q", k.clusterName()), + }} + return plan, nil +} + +// apply creates the cluster via ResourceDriver.Create. Per ADR 0037 a +// codes.AlreadyExists response resolves to success — preserving the in-core +// gkeBackend.apply behavior that swallowed ALREADY_EXISTS. +func (b *grpcKubernetesBackend) apply(k *PlatformKubernetes) (*PlatformResult, error) { + spec, err := b.buildResourceSpec(k) + if err != nil { + return nil, err + } + resp, err := b.client.Create(context.Background(), &pb.ResourceCreateRequest{ + ResourceType: gkeResourceType, + Spec: spec, + }) + if err != nil { + if status.Code(err) == codes.AlreadyExists { + return &PlatformResult{ + Success: true, + Message: fmt.Sprintf("GKE cluster %q already exists", k.clusterName()), + State: k.state, + }, nil + } + return nil, fmt.Errorf("gke apply: Create %q: %w", k.clusterName(), err) + } + clusterState, err := kubernetesClusterStateFromOutput(k.clusterName(), resp.GetOutput()) + if err != nil { + return nil, fmt.Errorf("gke apply: %w", err) + } + return &PlatformResult{ + Success: true, + Message: fmt.Sprintf("GKE cluster %q creation initiated", k.clusterName()), + State: clusterState, + }, nil +} + +// status reads the cluster via ResourceDriver.Read and projects the +// outputs_json map onto the typed KubernetesClusterState. A codes.NotFound +// response yields a clean not-found state rather than an error — matching the +// in-core gkeBackend.status, which set Status="not-found" on a failed Get. +func (b *grpcKubernetesBackend) status(k *PlatformKubernetes) (*KubernetesClusterState, error) { + resp, err := b.client.Read(context.Background(), &pb.ResourceReadRequest{ + ResourceType: gkeResourceType, + Ref: b.buildResourceRef(k), + }) + if err != nil { + if status.Code(err) == codes.NotFound { + return &KubernetesClusterState{Name: k.clusterName(), Provider: "gke", Status: "not-found"}, nil + } + return nil, fmt.Errorf("gke status: Read %q: %w", k.clusterName(), err) + } + st, err := kubernetesClusterStateFromOutput(k.clusterName(), resp.GetOutput()) + if err != nil { + return nil, fmt.Errorf("gke status: %w", err) + } + return st, nil +} + +// destroy deletes the cluster via ResourceDriver.Delete. Per ADR 0037 a +// codes.NotFound response resolves to success — preserving the in-core +// gkeBackend.destroy behavior that swallowed NOT_FOUND. +func (b *grpcKubernetesBackend) destroy(k *PlatformKubernetes) error { + _, err := b.client.Delete(context.Background(), &pb.ResourceDeleteRequest{ + ResourceType: gkeResourceType, + Ref: b.buildResourceRef(k), + }) + if err != nil { + if status.Code(err) == codes.NotFound { + return nil + } + return fmt.Errorf("gke destroy: Delete %q: %w", k.clusterName(), err) + } + return nil +} + +// buildResourceRef builds the ResourceRef the ResourceDriver RPCs address the +// cluster by — keyed on the cluster name and the infra.k8s_cluster type. +func (b *grpcKubernetesBackend) buildResourceRef(k *PlatformKubernetes) *pb.ResourceRef { + return &pb.ResourceRef{ + Name: k.clusterName(), + Type: gkeResourceType, + } +} + +// buildResourceSpec builds the ResourceSpec for a Create RPC: the +// platform.kubernetes module config carried through as config_json (the plugin +// GKEDriver reads project_id/location/zone/version/nodeGroups from it), plus +// resolved cloud credentials serialized in when a cloud account is wired. +func (b *grpcKubernetesBackend) buildResourceSpec(k *PlatformKubernetes) (*pb.ResourceSpec, error) { + cfg := make(map[string]any, len(k.config)+2) + for key, val := range k.config { + cfg[key] = val + } + if v, _ := cfg["version"].(string); v == "" { + cfg["version"] = "1.29" + } + if err := b.injectCredentials(k, cfg); err != nil { + return nil, err + } + configJSON, err := json.Marshal(cfg) + if err != nil { + return nil, fmt.Errorf("gke: encode resource spec config: %w", err) + } + return &pb.ResourceSpec{ + Name: k.clusterName(), + Type: gkeResourceType, + ConfigJson: configJSON, + }, nil +} + +// injectCredentials resolves the cloud account (when one is wired) and folds the +// GCP project ID + service-account JSON into the spec config — the cross-process +// equivalent of the in-core containerService building the SDK client from +// CloudCredentials.ServiceAccountJSON. +func (b *grpcKubernetesBackend) injectCredentials(k *PlatformKubernetes, cfg map[string]any) error { + if k.provider == nil { + return nil + } + creds, err := k.provider.GetCredentials(context.Background()) + if err != nil { + return fmt.Errorf("gke: resolve cloud credentials: %w", err) + } + if creds == nil { + return nil + } + if creds.ProjectID != "" { + cfg["projectId"] = creds.ProjectID + } + if len(creds.ServiceAccountJSON) > 0 { + cfg["serviceAccountJSON"] = string(creds.ServiceAccountJSON) + } + return nil +} + +// kubernetesClusterStateFromOutput projects a ResourceDriver ResourceOutput onto +// the typed KubernetesClusterState. The free-form outputs_json map crosses the +// wire as JSON bytes (the iac.proto invariant); this is the host-owned +// map→struct projection ADR 0037 assigns to Tasks 25/26. The adapter sets +// Provider="gke" itself and tolerates a missing/empty outputs_json. +func kubernetesClusterStateFromOutput(name string, out *pb.ResourceOutput) (*KubernetesClusterState, error) { + st := &KubernetesClusterState{Name: name, Provider: "gke", Status: "not-found"} + if out == nil { + return st, nil + } + if out.GetStatus() != "" { + st.Status = out.GetStatus() + } + outputs, err := jsonBytesToMap(out.GetOutputsJson()) + if err != nil { + return nil, fmt.Errorf("decode outputs_json: %w", err) + } + if outputs == nil { + return st, nil + } + if s, ok := outputs[k8sOutputKeyStatus].(string); ok && s != "" { + st.Status = s + } + if e, ok := outputs[k8sOutputKeyEndpoint].(string); ok { + st.Endpoint = e + } + if v, ok := outputs[k8sOutputKeyVersion].(string); ok { + st.Version = v + } + if ngRaw, ok := outputs[k8sOutputKeyNodeGroups]; ok && ngRaw != nil { + groups, err := nodeGroupsFromAny(ngRaw) + if err != nil { + return nil, fmt.Errorf("decode %s: %w", k8sOutputKeyNodeGroups, err) + } + st.NodeGroups = groups + } + return st, nil +} + +// nodeGroupsFromAny re-marshals the free-form nodeGroups value (an []any of +// map[string]any decoded from JSON) into typed NodeGroupState slices. +func nodeGroupsFromAny(v any) ([]NodeGroupState, error) { + b, err := json.Marshal(v) + if err != nil { + return nil, err + } + var groups []NodeGroupState + if err := json.Unmarshal(b, &groups); err != nil { + return nil, err + } + return groups, nil +} diff --git a/module/platform_kubernetes_grpc_test.go b/module/platform_kubernetes_grpc_test.go new file mode 100644 index 00000000..e70e8358 --- /dev/null +++ b/module/platform_kubernetes_grpc_test.go @@ -0,0 +1,253 @@ +package module + +import ( + "context" + "encoding/json" + "testing" + + pb "github.com/GoCodeAlone/workflow/plugin/external/proto" + "google.golang.org/grpc" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" +) + +// fakeResourceDriverClient is a pb.ResourceDriverClient stub for the +// grpcKubernetesBackend adapter tests. It records the requests it received and +// returns canned responses/errors per RPC. The 6 RPCs the adapter never calls +// (Update/Diff/Scale/HealthCheck/SensitiveKeys/Troubleshoot) are no-ops. +type fakeResourceDriverClient struct { + createReq *pb.ResourceCreateRequest + createResp *pb.ResourceCreateResponse + createErr error + + readReq *pb.ResourceReadRequest + readResp *pb.ResourceReadResponse + readErr error + + deleteReq *pb.ResourceDeleteRequest + deleteErr error +} + +func (f *fakeResourceDriverClient) Create(_ context.Context, in *pb.ResourceCreateRequest, _ ...grpc.CallOption) (*pb.ResourceCreateResponse, error) { + f.createReq = in + return f.createResp, f.createErr +} +func (f *fakeResourceDriverClient) Read(_ context.Context, in *pb.ResourceReadRequest, _ ...grpc.CallOption) (*pb.ResourceReadResponse, error) { + f.readReq = in + return f.readResp, f.readErr +} +func (f *fakeResourceDriverClient) Delete(_ context.Context, in *pb.ResourceDeleteRequest, _ ...grpc.CallOption) (*pb.ResourceDeleteResponse, error) { + f.deleteReq = in + if f.deleteErr != nil { + return nil, f.deleteErr + } + return &pb.ResourceDeleteResponse{}, nil +} +func (*fakeResourceDriverClient) Update(context.Context, *pb.ResourceUpdateRequest, ...grpc.CallOption) (*pb.ResourceUpdateResponse, error) { + return nil, nil +} +func (*fakeResourceDriverClient) Diff(context.Context, *pb.ResourceDiffRequest, ...grpc.CallOption) (*pb.ResourceDiffResponse, error) { + return nil, nil +} +func (*fakeResourceDriverClient) Scale(context.Context, *pb.ResourceScaleRequest, ...grpc.CallOption) (*pb.ResourceScaleResponse, error) { + return nil, nil +} +func (*fakeResourceDriverClient) HealthCheck(context.Context, *pb.ResourceHealthCheckRequest, ...grpc.CallOption) (*pb.ResourceHealthCheckResponse, error) { + return nil, nil +} +func (*fakeResourceDriverClient) SensitiveKeys(context.Context, *pb.SensitiveKeysRequest, ...grpc.CallOption) (*pb.SensitiveKeysResponse, error) { + return nil, nil +} +func (*fakeResourceDriverClient) Troubleshoot(context.Context, *pb.TroubleshootRequest, ...grpc.CallOption) (*pb.TroubleshootResponse, error) { + return nil, nil +} + +func newGKETestModule() *PlatformKubernetes { + return NewPlatformKubernetes("my-cluster", map[string]any{ + "type": "gke", + "clusterName": "my-cluster", + "version": "1.29", + }) +} + +func TestGRPCKubernetesBackend_Plan(t *testing.T) { + t.Run("not found → create action", func(t *testing.T) { + fake := &fakeResourceDriverClient{readErr: status.Error(codes.NotFound, "no such cluster")} + b := newGRPCKubernetesBackend(fake) + plan, err := b.plan(newGKETestModule()) + if err != nil { + t.Fatalf("plan: %v", err) + } + if plan.Provider != "gke" || plan.Resource != "my-cluster" { + t.Fatalf("plan header mismatch: %+v", plan) + } + if len(plan.Actions) != 1 || plan.Actions[0].Type != "create" { + t.Fatalf("expected one create action, got %+v", plan.Actions) + } + if fake.readReq.GetResourceType() != gkeResourceType { + t.Fatalf("Read resource_type = %q, want %q", fake.readReq.GetResourceType(), gkeResourceType) + } + }) + + t.Run("exists → noop action", func(t *testing.T) { + fake := &fakeResourceDriverClient{readResp: &pb.ResourceReadResponse{ + Output: &pb.ResourceOutput{Name: "my-cluster", Type: gkeResourceType, Status: "running"}, + }} + b := newGRPCKubernetesBackend(fake) + plan, err := b.plan(newGKETestModule()) + if err != nil { + t.Fatalf("plan: %v", err) + } + if len(plan.Actions) != 1 || plan.Actions[0].Type != "noop" { + t.Fatalf("expected one noop action, got %+v", plan.Actions) + } + }) + + t.Run("transport error propagates", func(t *testing.T) { + fake := &fakeResourceDriverClient{readErr: status.Error(codes.Unavailable, "boom")} + b := newGRPCKubernetesBackend(fake) + if _, err := b.plan(newGKETestModule()); err == nil { + t.Fatal("plan must propagate a non-NotFound transport error") + } + }) +} + +func TestGRPCKubernetesBackend_Apply(t *testing.T) { + t.Run("create success", func(t *testing.T) { + fake := &fakeResourceDriverClient{createResp: &pb.ResourceCreateResponse{ + Output: &pb.ResourceOutput{Name: "my-cluster", Type: gkeResourceType, Status: "creating"}, + }} + b := newGRPCKubernetesBackend(fake) + res, err := b.apply(newGKETestModule()) + if err != nil { + t.Fatalf("apply: %v", err) + } + if !res.Success { + t.Fatalf("apply Success = false: %+v", res) + } + if fake.createReq.GetResourceType() != gkeResourceType { + t.Fatalf("Create resource_type = %q, want %q", fake.createReq.GetResourceType(), gkeResourceType) + } + spec := fake.createReq.GetSpec() + if spec.GetName() != "my-cluster" || spec.GetType() != gkeResourceType { + t.Fatalf("Create spec mismatch: name=%q type=%q", spec.GetName(), spec.GetType()) + } + if len(spec.GetConfigJson()) == 0 { + t.Fatal("Create spec config_json must carry the platform.kubernetes config") + } + }) + + t.Run("already exists resolves to success", func(t *testing.T) { + fake := &fakeResourceDriverClient{createErr: status.Error(codes.AlreadyExists, "exists")} + b := newGRPCKubernetesBackend(fake) + res, err := b.apply(newGKETestModule()) + if err != nil { + t.Fatalf("apply: %v", err) + } + if !res.Success { + t.Fatalf("apply on AlreadyExists must be Success=true, got %+v", res) + } + }) + + t.Run("transport error propagates", func(t *testing.T) { + fake := &fakeResourceDriverClient{createErr: status.Error(codes.Internal, "boom")} + b := newGRPCKubernetesBackend(fake) + if _, err := b.apply(newGKETestModule()); err == nil { + t.Fatal("apply must propagate a non-AlreadyExists error") + } + }) +} + +func TestGRPCKubernetesBackend_Status(t *testing.T) { + t.Run("outputs_json projects onto KubernetesClusterState", func(t *testing.T) { + outputs := map[string]any{ + "status": "running", + "endpoint": "https://1.2.3.4", + "version": "1.29.1", + "nodeGroups": []any{ + map[string]any{ + "name": "default-pool", + "instanceType": "e2-medium", + "min": 1, + "max": 3, + "current": 2, + }, + }, + } + outJSON, err := json.Marshal(outputs) + if err != nil { + t.Fatalf("marshal outputs: %v", err) + } + fake := &fakeResourceDriverClient{readResp: &pb.ResourceReadResponse{ + Output: &pb.ResourceOutput{ + Name: "my-cluster", + Type: gkeResourceType, + ProviderId: "projects/p/locations/l/clusters/my-cluster", + OutputsJson: outJSON, + Status: "running", + }, + }} + b := newGRPCKubernetesBackend(fake) + st, err := b.status(newGKETestModule()) + if err != nil { + t.Fatalf("status: %v", err) + } + if st.Name != "my-cluster" || st.Provider != "gke" { + t.Fatalf("status identity mismatch: %+v", st) + } + if st.Status != "running" || st.Endpoint != "https://1.2.3.4" || st.Version != "1.29.1" { + t.Fatalf("status fields mismatch: %+v", st) + } + if len(st.NodeGroups) != 1 { + t.Fatalf("expected 1 node group, got %d", len(st.NodeGroups)) + } + ng := st.NodeGroups[0] + if ng.Name != "default-pool" || ng.InstanceType != "e2-medium" || ng.Min != 1 || ng.Max != 3 || ng.Current != 2 { + t.Fatalf("node group did not survive the JSON-bytes round-trip: %+v", ng) + } + }) + + t.Run("not found → not-found state", func(t *testing.T) { + fake := &fakeResourceDriverClient{readErr: status.Error(codes.NotFound, "no such cluster")} + b := newGRPCKubernetesBackend(fake) + st, err := b.status(newGKETestModule()) + if err != nil { + t.Fatalf("status: %v", err) + } + if st.Status != "not-found" || st.Provider != "gke" { + t.Fatalf("expected not-found gke state, got %+v", st) + } + }) +} + +func TestGRPCKubernetesBackend_Destroy(t *testing.T) { + t.Run("delete success", func(t *testing.T) { + fake := &fakeResourceDriverClient{} + b := newGRPCKubernetesBackend(fake) + if err := b.destroy(newGKETestModule()); err != nil { + t.Fatalf("destroy: %v", err) + } + if fake.deleteReq.GetResourceType() != gkeResourceType { + t.Fatalf("Delete resource_type = %q, want %q", fake.deleteReq.GetResourceType(), gkeResourceType) + } + if fake.deleteReq.GetRef().GetName() != "my-cluster" { + t.Fatalf("Delete ref name = %q, want my-cluster", fake.deleteReq.GetRef().GetName()) + } + }) + + t.Run("not found resolves to success", func(t *testing.T) { + fake := &fakeResourceDriverClient{deleteErr: status.Error(codes.NotFound, "gone")} + b := newGRPCKubernetesBackend(fake) + if err := b.destroy(newGKETestModule()); err != nil { + t.Fatalf("destroy on NotFound must succeed, got %v", err) + } + }) + + t.Run("transport error propagates", func(t *testing.T) { + fake := &fakeResourceDriverClient{deleteErr: status.Error(codes.Internal, "boom")} + b := newGRPCKubernetesBackend(fake) + if err := b.destroy(newGKETestModule()); err == nil { + t.Fatal("destroy must propagate a non-NotFound error") + } + }) +} From e681beb8ea61d32660e5c3d01863e7f2242a9983 Mon Sep 17 00:00:00 2001 From: Jon Langevin Date: Thu, 14 May 2026 19:23:33 -0400 Subject: [PATCH 2/4] feat: engine seam + registry for plugin-served kubernetes backends --- engine.go | 16 +++ module/platform_kubernetes.go | 34 ++++-- module/platform_kubernetes_plugin_registry.go | 92 ++++++++++++++ ...latform_kubernetes_plugin_registry_test.go | 112 ++++++++++++++++++ plugin/external/adapter.go | 85 +++++++++++++ plugin/kubernetes_backend_provider.go | 16 +++ 6 files changed, 347 insertions(+), 8 deletions(-) create mode 100644 module/platform_kubernetes_plugin_registry.go create mode 100644 module/platform_kubernetes_plugin_registry_test.go create mode 100644 plugin/kubernetes_backend_provider.go diff --git a/engine.go b/engine.go index d9e855a9..0998cda1 100644 --- a/engine.go +++ b/engine.go @@ -339,6 +339,22 @@ func (e *StdEngine) loadPluginInternal(p plugin.EnginePlugin, allowOverride bool } } } + // Register any platform.kubernetes backends the plugin serves into module's + // package-level registry, so `platform.kubernetes` configs with a + // plugin-provided `type:` (e.g. gke) dispatch to the plugin-served + // ResourceDriver-backed backend. Per ADR 0037 — folds into the existing + // ResourceDriver contract, no new proto surface. + if kb, ok := p.(plugin.KubernetesBackendProvider); ok { + clients, err := kb.KubernetesBackendClients() + if err != nil { + return fmt.Errorf("load plugin %q: kubernetes backends: %w", p.EngineManifest().Name, err) + } + for name, client := range clients { + if err := module.RegisterKubernetesBackendClient(name, client); err != nil { + return fmt.Errorf("load plugin %q: %w", p.EngineManifest().Name, err) + } + } + } e.enginePlugins = append(e.enginePlugins, p) return nil } diff --git a/module/platform_kubernetes.go b/module/platform_kubernetes.go index 3d355aaf..d630718c 100644 --- a/module/platform_kubernetes.go +++ b/module/platform_kubernetes.go @@ -89,15 +89,33 @@ func (m *PlatformKubernetes) Init(app modular.Application) error { clusterType = "kind" } - factory, ok := kubernetesBackendRegistry[clusterType] - if !ok { - return fmt.Errorf("platform.kubernetes %q: unsupported type %q", m.name, clusterType) - } - backend, err := factory(m.config) - if err != nil { - return fmt.Errorf("platform.kubernetes %q: creating backend: %w", m.name, err) + if _, isCore := reservedKubernetesBackendTypes[clusterType]; isCore { + // SDK-free in-core backend (kind/k3s/eks/aks) — use the in-process + // factory map unchanged. + factory, ok := kubernetesBackendRegistry[clusterType] + if !ok { + return fmt.Errorf("platform.kubernetes %q: unsupported type %q", m.name, clusterType) + } + backend, err := factory(m.config) + if err != nil { + return fmt.Errorf("platform.kubernetes %q: creating backend: %w", m.name, err) + } + m.backend = backend + } else { + // Not an in-core cluster type — consult the plugin-backend registry. + // The engine populates kubernetesBackendClientRegistryInstance at + // plugin-load time; a resolved type (e.g. gke) is served over gRPC via + // the ResourceDriver contract, wrapped in grpcKubernetesBackend. Per + // ADR 0037. + client, ok := kubernetesBackendClientRegistryInstance.resolve(clusterType) + if !ok { + return fmt.Errorf("platform.kubernetes %q: cluster type %q is not built into workflow core "+ + "(in-core types: 'kind', 'k3s', 'eks', 'aks'). If %q is a plugin-provided backend "+ + "(e.g. 'gke' via workflow-plugin-gcp), install and load that plugin", + m.name, clusterType, clusterType) + } + m.backend = newGRPCKubernetesBackend(client) } - m.backend = backend version, _ := m.config["version"].(string) m.state = &KubernetesClusterState{ diff --git a/module/platform_kubernetes_plugin_registry.go b/module/platform_kubernetes_plugin_registry.go new file mode 100644 index 00000000..b7538c22 --- /dev/null +++ b/module/platform_kubernetes_plugin_registry.go @@ -0,0 +1,92 @@ +package module + +import ( + "fmt" + "strings" + "sync" + + pb "github.com/GoCodeAlone/workflow/plugin/external/proto" +) + +// ───────────────────────────────────────────────────────────────────────────── +// kubernetesBackendClientRegistry — engine-side registry mapping a +// platform.kubernetes cluster type name to a plugin-served ResourceDriver gRPC +// client. +// +// The engine populates the package-level singleton at plugin-load time; +// PlatformKubernetes.Init consults it for any cluster type not handled by an +// in-core backend. Reserved in-core type names (kind/k3s/eks/aks) — the +// SDK-free backends that stay in core — cannot be claimed by a plugin. +// +// Structurally identical to iacStateBackendRegistry +// (module/iac_state_plugin_registry.go); per ADR 0037 a kubernetes backend is +// served over the existing ResourceDriver contract — no new proto surface. +// ───────────────────────────────────────────────────────────────────────────── + +// reservedKubernetesBackendTypes are the in-core cluster type names a plugin may +// never claim — the backends registered in platform_kubernetes_core.go. `gke` +// is intentionally absent: it is the cloud-SDK-bearing backend that moves to +// workflow-plugin-gcp and is therefore plugin-served. +var reservedKubernetesBackendTypes = map[string]struct{}{ + "kind": {}, + "k3s": {}, + "eks": {}, + "aks": {}, +} + +// kubernetesBackendClientRegistry maps a cluster type name to a plugin gRPC +// client. +type kubernetesBackendClientRegistry struct { + mu sync.RWMutex + clients map[string]pb.ResourceDriverClient +} + +// newKubernetesBackendClientRegistry constructs an empty registry. +func newKubernetesBackendClientRegistry() *kubernetesBackendClientRegistry { + return &kubernetesBackendClientRegistry{clients: make(map[string]pb.ResourceDriverClient)} +} + +// register associates a cluster type name with a plugin client. The name must +// be non-empty (after trimming) and the client must be non-nil. Reserved +// in-core type names are rejected. Re-registering a non-reserved name +// overwrites the previous client (last plugin loaded wins). +func (r *kubernetesBackendClientRegistry) register(name string, client pb.ResourceDriverClient) error { + name = strings.TrimSpace(name) + if name == "" { + return fmt.Errorf("kubernetes backend registration: name must not be empty") + } + if client == nil { + return fmt.Errorf("kubernetes backend registration %q: client must not be nil", name) + } + if _, reserved := reservedKubernetesBackendTypes[name]; reserved { + return fmt.Errorf("plugin registered reserved kubernetes backend type %q", name) + } + r.mu.Lock() + defer r.mu.Unlock() + r.clients[name] = client + return nil +} + +// resolve returns the plugin client for a cluster type name, and whether one is +// registered. +func (r *kubernetesBackendClientRegistry) resolve(name string) (pb.ResourceDriverClient, bool) { + r.mu.RLock() + defer r.mu.RUnlock() + c, ok := r.clients[name] + return c, ok +} + +// kubernetesBackendClientRegistryInstance is the package-level singleton the +// engine populates and PlatformKubernetes.Init consults. +var kubernetesBackendClientRegistryInstance = newKubernetesBackendClientRegistry() + +// RegisterKubernetesBackendClient associates a platform.kubernetes cluster type +// with a plugin-served ResourceDriver gRPC client in the package-level +// registry. The engine calls this at plugin-load for each kubernetes backend a +// loaded plugin serves (e.g. `gke` via workflow-plugin-gcp); +// PlatformKubernetes.Init then resolves `type: ` configs against it. +// Reserved in-core type names (kind/k3s/eks/aks) and empty names / nil clients +// are rejected — see kubernetesBackendClientRegistry.register. Per ADR 0037. +func RegisterKubernetesBackendClient(name string, client pb.ResourceDriverClient) error { + return kubernetesBackendClientRegistryInstance.register(name, client) +} diff --git a/module/platform_kubernetes_plugin_registry_test.go b/module/platform_kubernetes_plugin_registry_test.go new file mode 100644 index 00000000..801d9cea --- /dev/null +++ b/module/platform_kubernetes_plugin_registry_test.go @@ -0,0 +1,112 @@ +package module + +import ( + "strings" + "testing" +) + +// TestKubernetesBackendClientRegistry exercises the engine-side registry that +// maps a plugin-served platform.kubernetes cluster type to a ResourceDriver +// gRPC client. Mirrors TestIaCStateBackendRegistry. +func TestKubernetesBackendClientRegistry(t *testing.T) { + reg := newKubernetesBackendClientRegistry() + if _, ok := reg.resolve("gke"); ok { + t.Fatal("empty registry should not resolve gke") + } + fake := &fakeResourceDriverClient{} + if err := reg.register("gke", fake); err != nil { + t.Fatalf("register: %v", err) + } + got, ok := reg.resolve("gke") + if !ok || got != fake { + t.Fatalf("resolve gke: ok=%v got=%v", ok, got) + } + // Reserved in-core cluster types cannot be claimed by a plugin. + for _, reserved := range []string{"kind", "k3s", "eks", "aks"} { + if err := reg.register(reserved, fake); err == nil { + t.Fatalf("register(%q) must fail — reserved in-core cluster type", reserved) + } + } + // Empty / whitespace-only names are rejected. + for _, bad := range []string{"", " "} { + if err := reg.register(bad, fake); err == nil { + t.Fatalf("register(%q) must fail — empty name", bad) + } + } + // A nil client is rejected. + if err := reg.register("nilclient_type", nil); err == nil { + t.Fatal("register with nil client must fail") + } + // A name surrounded by whitespace is trimmed and registers under the + // trimmed key. + if err := reg.register(" spaced_type ", fake); err != nil { + t.Fatalf("register trimmed name: %v", err) + } + if _, ok := reg.resolve("spaced_type"); !ok { + t.Fatal("trimmed name must resolve under its trimmed key") + } +} + +// TestRegisterKubernetesBackendClient exercises the exported wrapper the engine +// calls at plugin-load: a non-reserved name registers into the package-level +// singleton and resolves; a reserved in-core type name is rejected. +func TestRegisterKubernetesBackendClient(t *testing.T) { + const backend = "gke_wrapper_test" + fake := &fakeResourceDriverClient{} + if err := RegisterKubernetesBackendClient(backend, fake); err != nil { + t.Fatalf("RegisterKubernetesBackendClient(%q): %v", backend, err) + } + defer func() { + kubernetesBackendClientRegistryInstance.mu.Lock() + delete(kubernetesBackendClientRegistryInstance.clients, backend) + kubernetesBackendClientRegistryInstance.mu.Unlock() + }() + if got, ok := kubernetesBackendClientRegistryInstance.resolve(backend); !ok || got != fake { + t.Fatalf("resolve(%q): ok=%v got=%v", backend, ok, got) + } + if err := RegisterKubernetesBackendClient("kind", fake); err == nil { + t.Fatal(`RegisterKubernetesBackendClient("kind") must fail — reserved in-core cluster type`) + } +} + +// TestPlatformKubernetes_GKEDispatchToPluginClient exercises the real +// PlatformKubernetes.Init resolution path: a `type: gke` config — a type no +// in-core backend owns — resolves from the package-level client registry, +// yielding a *grpcKubernetesBackend. +func TestPlatformKubernetes_GKEDispatchToPluginClient(t *testing.T) { + const clusterType = "gke" + fake := &fakeResourceDriverClient{} + if err := kubernetesBackendClientRegistryInstance.register(clusterType, fake); err != nil { + t.Fatalf("register: %v", err) + } + defer func() { + kubernetesBackendClientRegistryInstance.mu.Lock() + delete(kubernetesBackendClientRegistryInstance.clients, clusterType) + kubernetesBackendClientRegistryInstance.mu.Unlock() + }() + + m := NewPlatformKubernetes("gke-cluster", map[string]any{"type": "gke"}) + if err := m.Init(NewMockApplication()); err != nil { + t.Fatalf("Init: %v", err) + } + if _, ok := m.backend.(*grpcKubernetesBackend); !ok { + t.Fatalf("m.backend is %T, want *grpcKubernetesBackend", m.backend) + } +} + +// TestPlatformKubernetes_GKEWithoutPluginErrors verifies that `type: gke` with +// no plugin client registered fails Init with a clean error pointing the +// operator at workflow-plugin-gcp. +func TestPlatformKubernetes_GKEWithoutPluginErrors(t *testing.T) { + if _, ok := kubernetesBackendClientRegistryInstance.resolve("gke"); ok { + t.Skip("a gke client is registered by a concurrent test; skipping the negative case") + } + m := NewPlatformKubernetes("gke-cluster", map[string]any{"type": "gke"}) + err := m.Init(NewMockApplication()) + if err == nil { + t.Fatal("Init must fail when no gke plugin client is registered") + } + if !strings.Contains(err.Error(), "workflow-plugin-gcp") { + t.Fatalf("error must point the operator to workflow-plugin-gcp, got: %v", err) + } +} diff --git a/plugin/external/adapter.go b/plugin/external/adapter.go index 4daffb4e..6b0dcb89 100644 --- a/plugin/external/adapter.go +++ b/plugin/external/adapter.go @@ -720,9 +720,94 @@ func sameStringSet(a, b []string) bool { return len(seen) == len(set) } +// ───────────────────────────────────────────────────────────────────────────── +// KubernetesBackendProvider — plugin-served platform.kubernetes backends. +// +// Per ADR 0037 a kubernetes backend (gke) folds into the existing +// ResourceDriver contract — no new proto surface. A plugin serves the `gke` +// platform.kubernetes backend when it advertises the ResourceDriver service AND +// its live Capabilities RPC declares the infra.k8s_cluster resource type. +// ───────────────────────────────────────────────────────────────────────────── + +// resourceDriverServiceName is the fully-qualified gRPC service a plugin's +// ContractRegistry must advertise for the adapter to be a potential +// kubernetes-backend provider. Sourced from the generated proto's ServiceDesc +// so it cannot drift if the proto package path/service name ever changes. +var resourceDriverServiceName = pb.ResourceDriver_ServiceDesc.ServiceName + +// k8sClusterResourceType is the ResourceDriver resource type a plugin must +// declare (via the Capabilities RPC) for the adapter to register it as the +// platform.kubernetes `gke` backend. Mirrors module's gkeResourceType — kept +// local so the plugin/external package takes no dependency on module. +const k8sClusterResourceType = "infra.k8s_cluster" + +// gkeKubernetesBackendType is the platform.kubernetes cluster type name the +// infra.k8s_cluster ResourceDriver is registered under in core. +const gkeKubernetesBackendType = "gke" + +// advertisesResourceDriverService reports whether the adapter's ContractRegistry +// carries a CONTRACT_KIND_SERVICE descriptor for the ResourceDriver service. +func (a *ExternalPluginAdapter) advertisesResourceDriverService() bool { + if a.contractRegistry == nil { + return false + } + for _, d := range a.contractRegistry.Contracts { + if d == nil { + continue + } + if d.Kind == pb.ContractKind_CONTRACT_KIND_SERVICE && d.ServiceName == resourceDriverServiceName { + return true + } + } + return false +} + +// KubernetesBackendClients implements plugin.KubernetesBackendProvider. At +// plugin-load the engine type-asserts the adapter against that interface and +// registers each returned (cluster-type → ResourceDriver client) pair into +// module's kubernetes backend registry. Per ADR 0037. +// +// Behaviour: +// - If the plugin does not advertise the ResourceDriver service it serves no +// kubernetes backend — return (nil, nil); the engine type-assert still +// succeeds and just registers nothing. +// - Otherwise the live Capabilities RPC is the source of truth (mirroring how +// IaCStateBackendClients trusts the ListBackendNames RPC): when it declares +// the infra.k8s_cluster resource type, the plugin serves the `gke` +// kubernetes backend and a ResourceDriver client is registered under that +// name. +func (a *ExternalPluginAdapter) KubernetesBackendClients() (map[string]pb.ResourceDriverClient, error) { + if !a.advertisesResourceDriverService() { + return nil, nil + } + conn := a.Conn() + if conn == nil { + return nil, fmt.Errorf("plugin %s advertises the ResourceDriver service but has no gRPC connection", a.name) + } + provider := pb.NewIaCProviderRequiredClient(conn) + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + caps, err := provider.Capabilities(ctx, &pb.CapabilitiesRequest{}) + if err != nil { + return nil, fmt.Errorf("plugin %s: Capabilities RPC: %w", a.name, err) + } + for _, decl := range caps.GetCapabilities() { + if decl.GetResourceType() == k8sClusterResourceType { + return map[string]pb.ResourceDriverClient{ + gkeKubernetesBackendType: pb.NewResourceDriverClient(conn), + }, nil + } + } + return nil, nil +} + // Ensure ExternalPluginAdapter satisfies plugin.EnginePlugin at compile time. var _ plugin.EnginePlugin = (*ExternalPluginAdapter)(nil) +// Ensure ExternalPluginAdapter satisfies plugin.KubernetesBackendProvider — the +// engine type-asserts loaded plugins against it at plugin-load. +var _ plugin.KubernetesBackendProvider = (*ExternalPluginAdapter)(nil) + // Ensure ExternalPluginAdapter satisfies plugin.IaCStateBackendProvider at // compile time — the engine type-asserts loaded plugins against it. var _ plugin.IaCStateBackendProvider = (*ExternalPluginAdapter)(nil) diff --git a/plugin/kubernetes_backend_provider.go b/plugin/kubernetes_backend_provider.go new file mode 100644 index 00000000..2daa3b5b --- /dev/null +++ b/plugin/kubernetes_backend_provider.go @@ -0,0 +1,16 @@ +package plugin + +import proto "github.com/GoCodeAlone/workflow/plugin/external/proto" + +// KubernetesBackendProvider is the optional interface an external-plugin adapter +// implements when its plugin serves one or more platform.kubernetes cluster-type +// backends (e.g. "gke"). The engine type-asserts loaded plugins against it (same +// pattern as IaCStateBackendProvider) and populates module's kubernetes backend +// registry. +// +// Per ADR 0037 a kubernetes backend is served over the existing ResourceDriver +// contract — no new proto surface — so the returned clients are +// proto.ResourceDriverClient values keyed by cluster type name. +type KubernetesBackendProvider interface { + KubernetesBackendClients() (map[string]proto.ResourceDriverClient, error) +} From 65f319c3345ef8158d8f88ee2310a8e7d9ad0ff4 Mon Sep 17 00:00:00 2001 From: Jon Langevin Date: Thu, 14 May 2026 19:37:05 -0400 Subject: [PATCH 3/4] fix: pin config_json write-key contract to snake_case (code-review) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses the code-reviewer findings on the grpcKubernetesBackend adapter: - Critical: injectCredentials wrote camelCase config_json keys (projectId/serviceAccountJSON) but the deleted in-core gkeBackend and buildResourceSpec's own doc-comment use snake_case. Resolved credentials now land under project_id/service_account_json — the keys workflow-plugin-gcp's GKEDriver (Task 22) actually reads. - Important: pinned the config_json WRITE-key contract as k8sConfigKey* consts, symmetric with the already-pinned outputs_json READ-key consts (k8sOutputKey*). Per ADR 0037 the host adapter owns both halves of the key contract. - Important: dropped the "1.29" GKE-version default from the host adapter — version defaulting is GKE-domain knowledge that belongs in the plugin's GKEDriver, not this generic adapter. The user-supplied version flows through verbatim. Added a test asserting resolved credentials use the pinned snake_case keys and guarding against a camelCase regression. Co-Authored-By: Claude Opus 4.7 --- module/platform_kubernetes_grpc.go | 49 +++++++++++++++++-------- module/platform_kubernetes_grpc_test.go | 45 +++++++++++++++++++++++ 2 files changed, 78 insertions(+), 16 deletions(-) diff --git a/module/platform_kubernetes_grpc.go b/module/platform_kubernetes_grpc.go index 61a41c04..13f97d20 100644 --- a/module/platform_kubernetes_grpc.go +++ b/module/platform_kubernetes_grpc.go @@ -30,10 +30,11 @@ import ( // Option 1 dispatches the in-core `gke` kubernetesBackend to this driver. const gkeResourceType = "infra.k8s_cluster" -// KubernetesClusterState outputs_json key contract. ADR 0037 makes the host -// adapter the owner of these key names; workflow-plugin-gcp's GKEDriver.Read -// (Task 22) conforms its output to them. Keys mirror the KubernetesClusterState -// JSON field tags so the projection is a direct re-marshal. +// KubernetesClusterState outputs_json key contract — the READ side. ADR 0037 +// makes the host adapter the owner of these key names; workflow-plugin-gcp's +// GKEDriver.Read (Task 22) conforms its output to them. Keys mirror the +// KubernetesClusterState JSON field tags so the projection is a direct +// re-marshal. const ( k8sOutputKeyStatus = "status" k8sOutputKeyEndpoint = "endpoint" @@ -41,6 +42,19 @@ const ( k8sOutputKeyNodeGroups = "nodeGroups" ) +// ResourceSpec.config_json key contract — the WRITE side. These are the keys +// buildResourceSpec injects into the spec config_json that workflow-plugin-gcp's +// GKEDriver (Task 22) reads to resolve project + credentials. snake_case to +// match the deleted in-core gkeBackend's config keys (it read +// k.config["project_id"]) — see ADR 0037: "the host adapter define the key +// contract and Task 22 conform." The user-supplied platform.kubernetes config +// (e.g. `version`, `zone`, `nodeGroups`) is copied through verbatim; only these +// resolved-credential keys are host-adapter-owned. +const ( + k8sConfigKeyProjectID = "project_id" + k8sConfigKeyServiceAccountJSON = "service_account_json" //nolint:gosec // G101: config map key name, not a credential +) + // grpcKubernetesBackend adapts a pb.ResourceDriverClient (resource type // infra.k8s_cluster) to the in-core kubernetesBackend interface, so a // plugin-served `gke` backend is dispatched exactly like the deleted in-core @@ -176,18 +190,19 @@ func (b *grpcKubernetesBackend) buildResourceRef(k *PlatformKubernetes) *pb.Reso } } -// buildResourceSpec builds the ResourceSpec for a Create RPC: the -// platform.kubernetes module config carried through as config_json (the plugin -// GKEDriver reads project_id/location/zone/version/nodeGroups from it), plus -// resolved cloud credentials serialized in when a cloud account is wired. +// buildResourceSpec builds the ResourceSpec for a Create RPC. The user-supplied +// platform.kubernetes module config is carried through as config_json verbatim +// (the plugin GKEDriver reads location/zone/version/nodeGroups from it — those +// keys stay exactly as the user authored them); buildResourceSpec then folds in +// the host-adapter-owned resolved-credential keys (k8sConfigKeyProjectID / +// k8sConfigKeyServiceAccountJSON) when a cloud account is wired. No GKE-version +// default is injected here — version defaulting is GKE-domain knowledge that +// belongs in the plugin's GKEDriver, not this generic host adapter. func (b *grpcKubernetesBackend) buildResourceSpec(k *PlatformKubernetes) (*pb.ResourceSpec, error) { cfg := make(map[string]any, len(k.config)+2) for key, val := range k.config { cfg[key] = val } - if v, _ := cfg["version"].(string); v == "" { - cfg["version"] = "1.29" - } if err := b.injectCredentials(k, cfg); err != nil { return nil, err } @@ -203,9 +218,11 @@ func (b *grpcKubernetesBackend) buildResourceSpec(k *PlatformKubernetes) (*pb.Re } // injectCredentials resolves the cloud account (when one is wired) and folds the -// GCP project ID + service-account JSON into the spec config — the cross-process -// equivalent of the in-core containerService building the SDK client from -// CloudCredentials.ServiceAccountJSON. +// GCP project ID + service-account JSON into the spec config under the pinned +// k8sConfigKey* names — the cross-process equivalent of the in-core +// containerService building the SDK client from CloudCredentials. The key names +// (snake_case) match the deleted in-core gkeBackend's config keys and are the +// contract workflow-plugin-gcp's GKEDriver (Task 22) reads. func (b *grpcKubernetesBackend) injectCredentials(k *PlatformKubernetes, cfg map[string]any) error { if k.provider == nil { return nil @@ -218,10 +235,10 @@ func (b *grpcKubernetesBackend) injectCredentials(k *PlatformKubernetes, cfg map return nil } if creds.ProjectID != "" { - cfg["projectId"] = creds.ProjectID + cfg[k8sConfigKeyProjectID] = creds.ProjectID } if len(creds.ServiceAccountJSON) > 0 { - cfg["serviceAccountJSON"] = string(creds.ServiceAccountJSON) + cfg[k8sConfigKeyServiceAccountJSON] = string(creds.ServiceAccountJSON) } return nil } diff --git a/module/platform_kubernetes_grpc_test.go b/module/platform_kubernetes_grpc_test.go index e70e8358..ac9f3184 100644 --- a/module/platform_kubernetes_grpc_test.go +++ b/module/platform_kubernetes_grpc_test.go @@ -112,6 +112,16 @@ func TestGRPCKubernetesBackend_Plan(t *testing.T) { }) } +// fakeCredProvider is a minimal CloudCredentialProvider for exercising the +// credential-injection path of buildResourceSpec. +type fakeCredProvider struct{ creds *CloudCredentials } + +func (f *fakeCredProvider) Provider() string { return "gcp" } +func (f *fakeCredProvider) Region() string { return "us-central1" } +func (f *fakeCredProvider) GetCredentials(context.Context) (*CloudCredentials, error) { + return f.creds, nil +} + func TestGRPCKubernetesBackend_Apply(t *testing.T) { t.Run("create success", func(t *testing.T) { fake := &fakeResourceDriverClient{createResp: &pb.ResourceCreateResponse{ @@ -137,6 +147,41 @@ func TestGRPCKubernetesBackend_Apply(t *testing.T) { } }) + t.Run("resolved credentials use the pinned snake_case config keys", func(t *testing.T) { + fake := &fakeResourceDriverClient{createResp: &pb.ResourceCreateResponse{ + Output: &pb.ResourceOutput{Name: "my-cluster", Type: gkeResourceType, Status: "creating"}, + }} + b := newGRPCKubernetesBackend(fake) + m := newGKETestModule() + m.provider = &fakeCredProvider{creds: &CloudCredentials{ + Provider: "gcp", + ProjectID: "my-gcp-project", + ServiceAccountJSON: []byte(`{"type":"service_account"}`), + }} + if _, err := b.apply(m); err != nil { + t.Fatalf("apply: %v", err) + } + cfg, err := jsonBytesToMap(fake.createReq.GetSpec().GetConfigJson()) + if err != nil { + t.Fatalf("config_json decode: %v", err) + } + // The host-adapter-owned credential keys are snake_case — the contract + // workflow-plugin-gcp's GKEDriver (Task 22) reads. + if cfg[k8sConfigKeyProjectID] != "my-gcp-project" { + t.Errorf("config_json[%q] = %v, want my-gcp-project", k8sConfigKeyProjectID, cfg[k8sConfigKeyProjectID]) + } + if cfg[k8sConfigKeyServiceAccountJSON] != `{"type":"service_account"}` { + t.Errorf("config_json[%q] = %v, want the service-account JSON", k8sConfigKeyServiceAccountJSON, cfg[k8sConfigKeyServiceAccountJSON]) + } + // Guard against a camelCase regression — the GKEDriver reads snake_case. + if _, bad := cfg["projectId"]; bad { + t.Error("config_json must not use camelCase 'projectId' — the GKEDriver reads snake_case 'project_id'") + } + if _, bad := cfg["serviceAccountJSON"]; bad { + t.Error("config_json must not use camelCase 'serviceAccountJSON' — the GKEDriver reads snake_case 'service_account_json'") + } + }) + t.Run("already exists resolves to success", func(t *testing.T) { fake := &fakeResourceDriverClient{createErr: status.Error(codes.AlreadyExists, "exists")} b := newGRPCKubernetesBackend(fake) From b94bea7166cdd48fa64e94ddb416f7d352e3754d Mon Sep 17 00:00:00 2001 From: Jon Langevin Date: Fri, 15 May 2026 01:29:14 -0400 Subject: [PATCH 4/4] fix: Task 25/26 Copilot findings on PR 9 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 5 substantive Copilot comments on module/platform_kubernetes_grpc.go, all addressed: 1. injectCredentials precedence — module config now wins; cloud-account creds only fill missing project_id / service_account_json keys. Mirrors the in-core gkeBackend's config-first precedence. 2. buildResourceRef.ProviderId — now populated with the fully-qualified GKE path projects//locations//clusters/ (GKE cluster names are not globally unique). gkeProject / gkeLocation helpers port the in-core resolution: config-first, cloud-account fallback. 3. kubernetesClusterStateFromOutput — call sites now pass k.name (the module name) instead of k.clusterName(); matches the in-core PlatformKubernetes.Init semantics where state.Name = m.name and is stable when clusterName differs. 4. TestPlatformKubernetes_GKEWithoutPluginErrors — replaced the nondeterministic t.Skip with deterministic mutex-guarded clear-and-restore of the registry entry. Added tests locking each new behavior (module-config-first precedence, fully-qualified ProviderId in all 3 resolution paths, state.Name=module name when clusterName differs). PR description (item 5) updated separately to match the implementation: plan→Read / apply→Create-only per ADR 0037. Co-Authored-By: Claude Opus 4.7 --- module/platform_kubernetes_grpc.go | 87 +++++++++++---- module/platform_kubernetes_grpc_test.go | 105 ++++++++++++++++++ ...latform_kubernetes_plugin_registry_test.go | 22 +++- 3 files changed, 191 insertions(+), 23 deletions(-) diff --git a/module/platform_kubernetes_grpc.go b/module/platform_kubernetes_grpc.go index 13f97d20..b845afd3 100644 --- a/module/platform_kubernetes_grpc.go +++ b/module/platform_kubernetes_grpc.go @@ -131,7 +131,7 @@ func (b *grpcKubernetesBackend) apply(k *PlatformKubernetes) (*PlatformResult, e } return nil, fmt.Errorf("gke apply: Create %q: %w", k.clusterName(), err) } - clusterState, err := kubernetesClusterStateFromOutput(k.clusterName(), resp.GetOutput()) + clusterState, err := kubernetesClusterStateFromOutput(k.name, resp.GetOutput()) if err != nil { return nil, fmt.Errorf("gke apply: %w", err) } @@ -153,11 +153,11 @@ func (b *grpcKubernetesBackend) status(k *PlatformKubernetes) (*KubernetesCluste }) if err != nil { if status.Code(err) == codes.NotFound { - return &KubernetesClusterState{Name: k.clusterName(), Provider: "gke", Status: "not-found"}, nil + return &KubernetesClusterState{Name: k.name, Provider: "gke", Status: "not-found"}, nil } return nil, fmt.Errorf("gke status: Read %q: %w", k.clusterName(), err) } - st, err := kubernetesClusterStateFromOutput(k.clusterName(), resp.GetOutput()) + st, err := kubernetesClusterStateFromOutput(k.name, resp.GetOutput()) if err != nil { return nil, fmt.Errorf("gke status: %w", err) } @@ -182,12 +182,53 @@ func (b *grpcKubernetesBackend) destroy(k *PlatformKubernetes) error { } // buildResourceRef builds the ResourceRef the ResourceDriver RPCs address the -// cluster by — keyed on the cluster name and the infra.k8s_cluster type. +// cluster by. ProviderID is the fully-qualified GKE resource path +// `projects//locations//clusters/` when both project +// and location are resolvable — GKE cluster names alone are not globally +// unique, and the in-core gkeBackend addressed clusters by this same FQN. +// When project or location is unresolvable (e.g. a plan probe before any +// cloud-account wiring), ProviderID is left empty. func (b *grpcKubernetesBackend) buildResourceRef(k *PlatformKubernetes) *pb.ResourceRef { - return &pb.ResourceRef{ + ref := &pb.ResourceRef{ Name: k.clusterName(), Type: gkeResourceType, } + project, location := b.gkeProject(k), b.gkeLocation(k) + if project != "" && location != "" { + ref.ProviderId = fmt.Sprintf("projects/%s/locations/%s/clusters/%s", project, location, k.clusterName()) + } + return ref +} + +// gkeProject resolves the GCP project ID with module-config-first precedence: +// k.config["project_id"] wins; falls back to the cloud account's ProjectID. +// Mirrors the in-core gkeBackend.gkeProject helper. +func (b *grpcKubernetesBackend) gkeProject(k *PlatformKubernetes) string { + if p, ok := k.config[k8sConfigKeyProjectID].(string); ok && p != "" { + return p + } + if k.provider != nil { + if creds, err := k.provider.GetCredentials(context.Background()); err == nil && creds != nil && creds.ProjectID != "" { + return creds.ProjectID + } + } + return "" +} + +// gkeLocation resolves the GKE location (zone preferred, then region) with +// module-config-first precedence. Mirrors the in-core gkeBackend.gkeLocation +// helper. +func (b *grpcKubernetesBackend) gkeLocation(k *PlatformKubernetes) string { + if z, ok := k.config["zone"].(string); ok && z != "" { + return z + } + if l, ok := k.config["location"].(string); ok && l != "" { + return l + } + if k.provider != nil { + return k.provider.Region() + } + return "" } // buildResourceSpec builds the ResourceSpec for a Create RPC. The user-supplied @@ -217,12 +258,15 @@ func (b *grpcKubernetesBackend) buildResourceSpec(k *PlatformKubernetes) (*pb.Re }, nil } -// injectCredentials resolves the cloud account (when one is wired) and folds the -// GCP project ID + service-account JSON into the spec config under the pinned -// k8sConfigKey* names — the cross-process equivalent of the in-core -// containerService building the SDK client from CloudCredentials. The key names -// (snake_case) match the deleted in-core gkeBackend's config keys and are the -// contract workflow-plugin-gcp's GKEDriver (Task 22) reads. +// injectCredentials resolves the cloud account (when one is wired) and folds +// the GCP project ID + service-account JSON into the spec config under the +// pinned k8sConfigKey* names — the cross-process equivalent of the in-core +// containerService building the SDK client from CloudCredentials. +// +// Precedence is module-config-first, mirroring the in-core gkeBackend: +// explicit project_id / service_account_json in k.config win, and the cloud +// account only fills the gaps. This keeps the user's escape hatch (e.g. +// per-module credential overrides) functional. func (b *grpcKubernetesBackend) injectCredentials(k *PlatformKubernetes, cfg map[string]any) error { if k.provider == nil { return nil @@ -234,22 +278,25 @@ func (b *grpcKubernetesBackend) injectCredentials(k *PlatformKubernetes, cfg map if creds == nil { return nil } - if creds.ProjectID != "" { + if _, present := cfg[k8sConfigKeyProjectID]; !present && creds.ProjectID != "" { cfg[k8sConfigKeyProjectID] = creds.ProjectID } - if len(creds.ServiceAccountJSON) > 0 { + if _, present := cfg[k8sConfigKeyServiceAccountJSON]; !present && len(creds.ServiceAccountJSON) > 0 { cfg[k8sConfigKeyServiceAccountJSON] = string(creds.ServiceAccountJSON) } return nil } -// kubernetesClusterStateFromOutput projects a ResourceDriver ResourceOutput onto -// the typed KubernetesClusterState. The free-form outputs_json map crosses the -// wire as JSON bytes (the iac.proto invariant); this is the host-owned -// map→struct projection ADR 0037 assigns to Tasks 25/26. The adapter sets -// Provider="gke" itself and tolerates a missing/empty outputs_json. -func kubernetesClusterStateFromOutput(name string, out *pb.ResourceOutput) (*KubernetesClusterState, error) { - st := &KubernetesClusterState{Name: name, Provider: "gke", Status: "not-found"} +// kubernetesClusterStateFromOutput projects a ResourceDriver ResourceOutput +// onto the typed KubernetesClusterState. The free-form outputs_json map +// crosses the wire as JSON bytes (the iac.proto invariant); this is the +// host-owned map→struct projection ADR 0037 assigns to Tasks 25/26. The +// adapter sets Provider="gke" itself and tolerates a missing/empty +// outputs_json. `moduleName` is the platform.kubernetes module name (NOT the +// `clusterName` config override) — it matches the in-core +// PlatformKubernetes.Init semantics where `state.Name = m.name`. +func kubernetesClusterStateFromOutput(moduleName string, out *pb.ResourceOutput) (*KubernetesClusterState, error) { + st := &KubernetesClusterState{Name: moduleName, Provider: "gke", Status: "not-found"} if out == nil { return st, nil } diff --git a/module/platform_kubernetes_grpc_test.go b/module/platform_kubernetes_grpc_test.go index ac9f3184..c806c5c2 100644 --- a/module/platform_kubernetes_grpc_test.go +++ b/module/platform_kubernetes_grpc_test.go @@ -70,6 +70,78 @@ func newGKETestModule() *PlatformKubernetes { }) } +// TestGRPCKubernetesBackend_BuildResourceRef_FullyQualifiedProviderID locks the +// fully-qualified GKE resource path the adapter must put in ResourceRef.ProviderId +// (GKE cluster names alone are not globally unique). +func TestGRPCKubernetesBackend_BuildResourceRef_FullyQualifiedProviderID(t *testing.T) { + b := newGRPCKubernetesBackend(&fakeResourceDriverClient{}) + + t.Run("project + zone from module config", func(t *testing.T) { + m := NewPlatformKubernetes("mod-name", map[string]any{ + "type": "gke", + "clusterName": "my-cluster", + "project_id": "p", + "zone": "us-central1-a", + }) + ref := b.buildResourceRef(m) + want := "projects/p/locations/us-central1-a/clusters/my-cluster" + if ref.GetProviderId() != want { + t.Errorf("ProviderId = %q, want %q", ref.GetProviderId(), want) + } + if ref.GetName() != "my-cluster" || ref.GetType() != gkeResourceType { + t.Errorf("ref name/type mismatch: name=%q type=%q", ref.GetName(), ref.GetType()) + } + }) + + t.Run("project + location from cloud account fallback", func(t *testing.T) { + m := NewPlatformKubernetes("mod-name", map[string]any{ + "type": "gke", "clusterName": "my-cluster", + }) + // fakeCredProvider.Region() returns "us-central1" — the cloud account + // is the fallback when module config omits both zone and location. + m.provider = &fakeCredProvider{creds: &CloudCredentials{ + Provider: "gcp", ProjectID: "creds-project", + }} + ref := b.buildResourceRef(m) + want := "projects/creds-project/locations/us-central1/clusters/my-cluster" + if ref.GetProviderId() != want { + t.Errorf("ProviderId = %q, want %q", ref.GetProviderId(), want) + } + }) + + t.Run("no project or location → empty ProviderId", func(t *testing.T) { + m := NewPlatformKubernetes("mod-name", map[string]any{ + "type": "gke", "clusterName": "my-cluster", + }) + ref := b.buildResourceRef(m) + if ref.GetProviderId() != "" { + t.Errorf("ProviderId = %q, want empty when project/location unresolvable", ref.GetProviderId()) + } + }) +} + +// TestGRPCKubernetesBackend_Status_StateNameIsModuleName locks the in-core +// PlatformKubernetes.Init semantics: KubernetesClusterState.Name is the +// module name, NOT the `clusterName` config override. +func TestGRPCKubernetesBackend_Status_StateNameIsModuleName(t *testing.T) { + fake := &fakeResourceDriverClient{readResp: &pb.ResourceReadResponse{ + Output: &pb.ResourceOutput{Name: "cluster-x", Type: gkeResourceType, Status: "running"}, + }} + b := newGRPCKubernetesBackend(fake) + // module name "iac-gke" intentionally differs from clusterName "cluster-x". + m := NewPlatformKubernetes("iac-gke", map[string]any{ + "type": "gke", + "clusterName": "cluster-x", + }) + st, err := b.status(m) + if err != nil { + t.Fatalf("status: %v", err) + } + if st.Name != "iac-gke" { + t.Errorf("state.Name = %q, want module name %q (per PlatformKubernetes.Init semantics)", st.Name, "iac-gke") + } +} + func TestGRPCKubernetesBackend_Plan(t *testing.T) { t.Run("not found → create action", func(t *testing.T) { fake := &fakeResourceDriverClient{readErr: status.Error(codes.NotFound, "no such cluster")} @@ -147,6 +219,39 @@ func TestGRPCKubernetesBackend_Apply(t *testing.T) { } }) + t.Run("module config takes precedence over cloud account credentials", func(t *testing.T) { + // In-core gkeBackend honored explicit module-config keys over the + // cloud-account fallback; the adapter must preserve that precedence. + fake := &fakeResourceDriverClient{createResp: &pb.ResourceCreateResponse{ + Output: &pb.ResourceOutput{Name: "my-cluster", Type: gkeResourceType, Status: "creating"}, + }} + b := newGRPCKubernetesBackend(fake) + m := NewPlatformKubernetes("my-cluster", map[string]any{ + "type": "gke", + "clusterName": "my-cluster", + "project_id": "config-project", + "service_account_json": `{"type":"from-config"}`, + }) + m.provider = &fakeCredProvider{creds: &CloudCredentials{ + Provider: "gcp", + ProjectID: "creds-project", + ServiceAccountJSON: []byte(`{"type":"from-creds"}`), + }} + if _, err := b.apply(m); err != nil { + t.Fatalf("apply: %v", err) + } + cfg, err := jsonBytesToMap(fake.createReq.GetSpec().GetConfigJson()) + if err != nil { + t.Fatalf("decode: %v", err) + } + if cfg[k8sConfigKeyProjectID] != "config-project" { + t.Errorf("project_id = %v, want config-project (module config must win over cloud account)", cfg[k8sConfigKeyProjectID]) + } + if cfg[k8sConfigKeyServiceAccountJSON] != `{"type":"from-config"}` { + t.Errorf("service_account_json = %v, want from-config (module config must win over cloud account)", cfg[k8sConfigKeyServiceAccountJSON]) + } + }) + t.Run("resolved credentials use the pinned snake_case config keys", func(t *testing.T) { fake := &fakeResourceDriverClient{createResp: &pb.ResourceCreateResponse{ Output: &pb.ResourceOutput{Name: "my-cluster", Type: gkeResourceType, Status: "creating"}, diff --git a/module/platform_kubernetes_plugin_registry_test.go b/module/platform_kubernetes_plugin_registry_test.go index 801d9cea..d59bcc16 100644 --- a/module/platform_kubernetes_plugin_registry_test.go +++ b/module/platform_kubernetes_plugin_registry_test.go @@ -98,9 +98,25 @@ func TestPlatformKubernetes_GKEDispatchToPluginClient(t *testing.T) { // no plugin client registered fails Init with a clean error pointing the // operator at workflow-plugin-gcp. func TestPlatformKubernetes_GKEWithoutPluginErrors(t *testing.T) { - if _, ok := kubernetesBackendClientRegistryInstance.resolve("gke"); ok { - t.Skip("a gke client is registered by a concurrent test; skipping the negative case") - } + const clusterType = "gke" + // Clear any registration left by a sibling test (Go test order within a + // package is not guaranteed across files), then restore on cleanup. Doing + // this under the registry mutex keeps the test deterministic instead of + // skipping when a concurrent registration is present. + kubernetesBackendClientRegistryInstance.mu.Lock() + prev, hadPrev := kubernetesBackendClientRegistryInstance.clients[clusterType] + delete(kubernetesBackendClientRegistryInstance.clients, clusterType) + kubernetesBackendClientRegistryInstance.mu.Unlock() + defer func() { + kubernetesBackendClientRegistryInstance.mu.Lock() + if hadPrev { + kubernetesBackendClientRegistryInstance.clients[clusterType] = prev + } else { + delete(kubernetesBackendClientRegistryInstance.clients, clusterType) + } + kubernetesBackendClientRegistryInstance.mu.Unlock() + }() + m := NewPlatformKubernetes("gke-cluster", map[string]any{"type": "gke"}) err := m.Init(NewMockApplication()) if err == nil {