From 900f80dbf40fa1b8a7ee6c1fe10670213f2d5cd3 Mon Sep 17 00:00:00 2001 From: alz Date: Fri, 24 Apr 2026 11:07:56 +0300 Subject: [PATCH] Fix secret-backed env upgrade rollout --- pkg/controller/chi/worker-reconciler-chi.go | 26 +++++++-- .../chi/test-011-4-secrets-upgrade-1.yaml | 16 ++++++ .../chi/test-011-4-secrets-upgrade-2.yaml | 33 +++++++++++ .../manifests/secret/test-011-4-secret.yaml | 7 +++ tests/e2e/test_operator.py | 56 +++++++++++++++++++ 5 files changed, 133 insertions(+), 5 deletions(-) create mode 100644 tests/e2e/manifests/chi/test-011-4-secrets-upgrade-1.yaml create mode 100644 tests/e2e/manifests/chi/test-011-4-secrets-upgrade-2.yaml create mode 100644 tests/e2e/manifests/secret/test-011-4-secret.yaml diff --git a/pkg/controller/chi/worker-reconciler-chi.go b/pkg/controller/chi/worker-reconciler-chi.go index 2551a9ee3..b8ead9259 100644 --- a/pkg/controller/chi/worker-reconciler-chi.go +++ b/pkg/controller/chi/worker-reconciler-chi.go @@ -21,6 +21,7 @@ import ( "time" meta "k8s.io/apimachinery/pkg/apis/meta/v1" + apiequality "k8s.io/apimachinery/pkg/api/equality" log "github.com/altinity/clickhouse-operator/pkg/announcer" api "github.com/altinity/clickhouse-operator/pkg/apis/clickhouse.altinity.com/v1" @@ -406,15 +407,19 @@ func (w *worker) reconcileHostStatefulSet(ctx context.Context, host *api.Host, o w.a.V(1).M(host).F().Info("Reconcile host STS: %s. App version: %s", host.GetName(), host.Runtime.Version.Render()) + w.stsReconciler.PrepareHostStatefulSetWithStatus(ctx, host, host.IsStopped()) + opts = w.prepareStsReconcileOptsWaitSection(host, opts) + // Start with force-restart host if w.shouldForceRestartHost(ctx, host) { - w.a.V(1).M(host).F().Info("Reconcile host STS force restart: %s", host.GetName()) - _ = w.hostForceRestart(ctx, host, opts) + if w.hostRequiresStatefulSetRollout(host) { + w.a.V(1).M(host).F().Info("Reconcile host STS skip software restart and roll out StatefulSet: %s", host.GetName()) + } else { + w.a.V(1).M(host).F().Info("Reconcile host STS force restart: %s", host.GetName()) + _ = w.hostForceRestart(ctx, host, opts) + } } - w.stsReconciler.PrepareHostStatefulSetWithStatus(ctx, host, host.IsStopped()) - opts = w.prepareStsReconcileOptsWaitSection(host, opts) - // We are in place, where we can reconcile StatefulSet to desired configuration. w.a.V(1).M(host).F().Info("Reconcile host STS: %s. Reconcile StatefulSet", host.GetName()) err := w.stsReconciler.ReconcileStatefulSet(ctx, host, true, opts) @@ -438,6 +443,17 @@ func (w *worker) reconcileHostStatefulSet(ctx context.Context, host *api.Host, o return err } +func (w *worker) hostRequiresStatefulSetRollout(host *api.Host) bool { + cur := host.Runtime.CurStatefulSet + desired := host.Runtime.DesiredStatefulSet + + if cur == nil || desired == nil { + return true + } + + return !apiequality.Semantic.DeepEqual(cur.Spec.Template, desired.Spec.Template) +} + func (w *worker) hostForceRestart(ctx context.Context, host *api.Host, opts *statefulset.ReconcileOptions) error { w.a.V(1).M(host).F().Info("Reconcile host. Force restart: %s", host.GetName()) diff --git a/tests/e2e/manifests/chi/test-011-4-secrets-upgrade-1.yaml b/tests/e2e/manifests/chi/test-011-4-secrets-upgrade-1.yaml new file mode 100644 index 000000000..ce560ef0c --- /dev/null +++ b/tests/e2e/manifests/chi/test-011-4-secrets-upgrade-1.yaml @@ -0,0 +1,16 @@ +apiVersion: clickhouse.altinity.com/v1 +kind: ClickHouseInstallation +metadata: + name: test-011-4-secrets-upgrade +spec: + useTemplates: + - name: clickhouse-version + defaults: + templates: + podTemplate: default + configuration: + clusters: + - name: default + layout: + shardsCount: 1 + replicasCount: 1 diff --git a/tests/e2e/manifests/chi/test-011-4-secrets-upgrade-2.yaml b/tests/e2e/manifests/chi/test-011-4-secrets-upgrade-2.yaml new file mode 100644 index 000000000..740aafd33 --- /dev/null +++ b/tests/e2e/manifests/chi/test-011-4-secrets-upgrade-2.yaml @@ -0,0 +1,33 @@ +apiVersion: clickhouse.altinity.com/v1 +kind: ClickHouseInstallation +metadata: + name: test-011-4-secrets-upgrade +spec: + useTemplates: + - name: clickhouse-version + templates: + podTemplates: + - name: default + spec: + containers: + - name: clickhouse-pod + env: + - name: TEST_010011_4_MARK_CACHE_SIZE + valueFrom: + secretKeyRef: + name: test-011-4-secret + key: mark_cache_size + defaults: + templates: + podTemplate: default + configuration: + files: + config.d/test-010011-4-secret.xml: | + + + + clusters: + - name: default + layout: + shardsCount: 1 + replicasCount: 1 diff --git a/tests/e2e/manifests/secret/test-011-4-secret.yaml b/tests/e2e/manifests/secret/test-011-4-secret.yaml new file mode 100644 index 000000000..84e7706c7 --- /dev/null +++ b/tests/e2e/manifests/secret/test-011-4-secret.yaml @@ -0,0 +1,7 @@ +apiVersion: v1 +kind: Secret +metadata: + name: test-011-4-secret +type: Opaque +stringData: + mark_cache_size: "10485760" diff --git a/tests/e2e/test_operator.py b/tests/e2e/test_operator.py index ed6e97e62..bc5db7b5d 100644 --- a/tests/e2e/test_operator.py +++ b/tests/e2e/test_operator.py @@ -1085,6 +1085,62 @@ def test_010011_3(self): delete_test_namespace() +@TestScenario +@Name("test_010011_4. Test secret-backed env rollout during upgrade") +@Requirements(RQ_SRS_026_ClickHouseOperator_Secrets("1.0")) +def test_010011_4(self): + create_shell_namespace_clickhouse_template() + + with Given("a single-node ClickHouseInstallation with no secret-backed env or settings"): + kubectl.apply( + util.get_full_path("manifests/secret/test-011-4-secret.yaml"), + ) + + kubectl.create_and_check( + manifest="manifests/chi/test-011-4-secrets-upgrade-1.yaml", + check={ + "pod_count": 1, + "do_not_delete": 1, + }, + ) + + chi = "test-011-4-secrets-upgrade" + pod = f"chi-{chi}-default-0-0-0" + + with When("the CHI is updated to add a secret-backed env var and a server setting that reads it via from_env"): + kubectl.create_and_check( + manifest="manifests/chi/test-011-4-secrets-upgrade-2.yaml", + check={ + "chi_status": "InProgress", + "do_not_delete": 1, + }, + ) + + with Then("the pod should not enter CrashLoopBackOff while the CHI is reconciling"): + chi_status = "" + container_status = "" + for i in range(75): + chi_status = kubectl.get_field("chi", chi, ".status.status") + if chi_status in ("Aborted", "Completed"): + break + container_status = kubectl.get_field("pod", pod, ".status.containerStatuses[0].state.waiting.reason") + print(f"{chi} status={chi_status} pod={pod} waiting_reason={container_status}") + assert container_status not in ["CrashLoopBackOff", "Error"], error( + f"{pod} entered {container_status} during secret-backed env rollout" + ) + + time.sleep(5) + + assert chi_status == "Completed", error(f"{chi} did not complete successfully") + + with And("the secret-backed setting should be applied successfully"): + out = clickhouse.query(chi, "select value from system.server_settings where name = 'mark_cache_size'") + assert out == "10485760" + + with Finally("I clean up"): + delete_test_namespace() + + @TestScenario @Name("test_010012. Test service templates") @Requirements(