diff --git a/.github/workflows/changelog.yml b/.github/workflows/changelog.yml index a82dc3e..05758ee 100644 --- a/.github/workflows/changelog.yml +++ b/.github/workflows/changelog.yml @@ -1,11 +1,6 @@ -name: Update Changelog +name: Update Changelog (disabled) on: - push: - branches: [main] - paths-ignore: - - 'CHANGELOG.md' - - '.github/**' workflow_dispatch: inputs: since_tag: diff --git a/.github/workflows/publish-oci.yml b/.github/workflows/publish-oci.yml index 20c8faf..2ddf08a 100644 --- a/.github/workflows/publish-oci.yml +++ b/.github/workflows/publish-oci.yml @@ -30,6 +30,15 @@ jobs: with: version: v3.17.0 + - name: Build chart dependencies + run: | + for chart in charts/*/; do + if grep -q '^dependencies:' "${chart}Chart.yaml" 2>/dev/null; then + echo "Building dependencies for $(basename "${chart}")..." + helm dependency build "${chart}" + fi + done + - name: Lint all charts run: | for chart in charts/*/; do @@ -68,6 +77,15 @@ jobs: echo "${GITHUB_TOKEN}" | helm registry login ${REGISTRY} -u "${GH_ACTOR}" --password-stdin echo "${GITHUB_TOKEN}" | cosign login ${REGISTRY} -u "${GH_ACTOR}" --password-stdin + - name: Build chart dependencies + run: | + for chart in charts/*/; do + if grep -q '^dependencies:' "${chart}Chart.yaml" 2>/dev/null; then + echo "Building dependencies for $(basename "${chart}")..." + helm dependency build "${chart}" + fi + done + - name: Package, push, sign, and attach SBOM env: RELEASE_TAG: ${{ github.event.release.tag_name }} diff --git a/.github/workflows/validate-charts.yml b/.github/workflows/validate-charts.yml index a05ea7c..9a64848 100644 --- a/.github/workflows/validate-charts.yml +++ b/.github/workflows/validate-charts.yml @@ -33,6 +33,15 @@ jobs: with: version: v3.17.0 + - name: Build chart dependencies + run: | + for chart in charts/*/; do + if grep -q '^dependencies:' "${chart}Chart.yaml" 2>/dev/null; then + echo "Building dependencies for $(basename "${chart}")..." + helm dependency build "${chart}" + fi + done + - name: Lint all charts run: | exit_code=0 @@ -57,6 +66,15 @@ jobs: with: version: v3.17.0 + - name: Build chart dependencies + run: | + for chart in charts/*/; do + if grep -q '^dependencies:' "${chart}Chart.yaml" 2>/dev/null; then + echo "Building dependencies for $(basename "${chart}")..." + helm dependency build "${chart}" + fi + done + - name: Template render all charts run: | exit_code=0 @@ -90,6 +108,12 @@ jobs: --set users.metrics.password=test \ > /dev/null || exit_code=1 ;; + countly-migration) + helm template test-release "${chart}" \ + --set backingServices.mongodb.password=test \ + --set backingServices.clickhouse.password=test \ + > /dev/null || exit_code=1 + ;; *) helm template test-release "${chart}" > /dev/null || exit_code=1 ;; diff --git a/README.md b/README.md index a9f5a97..bda7fae 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ Helm charts for deploying Countly analytics on Kubernetes. ## Architecture -Five charts, each in its own namespace: +Seven charts, each in its own namespace: | Chart | Namespace | Purpose | |-------|-----------|---------| @@ -13,6 +13,8 @@ Five charts, each in its own namespace: | `countly-clickhouse` | clickhouse | ClickHouse via ClickHouse Operator | | `countly-kafka` | kafka | Kafka via Strimzi Operator | | `countly-observability` | observability | Prometheus, Grafana, Loki, Tempo, Pyroscope | +| `countly-migration` | countly-migration | MongoDB to ClickHouse batch migration (with bundled Redis) | +| `countly-argocd` | argocd | ArgoCD app-of-apps (AppProject + Applications) | ### Architecture Overview @@ -46,6 +48,11 @@ flowchart TB mongo["MongoDB\n:27017"] end + subgraph mig-ns["countly-migration"] + migsvc["Migration Service\n:8080"] + redis["Redis\n:6379"] + end + subgraph obs-ns["observability"] prom["Prometheus"] grafana["Grafana"] @@ -66,6 +73,10 @@ flowchart TB brokers --> connect --> chserver keeper -.-> chserver + migsvc -->|read batches| mongo + migsvc -->|insert rows| chserver + migsvc <-.->|hot state| redis + alloy -.-> prom & loki & tempo & pyroscope prom & loki & tempo & pyroscope --> grafana ``` @@ -164,43 +175,69 @@ Install required operators before deploying Countly. See [docs/PREREQUISITES.md] ### Manual Installation (without Helmfile) +Substitute your profile choices from `global.yaml` into the commands below. +The value file order must match the layering: global → sizing → dimension profiles → security → environment → secrets. + ```bash +# Shorthand — substitute these from your environments//global.yaml +ENV=my-deployment +SIZING=local # local | small | production +SECURITY=open # open | hardened +TLS=selfSigned # none | selfSigned | letsencrypt | provided +OBS=full # disabled | full | external-grafana | external +KC=balanced # throughput | balanced | low-latency + helm install countly-mongodb ./charts/countly-mongodb -n mongodb --create-namespace \ --wait --timeout 10m \ - -f environments/my-deployment/global.yaml \ - -f profiles/sizing/production/mongodb.yaml \ - -f environments/my-deployment/mongodb.yaml \ - -f environments/my-deployment/secrets-mongodb.yaml + -f environments/$ENV/global.yaml \ + -f profiles/sizing/$SIZING/mongodb.yaml \ + -f profiles/security/$SECURITY/mongodb.yaml \ + -f environments/$ENV/mongodb.yaml \ + -f environments/$ENV/secrets-mongodb.yaml helm install countly-clickhouse ./charts/countly-clickhouse -n clickhouse --create-namespace \ --wait --timeout 10m \ - -f environments/my-deployment/global.yaml \ - -f profiles/sizing/production/clickhouse.yaml \ - -f environments/my-deployment/clickhouse.yaml \ - -f environments/my-deployment/secrets-clickhouse.yaml + -f environments/$ENV/global.yaml \ + -f profiles/sizing/$SIZING/clickhouse.yaml \ + -f profiles/security/$SECURITY/clickhouse.yaml \ + -f environments/$ENV/clickhouse.yaml \ + -f environments/$ENV/secrets-clickhouse.yaml helm install countly-kafka ./charts/countly-kafka -n kafka --create-namespace \ --wait --timeout 10m \ - -f environments/my-deployment/global.yaml \ - -f profiles/sizing/production/kafka.yaml \ - -f profiles/kafka-connect/balanced/kafka.yaml \ - -f environments/my-deployment/kafka.yaml \ - -f environments/my-deployment/secrets-kafka.yaml + -f environments/$ENV/global.yaml \ + -f profiles/sizing/$SIZING/kafka.yaml \ + -f profiles/kafka-connect/$KC/kafka.yaml \ + -f profiles/observability/$OBS/kafka.yaml \ + -f profiles/security/$SECURITY/kafka.yaml \ + -f environments/$ENV/kafka.yaml \ + -f environments/$ENV/secrets-kafka.yaml helm install countly ./charts/countly -n countly --create-namespace \ --wait --timeout 10m \ - -f environments/my-deployment/global.yaml \ - -f profiles/sizing/production/countly.yaml \ - -f profiles/tls/letsencrypt/countly.yaml \ - -f environments/my-deployment/countly.yaml \ - -f environments/my-deployment/secrets-countly.yaml + -f environments/$ENV/global.yaml \ + -f profiles/sizing/$SIZING/countly.yaml \ + -f profiles/tls/$TLS/countly.yaml \ + -f profiles/observability/$OBS/countly.yaml \ + -f profiles/security/$SECURITY/countly.yaml \ + -f environments/$ENV/countly.yaml \ + -f environments/$ENV/secrets-countly.yaml helm install countly-observability ./charts/countly-observability -n observability --create-namespace \ --wait --timeout 10m \ - -f environments/my-deployment/global.yaml \ - -f profiles/sizing/production/observability.yaml \ - -f profiles/observability/full/observability.yaml \ - -f environments/my-deployment/observability.yaml + -f environments/$ENV/global.yaml \ + -f profiles/sizing/$SIZING/observability.yaml \ + -f profiles/observability/$OBS/observability.yaml \ + -f profiles/security/$SECURITY/observability.yaml \ + -f environments/$ENV/observability.yaml \ + -f environments/$ENV/secrets-observability.yaml + +# Optional: MongoDB to ClickHouse batch migration (includes bundled Redis) +helm install countly-migration ./charts/countly-migration -n countly-migration --create-namespace \ + --wait --timeout 5m \ + -f environments/$ENV/global.yaml \ + -f environments/$ENV/migration.yaml \ + -f environments/$ENV/secrets-migration.yaml ``` ## Configuration Model @@ -247,6 +284,7 @@ Environments contain deployment-specific choices: - [VERIFICATION.md](docs/VERIFICATION.md) — Chart signature verification, SBOM, provenance - [TROUBLESHOOTING.md](docs/TROUBLESHOOTING.md) — Common issues and fixes - [VERSION-MATRIX.md](docs/VERSION-MATRIX.md) — Pinned operator and image versions +- [ARGOCD.md](docs/ARGOCD.md) — ArgoCD deployment, sync waves, custom health checks ## Repository Structure @@ -258,6 +296,8 @@ helm/ countly-clickhouse/ countly-kafka/ countly-observability/ + countly-migration/ + countly-argocd/ profiles/ # Composable profile dimensions sizing/ # local | small | production observability/ # disabled | full | external-grafana | external diff --git a/charts/countly-argocd/Chart.yaml b/charts/countly-argocd/Chart.yaml new file mode 100644 index 0000000..9ccc535 --- /dev/null +++ b/charts/countly-argocd/Chart.yaml @@ -0,0 +1,20 @@ +apiVersion: v2 +name: countly-argocd +description: ArgoCD app-of-apps for deploying Countly to one or more clusters +type: application +version: 0.1.0 +appVersion: "1.0.0" +home: https://countly.com +icon: https://count.ly/images/logos/countly-logo.svg +sources: + - https://github.com/Countly/countly-server +keywords: + - argocd + - gitops + - countly + - multi-cluster +maintainers: + - name: Countly + url: https://countly.com +annotations: + artifacthub.io/license: AGPL-3.0 diff --git a/charts/countly-argocd/README.md b/charts/countly-argocd/README.md new file mode 100644 index 0000000..dd6ffa8 --- /dev/null +++ b/charts/countly-argocd/README.md @@ -0,0 +1,154 @@ +# Countly ArgoCD Helm Chart + +App-of-apps pattern for deploying the full Countly stack to one or more Kubernetes clusters via ArgoCD. Creates an ArgoCD Project and one Application per component, each pointing to the appropriate chart and environment values. + +**Chart version:** 0.1.0 +**App version:** 1.0.0 + +--- + +## Architecture + +```mermaid +flowchart TD + project["ArgoCD Project\n(per-customer isolation)"] + + project --> app_mongo["App: MongoDB\nnamespace: mongodb"] + project --> app_ch["App: ClickHouse\nnamespace: clickhouse"] + project --> app_kafka["App: Kafka\nnamespace: kafka"] + project --> app_countly["App: Countly\nnamespace: countly"] + project --> app_obs["App: Observability\nnamespace: observability"] + project --> app_mig["App: Migration\nnamespace: countly-migration"] +``` + +Each child Application: +- Points to a specific chart under `charts/` in the Git repo +- Pulls environment-specific values from `environments//` +- Has its own sync policy, retry, and self-heal configuration +- Deploys into an isolated namespace with `CreateNamespace=true` + +--- + +## Quick Start + +```bash +helm install countly-stack ./charts/countly-argocd \ + -n argocd \ + --set environment=my-production \ + --set destination.server="https://kubernetes.default.svc" +``` + +This creates an ArgoCD Project and 5-6 Applications (depending on which components are enabled). + +--- + +## Prerequisites + +- **ArgoCD** installed and running on the management cluster +- **Git repository** accessible from ArgoCD (configured via `repoURL`) +- **Environment directory** at `environments//` with per-chart value overrides +- **Operators** pre-installed on target cluster: ClickHouse Operator, Strimzi, MongoDB Community Operator + +--- + +## Configuration + +### Component Toggles + +Each component can be independently enabled or disabled: + +```yaml +mongodb: + enabled: true + namespace: mongodb + +clickhouse: + enabled: true + namespace: clickhouse + +kafka: + enabled: true + namespace: kafka + +countly: + enabled: true + namespace: countly + +observability: + enabled: true + namespace: observability + +migration: + enabled: false # Opt-in: enable when migrating data + namespace: countly-migration +``` + +### Sync Policy + +```yaml +syncPolicy: + automated: true # Auto-sync on git push + selfHeal: true # Revert manual cluster changes + prune: true # Remove resources deleted from git + retry: + limit: 5 + backoff: + duration: 5s + factor: 2 + maxDuration: 3m +``` + +### Multi-Cluster + +To deploy to a remote cluster, set `destination.server` to the target cluster's API endpoint (must be registered in ArgoCD): + +```yaml +destination: + server: "https://remote-cluster-api:6443" +``` + +See `examples/multi-cluster.yaml` for a complete example. + +### Environment Structure + +The chart expects environment-specific values at: + +``` +environments// + countly.yaml + countly-clickhouse.yaml + countly-kafka.yaml + countly-mongodb.yaml + countly-observability.yaml + countly-migration.yaml # optional +``` + +--- + +## Configuration Reference + +| Key | Default | Description | +|-----|---------|-------------| +| `repoURL` | `https://github.com/Countly/helm.git` | Git repo URL for chart source | +| `targetRevision` | `main` | Git branch/tag/commit | +| `environment` | `example-production` | Environment name (maps to `environments//`) | +| `destination.server` | `https://kubernetes.default.svc` | Target cluster API server | +| `project` | `""` | ArgoCD project name (defaults to release name) | +| `mongodb.enabled` | `true` | Deploy MongoDB | +| `clickhouse.enabled` | `true` | Deploy ClickHouse | +| `kafka.enabled` | `true` | Deploy Kafka | +| `countly.enabled` | `true` | Deploy Countly application | +| `observability.enabled` | `true` | Deploy observability stack | +| `migration.enabled` | `false` | Deploy migration service | +| `syncPolicy.automated` | `true` | Enable auto-sync | +| `syncPolicy.selfHeal` | `true` | Enable self-heal | +| `syncPolicy.prune` | `true` | Enable resource pruning | + +--- + +## Examples + +See the `examples/` directory: + +- **`applicationset.yaml`** — ArgoCD ApplicationSet for multi-environment deployments +- **`multi-cluster.yaml`** — Deploy Countly across multiple clusters diff --git a/charts/countly-argocd/examples/applicationset.yaml b/charts/countly-argocd/examples/applicationset.yaml new file mode 100644 index 0000000..47e3ead --- /dev/null +++ b/charts/countly-argocd/examples/applicationset.yaml @@ -0,0 +1,142 @@ +# Alternative to the app-of-apps chart for 100+ customers. +# +# Instead of running `helm install` per customer, this single ApplicationSet +# generates all Applications from a list of customers. Add a new customer +# by adding an entry to the list — no new Helm release needed. +# +# Prerequisites: +# 1. ArgoCD ApplicationSet controller installed +# 2. Target clusters registered with ArgoCD +# 3. Environment directories exist per customer in the helm repo +# 4. Custom health checks in argocd-cm (see chart NOTES.txt) +# +# Apply: kubectl apply -f applicationset.yaml -n argocd +# Add customer: add entry to generators[].list.elements, re-apply + +# One ApplicationSet per component, each with the correct sync-wave. +# ArgoCD processes waves within a parent sync — use an app-of-apps +# root Application that points to a directory containing these files. + +--- +# Wave 0: MongoDB +apiVersion: argoproj.io/v1alpha1 +kind: ApplicationSet +metadata: + name: countly-mongodb + namespace: argocd +spec: + generators: + - list: + elements: + - customer: customer-a + server: https://cluster-a.example.com + sizing: production + security: hardened + - customer: customer-b + server: https://cluster-b.example.com + sizing: small + security: open + # Add more customers here... + template: + metadata: + name: "{{customer}}-mongodb" + annotations: + argocd.argoproj.io/sync-wave: "0" + spec: + project: "{{customer}}" + source: + repoURL: https://github.com/Countly/helm.git + targetRevision: main + path: charts/countly-mongodb + helm: + releaseName: countly-mongodb + valueFiles: + - "../../environments/{{customer}}/global.yaml" + - "../../profiles/sizing/{{sizing}}/mongodb.yaml" + - "../../profiles/security/{{security}}/mongodb.yaml" + - "../../environments/{{customer}}/mongodb.yaml" + parameters: + - name: argocd.enabled + value: "true" + destination: + server: "{{server}}" + namespace: mongodb + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - CreateNamespace=true + - ServerSideApply=true + retry: + limit: 5 + backoff: + duration: 5s + factor: 2 + maxDuration: 3m + +--- +# Wave 0: ClickHouse (same pattern as MongoDB) +apiVersion: argoproj.io/v1alpha1 +kind: ApplicationSet +metadata: + name: countly-clickhouse + namespace: argocd +spec: + generators: + - list: + elements: + - customer: customer-a + server: https://cluster-a.example.com + sizing: production + security: hardened + - customer: customer-b + server: https://cluster-b.example.com + sizing: small + security: open + template: + metadata: + name: "{{customer}}-clickhouse" + annotations: + argocd.argoproj.io/sync-wave: "0" + spec: + project: "{{customer}}" + source: + repoURL: https://github.com/Countly/helm.git + targetRevision: main + path: charts/countly-clickhouse + helm: + releaseName: countly-clickhouse + valueFiles: + - "../../environments/{{customer}}/global.yaml" + - "../../profiles/sizing/{{sizing}}/clickhouse.yaml" + - "../../profiles/security/{{security}}/clickhouse.yaml" + - "../../environments/{{customer}}/clickhouse.yaml" + parameters: + - name: argocd.enabled + value: "true" + destination: + server: "{{server}}" + namespace: clickhouse + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - CreateNamespace=true + - ServerSideApply=true + retry: + limit: 5 + backoff: + duration: 5s + factor: 2 + maxDuration: 3m + +# Repeat the same pattern for: +# - countly-kafka (wave 5) +# - countly (wave 10) +# - countly-observability (wave 15) +# - countly-migration (wave 10, optional) +# +# For a DRY approach, use a Matrix generator combining the customer list +# with a component list to generate all Applications from a single spec. diff --git a/charts/countly-argocd/examples/multi-cluster.yaml b/charts/countly-argocd/examples/multi-cluster.yaml new file mode 100644 index 0000000..f356ee2 --- /dev/null +++ b/charts/countly-argocd/examples/multi-cluster.yaml @@ -0,0 +1,49 @@ +# Example: Deploy Countly to two clusters from a single ArgoCD instance. +# +# Prerequisites: +# 1. ArgoCD installed on a management cluster +# 2. Target clusters registered with ArgoCD: +# argocd cluster add cluster-a-context +# argocd cluster add cluster-b-context +# 3. Environment directories exist: +# environments/customer-a/ (global.yaml, mongodb.yaml, etc.) +# environments/customer-b/ (global.yaml, mongodb.yaml, etc.) +# 4. Custom health checks configured in argocd-cm (see NOTES.txt) +# +# Deploy: +# helm install customer-a charts/countly-argocd -f examples/multi-cluster.yaml -n argocd +# helm install customer-b charts/countly-argocd -f examples/multi-cluster.yaml --set environment=customer-b -n argocd + +# --- Customer A: Production, large, with migrations --- +repoURL: "https://github.com/Countly/helm.git" +targetRevision: main +environment: customer-a +project: countly-customer-a + +destination: + server: "https://cluster-a.example.com" + +global: + sizing: production + security: hardened + tls: letsencrypt + observability: full + kafkaConnect: throughput + +mongodb: + enabled: true +clickhouse: + enabled: true +kafka: + enabled: true +countly: + enabled: true +observability: + enabled: true +migration: + enabled: true # This customer needs data migration + +syncPolicy: + automated: true + selfHeal: true + prune: true diff --git a/charts/countly-argocd/templates/NOTES.txt b/charts/countly-argocd/templates/NOTES.txt new file mode 100644 index 0000000..67de586 --- /dev/null +++ b/charts/countly-argocd/templates/NOTES.txt @@ -0,0 +1,72 @@ +=== Countly ArgoCD Deployment === + +Environment: {{ .Values.environment }} +Cluster: {{ .Values.destination.server }} +Project: {{ include "countly-argocd.projectName" . }} + +Applications deployed (sync wave order): + Wave 0: {{ if .Values.mongodb.enabled }}mongodb{{ end }} {{ if .Values.clickhouse.enabled }}clickhouse{{ end }} + Wave 5: {{ if .Values.kafka.enabled }}kafka{{ end }} + Wave 10: {{ if .Values.countly.enabled }}countly{{ end }} + Wave 15: {{ if .Values.observability.enabled }}observability{{ end }} + Wave 20: {{ if .Values.migration.enabled }}migration{{ end }} + +--- Status --- + + # List all Countly applications + kubectl get applications -n argocd -l app.kubernetes.io/instance={{ .Release.Name }} + + # Sync all + argocd app sync -l app.kubernetes.io/instance={{ .Release.Name }} + +--- Multi-Cluster --- + + # Deploy to another cluster + helm install countly- charts/countly-argocd \ + --set environment= \ + --set destination.server=https:// \ + -n argocd + +--- Required: ArgoCD Custom Health Checks --- + + Add these to your argocd-cm ConfigMap for sync waves to + block on actual readiness: + + resource.customizations.health.kafka.strimzi.io_Kafka: | + hs = {} + if obj.status ~= nil and obj.status.conditions ~= nil then + for _, c in ipairs(obj.status.conditions) do + if c.type == "Ready" and c.status == "True" then + hs.status = "Healthy"; hs.message = c.message or "Ready"; return hs + end + if c.type == "NotReady" then + hs.status = "Progressing"; hs.message = c.message or "Not ready"; return hs + end + end + end + hs.status = "Progressing"; hs.message = "Waiting for status"; return hs + + # Same pattern for: KafkaConnect, KafkaNodePool, KafkaConnector + + resource.customizations.health.clickhouse.com_ClickHouseCluster: | + hs = {} + if obj.status ~= nil and obj.status.status ~= nil then + if obj.status.status == "Completed" then + hs.status = "Healthy"; hs.message = "Completed"; return hs + end + end + hs.status = "Progressing"; hs.message = "Provisioning"; return hs + + resource.customizations.health.mongodbcommunity.mongodb.com_MongoDBCommunity: | + hs = {} + if obj.status ~= nil and obj.status.phase ~= nil then + if obj.status.phase == "Running" then + hs.status = "Healthy"; hs.message = "Running"; return hs + end + end + hs.status = "Progressing"; hs.message = "Provisioning"; return hs + +--- Teardown --- + + helm uninstall {{ .Release.Name }} -n argocd + # Cascading finalizers will delete all child Applications diff --git a/charts/countly-argocd/templates/_helpers.tpl b/charts/countly-argocd/templates/_helpers.tpl new file mode 100644 index 0000000..9aee842 --- /dev/null +++ b/charts/countly-argocd/templates/_helpers.tpl @@ -0,0 +1,62 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "countly-argocd.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +*/}} +{{- define "countly-argocd.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "countly-argocd.labels" -}} +helm.sh/chart: {{ printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +app.kubernetes.io/name: {{ include "countly-argocd.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +ArgoCD project name — unique per release to prevent multi-tenant collisions. +*/}} +{{- define "countly-argocd.projectName" -}} +{{- .Values.project | default (include "countly-argocd.fullname" .) }} +{{- end -}} + +{{/* +Sync policy block — reused by all Application templates. +Includes retry policy for resilience at scale (100+ customers = 600+ Applications). +*/}} +{{- define "countly-argocd.syncPolicy" -}} +syncPolicy: + {{- if .Values.syncPolicy.automated }} + automated: + prune: {{ .Values.syncPolicy.prune }} + selfHeal: {{ .Values.syncPolicy.selfHeal }} + {{- end }} + syncOptions: + - CreateNamespace=true + - ServerSideApply=true + - RespectIgnoreDifferences=true + retry: + limit: {{ .Values.syncPolicy.retry.limit }} + backoff: + duration: {{ .Values.syncPolicy.retry.backoff.duration }} + factor: {{ .Values.syncPolicy.retry.backoff.factor }} + maxDuration: {{ .Values.syncPolicy.retry.backoff.maxDuration }} +{{- end -}} diff --git a/charts/countly-argocd/templates/app-clickhouse.yaml b/charts/countly-argocd/templates/app-clickhouse.yaml new file mode 100644 index 0000000..b164050 --- /dev/null +++ b/charts/countly-argocd/templates/app-clickhouse.yaml @@ -0,0 +1,43 @@ +{{- if .Values.clickhouse.enabled }} +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: {{ include "countly-argocd.fullname" . }}-clickhouse + namespace: argocd + labels: + {{- include "countly-argocd.labels" . | nindent 4 }} + annotations: + argocd.argoproj.io/sync-wave: "0" + finalizers: + - resources-finalizer.argocd.argoproj.io +spec: + project: {{ include "countly-argocd.projectName" . }} + source: + repoURL: {{ .Values.repoURL }} + targetRevision: {{ .Values.targetRevision }} + path: charts/countly-clickhouse + helm: + releaseName: countly-clickhouse + valueFiles: + - ../../environments/{{ .Values.environment }}/global.yaml + - ../../profiles/sizing/{{ .Values.global.sizing }}/clickhouse.yaml + - ../../profiles/security/{{ .Values.global.security }}/clickhouse.yaml + - ../../environments/{{ .Values.environment }}/clickhouse.yaml + - ../../environments/{{ .Values.environment }}/secrets-clickhouse.yaml + parameters: + - name: argocd.enabled + value: "true" + destination: + server: {{ .Values.destination.server }} + namespace: {{ .Values.clickhouse.namespace }} + {{- include "countly-argocd.syncPolicy" . | nindent 2 }} + ignoreDifferences: + - group: clickhouse.com + kind: ClickHouseCluster + jsonPointers: + - /status + - group: clickhouse.com + kind: KeeperCluster + jsonPointers: + - /status +{{- end }} diff --git a/charts/countly-argocd/templates/app-countly.yaml b/charts/countly-argocd/templates/app-countly.yaml new file mode 100644 index 0000000..54c8592 --- /dev/null +++ b/charts/countly-argocd/templates/app-countly.yaml @@ -0,0 +1,41 @@ +{{- if .Values.countly.enabled }} +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: {{ include "countly-argocd.fullname" . }}-countly + namespace: argocd + labels: + {{- include "countly-argocd.labels" . | nindent 4 }} + annotations: + argocd.argoproj.io/sync-wave: "10" + finalizers: + - resources-finalizer.argocd.argoproj.io +spec: + project: {{ include "countly-argocd.projectName" . }} + source: + repoURL: {{ .Values.repoURL }} + targetRevision: {{ .Values.targetRevision }} + path: charts/countly + helm: + releaseName: countly + valueFiles: + - ../../environments/{{ .Values.environment }}/global.yaml + - ../../profiles/sizing/{{ .Values.global.sizing }}/countly.yaml + - ../../profiles/tls/{{ .Values.global.tls }}/countly.yaml + - ../../profiles/observability/{{ .Values.global.observability }}/countly.yaml + - ../../profiles/security/{{ .Values.global.security }}/countly.yaml + - ../../environments/{{ .Values.environment }}/countly.yaml + - ../../environments/{{ .Values.environment }}/secrets-countly.yaml + parameters: + - name: argocd.enabled + value: "true" + destination: + server: {{ .Values.destination.server }} + namespace: {{ .Values.countly.namespace }} + {{- include "countly-argocd.syncPolicy" . | nindent 2 }} + ignoreDifferences: + - group: networking.k8s.io + kind: Ingress + jsonPointers: + - /status +{{- end }} diff --git a/charts/countly-argocd/templates/app-kafka.yaml b/charts/countly-argocd/templates/app-kafka.yaml new file mode 100644 index 0000000..b373087 --- /dev/null +++ b/charts/countly-argocd/templates/app-kafka.yaml @@ -0,0 +1,53 @@ +{{- if .Values.kafka.enabled }} +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: {{ include "countly-argocd.fullname" . }}-kafka + namespace: argocd + labels: + {{- include "countly-argocd.labels" . | nindent 4 }} + annotations: + argocd.argoproj.io/sync-wave: "5" + finalizers: + - resources-finalizer.argocd.argoproj.io +spec: + project: {{ include "countly-argocd.projectName" . }} + source: + repoURL: {{ .Values.repoURL }} + targetRevision: {{ .Values.targetRevision }} + path: charts/countly-kafka + helm: + releaseName: countly-kafka + valueFiles: + - ../../environments/{{ .Values.environment }}/global.yaml + - ../../profiles/sizing/{{ .Values.global.sizing }}/kafka.yaml + - ../../profiles/kafka-connect/{{ .Values.global.kafkaConnect }}/kafka.yaml + - ../../profiles/observability/{{ .Values.global.observability }}/kafka.yaml + - ../../profiles/security/{{ .Values.global.security }}/kafka.yaml + - ../../environments/{{ .Values.environment }}/kafka.yaml + - ../../environments/{{ .Values.environment }}/secrets-kafka.yaml + parameters: + - name: argocd.enabled + value: "true" + destination: + server: {{ .Values.destination.server }} + namespace: {{ .Values.kafka.namespace }} + {{- include "countly-argocd.syncPolicy" . | nindent 2 }} + ignoreDifferences: + - group: kafka.strimzi.io + kind: Kafka + jsonPointers: + - /status + - group: kafka.strimzi.io + kind: KafkaConnect + jsonPointers: + - /status + - group: kafka.strimzi.io + kind: KafkaConnector + jsonPointers: + - /status + - group: kafka.strimzi.io + kind: KafkaNodePool + jsonPointers: + - /status +{{- end }} diff --git a/charts/countly-argocd/templates/app-migration.yaml b/charts/countly-argocd/templates/app-migration.yaml new file mode 100644 index 0000000..ab30a27 --- /dev/null +++ b/charts/countly-argocd/templates/app-migration.yaml @@ -0,0 +1,32 @@ +{{- if .Values.migration.enabled }} +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: {{ include "countly-argocd.fullname" . }}-migration + namespace: argocd + labels: + {{- include "countly-argocd.labels" . | nindent 4 }} + annotations: + argocd.argoproj.io/sync-wave: "20" + finalizers: + - resources-finalizer.argocd.argoproj.io +spec: + project: {{ include "countly-argocd.projectName" . }} + source: + repoURL: {{ .Values.repoURL }} + targetRevision: {{ .Values.targetRevision }} + path: charts/countly-migration + helm: + releaseName: countly-migration + valueFiles: + - ../../environments/{{ .Values.environment }}/global.yaml + - ../../environments/{{ .Values.environment }}/migration.yaml + - ../../environments/{{ .Values.environment }}/secrets-migration.yaml + parameters: + - name: argocd.enabled + value: "true" + destination: + server: {{ .Values.destination.server }} + namespace: {{ .Values.migration.namespace }} + {{- include "countly-argocd.syncPolicy" . | nindent 2 }} +{{- end }} diff --git a/charts/countly-argocd/templates/app-mongodb.yaml b/charts/countly-argocd/templates/app-mongodb.yaml new file mode 100644 index 0000000..ce470a3 --- /dev/null +++ b/charts/countly-argocd/templates/app-mongodb.yaml @@ -0,0 +1,39 @@ +{{- if .Values.mongodb.enabled }} +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: {{ include "countly-argocd.fullname" . }}-mongodb + namespace: argocd + labels: + {{- include "countly-argocd.labels" . | nindent 4 }} + annotations: + argocd.argoproj.io/sync-wave: "0" + finalizers: + - resources-finalizer.argocd.argoproj.io +spec: + project: {{ include "countly-argocd.projectName" . }} + source: + repoURL: {{ .Values.repoURL }} + targetRevision: {{ .Values.targetRevision }} + path: charts/countly-mongodb + helm: + releaseName: countly-mongodb + valueFiles: + - ../../environments/{{ .Values.environment }}/global.yaml + - ../../profiles/sizing/{{ .Values.global.sizing }}/mongodb.yaml + - ../../profiles/security/{{ .Values.global.security }}/mongodb.yaml + - ../../environments/{{ .Values.environment }}/mongodb.yaml + - ../../environments/{{ .Values.environment }}/secrets-mongodb.yaml + parameters: + - name: argocd.enabled + value: "true" + destination: + server: {{ .Values.destination.server }} + namespace: {{ .Values.mongodb.namespace }} + {{- include "countly-argocd.syncPolicy" . | nindent 2 }} + ignoreDifferences: + - group: mongodbcommunity.mongodb.com + kind: MongoDBCommunity + jsonPointers: + - /status +{{- end }} diff --git a/charts/countly-argocd/templates/app-observability.yaml b/charts/countly-argocd/templates/app-observability.yaml new file mode 100644 index 0000000..27276d7 --- /dev/null +++ b/charts/countly-argocd/templates/app-observability.yaml @@ -0,0 +1,35 @@ +{{- if .Values.observability.enabled }} +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: {{ include "countly-argocd.fullname" . }}-observability + namespace: argocd + labels: + {{- include "countly-argocd.labels" . | nindent 4 }} + annotations: + argocd.argoproj.io/sync-wave: "15" + finalizers: + - resources-finalizer.argocd.argoproj.io +spec: + project: {{ include "countly-argocd.projectName" . }} + source: + repoURL: {{ .Values.repoURL }} + targetRevision: {{ .Values.targetRevision }} + path: charts/countly-observability + helm: + releaseName: countly-observability + valueFiles: + - ../../environments/{{ .Values.environment }}/global.yaml + - ../../profiles/sizing/{{ .Values.global.sizing }}/observability.yaml + - ../../profiles/observability/{{ .Values.global.observability }}/observability.yaml + - ../../profiles/security/{{ .Values.global.security }}/observability.yaml + - ../../environments/{{ .Values.environment }}/observability.yaml + - ../../environments/{{ .Values.environment }}/secrets-observability.yaml + parameters: + - name: argocd.enabled + value: "true" + destination: + server: {{ .Values.destination.server }} + namespace: {{ .Values.observability.namespace }} + {{- include "countly-argocd.syncPolicy" . | nindent 2 }} +{{- end }} diff --git a/charts/countly-argocd/templates/project.yaml b/charts/countly-argocd/templates/project.yaml new file mode 100644 index 0000000..5cbad53 --- /dev/null +++ b/charts/countly-argocd/templates/project.yaml @@ -0,0 +1,28 @@ +apiVersion: argoproj.io/v1alpha1 +kind: AppProject +metadata: + name: {{ include "countly-argocd.projectName" . }} + namespace: argocd + labels: + {{- include "countly-argocd.labels" . | nindent 4 }} +spec: + description: "Countly analytics platform ({{ .Values.environment }})" + sourceRepos: + - {{ .Values.repoURL | quote }} + destinations: + - namespace: "*" + server: {{ .Values.destination.server }} + clusterResourceWhitelist: + - group: storage.k8s.io + kind: StorageClass + - group: rbac.authorization.k8s.io + kind: ClusterRole + - group: rbac.authorization.k8s.io + kind: ClusterRoleBinding + - group: cert-manager.io + kind: ClusterIssuer + namespaceResourceWhitelist: + - group: "*" + kind: "*" + orphanedResources: + warn: true diff --git a/charts/countly-argocd/values.schema.json b/charts/countly-argocd/values.schema.json new file mode 100644 index 0000000..03c0c44 --- /dev/null +++ b/charts/countly-argocd/values.schema.json @@ -0,0 +1,111 @@ +{ + "$schema": "https://json-schema.org/draft-07/schema#", + "title": "Countly ArgoCD Chart Values", + "type": "object", + "required": ["repoURL", "environment", "destination"], + "properties": { + "repoURL": { + "type": "string", + "description": "Git repo containing the Helm charts", + "minLength": 1 + }, + "targetRevision": { + "type": "string", + "description": "Git branch, tag, or commit SHA" + }, + "environment": { + "type": "string", + "description": "Environment name (maps to environments// directory)", + "minLength": 1 + }, + "destination": { + "type": "object", + "required": ["server"], + "properties": { + "server": { + "type": "string", + "description": "Target cluster API server URL", + "minLength": 1 + } + } + }, + "project": { + "type": "string", + "description": "ArgoCD project name (defaults to release name if empty)" + }, + "global": { + "type": "object", + "properties": { + "sizing": { "type": "string" }, + "security": { "type": "string" }, + "tls": { "type": "string" }, + "observability": { "type": "string" }, + "kafkaConnect": { "type": "string" } + } + }, + "mongodb": { + "type": "object", + "properties": { + "enabled": { "type": "boolean" }, + "namespace": { "type": "string" } + } + }, + "clickhouse": { + "type": "object", + "properties": { + "enabled": { "type": "boolean" }, + "namespace": { "type": "string" } + } + }, + "kafka": { + "type": "object", + "properties": { + "enabled": { "type": "boolean" }, + "namespace": { "type": "string" } + } + }, + "countly": { + "type": "object", + "properties": { + "enabled": { "type": "boolean" }, + "namespace": { "type": "string" } + } + }, + "observability": { + "type": "object", + "properties": { + "enabled": { "type": "boolean" }, + "namespace": { "type": "string" } + } + }, + "migration": { + "type": "object", + "properties": { + "enabled": { "type": "boolean" }, + "namespace": { "type": "string" } + } + }, + "syncPolicy": { + "type": "object", + "properties": { + "automated": { "type": "boolean" }, + "selfHeal": { "type": "boolean" }, + "prune": { "type": "boolean" }, + "retry": { + "type": "object", + "properties": { + "limit": { "type": "integer", "minimum": 0 }, + "backoff": { + "type": "object", + "properties": { + "duration": { "type": "string" }, + "factor": { "type": "integer" }, + "maxDuration": { "type": "string" } + } + } + } + } + } + } + } +} diff --git a/charts/countly-argocd/values.yaml b/charts/countly-argocd/values.yaml new file mode 100644 index 0000000..489ead7 --- /dev/null +++ b/charts/countly-argocd/values.yaml @@ -0,0 +1,59 @@ +# -- Git repo containing the Helm charts +repoURL: "https://github.com/Countly/helm.git" +targetRevision: main + +# -- Environment name (maps to environments// directory) +environment: example-production + +# -- Target cluster +destination: + server: "https://kubernetes.default.svc" + +# -- ArgoCD project name (defaults to release name if empty) +# Each customer MUST have a unique project to avoid collisions. +project: "" + +# -- Profile selections (passed to child charts via valueFiles) +global: + sizing: production + security: hardened + tls: letsencrypt + observability: full + kafkaConnect: balanced + +# -- Component toggles +mongodb: + enabled: true + namespace: mongodb + +clickhouse: + enabled: true + namespace: clickhouse + +kafka: + enabled: true + namespace: kafka + +countly: + enabled: true + namespace: countly + +observability: + enabled: true + namespace: observability + +migration: + enabled: false + namespace: countly-migration + +# -- Sync policy for child Applications +syncPolicy: + automated: true + selfHeal: true + prune: true + retry: + limit: 5 + backoff: + duration: 5s + factor: 2 + maxDuration: 3m diff --git a/charts/countly-clickhouse/.helmignore b/charts/countly-clickhouse/.helmignore index 24676c2..3b73490 100644 --- a/charts/countly-clickhouse/.helmignore +++ b/charts/countly-clickhouse/.helmignore @@ -15,4 +15,3 @@ docs/ examples/ environments/ ci/ -tests/ diff --git a/charts/countly-clickhouse/README.md b/charts/countly-clickhouse/README.md new file mode 100644 index 0000000..3f57c0e --- /dev/null +++ b/charts/countly-clickhouse/README.md @@ -0,0 +1,157 @@ +# Countly ClickHouse Helm Chart + +Deploys a ClickHouse cluster for Countly analytics via the ClickHouse Operator. Includes ClickHouse server replicas, ClickHouse Keeper for coordination, and optional Prometheus monitoring. + +**Chart version:** 0.1.0 +**App version:** 26.2 + +--- + +## Architecture + +```mermaid +flowchart LR + subgraph clickhouse["clickhouse namespace"] + server0["ClickHouse Server\nShard 0, Replica 0\n:8123 (HTTP) :9000 (Native)"] + server1["ClickHouse Server\nShard 0, Replica 1\n:8123 (HTTP) :9000 (Native)"] + keeper["ClickHouse Keeper\n:2181"] + end + + server0 <-->|replication| server1 + server0 --> keeper + server1 --> keeper + + countly["Countly App"] -->|HTTP queries| server0 + countly -->|HTTP queries| server1 + kafka["Kafka Connect"] -->|sink inserts| server0 +``` + +The chart creates a `ClickHouseCluster` custom resource managed by the ClickHouse Operator. Keeper provides distributed coordination for replication and distributed DDL. + +--- + +## Quick Start + +```bash +helm install countly-clickhouse ./charts/countly-clickhouse \ + -n clickhouse --create-namespace \ + --set auth.defaultUserPassword.password="YOUR_PASSWORD" +``` + +> **Production deployment:** Use the profile-based approach from the [root README](../../README.md#manual-installation-without-helmfile) instead of `--set` flags. This chart supports sizing and security profile layers. + +--- + +## Prerequisites + +- **ClickHouse Operator** installed in the cluster (`clickhouse.com/v1alpha1` CRDs) +- **StorageClass** available for persistent volumes + +--- + +## Configuration + +### Cluster Sizing + +```yaml +shards: 1 # Number of shards +replicas: 2 # Replicas per shard +version: "26.2" # ClickHouse server version +``` + +### Server Resources + +```yaml +server: + resources: + requests: { cpu: "1", memory: "4Gi" } + limits: { cpu: "2", memory: "8Gi" } + persistence: + storageClass: "" # Uses cluster default if empty + size: 50Gi +``` + +### Keeper Resources + +```yaml +keeper: + replicas: 1 + resources: + requests: { cpu: "250m", memory: "512Mi" } + limits: { cpu: "500m", memory: "1Gi" } + persistence: + size: 5Gi +``` + +### Authentication + +```yaml +auth: + defaultUserPassword: + password: "" # Set on install (or use existingSecret) + existingSecret: "" # Name of pre-created Secret + secretName: clickhouse-default-password + key: password + adminUser: + enabled: false + passwordSha256Hex: "" # echo -n 'password' | sha256sum | cut -d' ' -f1 +``` + +### OpenTelemetry Server-Side Tracing + +```yaml +opentelemetry: + enabled: false + spanLog: + ttlDays: 7 + flushIntervalMs: 1000 +``` + +When enabled, ClickHouse logs query spans to `system.opentelemetry_span_log` for queries arriving with W3C `traceparent` headers. + +### ArgoCD Integration + +```yaml +argocd: + enabled: true +``` + +--- + +## Verifying the Deployment + +```bash +# 1. Check the ClickHouseCluster resource +kubectl get clickhousecluster -n clickhouse + +# 2. Check pods are running +kubectl get pods -n clickhouse + +# 3. Test ClickHouse connectivity +kubectl exec -n clickhouse countly-clickhouse-clickhouse-0-0-0 -- \ + clickhouse-client --password YOUR_PASSWORD --query "SELECT 1" + +# 4. Check database exists +kubectl exec -n clickhouse countly-clickhouse-clickhouse-0-0-0 -- \ + clickhouse-client --password YOUR_PASSWORD --query "SHOW DATABASES" +``` + +--- + +## Configuration Reference + +| Key | Default | Description | +|-----|---------|-------------| +| `version` | `26.2` | ClickHouse server version | +| `shards` | `1` | Number of shards | +| `replicas` | `2` | Replicas per shard | +| `database` | `countly_drill` | Default database name | +| `server.resources.requests.cpu` | `1` | Server CPU request | +| `server.resources.requests.memory` | `4Gi` | Server memory request | +| `server.persistence.size` | `50Gi` | Server data volume size | +| `keeper.replicas` | `1` | Number of Keeper nodes | +| `keeper.persistence.size` | `5Gi` | Keeper data volume size | +| `auth.defaultUserPassword.password` | `""` | Default user password | +| `podDisruptionBudget.server.enabled` | `false` | Server PDB | +| `serviceMonitor.enabled` | `false` | Prometheus ServiceMonitor | +| `networkPolicy.enabled` | `false` | NetworkPolicy | diff --git a/charts/countly-clickhouse/examples/values-production.yaml b/charts/countly-clickhouse/examples/values-production.yaml new file mode 100644 index 0000000..b83493f --- /dev/null +++ b/charts/countly-clickhouse/examples/values-production.yaml @@ -0,0 +1,51 @@ +# Production ClickHouse cluster. +# Single shard, 2 replicas, Keeper quorum, PDB enabled. + +argocd: + enabled: true + +shards: 1 +replicas: 2 + +auth: + defaultUserPassword: + existingSecret: clickhouse-default-password + +server: + resources: + requests: + cpu: "4" + memory: "16Gi" + limits: + cpu: "8" + memory: "32Gi" + persistence: + size: 500Gi + +keeper: + replicas: 3 + resources: + requests: + cpu: "500m" + memory: "1Gi" + limits: + cpu: "1" + memory: "2Gi" + persistence: + size: 10Gi + +podDisruptionBudget: + server: + enabled: true + maxUnavailable: 1 + keeper: + enabled: true + maxUnavailable: 1 + +networkPolicy: + enabled: true + allowMonitoring: true + +serviceMonitor: + enabled: true + interval: "15s" diff --git a/charts/countly-clickhouse/examples/values-small.yaml b/charts/countly-clickhouse/examples/values-small.yaml new file mode 100644 index 0000000..a38a2f8 --- /dev/null +++ b/charts/countly-clickhouse/examples/values-small.yaml @@ -0,0 +1,32 @@ +# Small / development ClickHouse cluster. +# Single shard, single replica, minimal resources. + +shards: 1 +replicas: 1 + +auth: + defaultUserPassword: + password: "dev-password" + +server: + resources: + requests: + cpu: "500m" + memory: "2Gi" + limits: + cpu: "1" + memory: "4Gi" + persistence: + size: 20Gi + +keeper: + replicas: 1 + resources: + requests: + cpu: "100m" + memory: "256Mi" + limits: + cpu: "250m" + memory: "512Mi" + persistence: + size: 2Gi diff --git a/charts/countly-clickhouse/templates/_helpers.tpl b/charts/countly-clickhouse/templates/_helpers.tpl index 73c06a9..52b053d 100644 --- a/charts/countly-clickhouse/templates/_helpers.tpl +++ b/charts/countly-clickhouse/templates/_helpers.tpl @@ -116,3 +116,12 @@ Password secret name for ClickHouse default user {{ .Values.auth.defaultUserPassword.secretName }} {{- end -}} {{- end -}} + +{{/* +ArgoCD sync-wave annotation (only when argocd.enabled). +*/}} +{{- define "countly-clickhouse.syncWave" -}} +{{- if ((.root.Values.argocd).enabled) }} +argocd.argoproj.io/sync-wave: {{ .wave | quote }} +{{- end }} +{{- end -}} diff --git a/charts/countly-clickhouse/templates/clickhousecluster.yaml b/charts/countly-clickhouse/templates/clickhousecluster.yaml index 037612e..befc0a8 100644 --- a/charts/countly-clickhouse/templates/clickhousecluster.yaml +++ b/charts/countly-clickhouse/templates/clickhousecluster.yaml @@ -4,6 +4,7 @@ metadata: name: {{ include "countly-clickhouse.fullname" . }} annotations: "helm.sh/resource-policy": keep + {{- include "countly-clickhouse.syncWave" (dict "wave" "5" "root" .) | nindent 4 }} labels: {{- include "countly-clickhouse.labels" . | nindent 4 }} spec: diff --git a/charts/countly-clickhouse/templates/keepercluster.yaml b/charts/countly-clickhouse/templates/keepercluster.yaml index 8b92a05..19771e1 100644 --- a/charts/countly-clickhouse/templates/keepercluster.yaml +++ b/charts/countly-clickhouse/templates/keepercluster.yaml @@ -4,6 +4,7 @@ metadata: name: {{ include "countly-clickhouse.fullname" . }}-keeper annotations: "helm.sh/resource-policy": keep + {{- include "countly-clickhouse.syncWave" (dict "wave" "3" "root" .) | nindent 4 }} labels: {{- include "countly-clickhouse.labels" . | nindent 4 }} spec: diff --git a/charts/countly-clickhouse/templates/namespace.yaml b/charts/countly-clickhouse/templates/namespace.yaml index a372fc8..f09110e 100644 --- a/charts/countly-clickhouse/templates/namespace.yaml +++ b/charts/countly-clickhouse/templates/namespace.yaml @@ -5,4 +5,8 @@ metadata: name: {{ .Release.Namespace }} labels: {{- include "countly-clickhouse.labels" . | nindent 4 }} + {{- if ((.Values.argocd).enabled) }} + annotations: + {{- include "countly-clickhouse.syncWave" (dict "wave" "0" "root" .) | nindent 4 }} + {{- end }} {{- end }} diff --git a/charts/countly-clickhouse/templates/networkpolicy.yaml b/charts/countly-clickhouse/templates/networkpolicy.yaml index 0de5fb8..40cca70 100644 --- a/charts/countly-clickhouse/templates/networkpolicy.yaml +++ b/charts/countly-clickhouse/templates/networkpolicy.yaml @@ -5,6 +5,10 @@ metadata: name: {{ include "countly-clickhouse.fullname" . }}-default-deny labels: {{- include "countly-clickhouse.labels" . | nindent 4 }} + {{- if ((.Values.argocd).enabled) }} + annotations: + {{- include "countly-clickhouse.syncWave" (dict "wave" "0" "root" .) | nindent 4 }} + {{- end }} spec: podSelector: {} policyTypes: diff --git a/charts/countly-clickhouse/templates/pdb-keeper.yaml b/charts/countly-clickhouse/templates/pdb-keeper.yaml index 1c83556..2b783e5 100644 --- a/charts/countly-clickhouse/templates/pdb-keeper.yaml +++ b/charts/countly-clickhouse/templates/pdb-keeper.yaml @@ -5,6 +5,10 @@ metadata: name: {{ include "countly-clickhouse.fullname" . }}-keeper-pdb labels: {{- include "countly-clickhouse.labels" . | nindent 4 }} + {{- if ((.Values.argocd).enabled) }} + annotations: + {{- include "countly-clickhouse.syncWave" (dict "wave" "3" "root" .) | nindent 4 }} + {{- end }} spec: maxUnavailable: {{ .Values.podDisruptionBudget.keeper.maxUnavailable | default 1 }} selector: diff --git a/charts/countly-clickhouse/templates/pdb-server.yaml b/charts/countly-clickhouse/templates/pdb-server.yaml index 20aef77..cf95153 100644 --- a/charts/countly-clickhouse/templates/pdb-server.yaml +++ b/charts/countly-clickhouse/templates/pdb-server.yaml @@ -5,6 +5,10 @@ metadata: name: {{ include "countly-clickhouse.fullname" . }}-server-pdb labels: {{- include "countly-clickhouse.labels" . | nindent 4 }} + {{- if ((.Values.argocd).enabled) }} + annotations: + {{- include "countly-clickhouse.syncWave" (dict "wave" "5" "root" .) | nindent 4 }} + {{- end }} spec: maxUnavailable: {{ .Values.podDisruptionBudget.server.maxUnavailable | default 1 }} selector: diff --git a/charts/countly-clickhouse/templates/secret-default-password.yaml b/charts/countly-clickhouse/templates/secret-default-password.yaml index b5cd2cf..cbeaa37 100644 --- a/charts/countly-clickhouse/templates/secret-default-password.yaml +++ b/charts/countly-clickhouse/templates/secret-default-password.yaml @@ -9,6 +9,7 @@ metadata: {{- if .Values.secrets.keep }} helm.sh/resource-policy: keep {{- end }} + {{- include "countly-clickhouse.syncWave" (dict "wave" "0" "root" .) | nindent 4 }} type: Opaque data: {{- $existing := lookup "v1" "Secret" .Release.Namespace .Values.auth.defaultUserPassword.secretName }} diff --git a/charts/countly-clickhouse/templates/service-metrics.yaml b/charts/countly-clickhouse/templates/service-metrics.yaml index b5f420e..608e54e 100644 --- a/charts/countly-clickhouse/templates/service-metrics.yaml +++ b/charts/countly-clickhouse/templates/service-metrics.yaml @@ -7,6 +7,10 @@ metadata: {{- include "countly-clickhouse.labels" . | nindent 4 }} countly.io/component: clickhouse-server countly.io/metrics: "true" + {{- if ((.Values.argocd).enabled) }} + annotations: + {{- include "countly-clickhouse.syncWave" (dict "wave" "10" "root" .) | nindent 4 }} + {{- end }} spec: type: ClusterIP {{- if eq (.Values.serviceMonitor.serviceType | default "headless") "headless" }} @@ -30,6 +34,10 @@ metadata: {{- include "countly-clickhouse.labels" . | nindent 4 }} countly.io/component: clickhouse-keeper countly.io/metrics: "true" + {{- if ((.Values.argocd).enabled) }} + annotations: + {{- include "countly-clickhouse.syncWave" (dict "wave" "10" "root" .) | nindent 4 }} + {{- end }} spec: type: ClusterIP {{- if eq (.Values.serviceMonitor.serviceType | default "headless") "headless" }} diff --git a/charts/countly-clickhouse/templates/servicemonitor.yaml b/charts/countly-clickhouse/templates/servicemonitor.yaml index 8aace64..a2200fd 100644 --- a/charts/countly-clickhouse/templates/servicemonitor.yaml +++ b/charts/countly-clickhouse/templates/servicemonitor.yaml @@ -5,6 +5,10 @@ metadata: name: {{ include "countly-clickhouse.fullname" . }}-server labels: {{- include "countly-clickhouse.labels" . | nindent 4 }} + {{- if ((.Values.argocd).enabled) }} + annotations: + {{- include "countly-clickhouse.syncWave" (dict "wave" "10" "root" .) | nindent 4 }} + {{- end }} spec: namespaceSelector: matchNames: @@ -24,6 +28,10 @@ metadata: name: {{ include "countly-clickhouse.fullname" . }}-keeper labels: {{- include "countly-clickhouse.labels" . | nindent 4 }} + {{- if ((.Values.argocd).enabled) }} + annotations: + {{- include "countly-clickhouse.syncWave" (dict "wave" "10" "root" .) | nindent 4 }} + {{- end }} spec: namespaceSelector: matchNames: diff --git a/charts/countly-clickhouse/templates/tests/test-connection.yaml b/charts/countly-clickhouse/templates/tests/test-connection.yaml new file mode 100644 index 0000000..e635654 --- /dev/null +++ b/charts/countly-clickhouse/templates/tests/test-connection.yaml @@ -0,0 +1,16 @@ +apiVersion: v1 +kind: Pod +metadata: + name: {{ include "countly-clickhouse.fullname" . }}-test-connection + namespace: {{ .Release.Namespace }} + labels: + {{- include "countly-clickhouse.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test + "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded +spec: + restartPolicy: Never + containers: + - name: test + image: busybox:1.35 + command: ['sh', '-c', 'nc -z {{ include "countly-clickhouse.fullname" . }}-clickhouse-headless 8123'] diff --git a/charts/countly-clickhouse/values.yaml b/charts/countly-clickhouse/values.yaml index 3affe4c..9051065 100644 --- a/charts/countly-clickhouse/values.yaml +++ b/charts/countly-clickhouse/values.yaml @@ -1,37 +1,66 @@ +# -- Global settings shared across all Countly charts global: + # -- Override container image registry for all images imageRegistry: "" + # -- Global image pull secrets imagePullSecrets: [] + # -- Default StorageClass for PVCs (empty = cluster default) storageClass: "" + # -- Sizing profile name (informational, used by environment overlays) sizing: small + # -- Global scheduling defaults (merged with per-component settings) scheduling: nodeSelector: {} tolerations: [] +# -- Override the chart name used in resource names nameOverride: "" +# -- Override the full resource name fullnameOverride: "" +# -- Create the target namespace (disable when ArgoCD manages namespace creation) createNamespace: false +# -- ArgoCD integration (adds sync-wave annotations when enabled) +argocd: + enabled: false + +# -- ClickHouse Operator CRD API version clickhouseOperator: apiVersion: clickhouse.com/v1alpha1 +# -- ClickHouse server version version: "26.2" +# -- Number of shards in the cluster shards: 1 +# -- Number of replicas per shard replicas: 2 +# -- Container images for ClickHouse components image: + # -- ClickHouse server image server: clickhouse/clickhouse-server + # -- ClickHouse Keeper image keeper: clickhouse/clickhouse-keeper +# -- Default database created on cluster initialization database: countly_drill +# -- Authentication configuration auth: + # -- Default user password (required on first install) defaultUserPassword: + # -- Use a pre-created Secret instead of creating one from values existingSecret: "" + # -- Name of the Secret to create (when existingSecret is empty) secretName: clickhouse-default-password + # -- Key within the Secret containing the password key: password + # -- Password value (REQUIRED on first install unless existingSecret is set) password: "" + # -- Optional admin user with SHA256 password hash adminUser: + # -- Create the admin user enabled: false # Precomputed SHA256 hex of the admin password (64 hex chars). # Generate: echo -n 'your_password' | sha256sum | cut -d' ' -f1 @@ -44,17 +73,23 @@ auth: # that arrive with W3C traceparent headers (injected by ClickhouseClient.js). # A separate export bridge is needed to ship spans from this table to Tempo. opentelemetry: + # -- Enable server-side OpenTelemetry span logging enabled: false spanLog: + # -- TTL in days for span log entries ttlDays: 7 + # -- Flush interval for the span log buffer flushIntervalMs: 1000 +# -- ClickHouse server configuration server: + # -- Pod security context for server pods securityContext: runAsNonRoot: true runAsUser: 101 runAsGroup: 101 fsGroup: 101 + # -- Resource requests and limits per server replica resources: requests: cpu: "1" @@ -62,35 +97,50 @@ server: limits: cpu: "2" memory: "8Gi" + # -- Persistent volume configuration for server data persistence: + # -- StorageClass override (empty = global or cluster default) storageClass: "" + # -- Data volume size per server replica size: 50Gi + # -- ClickHouse server settings settings: + # -- Maximum concurrent connections maxConnections: 4096 + # -- Additional XML config merged into config.xml extraConfig: "" + # -- Additional XML config merged into users.xml extraUsersConfig: "" + # -- Prometheus metrics endpoint on each server pod prometheus: enabled: true port: 9363 endpoint: /metrics + # -- Scheduling constraints for server pods scheduling: nodeSelector: {} tolerations: [] affinity: {} topologySpreadConstraints: [] + # -- Pod anti-affinity to spread server replicas across nodes antiAffinity: enabled: true + # -- preferred or required type: preferred topologyKey: kubernetes.io/hostname weight: 100 +# -- ClickHouse Keeper configuration (distributed coordination) keeper: + # -- Number of Keeper nodes (use odd numbers: 1, 3, 5) replicas: 1 + # -- Pod security context for Keeper pods securityContext: runAsNonRoot: true runAsUser: 101 runAsGroup: 101 fsGroup: 101 + # -- Resource requests and limits per Keeper node resources: requests: cpu: "250m" @@ -98,46 +148,68 @@ keeper: limits: cpu: "500m" memory: "1Gi" + # -- Persistent volume configuration for Keeper data persistence: + # -- StorageClass override (empty = global or cluster default) storageClass: "" + # -- Data volume size per Keeper node size: 5Gi + # -- Keeper settings settings: + # -- Prometheus metrics endpoint on each Keeper pod prometheus: enabled: true port: 9090 endpoint: /metrics + # -- Scheduling constraints for Keeper pods scheduling: nodeSelector: {} tolerations: [] affinity: {} + # -- Pod anti-affinity to spread Keeper nodes across nodes antiAffinity: enabled: true type: preferred topologyKey: kubernetes.io/hostname weight: 100 +# -- Pod disruption budgets podDisruptionBudget: + # -- PDB for ClickHouse server pods server: enabled: false maxUnavailable: 1 + # -- PDB for Keeper pods keeper: enabled: false maxUnavailable: 1 +# -- Network policy restricting ingress to ClickHouse pods networkPolicy: + # -- Enable the NetworkPolicy enabled: false + # -- Namespaces allowed to connect to ClickHouse allowedNamespaces: - countly - kafka + # -- Allow ingress from the monitoring namespace (for metrics scraping) allowMonitoring: false + # -- Label selector for the monitoring namespace monitoringNamespaceSelector: kubernetes.io/metadata.name: monitoring + # -- Additional custom ingress rules additionalIngress: [] +# -- Prometheus ServiceMonitor for ClickHouse metrics serviceMonitor: + # -- Deploy a ServiceMonitor resource enabled: false + # -- Scrape interval interval: "15s" - serviceType: headless # "headless" for per-pod scraping, "clusterIP" for any-pod + # -- "headless" for per-pod scraping, "clusterIP" for any-pod + serviceType: headless +# -- Secret retention settings secrets: + # -- Preserve secrets on helm uninstall/upgrade keep: true diff --git a/charts/countly-kafka/.helmignore b/charts/countly-kafka/.helmignore index 24676c2..3b73490 100644 --- a/charts/countly-kafka/.helmignore +++ b/charts/countly-kafka/.helmignore @@ -15,4 +15,3 @@ docs/ examples/ environments/ ci/ -tests/ diff --git a/charts/countly-kafka/README.md b/charts/countly-kafka/README.md new file mode 100644 index 0000000..1f8b22f --- /dev/null +++ b/charts/countly-kafka/README.md @@ -0,0 +1,191 @@ +# Countly Kafka Helm Chart + +Deploys Apache Kafka for Countly event streaming via the Strimzi Operator. Includes KRaft-mode brokers, controllers, Kafka Connect with the ClickHouse sink connector, and optional Cruise Control for partition rebalancing. + +**Chart version:** 0.1.0 +**App version:** 4.2.0 + +--- + +## Architecture + +```mermaid +flowchart LR + subgraph kafka["kafka namespace"] + broker0["Broker 0\n:9092"] + broker1["Broker 1\n:9092"] + broker2["Broker 2\n:9092"] + ctrl["Controllers (3)\nKRaft metadata"] + connect["Kafka Connect\nClickHouse Sink"] + cc["Cruise Control"] + end + + ingestor["Countly Ingestor"] -->|produce events| broker0 + broker0 --> ctrl + broker1 --> ctrl + broker2 --> ctrl + connect -->|consume & sink| ch["ClickHouse"] + cc -.->|rebalance| broker0 +``` + +The chart creates Strimzi `Kafka`, `KafkaConnect`, and `KafkaConnector` custom resources. Brokers run in KRaft mode (no ZooKeeper). The ClickHouse sink connector reads from the `drill-events` topic and inserts into the `drill_events` table. + +--- + +## Quick Start + +```bash +helm install countly-kafka ./charts/countly-kafka \ + -n kafka --create-namespace \ + --set kafkaConnect.clickhouse.password="YOUR_CLICKHOUSE_PASSWORD" +``` + +> **Production deployment:** Use the profile-based approach from the [root README](../../README.md#manual-installation-without-helmfile) instead of `--set` flags. This chart supports sizing, kafka-connect, observability, and security profile layers. + +--- + +## Prerequisites + +- **Strimzi Kafka Operator** installed in the cluster (`kafka.strimzi.io/v1` CRDs) +- **StorageClass** available for persistent volumes +- **ClickHouse** accessible from the kafka namespace (for Kafka Connect sink) + +--- + +## Configuration + +### Brokers + +```yaml +brokers: + replicas: 3 + resources: + requests: { cpu: "1", memory: "4Gi" } + limits: { cpu: "1", memory: "4Gi" } + jvmOptions: + xms: "2g" + xmx: "2g" + persistence: + volumes: + - id: 0 + size: 100Gi + config: + default.replication.factor: 2 + min.insync.replicas: 2 + auto.create.topics.enable: false +``` + +### Controllers + +```yaml +controllers: + replicas: 3 + resources: + requests: { cpu: "500m", memory: "2Gi" } + limits: { cpu: "1", memory: "2Gi" } + persistence: + size: 20Gi +``` + +### Kafka Connect + +```yaml +kafkaConnect: + enabled: true + name: connect-ch + replicas: 2 + resources: + requests: { cpu: "2", memory: "8Gi" } + limits: { cpu: "2", memory: "8Gi" } + clickhouse: + host: "" # Auto-resolved from clickhouseNamespace if empty + port: "8123" + password: "" # Required + database: "countly_drill" + hpa: + enabled: false + minReplicas: 1 + maxReplicas: 3 +``` + +### Connectors + +Connectors are defined as a list and rendered as `KafkaConnector` resources: + +```yaml +kafkaConnect: + connectors: + - name: ch-sink-drill-events + enabled: true + state: running + class: com.clickhouse.kafka.connect.ClickHouseSinkConnector + tasksMax: 1 + config: + topics: drill-events + topic2TableMap: "drill-events=drill_events" +``` + +### OpenTelemetry (Kafka Connect) + +```yaml +kafkaConnect: + otel: + enabled: false + serviceName: "kafka-connect" + exporterEndpoint: "http://alloy-otlp:4317" +``` + +### ArgoCD Integration + +```yaml +argocd: + enabled: true +``` + +--- + +## Verifying the Deployment + +```bash +# 1. Check Kafka cluster status +kubectl get kafka -n kafka + +# 2. Check all pods +kubectl get pods -n kafka + +# 3. Check Kafka Connect +kubectl get kafkaconnect -n kafka + +# 4. Check connectors +kubectl get kafkaconnectors -n kafka + +# 5. Verify topic exists +kubectl exec -n kafka countly-kafka-countly-kafka-brokers-0 -- \ + bin/kafka-topics.sh --bootstrap-server localhost:9092 --list + +# 6. Check connector status +kubectl exec -n kafka countly-kafka-countly-kafka-brokers-0 -- \ + bin/kafka-topics.sh --bootstrap-server localhost:9092 \ + --describe --topic drill-events +``` + +--- + +## Configuration Reference + +| Key | Default | Description | +|-----|---------|-------------| +| `version` | `4.2.0` | Kafka version | +| `brokers.replicas` | `3` | Number of broker nodes | +| `brokers.persistence.volumes[0].size` | `100Gi` | Broker data volume size | +| `brokers.config.default.replication.factor` | `2` | Default topic replication | +| `brokers.config.min.insync.replicas` | `2` | Minimum in-sync replicas | +| `controllers.replicas` | `3` | Number of KRaft controllers | +| `controllers.persistence.size` | `20Gi` | Controller metadata volume | +| `kafkaConnect.enabled` | `true` | Deploy Kafka Connect | +| `kafkaConnect.replicas` | `2` | Connect worker replicas | +| `kafkaConnect.clickhouse.password` | `""` | ClickHouse password for sink | +| `kafkaConnect.hpa.enabled` | `false` | HPA for Connect workers | +| `cruiseControl.enabled` | `true` | Deploy Cruise Control | +| `metrics.enabled` | `true` | Enable JMX metrics | +| `networkPolicy.enabled` | `false` | NetworkPolicy | diff --git a/charts/countly-kafka/examples/values-production.yaml b/charts/countly-kafka/examples/values-production.yaml new file mode 100644 index 0000000..6555d10 --- /dev/null +++ b/charts/countly-kafka/examples/values-production.yaml @@ -0,0 +1,67 @@ +# Production Kafka cluster. +# 3 brokers, 3 controllers, Cruise Control, Kafka Connect with HPA. + +argocd: + enabled: true + +brokers: + replicas: 3 + resources: + requests: + cpu: "2" + memory: "8Gi" + limits: + cpu: "4" + memory: "8Gi" + jvmOptions: + xms: "4g" + xmx: "4g" + persistence: + volumes: + - id: 0 + size: 500Gi + config: + default.replication.factor: 3 + min.insync.replicas: 2 + num.partitions: 100 + +controllers: + replicas: 3 + resources: + requests: + cpu: "1" + memory: "4Gi" + limits: + cpu: "2" + memory: "4Gi" + persistence: + size: 50Gi + +cruiseControl: + enabled: true + +kafkaConnect: + enabled: true + replicas: 3 + resources: + requests: + cpu: "4" + memory: "12Gi" + limits: + cpu: "4" + memory: "12Gi" + jvmOptions: + xms: "8g" + xmx: "8g" + clickhouse: + existingSecret: clickhouse-auth + hpa: + enabled: true + minReplicas: 2 + maxReplicas: 6 + otel: + enabled: true + +networkPolicy: + enabled: true + allowMonitoring: true diff --git a/charts/countly-kafka/examples/values-small.yaml b/charts/countly-kafka/examples/values-small.yaml new file mode 100644 index 0000000..70fdc73 --- /dev/null +++ b/charts/countly-kafka/examples/values-small.yaml @@ -0,0 +1,60 @@ +# Small / development Kafka cluster. +# Single broker, minimal resources, no Kafka Connect HPA. + +brokers: + replicas: 1 + resources: + requests: + cpu: "500m" + memory: "2Gi" + limits: + cpu: "1" + memory: "2Gi" + jvmOptions: + xms: "1g" + xmx: "1g" + persistence: + volumes: + - id: 0 + size: 20Gi + config: + default.replication.factor: 1 + min.insync.replicas: 1 + offsets.topic.replication.factor: 1 + transaction.state.log.replication.factor: 1 + num.partitions: 12 + +controllers: + replicas: 1 + resources: + requests: + cpu: "250m" + memory: "1Gi" + limits: + cpu: "500m" + memory: "1Gi" + persistence: + size: 5Gi + +cruiseControl: + enabled: false + +kafkaConnect: + enabled: true + replicas: 1 + resources: + requests: + cpu: "500m" + memory: "2Gi" + limits: + cpu: "1" + memory: "2Gi" + jvmOptions: + xms: "1g" + xmx: "1g" + workerConfig: + config.storage.replication.factor: 1 + offset.storage.replication.factor: 1 + status.storage.replication.factor: 1 + clickhouse: + password: "dev-password" diff --git a/charts/countly-kafka/templates/_helpers.tpl b/charts/countly-kafka/templates/_helpers.tpl index ee8a001..a4f35c3 100644 --- a/charts/countly-kafka/templates/_helpers.tpl +++ b/charts/countly-kafka/templates/_helpers.tpl @@ -77,6 +77,16 @@ countly-clickhouse-clickhouse-headless.{{ .Values.clickhouseNamespace | default {{- end -}} {{- end -}} +{{/* +ArgoCD sync-wave annotation (only when argocd.enabled). +Usage: {{- include "countly-kafka.syncWave" (dict "wave" "5" "root" .) | nindent 4 }} +*/}} +{{- define "countly-kafka.syncWave" -}} +{{- if ((.root.Values.argocd).enabled) }} +argocd.argoproj.io/sync-wave: {{ .wave | quote }} +{{- end }} +{{- end -}} + {{/* ClickHouse Connect secret name */}} diff --git a/charts/countly-kafka/templates/configmap-connect-env.yaml b/charts/countly-kafka/templates/configmap-connect-env.yaml index d250887..3cd42b1 100644 --- a/charts/countly-kafka/templates/configmap-connect-env.yaml +++ b/charts/countly-kafka/templates/configmap-connect-env.yaml @@ -5,6 +5,10 @@ metadata: name: {{ include "countly-kafka.fullname" . }}-connect-env labels: {{- include "countly-kafka.labels" . | nindent 4 }} + {{- if ((.Values.argocd).enabled) }} + annotations: + {{- include "countly-kafka.syncWave" (dict "wave" "0" "root" .) | nindent 4 }} + {{- end }} data: CLICKHOUSE_HOST: {{ include "countly-kafka.clickhouseHost" . | quote }} CLICKHOUSE_PORT: {{ .Values.kafkaConnect.clickhouse.port | quote }} diff --git a/charts/countly-kafka/templates/configmap-metrics.yaml b/charts/countly-kafka/templates/configmap-metrics.yaml index f8613cd..e947f45 100644 --- a/charts/countly-kafka/templates/configmap-metrics.yaml +++ b/charts/countly-kafka/templates/configmap-metrics.yaml @@ -5,6 +5,10 @@ metadata: name: {{ include "countly-kafka.fullname" . }}-metrics labels: {{- include "countly-kafka.labels" . | nindent 4 }} + {{- if ((.Values.argocd).enabled) }} + annotations: + {{- include "countly-kafka.syncWave" (dict "wave" "0" "root" .) | nindent 4 }} + {{- end }} data: kafka-metrics-config.yml: | lowercaseOutputName: true diff --git a/charts/countly-kafka/templates/hpa-connect.yaml b/charts/countly-kafka/templates/hpa-connect.yaml index 5173819..c8b8ab1 100644 --- a/charts/countly-kafka/templates/hpa-connect.yaml +++ b/charts/countly-kafka/templates/hpa-connect.yaml @@ -5,6 +5,10 @@ metadata: name: {{ include "countly-kafka.connectName" . }}-hpa labels: {{- include "countly-kafka.labels" . | nindent 4 }} + {{- if ((.Values.argocd).enabled) }} + annotations: + {{- include "countly-kafka.syncWave" (dict "wave" "10" "root" .) | nindent 4 }} + {{- end }} spec: scaleTargetRef: apiVersion: {{ .Values.strimzi.apiVersion }} diff --git a/charts/countly-kafka/templates/kafka.yaml b/charts/countly-kafka/templates/kafka.yaml index 821a4c5..1b174f5 100644 --- a/charts/countly-kafka/templates/kafka.yaml +++ b/charts/countly-kafka/templates/kafka.yaml @@ -43,6 +43,7 @@ metadata: name: {{ include "countly-kafka.fullname" . }}-brokers annotations: "helm.sh/resource-policy": keep + {{- include "countly-kafka.syncWave" (dict "wave" "5" "root" .) | nindent 4 }} labels: strimzi.io/cluster: {{ include "countly-kafka.fullname" . }} {{- include "countly-kafka.labels" . | nindent 4 }} @@ -100,6 +101,7 @@ metadata: name: {{ include "countly-kafka.fullname" . }}-controllers annotations: "helm.sh/resource-policy": keep + {{- include "countly-kafka.syncWave" (dict "wave" "5" "root" .) | nindent 4 }} labels: strimzi.io/cluster: {{ include "countly-kafka.fullname" . }} {{- include "countly-kafka.labels" . | nindent 4 }} @@ -152,6 +154,7 @@ metadata: "helm.sh/resource-policy": keep strimzi.io/kraft: enabled strimzi.io/node-pools: enabled + {{- include "countly-kafka.syncWave" (dict "wave" "5" "root" .) | nindent 4 }} spec: kafka: version: {{ .Values.version | quote }} diff --git a/charts/countly-kafka/templates/kafkaconnect.yaml b/charts/countly-kafka/templates/kafkaconnect.yaml index e28384d..fc6994c 100644 --- a/charts/countly-kafka/templates/kafkaconnect.yaml +++ b/charts/countly-kafka/templates/kafkaconnect.yaml @@ -7,6 +7,7 @@ metadata: {{- include "countly-kafka.labels" . | nindent 4 }} annotations: strimzi.io/use-connector-resources: "true" + {{- include "countly-kafka.syncWave" (dict "wave" "10" "root" .) | nindent 4 }} spec: version: {{ .Values.version | quote }} replicas: {{ .Values.kafkaConnect.replicas }} diff --git a/charts/countly-kafka/templates/kafkaconnectors.yaml b/charts/countly-kafka/templates/kafkaconnectors.yaml index 21fb798..bef1dd6 100644 --- a/charts/countly-kafka/templates/kafkaconnectors.yaml +++ b/charts/countly-kafka/templates/kafkaconnectors.yaml @@ -9,6 +9,10 @@ metadata: labels: strimzi.io/cluster: {{ include "countly-kafka.connectName" $ }} {{- include "countly-kafka.labels" $ | nindent 4 }} + {{- if (($.Values.argocd).enabled) }} + annotations: + {{- include "countly-kafka.syncWave" (dict "wave" "15" "root" $) | nindent 4 }} + {{- end }} spec: class: {{ $connector.class }} tasksMax: {{ $connector.tasksMax }} diff --git a/charts/countly-kafka/templates/namespace.yaml b/charts/countly-kafka/templates/namespace.yaml index 9d6596a..f60f74c 100644 --- a/charts/countly-kafka/templates/namespace.yaml +++ b/charts/countly-kafka/templates/namespace.yaml @@ -5,4 +5,8 @@ metadata: name: {{ .Release.Namespace }} labels: {{- include "countly-kafka.labels" . | nindent 4 }} + {{- if ((.Values.argocd).enabled) }} + annotations: + {{- include "countly-kafka.syncWave" (dict "wave" "0" "root" .) | nindent 4 }} + {{- end }} {{- end }} diff --git a/charts/countly-kafka/templates/networkpolicy.yaml b/charts/countly-kafka/templates/networkpolicy.yaml index 7168a09..dc9aa25 100644 --- a/charts/countly-kafka/templates/networkpolicy.yaml +++ b/charts/countly-kafka/templates/networkpolicy.yaml @@ -10,6 +10,10 @@ metadata: name: {{ include "countly-kafka.fullname" . }}-connect-api labels: {{- include "countly-kafka.labels" . | nindent 4 }} + {{- if ((.Values.argocd).enabled) }} + annotations: + {{- include "countly-kafka.syncWave" (dict "wave" "0" "root" .) | nindent 4 }} + {{- end }} spec: podSelector: matchLabels: @@ -36,6 +40,10 @@ metadata: name: {{ include "countly-kafka.fullname" . }}-default-deny labels: {{- include "countly-kafka.labels" . | nindent 4 }} + {{- if ((.Values.argocd).enabled) }} + annotations: + {{- include "countly-kafka.syncWave" (dict "wave" "0" "root" .) | nindent 4 }} + {{- end }} spec: podSelector: {} policyTypes: @@ -48,6 +56,10 @@ metadata: name: {{ include "countly-kafka.fullname" . }}-allow-kafka labels: {{- include "countly-kafka.labels" . | nindent 4 }} + {{- if ((.Values.argocd).enabled) }} + annotations: + {{- include "countly-kafka.syncWave" (dict "wave" "0" "root" .) | nindent 4 }} + {{- end }} spec: podSelector: {} policyTypes: @@ -74,6 +86,10 @@ metadata: name: {{ include "countly-kafka.fullname" . }}-allow-monitoring labels: {{- include "countly-kafka.labels" . | nindent 4 }} + {{- if ((.Values.argocd).enabled) }} + annotations: + {{- include "countly-kafka.syncWave" (dict "wave" "0" "root" .) | nindent 4 }} + {{- end }} spec: podSelector: {} policyTypes: diff --git a/charts/countly-kafka/templates/secret-clickhouse-connect.yaml b/charts/countly-kafka/templates/secret-clickhouse-connect.yaml index 6e4ac86..6039fca 100644 --- a/charts/countly-kafka/templates/secret-clickhouse-connect.yaml +++ b/charts/countly-kafka/templates/secret-clickhouse-connect.yaml @@ -9,6 +9,7 @@ metadata: {{- if .Values.secrets.keep }} helm.sh/resource-policy: keep {{- end }} + {{- include "countly-kafka.syncWave" (dict "wave" "0" "root" .) | nindent 4 }} type: Opaque data: {{- $secretName := .Values.kafkaConnect.clickhouse.secretName }} diff --git a/charts/countly-kafka/templates/tests/test-connection.yaml b/charts/countly-kafka/templates/tests/test-connection.yaml new file mode 100644 index 0000000..f928384 --- /dev/null +++ b/charts/countly-kafka/templates/tests/test-connection.yaml @@ -0,0 +1,16 @@ +apiVersion: v1 +kind: Pod +metadata: + name: {{ include "countly-kafka.fullname" . }}-test-connection + namespace: {{ .Release.Namespace }} + labels: + {{- include "countly-kafka.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test + "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded +spec: + restartPolicy: Never + containers: + - name: test + image: busybox:1.35 + command: ['sh', '-c', 'nc -z {{ include "countly-kafka.fullname" . }}-kafka-bootstrap 9092'] diff --git a/charts/countly-kafka/values.yaml b/charts/countly-kafka/values.yaml index 8bfcd0d..94ccff9 100644 --- a/charts/countly-kafka/values.yaml +++ b/charts/countly-kafka/values.yaml @@ -1,24 +1,42 @@ +# -- Global settings shared across all Countly charts global: + # -- Override container image registry for all images imageRegistry: "" + # -- Global image pull secrets imagePullSecrets: [] + # -- Default StorageClass for PVCs (empty = cluster default) storageClass: "" + # -- Sizing profile name (informational, used by environment overlays) sizing: small + # -- Global scheduling defaults (merged with per-component settings) scheduling: nodeSelector: {} tolerations: [] +# -- Override the chart name used in resource names nameOverride: "" +# -- Override the full resource name fullnameOverride: "" +# -- Create the target namespace (disable when ArgoCD manages namespace creation) createNamespace: false +# -- ArgoCD integration (adds sync-wave annotations when enabled) +argocd: + enabled: false + +# -- Strimzi CRD API version strimzi: apiVersion: kafka.strimzi.io/v1 +# -- Kafka version (must match the Strimzi-supported range) version: "4.2.0" +# -- Kafka broker configuration (KRaft mode, no ZooKeeper) brokers: + # -- Number of broker nodes replicas: 3 + # -- Resource requests and limits per broker resources: requests: cpu: "1" @@ -26,15 +44,19 @@ brokers: limits: cpu: "1" memory: "4Gi" + # -- JVM heap settings for brokers jvmOptions: xms: "2g" xmx: "2g" + # -- Persistent storage volumes for broker data persistence: volumes: - id: 0 size: 100Gi storageClass: "" + # -- Delete PVCs when the Kafka resource is deleted deleteClaim: false + # -- Kafka broker config properties (passed to server.properties) config: default.replication.factor: 2 min.insync.replicas: 2 # PRODUCTION: Set to 2 for durability @@ -45,17 +67,22 @@ brokers: offsets.topic.replication.factor: 2 num.partitions: 24 transaction.state.log.replication.factor: 2 + # -- Scheduling constraints for broker pods scheduling: nodeSelector: {} tolerations: [] affinity: {} + # -- Topology spread to distribute brokers across nodes topologySpreadConstraints: - maxSkew: 1 topologyKey: kubernetes.io/hostname whenUnsatisfiable: DoNotSchedule +# -- KRaft controller configuration controllers: + # -- Number of KRaft controller nodes replicas: 3 + # -- Resource requests and limits per controller resources: requests: cpu: "500m" @@ -63,9 +90,11 @@ controllers: limits: cpu: "1" memory: "2Gi" + # -- Persistent storage for KRaft metadata persistence: size: 20Gi storageClass: "" + # -- Scheduling constraints for controller pods scheduling: nodeSelector: {} tolerations: [] @@ -75,13 +104,16 @@ controllers: topologyKey: kubernetes.io/hostname whenUnsatisfiable: DoNotSchedule +# -- Kafka listeners configuration listeners: - name: internal port: 9092 type: internal tls: false +# -- Cruise Control for automatic partition rebalancing cruiseControl: + # -- Deploy Cruise Control alongside Kafka enabled: true resources: requests: @@ -93,16 +125,24 @@ cruiseControl: jvmOptions: xms: "1g" xmx: "2g" + # -- Auto-rebalance triggers autoRebalance: - mode: add-brokers - mode: remove-brokers +# -- Kafka Connect with ClickHouse sink connector kafkaConnect: + # -- Deploy Kafka Connect enabled: true + # -- Connect cluster name (used in Strimzi resource name) name: connect-ch + # -- Container image with the ClickHouse sink connector baked in image: "gcr.io/countly-dev-313620/strimzi/kafka-connect-clickhouse:4.2.0-1.3.5-strimzi-amd64" + # -- Number of Connect worker replicas replicas: 2 + # -- Override bootstrap servers (empty = auto-resolve from this Kafka cluster) bootstrapServers: "" + # -- Resource requests and limits per Connect worker resources: requests: cpu: "2" @@ -110,9 +150,11 @@ kafkaConnect: limits: cpu: "2" memory: "8Gi" + # -- JVM heap settings for Connect workers jvmOptions: xms: "5g" xmx: "5g" + # -- Kafka Connect worker-level configuration workerConfig: group.id: connect-ch config.storage.topic: connect_ch_configs @@ -129,15 +171,25 @@ kafkaConnect: connector.client.config.override.policy: All config.providers: env config.providers.env.class: org.apache.kafka.common.config.provider.EnvVarConfigProvider + # -- ClickHouse connection details for the sink connector clickhouse: + # -- Use a pre-created Secret instead of creating one from values existingSecret: "" + # -- Name of the Secret to create (when existingSecret is empty) secretName: clickhouse-auth + # -- ClickHouse host (empty = auto-resolve from clickhouseNamespace) host: "" + # -- ClickHouse HTTP port port: "8123" + # -- Enable SSL for ClickHouse connection ssl: "false" + # -- Target database database: "countly_drill" + # -- ClickHouse username username: "default" + # -- ClickHouse password (REQUIRED on first install unless existingSecret is set) password: "" + # -- Environment variables injected into Connect worker pods (connector config) env: EXACTLY_ONCE: "false" ERRORS_RETRY_TIMEOUT: "300" @@ -157,6 +209,7 @@ kafkaConnect: KAFKA_CONSUMER_SESSION_TIMEOUT_MS: "45000" KAFKA_CONSUMER_HEARTBEAT_INTERVAL_MS: "15000" KAFKA_CONSUMER_REQUEST_TIMEOUT_MS: "120000" + # -- HPA for Kafka Connect workers hpa: enabled: false minReplicas: 1 @@ -192,22 +245,32 @@ kafkaConnect: # When enabled, JAVA_TOOL_OPTIONS activates it. Creates spans for Kafka consumer/producer # and outbound HTTP (ClickHouse sink). otel: + # -- Enable OpenTelemetry Java agent instrumentation enabled: false + # -- Service name reported in traces serviceName: "kafka-connect" + # -- OTLP exporter endpoint exporterEndpoint: "http://countly-observability-alloy-otlp.observability.svc.cluster.local:4317" + # -- OTLP exporter protocol (grpc or http/protobuf) exporterProtocol: "grpc" + # -- Trace sampling strategy sampler: "parentbased_traceidratio" + # -- Sampling ratio (0.0 - 1.0) samplerArg: "1.0" + # -- Additional OTEL resource attributes resourceAttributes: "" # e.g. "deployment.environment=production,k8s.cluster.name=my-cluster" + # -- Scheduling constraints for Connect worker pods scheduling: nodeSelector: {} tolerations: [] affinity: {} + # -- Pod anti-affinity to spread Connect workers across nodes antiAffinity: enabled: true type: preferred topologyKey: kubernetes.io/hostname weight: 100 + # -- KafkaConnector resources deployed alongside Kafka Connect connectors: - name: ch-sink-drill-events enabled: true @@ -252,18 +315,27 @@ kafkaConnect: healthcheck.interval: "10000" dlq: {} +# -- Enable JMX metrics exposure on Kafka brokers metrics: enabled: true +# -- Namespace where ClickHouse is deployed (for auto-resolving Connect sink host) clickhouseNamespace: clickhouse +# -- Network policy restricting ingress to Kafka pods networkPolicy: + # -- Enable the NetworkPolicy enabled: false + # -- Namespaces allowed to connect to Kafka allowedNamespaces: - countly + # -- Allow ingress from the monitoring namespace allowMonitoring: false + # -- Label selector for the monitoring namespace monitoringNamespaceSelector: kubernetes.io/metadata.name: monitoring +# -- Secret retention settings secrets: + # -- Preserve secrets on helm uninstall/upgrade keep: true diff --git a/charts/countly-migration/Chart.yaml b/charts/countly-migration/Chart.yaml new file mode 100644 index 0000000..3632797 --- /dev/null +++ b/charts/countly-migration/Chart.yaml @@ -0,0 +1,29 @@ +apiVersion: v2 +name: countly-migration +description: MongoDB to ClickHouse batch migration service for Countly drill events +type: application +version: 0.1.0 +appVersion: "1.0.0" +home: https://countly.com +icon: https://count.ly/images/logos/countly-logo.svg +sources: + - https://github.com/Countly/countly-server +keywords: + - migration + - clickhouse + - mongodb + - countly + - batch-migration +maintainers: + - name: Countly + url: https://countly.com +dependencies: + - name: redis + version: ">=25.0.0" + repository: https://charts.bitnami.com/bitnami + condition: redis.enabled +annotations: + artifacthub.io/license: AGPL-3.0 + artifacthub.io/links: | + - name: Documentation + url: https://github.com/Countly/helm diff --git a/charts/countly-migration/README.md b/charts/countly-migration/README.md new file mode 100644 index 0000000..f88b56e --- /dev/null +++ b/charts/countly-migration/README.md @@ -0,0 +1,417 @@ +# Countly Migration Helm Chart + +Deploys the MongoDB-to-ClickHouse batch migration service for Countly drill events. Reads `drill_events*` collections from MongoDB, transforms documents, and inserts them into the ClickHouse `drill_events` table. Includes a bundled Redis instance for migration state tracking. + +**Chart version:** 0.1.0 +**App version:** 1.0.0 + +--- + +## Architecture + +```mermaid +flowchart LR + subgraph source["Source"] + mongo["MongoDB\ncountly_drill.drill_events*"] + end + + subgraph migration["countly-migration namespace"] + svc["Migration Service\n:8080"] + redis["Redis\n:6379"] + end + + subgraph target["Target"] + ch["ClickHouse\ncountly_drill.drill_events"] + end + + mongo -->|read batches| svc + svc -->|insert rows| ch + svc <-->|hot state, bitmaps,\nerror buffers| redis + svc -->|run manifests| mongo +``` + +The migration service is a **singleton Deployment** with `Recreate` strategy. It processes collections sequentially in batches, with full crash recovery and resume support. State is stored in MongoDB (run manifests) and Redis (hot state, processed document bitmaps, error buffers). + +--- + +## Quick Start + +**Alongside sibling charts** (default — auto-discovers MongoDB and ClickHouse via DNS): + +```bash +helm install countly-migration ./charts/countly-migration \ + -n countly-migration --create-namespace \ + --set backingServices.mongodb.password="YOUR_MONGODB_APP_PASSWORD" \ + --set backingServices.clickhouse.password="YOUR_CLICKHOUSE_PASSWORD" +``` + +Only two values required. Everything else is auto-detected from the sibling `countly-mongodb` and `countly-clickhouse` charts. Redis is bundled automatically. + +**Standalone** (external MongoDB and ClickHouse): + +```bash +helm install countly-migration ./charts/countly-migration \ + -n countly-migration --create-namespace \ + --set backingServices.mongodb.mode=external \ + --set backingServices.mongodb.uri="mongodb://app:PASSWORD@mongodb-host:27017/admin?replicaSet=rs0&ssl=false" \ + --set backingServices.clickhouse.mode=external \ + --set backingServices.clickhouse.url="http://clickhouse-host:8123" \ + --set backingServices.clickhouse.password="PASSWORD" +``` + +> **Production deployment:** Use the profile-based approach from the [root README](../../README.md#manual-installation-without-helmfile) instead of `--set` flags. + +The migration service auto-discovers all `drill_events*` collections in the source MongoDB database and begins migrating. + +--- + +## Prerequisites + +- **MongoDB** — Source database with `drill_events*` collections in `countly_drill` database +- **ClickHouse** — Target with `drill_events` table in `countly_drill` database +- **Redis** — Bundled by default (Bitnami subchart), or provide an external URL + +If deploying alongside other Countly charts, MongoDB and ClickHouse are already available via their respective namespaces. + +--- + +## Configuration + +### Backing Services + +The chart connects to MongoDB, ClickHouse, and Redis. Each can be configured in **bundled** or **external** mode. + +#### MongoDB + +| Mode | Description | +|------|-------------| +| `bundled` (default) | Auto-constructs URI from sibling `countly-mongodb` chart using in-cluster DNS | +| `external` | Provide a full connection URI via `backingServices.mongodb.uri` | + +```yaml +# Bundled mode (default — alongside countly-mongodb chart) +backingServices: + mongodb: + password: "app-user-password" # Only required field + +# External mode +backingServices: + mongodb: + mode: external + uri: "mongodb://app:pass@host:27017/admin?replicaSet=rs0&ssl=false" +``` + +In bundled mode, the chart constructs the URI as: +`mongodb://app:{password}@{releaseName}-mongodb-svc.{namespace}.svc.cluster.local:27017/admin?replicaSet={releaseName}-mongodb` + +Override `releaseName` if your sibling charts use a non-standard prefix (default: `"countly"`). + +#### ClickHouse + +| Mode | Description | +|------|-------------| +| `bundled` (default) | Auto-constructs URL from sibling `countly-clickhouse` chart using in-cluster DNS | +| `external` | Provide a full HTTP URL via `backingServices.clickhouse.url` | + +```yaml +# Bundled mode (default — alongside countly-clickhouse chart) +backingServices: + clickhouse: + password: "default-password" # Only required field + +# External mode +backingServices: + clickhouse: + mode: external + url: "http://clickhouse-host:8123" + password: "default-password" +``` + +In bundled mode, the chart constructs the URL as: +`http://{releaseName}-clickhouse-clickhouse-headless.{namespace}.svc:8123` + +#### Redis + +Redis is **enabled by default** as a Bitnami subchart with AOF persistence. + +```yaml +# Default: bundled Redis (already enabled) +redis: + enabled: true + +# External Redis: disable subchart and provide URL +redis: + enabled: false +backingServices: + redis: + url: "redis://my-external-redis:6379" +``` + +### Redis Configuration + +The bundled Redis defaults: + +| Setting | Default | Description | +|---------|---------|-------------| +| `redis.architecture` | `standalone` | Single-node Redis | +| `redis.auth.enabled` | `false` | No password (internal cluster traffic) | +| `redis.master.persistence.enabled` | `true` | Persistent volume for data | +| `redis.master.persistence.size` | `8Gi` | PVC size | +| `redis.commonConfiguration` | AOF + RDB | `appendonly yes`, `appendfsync everysec`, RDB snapshots | +| `redis.master.resources.requests.cpu` | `500m` | CPU request | +| `redis.master.resources.requests.memory` | `2Gi` | Memory request | +| `redis.master.resources.limits.cpu` | `1` | CPU limit | +| `redis.master.resources.limits.memory` | `2Gi` | Memory limit | + +To disable persistence (dev/test only): + +```yaml +redis: + master: + persistence: + enabled: false +``` + +### Secrets + +Three modes for managing credentials: + +| Mode | Description | Use Case | +|------|-------------|----------| +| `values` (default) | Secret created from Helm values | Development, testing | +| `existingSecret` | Reference a pre-created Kubernetes Secret | Production with manual secret management | +| `externalSecret` | External Secrets Operator (AWS SM, Azure KV) | Production with vault integration | + +The Secret must contain these keys: `MONGO_URI`, `CLICKHOUSE_URL`, `CLICKHOUSE_PASSWORD`, `REDIS_URL`. + +```yaml +# Production: use pre-created secret +secrets: + mode: existingSecret + existingSecret: + name: countly-migration-secrets +``` + +### Migration Config + +Key environment variables (set via `config.*`): + +| Variable | Default | Description | +|----------|---------|-------------| +| `RERUN_MODE` | `resume` | `resume` (crash recovery), `new-run`, `clone-run` | +| `LOG_LEVEL` | `info` | `fatal`, `error`, `warn`, `info`, `debug`, `trace` | +| `MONGO_DB` | `countly_drill` | Source MongoDB database | +| `MONGO_COLLECTION_PREFIX` | `drill_events` | Collection name prefix to discover | +| `MONGO_BATCH_ROWS_TARGET` | `10000` | Documents per batch | +| `CLICKHOUSE_DB` | `countly_drill` | Target ClickHouse database | +| `CLICKHOUSE_TABLE` | `drill_events` | Target table | +| `CLICKHOUSE_USE_DEDUP_TOKEN` | `true` | Deduplication on insert | +| `BACKPRESSURE_ENABLED` | `true` | Monitor ClickHouse compaction pressure | +| `GC_ENABLED` | `true` | Automatic garbage collection | +| `GC_RSS_SOFT_LIMIT_MB` | `1536` | Trigger GC at this RSS | +| `GC_RSS_HARD_LIMIT_MB` | `2048` | Force exit at this RSS | + +### ArgoCD Integration + +Enable sync-wave annotations and external progress link: + +```yaml +argocd: + enabled: true + +externalLink: + enabled: true + url: "https://migration.example.internal/runs/current" +``` + +Sync-wave ordering (within this chart): +- Wave 0: Redis subchart resources, ConfigMap +- Wave 1: Secret +- Wave 10: Deployment, Service, Ingress, ServiceMonitor + +At the **stack level** (in `countly-argocd`), migration deploys **last** at wave 20 — after all other charts (databases, Kafka, Countly, observability) are healthy. This ensures the full system is stable before migration begins. + +Namespace is created by the ArgoCD Application (`CreateNamespace=true`), not by the chart. + +--- + +## Endpoints + +| Method | Path | Purpose | +|--------|------|---------| +| GET | `/healthz` | Liveness probe — always returns 200 | +| GET | `/readyz` | Readiness probe — checks MongoDB, ClickHouse, Redis, ManifestStore, BatchRunner | +| GET | `/stats` | Comprehensive JSON stats (throughput, integrity, memory, backpressure) | +| GET | `/runs/current` | Current active run details | +| GET | `/runs` | Paginated list of all runs (`?status=active\|completed\|failed&limit=20`) | +| GET | `/runs/:id` | Single run details | +| GET | `/runs/:id/batches` | Batches for a run (`?status=done\|failed&limit=50`) | +| GET | `/runs/:id/failures` | Failure analysis (errors, mismatches, retries) | +| GET | `/runs/:id/timeline` | Historical timeline snapshots | +| GET | `/runs/:id/coverage` | Coverage percentage and batch counts | +| POST | `/control/pause` | Pause after current batch | +| POST | `/control/resume` | Resume processing | +| POST | `/control/stop-after-batch` | Graceful stop | +| POST | `/control/gc` | Trigger garbage collection (`{"mode":"now\|after-batch\|force"}`) | +| DELETE | `/runs/:id/cache` | Cleanup Redis cache for a completed run | + +--- + +## Verifying the Deployment + +### 1. Check pods are running + +```bash +kubectl get pods -n countly-migration +``` + +Expected: migration pod `1/1 Running`, redis-master pod `1/1 Running`. + +### 2. Check health + +```bash +kubectl exec -n countly-migration deploy/-countly-migration -- \ + node -e "fetch('http://localhost:8080/healthz').then(r=>r.text()).then(console.log)" +``` + +Expected: `{"status":"alive"}` + +### 3. Check readiness + +```bash +kubectl exec -n countly-migration deploy/-countly-migration -- \ + node -e "fetch('http://localhost:8080/readyz').then(r=>r.text()).then(console.log)" +``` + +Expected: `{"ready":true,"checks":{"mongo":true,"clickhouse":true,"redis":true,"manifestStore":true,"batchRunner":true}}` + +### 4. Check migration stats + +```bash +kubectl exec -n countly-migration deploy/-countly-migration -- \ + node -e "fetch('http://localhost:8080/stats').then(r=>r.json()).then(d=>console.log(JSON.stringify(d,null,2)))" +``` + +### 5. Check run status + +```bash +kubectl exec -n countly-migration deploy/-countly-migration -- \ + node -e "fetch('http://localhost:8080/runs?limit=5').then(r=>r.text()).then(console.log)" +``` + +### 6. Verify data in ClickHouse + +```bash +kubectl exec -n clickhouse -- \ + clickhouse-client --password \ + --query "SELECT count() FROM countly_drill.drill_events" +``` + +### 7. Port-forward for browser access + +```bash +kubectl port-forward -n countly-migration svc/-countly-migration 8080:8080 +# Then open: http://localhost:8080/stats +``` + +--- + +## Operations + +### Pause / Resume + +```bash +# Pause after current batch completes +kubectl exec -n countly-migration deploy/-countly-migration -- \ + node -e "fetch('http://localhost:8080/control/pause',{method:'POST'}).then(r=>r.text()).then(console.log)" + +# Resume +kubectl exec -n countly-migration deploy/-countly-migration -- \ + node -e "fetch('http://localhost:8080/control/resume',{method:'POST'}).then(r=>r.text()).then(console.log)" +``` + +### Check failures + +```bash +kubectl exec -n countly-migration deploy/-countly-migration -- \ + node -e "fetch('http://localhost:8080/runs/RUNID/failures').then(r=>r.text()).then(console.log)" +``` + +### Cleanup Redis cache after a completed run + +```bash +kubectl exec -n countly-migration deploy/-countly-migration -- \ + node -e "fetch('http://localhost:8080/runs/RUNID/cache',{method:'DELETE'}).then(r=>r.text()).then(console.log)" +``` + +### View logs + +```bash +kubectl logs -n countly-migration -l app.kubernetes.io/name=countly-migration -f +``` + +--- + +## Multi-Pod Mode + +Scale the migration across multiple pods for faster throughput. Pods coordinate via Redis-based collection locking and range splitting. + +```yaml +deployment: + replicas: 3 + strategy: + type: RollingUpdate + +pdb: + enabled: true + minAvailable: 1 +``` + +When `replicas > 1`, the chart automatically: +- Switches to `RollingUpdate` strategy support +- Adds pod anti-affinity (spread across nodes) +- Injects `POD_ID` from pod name for coordination +- Configures preStop drain hook + +### Worker settings + +| Value | Default | Description | +|-------|---------|-------------| +| `worker.enabled` | `true` | Enable multi-pod coordination | +| `worker.lockTtlSec` | `300` | Collection lock TTL (seconds) | +| `worker.lockRenewMs` | `60000` | Lock renewal interval (ms) | +| `worker.podHeartbeatMs` | `30000` | Heartbeat interval (ms) | +| `worker.podDeadAfterSec` | `180` | Dead pod threshold (seconds) | +| `worker.rangeParallelThreshold` | `500000` | Doc count to trigger range splitting | +| `worker.rangeCount` | `100` | Time ranges per collection | +| `worker.rangeLeaseTtlSec` | `300` | Range lease TTL (seconds) | + +For a comprehensive guide on multi-pod operations, scaling, and troubleshooting, see [docs/migration-guide.md](../../docs/migration-guide.md#multi-pod-mode). + +--- + +## Schema Guardrails + +The chart includes `values.schema.json` that enforces: + +- **`deployment.replicas`** must be `>= 1` — set to 1 for single-pod, or higher for multi-pod +- **`deployment.strategy.type`** must be `Recreate` or `RollingUpdate` — use `RollingUpdate` for multi-pod +- **`secrets.mode`** must be one of: `values`, `existingSecret`, `externalSecret` +- **Worker settings** have minimum value constraints (e.g., `lockTtlSec >= 30`, `podHeartbeatMs >= 1000`) + +--- + +## Examples + +See the `examples/` directory: + +- **`values-development.yaml`** — Minimal development setup with bundled backing services +- **`values-production.yaml`** — Production setup with `existingSecret` mode +- **`values-multipod.yaml`** — Multi-pod setup with 3 replicas, RollingUpdate, PDB +- **`argocd-application.yaml`** — ArgoCD Application manifest with `CreateNamespace=true` + +--- + +## Full Documentation + +For architecture details, configuration reference, operations playbook, API reference, and troubleshooting, see [docs/migration-guide.md](../../docs/migration-guide.md). diff --git a/charts/countly-migration/examples/argocd-application.yaml b/charts/countly-migration/examples/argocd-application.yaml new file mode 100644 index 0000000..6b36665 --- /dev/null +++ b/charts/countly-migration/examples/argocd-application.yaml @@ -0,0 +1,32 @@ +# Example ArgoCD Application for countly-migration. +# Namespace is created by ArgoCD (CreateNamespace=true), not by the chart. +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: countly-migration + namespace: argocd +spec: + project: default + source: + repoURL: https://github.com/Countly/helm.git + targetRevision: main + path: charts/countly-migration + helm: + releaseName: countly-migration + valueFiles: + - ../../environments/prod/migration.yaml + parameters: + - name: argocd.enabled + value: "true" + destination: + server: https://kubernetes.default.svc + namespace: countly-migration + syncPolicy: + syncOptions: + - CreateNamespace=true + - ApplyOutOfSyncOnly=true + managedNamespaceMetadata: + labels: + app.kubernetes.io/part-of: countly + annotations: + owner: data-platform diff --git a/charts/countly-migration/examples/values-development.yaml b/charts/countly-migration/examples/values-development.yaml new file mode 100644 index 0000000..aae85bf --- /dev/null +++ b/charts/countly-migration/examples/values-development.yaml @@ -0,0 +1,23 @@ +# Development values example for countly-migration. +# Bundled mode (default) — auto-discovers sibling chart endpoints via DNS. +# Only passwords are required; everything else uses sane defaults. + +# Backing service passwords (must match sibling charts) +backingServices: + mongodb: + password: "devpassword" + clickhouse: + password: "devpassword" + +# Redis is bundled automatically — no config needed. + +config: + LOG_LEVEL: "debug" + +resources: + requests: + cpu: "250m" + memory: "512Mi" + limits: + cpu: "1" + memory: "2Gi" diff --git a/charts/countly-migration/examples/values-multipod.yaml b/charts/countly-migration/examples/values-multipod.yaml new file mode 100644 index 0000000..b930935 --- /dev/null +++ b/charts/countly-migration/examples/values-multipod.yaml @@ -0,0 +1,37 @@ +# Multi-pod values example for countly-migration. +# Runs 3 replicas with Redis-based collection locking and range splitting. + +deployment: + replicas: 3 + strategy: + type: RollingUpdate + rollingUpdate: + maxSurge: 1 + maxUnavailable: 0 + +pdb: + enabled: true + minAvailable: 1 + +# Backing service passwords (must match sibling charts) +backingServices: + mongodb: + password: "your-mongodb-password" + clickhouse: + password: "your-clickhouse-password" + +# Worker coordination settings (defaults are sane for most deployments) +worker: + enabled: true + lockTtlSec: 300 + podDeadAfterSec: 180 + rangeParallelThreshold: 500000 + rangeCount: 100 + +resources: + requests: + cpu: "500m" + memory: "1Gi" + limits: + cpu: "2" + memory: "3Gi" diff --git a/charts/countly-migration/examples/values-production.yaml b/charts/countly-migration/examples/values-production.yaml new file mode 100644 index 0000000..9f97e00 --- /dev/null +++ b/charts/countly-migration/examples/values-production.yaml @@ -0,0 +1,70 @@ +# Production values example for countly-migration. +# Uses existingSecret mode — credentials are pre-created or managed externally. + +image: + repository: registry.example.com/countly/countly-migration + tag: "1.0.0" + +argocd: + enabled: true + +deployment: + replicas: 1 + strategy: + type: Recreate + terminationGracePeriodSeconds: 90 + +# Uncomment for multi-pod mode: +# deployment: +# replicas: 3 +# strategy: +# type: RollingUpdate +# pdb: +# enabled: true +# minAvailable: 1 + +service: + port: 8080 + +ingress: + enabled: true + className: nginx + annotations: + nginx.ingress.kubernetes.io/whitelist-source-range: "10.0.0.0/8,192.168.0.0/16" + argocd.argoproj.io/ignore-default-links: "true" + hosts: + - host: migration.example.internal + paths: + - path: / + pathType: Prefix + +externalLink: + enabled: true + url: "https://migration.example.internal/runs/current" + +# Reference pre-created secret containing MONGO_URI, CLICKHOUSE_URL, CLICKHOUSE_PASSWORD, REDIS_URL +secrets: + mode: existingSecret + keep: true + existingSecret: + name: countly-migration-secrets + +config: + SERVICE_NAME: countly-migration + SERVICE_PORT: "8080" + RERUN_MODE: "resume" + LOG_LEVEL: "info" + MONGO_DB: "countly_drill" + MONGO_COLLECTION_PREFIX: "drill_events" + CLICKHOUSE_DB: "countly_drill" + CLICKHOUSE_TABLE: "drill_events" + MANIFEST_DB: "countly_drill" + REDIS_KEY_PREFIX: "mig" + +resources: + requests: + cpu: "500m" + memory: "1Gi" + limits: + cpu: "2" + memory: "3Gi" diff --git a/charts/countly-migration/templates/NOTES.txt b/charts/countly-migration/templates/NOTES.txt new file mode 100644 index 0000000..0354751 --- /dev/null +++ b/charts/countly-migration/templates/NOTES.txt @@ -0,0 +1,87 @@ +Countly Migration service has been deployed! + +=== Service Endpoints === + + Health: GET /healthz (liveness probe) + Ready: GET /readyz (readiness — checks mongo, clickhouse, redis) + Stats: GET /stats (throughput, memory, backpressure) + Runs: GET /runs/current (active run details) + Control: POST /control/{pause,resume,stop-after-batch} + +=== Access === +{{- if .Values.ingress.enabled }} +{{- range .Values.ingress.hosts }} + http{{ if $.Values.ingress.tls }}s{{ end }}://{{ .host }} +{{- end }} +{{- else }} + + kubectl port-forward svc/{{ include "countly-migration.fullname" . }} {{ .Values.service.port }}:{{ .Values.service.port }} -n {{ .Release.Namespace }} + # Then open: http://localhost:{{ .Values.service.port }}/stats +{{- end }} +{{- if and .Values.externalLink.enabled .Values.externalLink.url }} + + ArgoCD progress link: {{ .Values.externalLink.url }} +{{- end }} + +=== Redis === +{{- if .Values.redis.enabled }} + + Bundled Redis (Bitnami subchart): + URL: redis://{{ include "countly-migration.fullname" . }}-redis-master:6379 + Persistence: {{ if .Values.redis.master.persistence.enabled }}enabled ({{ .Values.redis.master.persistence.size }}){{ else }}disabled{{ end }} + Auth: {{ if .Values.redis.auth.enabled }}enabled{{ else }}disabled{{ end }} +{{- else if .Values.backingServices.redis.url }} + + External Redis: {{ .Values.backingServices.redis.url }} +{{- else }} + + WARNING: No Redis configured. The migration service requires Redis for state tracking. + Either enable the bundled Redis (redis.enabled=true) or provide backingServices.redis.url. +{{- end }} + +=== Secrets === + + Mode: {{ .Values.secrets.mode }} +{{- if eq .Values.secrets.mode "existingSecret" }} + Secret: {{ .Values.secrets.existingSecret.name }} +{{- end }} +{{- if eq .Values.secrets.mode "externalSecret" }} + External Secrets Operator will provision the secret. +{{- end }} + +=== Verify Deployment === + + 1. Check pods: + kubectl get pods -n {{ .Release.Namespace }} + + 2. Check health: + kubectl exec -n {{ .Release.Namespace }} deploy/{{ include "countly-migration.fullname" . }} -- \ + node -e "fetch('http://localhost:{{ .Values.service.port }}/healthz').then(r=>r.text()).then(console.log)" + + 3. Check readiness (all backing services connected): + kubectl exec -n {{ .Release.Namespace }} deploy/{{ include "countly-migration.fullname" . }} -- \ + node -e "fetch('http://localhost:{{ .Values.service.port }}/readyz').then(r=>r.text()).then(console.log)" + + 4. Check migration progress: + kubectl exec -n {{ .Release.Namespace }} deploy/{{ include "countly-migration.fullname" . }} -- \ + node -e "fetch('http://localhost:{{ .Values.service.port }}/runs?limit=5').then(r=>r.text()).then(console.log)" + + 5. View logs: + kubectl logs -n {{ .Release.Namespace }} -l app.kubernetes.io/name=countly-migration -f +{{- if gt (int .Values.deployment.replicas) 1 }} + +=== Multi-Pod Mode ({{ .Values.deployment.replicas }} replicas) === + + Global control (affects all pods): + POST /control/global/pause Pause all pods + POST /control/global/resume Resume all pods + POST /control/global/stop Stop all pods + + Coordination: + GET /control/locks List collection locks + GET /control/pods List all pods and status + POST /control/drain Graceful drain (preStop calls this) + + Scale up/down: + kubectl scale deploy/{{ include "countly-migration.fullname" . }} -n {{ .Release.Namespace }} --replicas=N +{{- end }} diff --git a/charts/countly-migration/templates/_helpers.tpl b/charts/countly-migration/templates/_helpers.tpl new file mode 100644 index 0000000..8c3da74 --- /dev/null +++ b/charts/countly-migration/templates/_helpers.tpl @@ -0,0 +1,149 @@ +{{/* +Whether multi-pod mode is active (replicas > 1). +*/}} +{{- define "countly-migration.isMultiPod" -}} +{{- if gt (int .Values.deployment.replicas) 1 }}true{{- end }} +{{- end }} + +{{/* +Expand the name of the chart. +*/}} +{{- define "countly-migration.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +*/}} +{{- define "countly-migration.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "countly-migration.labels" -}} +helm.sh/chart: {{ include "countly-migration.chart" . }} +{{ include "countly-migration.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Chart label +*/}} +{{- define "countly-migration.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "countly-migration.selectorLabels" -}} +app.kubernetes.io/name: {{ include "countly-migration.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Service account name +*/}} +{{- define "countly-migration.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "countly-migration.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} + +{{/* +ArgoCD sync-wave annotation (only when argocd.enabled). +Usage: include "countly-migration.syncWave" (dict "wave" "0" "root" .) +*/}} +{{- define "countly-migration.syncWave" -}} +{{- if ((.root.Values.argocd).enabled) }} +argocd.argoproj.io/sync-wave: {{ .wave | quote }} +{{- end }} +{{- end -}} + +{{/* +Secret name resolution across three modes. +*/}} +{{- define "countly-migration.secretName" -}} +{{- if eq (.Values.secrets.mode | default "values") "existingSecret" }} +{{- required "secrets.existingSecret.name is required when secrets.mode=existingSecret" .Values.secrets.existingSecret.name }} +{{- else }} +{{- include "countly-migration.fullname" . }} +{{- end }} +{{- end }} + +{{/* +MongoDB URI computation. +External mode: use provided URI directly. +Bundled mode: construct from sibling countly-mongodb chart DNS. +*/}} +{{- define "countly-migration.mongoUri" -}} +{{- $bs := .Values.backingServices.mongodb -}} +{{- if eq ($bs.mode | default "bundled") "external" -}} +{{- required "backingServices.mongodb.uri is required when mode=external" $bs.uri -}} +{{- else -}} +{{- $prefix := $bs.releaseName | default "countly" -}} +{{- $host := $bs.host | default (printf "%s-mongodb-svc.%s.svc.cluster.local" $prefix ($bs.namespace | default "mongodb")) -}} +{{- $port := $bs.port | default "27017" -}} +{{- $user := $bs.username | default "app" -}} +{{- $pass := $bs.password | default "" -}} +{{- $db := $bs.database | default "admin" -}} +{{- $rs := $bs.replicaSet | default (printf "%s-mongodb" $prefix) -}} +mongodb://{{ $user }}:{{ $pass }}@{{ $host }}:{{ $port }}/{{ $db }}?replicaSet={{ $rs }}&ssl=false +{{- end -}} +{{- end -}} + +{{/* +ClickHouse URL computation. +External mode: use provided URL directly. +Bundled mode: construct from sibling countly-clickhouse chart DNS. +*/}} +{{- define "countly-migration.clickhouseUrl" -}} +{{- $bs := .Values.backingServices.clickhouse -}} +{{- if eq ($bs.mode | default "bundled") "external" -}} +{{- required "backingServices.clickhouse.url is required when mode=external" $bs.url -}} +{{- else -}} +{{- $prefix := $bs.releaseName | default "countly" -}} +{{- $host := $bs.host | default (printf "%s-clickhouse-clickhouse-headless.%s.svc" $prefix ($bs.namespace | default "clickhouse")) -}} +{{- $port := $bs.port | default "8123" -}} +{{- $tls := $bs.tls | default "false" -}} +{{- $scheme := ternary "https" "http" (eq (toString $tls) "true") -}} +{{- $scheme }}://{{ $host }}:{{ $port }} +{{- end -}} +{{- end -}} + +{{/* +Redis URL computation. +If backingServices.redis.url is set, use it. +If redis subchart is enabled, auto-wire to the subchart service. +*/}} +{{- define "countly-migration.redisUrl" -}} +{{- if .Values.backingServices.redis.url -}} +{{- .Values.backingServices.redis.url -}} +{{- else if .Values.redis.enabled -}} +redis://{{ include "countly-migration.fullname" . }}-redis-master:6379 +{{- end -}} +{{- end -}} + +{{/* +Image reference with tag defaulting to "latest". +*/}} +{{- define "countly-migration.image" -}} +{{- $tag := .Values.image.tag | default "latest" -}} +{{ .Values.image.repository }}:{{ $tag }} +{{- end }} diff --git a/charts/countly-migration/templates/configmap.yaml b/charts/countly-migration/templates/configmap.yaml new file mode 100644 index 0000000..efdad7a --- /dev/null +++ b/charts/countly-migration/templates/configmap.yaml @@ -0,0 +1,28 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "countly-migration.fullname" . }} + labels: + {{- include "countly-migration.labels" . | nindent 4 }} + {{- if ((.Values.argocd).enabled) }} + annotations: + {{- include "countly-migration.syncWave" (dict "wave" "0" "root" .) | nindent 4 }} + {{- end }} +data: + {{- range $k, $v := .Values.config }} + {{ $k }}: {{ $v | quote }} + {{- end }} + {{- /* Multi-pod worker configuration */ -}} + {{- with .Values.worker }} + MULTI_POD_ENABLED: {{ .enabled | quote }} + LOCK_TTL_SECONDS: {{ .lockTtlSec | quote }} + LOCK_RENEW_MS: {{ .lockRenewMs | quote }} + POD_HEARTBEAT_MS: {{ .podHeartbeatMs | quote }} + POD_DEAD_AFTER_SEC: {{ .podDeadAfterSec | quote }} + RANGE_PARALLEL_THRESHOLD: {{ .rangeParallelThreshold | quote }} + RANGE_COUNT: {{ .rangeCount | quote }} + RANGE_LEASE_TTL_SEC: {{ .rangeLeaseTtlSec | quote }} + PROGRESS_UPDATE_MS: {{ .progressUpdateMs | quote }} + ASYNC_WRITE_FLUSH_INTERVAL_MS: {{ .asyncWriteFlushIntervalMs | quote }} + ASYNC_WRITE_FLUSH_BATCH_SIZE: {{ .asyncWriteFlushBatchSize | quote }} + {{- end }} diff --git a/charts/countly-migration/templates/deployment.yaml b/charts/countly-migration/templates/deployment.yaml new file mode 100644 index 0000000..8ac7549 --- /dev/null +++ b/charts/countly-migration/templates/deployment.yaml @@ -0,0 +1,117 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "countly-migration.fullname" . }} + labels: + {{- include "countly-migration.labels" . | nindent 4 }} + {{- if or (and .Values.externalLink.enabled .Values.externalLink.url) ((.Values.argocd).enabled) }} + annotations: + {{- if and .Values.externalLink.enabled .Values.externalLink.url }} + link.argocd.argoproj.io/external-link: {{ .Values.externalLink.url | quote }} + {{- end }} + {{- include "countly-migration.syncWave" (dict "wave" "10" "root" .) | nindent 4 }} + {{- end }} +spec: + replicas: {{ .Values.deployment.replicas }} + strategy: + type: {{ .Values.deployment.strategy.type }} + {{- if eq .Values.deployment.strategy.type "RollingUpdate" }} + rollingUpdate: + maxSurge: {{ .Values.deployment.strategy.rollingUpdate.maxSurge | default 1 }} + maxUnavailable: {{ .Values.deployment.strategy.rollingUpdate.maxUnavailable | default 0 }} + {{- end }} + selector: + matchLabels: + {{- include "countly-migration.selectorLabels" . | nindent 6 }} + template: + metadata: + labels: + {{- include "countly-migration.selectorLabels" . | nindent 8 }} + {{- with .Values.podLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + annotations: + checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }} + {{- with .Values.podAnnotations }} + {{- toYaml . | nindent 8 }} + {{- end }} + spec: + serviceAccountName: {{ include "countly-migration.serviceAccountName" . }} + terminationGracePeriodSeconds: {{ .Values.deployment.terminationGracePeriodSeconds }} + {{- with .Values.podSecurityContext }} + securityContext: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.image.pullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + containers: + - name: {{ include "countly-migration.name" . }} + image: {{ include "countly-migration.image" . | quote }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + ports: + - name: http + containerPort: {{ .Values.service.port }} + protocol: TCP + envFrom: + - configMapRef: + name: {{ include "countly-migration.fullname" . }} + - secretRef: + name: {{ include "countly-migration.secretName" . }} + env: + - name: POD_ID + valueFrom: + fieldRef: + fieldPath: metadata.name + lifecycle: + preStop: + httpGet: + path: /control/drain + port: http + livenessProbe: + httpGet: + path: {{ .Values.probes.liveness.path }} + port: http + initialDelaySeconds: {{ .Values.probes.liveness.initialDelaySeconds }} + periodSeconds: {{ .Values.probes.liveness.periodSeconds }} + timeoutSeconds: {{ .Values.probes.liveness.timeoutSeconds }} + failureThreshold: {{ .Values.probes.liveness.failureThreshold }} + readinessProbe: + httpGet: + path: {{ .Values.probes.readiness.path }} + port: http + initialDelaySeconds: {{ .Values.probes.readiness.initialDelaySeconds }} + periodSeconds: {{ .Values.probes.readiness.periodSeconds }} + timeoutSeconds: {{ .Values.probes.readiness.timeoutSeconds }} + failureThreshold: {{ .Values.probes.readiness.failureThreshold }} + {{- with .Values.resources }} + resources: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.containerSecurityContext }} + securityContext: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.affinity }} + affinity: + {{- toYaml .Values.affinity | nindent 8 }} + {{- else if gt (int .Values.deployment.replicas) 1 }} + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchLabels: + {{- include "countly-migration.selectorLabels" . | nindent 20 }} + topologyKey: kubernetes.io/hostname + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/charts/countly-migration/templates/external-secret.yaml b/charts/countly-migration/templates/external-secret.yaml new file mode 100644 index 0000000..1daeb50 --- /dev/null +++ b/charts/countly-migration/templates/external-secret.yaml @@ -0,0 +1,33 @@ +{{- if eq (.Values.secrets.mode | default "values") "externalSecret" }} +apiVersion: external-secrets.io/v1beta1 +kind: ExternalSecret +metadata: + name: {{ include "countly-migration.fullname" . }} + labels: + {{- include "countly-migration.labels" . | nindent 4 }} + {{- if ((.Values.argocd).enabled) }} + annotations: + {{- include "countly-migration.syncWave" (dict "wave" "0" "root" .) | nindent 4 }} + {{- end }} +spec: + refreshInterval: {{ .Values.secrets.externalSecret.refreshInterval | default "1h" }} + secretStoreRef: + name: {{ required "secrets.externalSecret.secretStoreRef.name is required when secrets.mode=externalSecret" .Values.secrets.externalSecret.secretStoreRef.name }} + kind: {{ .Values.secrets.externalSecret.secretStoreRef.kind | default "ClusterSecretStore" }} + target: + name: {{ include "countly-migration.fullname" . }} + creationPolicy: Owner + data: + - secretKey: MONGO_URI + remoteRef: + key: {{ required "secrets.externalSecret.remoteRefs.mongoUri is required" .Values.secrets.externalSecret.remoteRefs.mongoUri }} + - secretKey: CLICKHOUSE_URL + remoteRef: + key: {{ required "secrets.externalSecret.remoteRefs.clickhouseUrl is required" .Values.secrets.externalSecret.remoteRefs.clickhouseUrl }} + - secretKey: CLICKHOUSE_PASSWORD + remoteRef: + key: {{ required "secrets.externalSecret.remoteRefs.clickhousePassword is required" .Values.secrets.externalSecret.remoteRefs.clickhousePassword }} + - secretKey: REDIS_URL + remoteRef: + key: {{ required "secrets.externalSecret.remoteRefs.redisUrl is required" .Values.secrets.externalSecret.remoteRefs.redisUrl }} +{{- end }} diff --git a/charts/countly-migration/templates/ingress.yaml b/charts/countly-migration/templates/ingress.yaml new file mode 100644 index 0000000..aa8a6e3 --- /dev/null +++ b/charts/countly-migration/templates/ingress.yaml @@ -0,0 +1,41 @@ +{{- if .Values.ingress.enabled }} +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: {{ include "countly-migration.fullname" . }} + labels: + {{- include "countly-migration.labels" . | nindent 4 }} + {{- if or (and .Values.externalLink.enabled .Values.externalLink.url) ((.Values.argocd).enabled) .Values.ingress.annotations }} + annotations: + {{- if and .Values.externalLink.enabled .Values.externalLink.url }} + link.argocd.argoproj.io/external-link: {{ .Values.externalLink.url | quote }} + {{- end }} + {{- include "countly-migration.syncWave" (dict "wave" "10" "root" .) | nindent 4 }} + {{- with .Values.ingress.annotations }} + {{- toYaml . | nindent 4 }} + {{- end }} + {{- end }} +spec: + {{- if .Values.ingress.className }} + ingressClassName: {{ .Values.ingress.className }} + {{- end }} + rules: + {{- range .Values.ingress.hosts }} + - host: {{ .host | quote }} + http: + paths: + {{- range .paths }} + - path: {{ .path }} + pathType: {{ .pathType }} + backend: + service: + name: {{ include "countly-migration.fullname" $ }} + port: + number: {{ $.Values.service.port }} + {{- end }} + {{- end }} + {{- with .Values.ingress.tls }} + tls: + {{- toYaml . | nindent 4 }} + {{- end }} +{{- end }} diff --git a/charts/countly-migration/templates/networkpolicy.yaml b/charts/countly-migration/templates/networkpolicy.yaml new file mode 100644 index 0000000..5b8959d --- /dev/null +++ b/charts/countly-migration/templates/networkpolicy.yaml @@ -0,0 +1,27 @@ +{{- if .Values.networkPolicy.enabled }} +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: {{ include "countly-migration.fullname" . }} + labels: + {{- include "countly-migration.labels" . | nindent 4 }} + {{- if ((.Values.argocd).enabled) }} + annotations: + {{- include "countly-migration.syncWave" (dict "wave" "0" "root" .) | nindent 4 }} + {{- end }} +spec: + podSelector: + matchLabels: + {{- include "countly-migration.selectorLabels" . | nindent 6 }} + policyTypes: + - Ingress + - Egress + {{- with .Values.networkPolicy.ingress }} + ingress: + {{- toYaml . | nindent 4 }} + {{- end }} + {{- with .Values.networkPolicy.egress }} + egress: + {{- toYaml . | nindent 4 }} + {{- end }} +{{- end }} diff --git a/charts/countly-migration/templates/pdb.yaml b/charts/countly-migration/templates/pdb.yaml new file mode 100644 index 0000000..2196a5f --- /dev/null +++ b/charts/countly-migration/templates/pdb.yaml @@ -0,0 +1,13 @@ +{{- if .Values.pdb.enabled }} +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: {{ include "countly-migration.fullname" . }} + labels: + {{- include "countly-migration.labels" . | nindent 4 }} +spec: + minAvailable: {{ .Values.pdb.minAvailable }} + selector: + matchLabels: + {{- include "countly-migration.selectorLabels" . | nindent 6 }} +{{- end }} diff --git a/charts/countly-migration/templates/secret.yaml b/charts/countly-migration/templates/secret.yaml new file mode 100644 index 0000000..4a8294f --- /dev/null +++ b/charts/countly-migration/templates/secret.yaml @@ -0,0 +1,44 @@ +{{- if eq (.Values.secrets.mode | default "values") "values" }} +apiVersion: v1 +kind: Secret +metadata: + name: {{ include "countly-migration.fullname" . }} + labels: + {{- include "countly-migration.labels" . | nindent 4 }} + {{- if or .Values.secrets.keep ((.Values.argocd).enabled) }} + annotations: + {{- if .Values.secrets.keep }} + helm.sh/resource-policy: keep + {{- end }} + {{- include "countly-migration.syncWave" (dict "wave" "1" "root" .) | nindent 4 }} + {{- end }} +type: Opaque +data: + {{- $secretName := include "countly-migration.fullname" . }} + {{- $existing := lookup "v1" "Secret" .Release.Namespace $secretName }} + {{- if and (not .Values.backingServices.mongodb.password) (eq (.Values.backingServices.mongodb.mode | default "bundled") "bundled") }} + {{- if and $existing (index $existing.data "MONGO_URI") }} + MONGO_URI: {{ index $existing.data "MONGO_URI" }} + {{- else }} + {{- fail "backingServices.mongodb.password is required on first install when mode=bundled. Set the value or provide secrets.existingSecret." }} + {{- end }} + {{- else }} + MONGO_URI: {{ include "countly-migration.mongoUri" . | b64enc }} + {{- end }} + CLICKHOUSE_URL: {{ include "countly-migration.clickhouseUrl" . | b64enc }} + {{- if .Values.backingServices.clickhouse.password }} + CLICKHOUSE_PASSWORD: {{ .Values.backingServices.clickhouse.password | b64enc }} + {{- else if and $existing (index $existing.data "CLICKHOUSE_PASSWORD") }} + CLICKHOUSE_PASSWORD: {{ index $existing.data "CLICKHOUSE_PASSWORD" }} + {{- else }} + CLICKHOUSE_PASSWORD: {{ "" | b64enc }} + {{- end }} + {{- $redisUrl := include "countly-migration.redisUrl" . }} + {{- if $redisUrl }} + REDIS_URL: {{ $redisUrl | b64enc }} + {{- else if and $existing (index $existing.data "REDIS_URL") }} + REDIS_URL: {{ index $existing.data "REDIS_URL" }} + {{- else }} + REDIS_URL: {{ "" | b64enc }} + {{- end }} +{{- end }} diff --git a/charts/countly-migration/templates/service.yaml b/charts/countly-migration/templates/service.yaml new file mode 100644 index 0000000..d18d7c2 --- /dev/null +++ b/charts/countly-migration/templates/service.yaml @@ -0,0 +1,22 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "countly-migration.fullname" . }} + labels: + {{- include "countly-migration.labels" . | nindent 4 }} + {{- if or ((.Values.argocd).enabled) .Values.service.annotations }} + annotations: + {{- include "countly-migration.syncWave" (dict "wave" "10" "root" .) | nindent 4 }} + {{- with .Values.service.annotations }} + {{- toYaml . | nindent 4 }} + {{- end }} + {{- end }} +spec: + type: {{ .Values.service.type }} + selector: + {{- include "countly-migration.selectorLabels" . | nindent 4 }} + ports: + - name: http + port: {{ .Values.service.port }} + targetPort: http + protocol: TCP diff --git a/charts/countly-migration/templates/serviceaccount.yaml b/charts/countly-migration/templates/serviceaccount.yaml new file mode 100644 index 0000000..fc9d0e1 --- /dev/null +++ b/charts/countly-migration/templates/serviceaccount.yaml @@ -0,0 +1,16 @@ +{{- if .Values.serviceAccount.create }} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "countly-migration.serviceAccountName" . }} + labels: + {{- include "countly-migration.labels" . | nindent 4 }} + {{- if or ((.Values.argocd).enabled) .Values.serviceAccount.annotations }} + annotations: + {{- include "countly-migration.syncWave" (dict "wave" "0" "root" .) | nindent 4 }} + {{- with .Values.serviceAccount.annotations }} + {{- toYaml . | nindent 4 }} + {{- end }} + {{- end }} +automountServiceAccountToken: false +{{- end }} diff --git a/charts/countly-migration/templates/servicemonitor.yaml b/charts/countly-migration/templates/servicemonitor.yaml new file mode 100644 index 0000000..6299115 --- /dev/null +++ b/charts/countly-migration/templates/servicemonitor.yaml @@ -0,0 +1,21 @@ +{{- if .Values.serviceMonitor.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ include "countly-migration.fullname" . }} + labels: + {{- include "countly-migration.labels" . | nindent 4 }} + {{- if ((.Values.argocd).enabled) }} + annotations: + {{- include "countly-migration.syncWave" (dict "wave" "10" "root" .) | nindent 4 }} + {{- end }} +spec: + selector: + matchLabels: + {{- include "countly-migration.selectorLabels" . | nindent 6 }} + endpoints: + - port: http + path: {{ .Values.serviceMonitor.path }} + interval: {{ .Values.serviceMonitor.interval }} + scrapeTimeout: {{ .Values.serviceMonitor.scrapeTimeout }} +{{- end }} diff --git a/charts/countly-migration/templates/tests/test-health.yaml b/charts/countly-migration/templates/tests/test-health.yaml new file mode 100644 index 0000000..1c4142c --- /dev/null +++ b/charts/countly-migration/templates/tests/test-health.yaml @@ -0,0 +1,16 @@ +apiVersion: v1 +kind: Pod +metadata: + name: {{ include "countly-migration.fullname" . }}-test-health + namespace: {{ .Release.Namespace }} + labels: + {{- include "countly-migration.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test + "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded +spec: + restartPolicy: Never + containers: + - name: test + image: busybox:1.35 + command: ['sh', '-c', 'wget -qO- --timeout=10 http://{{ include "countly-migration.fullname" . }}:{{ .Values.service.port }}/healthz'] diff --git a/charts/countly-migration/values.schema.json b/charts/countly-migration/values.schema.json new file mode 100644 index 0000000..a2e6ec3 --- /dev/null +++ b/charts/countly-migration/values.schema.json @@ -0,0 +1,229 @@ +{ + "$schema": "https://json-schema.org/draft-07/schema#", + "title": "countly-migration values", + "type": "object", + "properties": { + "image": { + "type": "object", + "properties": { + "repository": { + "type": "string", + "minLength": 1, + "description": "Container image repository" + }, + "tag": { + "type": "string", + "description": "Image tag (defaults to appVersion)" + }, + "pullPolicy": { + "type": "string", + "enum": ["Always", "IfNotPresent", "Never"] + } + }, + "required": ["repository"] + }, + "deployment": { + "type": "object", + "properties": { + "replicas": { + "type": "integer", + "minimum": 1, + "description": "Number of migration pods (>1 enables multi-pod coordination)" + }, + "strategy": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": ["Recreate", "RollingUpdate"], + "description": "Deployment strategy (use RollingUpdate for multi-pod)" + } + }, + "required": ["type"] + }, + "terminationGracePeriodSeconds": { + "type": "integer", + "minimum": 30 + } + }, + "required": ["replicas", "strategy"] + }, + "service": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": ["ClusterIP", "NodePort", "LoadBalancer"] + }, + "port": { + "type": "integer", + "minimum": 1, + "maximum": 65535 + } + }, + "required": ["port"] + }, + "secrets": { + "type": "object", + "properties": { + "mode": { + "type": "string", + "enum": ["values", "existingSecret", "externalSecret"], + "description": "Secret provisioning mode" + }, + "keep": { + "type": "boolean" + }, + "existingSecret": { + "type": "object", + "properties": { + "name": { + "type": "string" + } + } + }, + "externalSecret": { + "type": "object", + "properties": { + "refreshInterval": { + "type": "string" + }, + "secretStoreRef": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "kind": { + "type": "string" + } + } + }, + "remoteRefs": { + "type": "object", + "properties": { + "mongoUri": { "type": "string" }, + "clickhouseUrl": { "type": "string" }, + "clickhousePassword": { "type": "string" }, + "redisUrl": { "type": "string" } + } + } + } + } + }, + "required": ["mode"] + }, + "backingServices": { + "type": "object", + "properties": { + "mongodb": { + "type": "object", + "properties": { + "mode": { + "type": "string", + "enum": ["external", "bundled"] + }, + "releaseName": { + "type": "string", + "description": "Release name prefix of the sibling countly-mongodb chart" + } + } + }, + "clickhouse": { + "type": "object", + "properties": { + "mode": { + "type": "string", + "enum": ["external", "bundled"] + }, + "releaseName": { + "type": "string", + "description": "Release name prefix of the sibling countly-clickhouse chart" + } + } + }, + "redis": { + "type": "object", + "properties": { + "url": { + "type": "string" + } + } + } + } + }, + "config": { + "type": "object", + "additionalProperties": { + "type": "string" + }, + "description": "Non-secret environment variables" + }, + "probes": { + "type": "object", + "properties": { + "liveness": { + "type": "object", + "properties": { + "path": { "type": "string" }, + "initialDelaySeconds": { "type": "integer", "minimum": 0 }, + "periodSeconds": { "type": "integer", "minimum": 1 }, + "timeoutSeconds": { "type": "integer", "minimum": 1 }, + "failureThreshold": { "type": "integer", "minimum": 1 } + } + }, + "readiness": { + "type": "object", + "properties": { + "path": { "type": "string" }, + "initialDelaySeconds": { "type": "integer", "minimum": 0 }, + "periodSeconds": { "type": "integer", "minimum": 1 }, + "timeoutSeconds": { "type": "integer", "minimum": 1 }, + "failureThreshold": { "type": "integer", "minimum": 1 } + } + } + } + }, + "resources": { + "type": "object", + "properties": { + "requests": { "type": "object" }, + "limits": { "type": "object" } + } + }, + "externalLink": { + "type": "object", + "properties": { + "enabled": { "type": "boolean" }, + "url": { "type": "string" } + } + }, + "serviceMonitor": { + "type": "object", + "properties": { + "enabled": { "type": "boolean" }, + "interval": { "type": "string" }, + "scrapeTimeout": { "type": "string" }, + "path": { "type": "string" } + } + }, + "worker": { + "type": "object", + "description": "Multi-pod worker coordination settings", + "properties": { + "enabled": { "type": "boolean", "description": "Enable multi-pod coordination mode" }, + "lockTtlSec": { "type": "integer", "minimum": 30, "description": "Collection lock TTL in seconds" }, + "lockRenewMs": { "type": "integer", "minimum": 1000, "description": "Lock renewal interval in ms" }, + "podHeartbeatMs": { "type": "integer", "minimum": 1000, "description": "Pod heartbeat interval in ms" }, + "podDeadAfterSec": { "type": "integer", "minimum": 30, "description": "Consider pod dead after N seconds" }, + "rangeParallelThreshold": { "type": "integer", "minimum": 0, "description": "Doc count to trigger range splitting" }, + "rangeCount": { "type": "integer", "minimum": 1, "description": "Number of time ranges" }, + "rangeLeaseTtlSec": { "type": "integer", "minimum": 30, "description": "Range lease TTL in seconds" }, + "progressUpdateMs": { "type": "integer", "minimum": 1000, "description": "Progress report interval in ms" }, + "asyncWriteFlushIntervalMs": { "type": "integer", "minimum": 1000, "description": "Async batch flush interval in ms" }, + "asyncWriteFlushBatchSize": { "type": "integer", "minimum": 1, "description": "Async batch flush size" } + } + } + }, + "required": ["image", "deployment", "service", "secrets"] +} diff --git a/charts/countly-migration/values.yaml b/charts/countly-migration/values.yaml new file mode 100644 index 0000000..29cf0d9 --- /dev/null +++ b/charts/countly-migration/values.yaml @@ -0,0 +1,284 @@ +# -- Override the chart name used in resource names +nameOverride: "" +# -- Override the full resource name +fullnameOverride: "" + +# -- Container image configuration +image: + repository: countly/countly-migration + # -- Defaults to "latest" when empty + tag: "" + pullPolicy: IfNotPresent + pullSecrets: [] + +# -- Service account configuration +serviceAccount: + create: true + name: "" + annotations: {} + +# -- ArgoCD integration +argocd: + enabled: false + +# -- Deployment configuration +# For multi-pod mode: set replicas > 1 and strategy.type to RollingUpdate +deployment: + replicas: 1 + strategy: + type: Recreate + # RollingUpdate settings (used when strategy.type is RollingUpdate) + rollingUpdate: + maxSurge: 1 + maxUnavailable: 0 + terminationGracePeriodSeconds: 90 + +# -- Additional pod annotations +podAnnotations: {} +# -- Additional pod labels +podLabels: {} + +# -- Service configuration +service: + type: ClusterIP + port: 8080 + annotations: {} + +# -- Ingress configuration (optional) +ingress: + enabled: false + className: "" + annotations: {} + hosts: [] + # - host: migration.example.internal + # paths: + # - path: / + # pathType: Prefix + tls: [] + +# -- ArgoCD external link on the Deployment resource +externalLink: + enabled: false + # -- URL shown in ArgoCD UI (e.g. https://migration.example.internal/runs/current) + url: "" + +# -- Backing services (credential sources for MongoDB, ClickHouse, Redis) +backingServices: + mongodb: + # -- bundled: auto-discover from sibling countly-mongodb chart; external: provide full URI + mode: bundled + # -- Full MongoDB connection string (external mode only) + uri: "" + # -- Release name prefix of the sibling countly-mongodb chart (bundled mode) + releaseName: "countly" + # -- MongoDB host override (bundled mode; auto-constructed from releaseName if empty) + host: "" + port: "27017" + username: "app" + # -- MongoDB app user password (must match countly-mongodb chart) + password: "" + database: "admin" + replicaSet: "" + # -- Namespace where countly-mongodb chart is deployed + namespace: mongodb + clickhouse: + # -- bundled: auto-discover from sibling countly-clickhouse chart; external: provide full URL + mode: bundled + # -- Full ClickHouse HTTP URL (external mode only) + url: "" + # -- Release name prefix of the sibling countly-clickhouse chart (bundled mode) + releaseName: "countly" + # -- ClickHouse host override (bundled mode; auto-constructed from releaseName if empty) + host: "" + port: "8123" + tls: "false" + username: "default" + # -- ClickHouse default user password (must match countly-clickhouse chart) + password: "" + # -- Namespace where countly-clickhouse chart is deployed + namespace: clickhouse + redis: + # -- Full Redis connection URL (e.g. redis://redis:6379) + url: "" + +# -- Secrets management +secrets: + # -- values: create Secret from values; existingSecret: reference pre-created; externalSecret: use ESO + mode: values + # -- Preserve secrets on helm uninstall/upgrade + keep: true + existingSecret: + # -- Name of pre-created Secret containing MONGO_URI, CLICKHOUSE_URL, CLICKHOUSE_PASSWORD, REDIS_URL + name: "" + externalSecret: + refreshInterval: "1h" + secretStoreRef: + name: "" + kind: ClusterSecretStore + remoteRefs: + mongoUri: "" + clickhouseUrl: "" + clickhousePassword: "" + redisUrl: "" + +# -- Application config (non-secret environment variables) +config: + SERVICE_NAME: countly-migration + SERVICE_PORT: "8080" + SERVICE_HOST: "0.0.0.0" + GRACEFUL_SHUTDOWN_TIMEOUT_MS: "60000" + RERUN_MODE: "resume" + LOG_LEVEL: "info" + + # MongoDB source + MONGO_DB: "countly_drill" + MONGO_COLLECTION_PREFIX: "drill_events" + MONGO_READ_PREFERENCE: "primary" + MONGO_READ_CONCERN: "majority" + MONGO_RETRY_READS: "true" + MONGO_APP_NAME: "countly-migration" + MONGO_BATCH_ROWS_TARGET: "10000" + MONGO_CURSOR_BATCH_SIZE: "2000" + MONGO_MAX_TIME_MS: "120000" + + # Transform + TRANSFORM_VERSION: "v1" + + # ClickHouse target + CLICKHOUSE_DB: "countly_drill" + CLICKHOUSE_TABLE: "drill_events" + CLICKHOUSE_USERNAME: "default" + CLICKHOUSE_QUERY_TIMEOUT_MS: "120000" + CLICKHOUSE_MAX_RETRIES: "8" + CLICKHOUSE_RETRY_BASE_DELAY_MS: "1000" + CLICKHOUSE_RETRY_MAX_DELAY_MS: "30000" + CLICKHOUSE_USE_DEDUP_TOKEN: "true" + + # Backpressure + BACKPRESSURE_ENABLED: "true" + BACKPRESSURE_PARTS_TO_THROW_INSERT: "300" + BACKPRESSURE_MAX_PARTS_IN_TOTAL: "500" + BACKPRESSURE_PARTITION_PCT_HIGH: "0.50" + BACKPRESSURE_PARTITION_PCT_LOW: "0.35" + BACKPRESSURE_TOTAL_PCT_HIGH: "0.50" + BACKPRESSURE_TOTAL_PCT_LOW: "0.40" + BACKPRESSURE_POLL_INTERVAL_MS: "15000" + BACKPRESSURE_MAX_PAUSE_EPISODE_MS: "180000" + + # State management + MANIFEST_DB: "countly_drill" + REDIS_KEY_PREFIX: "mig" + TIMELINE_SNAPSHOT_INTERVAL: "10" + + # Garbage collection + GC_ENABLED: "true" + GC_RSS_SOFT_LIMIT_MB: "1536" + GC_RSS_HARD_LIMIT_MB: "2048" + GC_HEAP_USED_RATIO: "0.70" + GC_EVERY_N_BATCHES: "10" + +# -- Health probes +probes: + liveness: + path: /healthz + initialDelaySeconds: 20 + periodSeconds: 15 + timeoutSeconds: 5 + failureThreshold: 6 + readiness: + path: /readyz + initialDelaySeconds: 10 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 6 + +# -- Resource requests and limits +resources: + requests: + cpu: "500m" + memory: "1Gi" + limits: + cpu: "2" + memory: "3Gi" + +# -- Pod-level security context +podSecurityContext: + runAsNonRoot: true + runAsUser: 1000 + runAsGroup: 1000 + fsGroup: 1000 + +# -- Container-level security context +containerSecurityContext: + readOnlyRootFilesystem: true + allowPrivilegeEscalation: false + +# -- Node selector +nodeSelector: {} +# -- Tolerations +tolerations: [] +# -- Affinity rules +affinity: {} + +# -- Network policy (optional) +networkPolicy: + enabled: false + ingress: {} + egress: {} + +# -- Multi-pod worker coordination settings +# These take effect when deployment.replicas > 1. +worker: + enabled: true # MULTI_POD_ENABLED + lockTtlSec: 300 # LOCK_TTL_SECONDS — collection lock expiration + lockRenewMs: 60000 # LOCK_RENEW_MS — lock renewal interval + podHeartbeatMs: 30000 # POD_HEARTBEAT_MS — heartbeat interval + podDeadAfterSec: 180 # POD_DEAD_AFTER_SEC — dead pod threshold + rangeParallelThreshold: 500000 # RANGE_PARALLEL_THRESHOLD — split large collections + rangeCount: 100 # RANGE_COUNT — number of time ranges + rangeLeaseTtlSec: 300 # RANGE_LEASE_TTL_SEC — range lease expiration + progressUpdateMs: 5000 # PROGRESS_UPDATE_MS — progress report interval + asyncWriteFlushIntervalMs: 5000 # ASYNC_WRITE_FLUSH_INTERVAL_MS + asyncWriteFlushBatchSize: 10 # ASYNC_WRITE_FLUSH_BATCH_SIZE + +# -- Pod disruption budget (recommended when replicas > 1) +pdb: + enabled: false + minAvailable: 1 + +# -- Prometheus ServiceMonitor (optional) +serviceMonitor: + enabled: false + interval: "30s" + scrapeTimeout: "10s" + path: /stats + +# -- Bundled Redis subchart (Bitnami) +# Deploys Redis alongside the migration service. +# When enabled and backingServices.redis.url is empty, the chart auto-wires the URL. +# Set redis.enabled=false and provide backingServices.redis.url to use an external Redis. +redis: + enabled: true + architecture: standalone + # -- Explicit sync-wave ensures Redis is healthy before the migration Deployment (wave 10) + commonAnnotations: + argocd.argoproj.io/sync-wave: "0" + auth: + enabled: false + master: + resources: + requests: + cpu: "500m" + memory: "2Gi" + limits: + cpu: "1" + memory: "2Gi" + persistence: + enabled: true + size: 8Gi + commonConfiguration: |- + appendonly yes + appendfsync everysec + save 900 1 + save 300 10 + save 60 10000 diff --git a/charts/countly-mongodb/.helmignore b/charts/countly-mongodb/.helmignore index 24676c2..3b73490 100644 --- a/charts/countly-mongodb/.helmignore +++ b/charts/countly-mongodb/.helmignore @@ -15,4 +15,3 @@ docs/ examples/ environments/ ci/ -tests/ diff --git a/charts/countly-mongodb/README.md b/charts/countly-mongodb/README.md new file mode 100644 index 0000000..b13bf56 --- /dev/null +++ b/charts/countly-mongodb/README.md @@ -0,0 +1,164 @@ +# Countly MongoDB Helm Chart + +Deploys a MongoDB replica set for Countly via the MongoDB Community Operator. Includes application and metrics users, an optional Percona exporter for Prometheus scraping, and NetworkPolicy support. + +**Chart version:** 0.1.0 +**App version:** 8.2.5 + +--- + +## Architecture + +```mermaid +flowchart LR + subgraph mongodb["mongodb namespace"] + rs0["MongoDB\nReplica Set Member 0\n:27017"] + rs1["MongoDB\nReplica Set Member 1\n:27017"] + exporter["Percona Exporter\n:9216"] + end + + countly["Countly App"] -->|read/write| rs0 + countly -->|read/write| rs1 + rs0 <-->|replication| rs1 + prometheus["Prometheus"] -->|scrape| exporter + exporter -->|clusterMonitor| rs0 +``` + +The chart creates a `MongoDBCommunity` custom resource managed by the MongoDB Community Operator. The operator handles replica set initialization, user creation, and rolling upgrades. + +--- + +## Quick Start + +```bash +helm install countly-mongodb ./charts/countly-mongodb \ + -n mongodb --create-namespace \ + --set users.app.password="YOUR_APP_PASSWORD" \ + --set users.metrics.password="YOUR_METRICS_PASSWORD" +``` + +> **Production deployment:** Use the profile-based approach from the [root README](../../README.md#manual-installation-without-helmfile) instead of `--set` flags. This chart supports sizing and security profile layers. + +--- + +## Prerequisites + +- **MongoDB Community Operator** installed in the cluster (`mongodbcommunity.mongodb.com/v1` CRDs) +- **StorageClass** available for persistent volumes + +--- + +## Configuration + +### Replica Set + +```yaml +mongodb: + version: "8.2.5" + members: 2 + resources: + requests: { cpu: "500m", memory: "2Gi" } + limits: { cpu: "2", memory: "8Gi" } + persistence: + storageClass: "" # Uses cluster default if empty + size: 100Gi +``` + +### Users + +Two users are created by default: + +```yaml +users: + app: + name: app + database: admin + password: "" # Required on first install + roles: + - { name: readWriteAnyDatabase, db: admin } + - { name: dbAdmin, db: admin } + metrics: + enabled: true + name: metrics + password: "" # Required when exporter is enabled + roles: + - { name: clusterMonitor, db: admin } + - { name: read, db: local } +``` + +The `app` user password must match the password configured in the `countly` chart's `backingServices.mongodb.password` or `secrets.mongodb.password`. + +### Percona Exporter + +```yaml +exporter: + enabled: true + image: percona/mongodb_exporter:0.40.0 + port: 9216 + resources: + requests: { cpu: "50m", memory: "64Mi" } + limits: { cpu: "200m", memory: "256Mi" } +``` + +### Scheduling + +```yaml +mongodb: + scheduling: + nodeSelector: {} + tolerations: [] + antiAffinity: + enabled: true + type: preferred + topologyKey: kubernetes.io/hostname +``` + +### ArgoCD Integration + +```yaml +argocd: + enabled: true +``` + +--- + +## Verifying the Deployment + +```bash +# 1. Check MongoDBCommunity resource +kubectl get mongodbcommunity -n mongodb + +# 2. Check pods +kubectl get pods -n mongodb + +# 3. Get the operator-generated connection string secret +kubectl get secret -n mongodb countly-mongodb-app-mongodb-conn \ + -o jsonpath='{.data.connectionString\.standard}' | base64 -d + +# 4. Test connectivity +kubectl exec -n mongodb countly-mongodb-0 -c mongod -- \ + mongosh --eval "db.runCommand({ping: 1})" + +# 5. Check replica set status +kubectl exec -n mongodb countly-mongodb-0 -c mongod -- \ + mongosh --eval "rs.status().members.map(m => ({name: m.name, state: m.stateStr}))" +``` + +--- + +## Configuration Reference + +| Key | Default | Description | +|-----|---------|-------------| +| `mongodb.version` | `8.2.5` | MongoDB server version | +| `mongodb.members` | `2` | Number of replica set members | +| `mongodb.resources.requests.cpu` | `500m` | CPU request per member | +| `mongodb.resources.requests.memory` | `2Gi` | Memory request per member | +| `mongodb.persistence.size` | `100Gi` | Data volume size per member | +| `users.app.password` | `""` | Application user password (required) | +| `users.metrics.password` | `""` | Metrics user password | +| `exporter.enabled` | `true` | Deploy Percona MongoDB exporter | +| `exporter.port` | `9216` | Exporter metrics port | +| `podDisruptionBudget.enabled` | `false` | PDB for MongoDB members | +| `networkPolicy.enabled` | `false` | NetworkPolicy | +| `secrets.keep` | `true` | Preserve secrets on uninstall | diff --git a/charts/countly-mongodb/examples/values-production.yaml b/charts/countly-mongodb/examples/values-production.yaml new file mode 100644 index 0000000..79cacc7 --- /dev/null +++ b/charts/countly-mongodb/examples/values-production.yaml @@ -0,0 +1,40 @@ +# Production MongoDB replica set. +# 3 members, PDB enabled, anti-affinity, monitoring. + +argocd: + enabled: true + +mongodb: + version: "8.2.5" + members: 3 + resources: + requests: + cpu: "2" + memory: "8Gi" + limits: + cpu: "4" + memory: "16Gi" + persistence: + size: 500Gi + scheduling: + antiAffinity: + enabled: true + type: required + topologyKey: kubernetes.io/hostname + +users: + app: + password: "" # Set via --set or existingSecret + metrics: + password: "" # Set via --set or existingSecret + +exporter: + enabled: true + +podDisruptionBudget: + enabled: true + maxUnavailable: 1 + +networkPolicy: + enabled: true + allowMonitoring: true diff --git a/charts/countly-mongodb/examples/values-small.yaml b/charts/countly-mongodb/examples/values-small.yaml new file mode 100644 index 0000000..183d27e --- /dev/null +++ b/charts/countly-mongodb/examples/values-small.yaml @@ -0,0 +1,24 @@ +# Small / development MongoDB replica set. +# 2 members, minimal resources, exporter enabled. + +mongodb: + version: "8.2.5" + members: 2 + resources: + requests: + cpu: "250m" + memory: "1Gi" + limits: + cpu: "1" + memory: "4Gi" + persistence: + size: 20Gi + +users: + app: + password: "dev-app-password" + metrics: + password: "dev-metrics-password" + +exporter: + enabled: true diff --git a/charts/countly-mongodb/templates/_helpers.tpl b/charts/countly-mongodb/templates/_helpers.tpl index f9315e6..34d1a8a 100644 --- a/charts/countly-mongodb/templates/_helpers.tpl +++ b/charts/countly-mongodb/templates/_helpers.tpl @@ -48,6 +48,15 @@ app.kubernetes.io/name: {{ include "countly-mongodb.name" . }} app.kubernetes.io/instance: {{ .Release.Name }} {{- end }} +{{/* +ArgoCD sync-wave annotation (only when argocd.enabled). +*/}} +{{- define "countly-mongodb.syncWave" -}} +{{- if ((.root.Values.argocd).enabled) }} +argocd.argoproj.io/sync-wave: {{ .wave | quote }} +{{- end }} +{{- end -}} + {{/* MongoDB connection string secret name (generated by operator) */}} diff --git a/charts/countly-mongodb/templates/deployment-exporter.yaml b/charts/countly-mongodb/templates/deployment-exporter.yaml index 0e29ad9..62633e6 100644 --- a/charts/countly-mongodb/templates/deployment-exporter.yaml +++ b/charts/countly-mongodb/templates/deployment-exporter.yaml @@ -6,6 +6,10 @@ metadata: labels: {{- include "countly-mongodb.labels" . | nindent 4 }} app.kubernetes.io/component: exporter + {{- if ((.Values.argocd).enabled) }} + annotations: + {{- include "countly-mongodb.syncWave" (dict "wave" "10" "root" .) | nindent 4 }} + {{- end }} spec: replicas: 1 selector: diff --git a/charts/countly-mongodb/templates/mongodbcommunity.yaml b/charts/countly-mongodb/templates/mongodbcommunity.yaml index eed5f31..e3ba8d5 100644 --- a/charts/countly-mongodb/templates/mongodbcommunity.yaml +++ b/charts/countly-mongodb/templates/mongodbcommunity.yaml @@ -4,6 +4,7 @@ metadata: name: {{ include "countly-mongodb.fullname" . }} annotations: "helm.sh/resource-policy": keep + {{- include "countly-mongodb.syncWave" (dict "wave" "5" "root" .) | nindent 4 }} labels: {{- include "countly-mongodb.labels" . | nindent 4 }} spec: diff --git a/charts/countly-mongodb/templates/namespace.yaml b/charts/countly-mongodb/templates/namespace.yaml index c18077f..4dc0d13 100644 --- a/charts/countly-mongodb/templates/namespace.yaml +++ b/charts/countly-mongodb/templates/namespace.yaml @@ -5,4 +5,8 @@ metadata: name: {{ .Release.Namespace }} labels: {{- include "countly-mongodb.labels" . | nindent 4 }} + {{- if ((.Values.argocd).enabled) }} + annotations: + {{- include "countly-mongodb.syncWave" (dict "wave" "0" "root" .) | nindent 4 }} + {{- end }} {{- end }} diff --git a/charts/countly-mongodb/templates/networkpolicy.yaml b/charts/countly-mongodb/templates/networkpolicy.yaml index 585fe2d..c1dece5 100644 --- a/charts/countly-mongodb/templates/networkpolicy.yaml +++ b/charts/countly-mongodb/templates/networkpolicy.yaml @@ -5,6 +5,10 @@ metadata: name: {{ include "countly-mongodb.fullname" . }}-default-deny labels: {{- include "countly-mongodb.labels" . | nindent 4 }} + {{- if ((.Values.argocd).enabled) }} + annotations: + {{- include "countly-mongodb.syncWave" (dict "wave" "0" "root" .) | nindent 4 }} + {{- end }} spec: podSelector: {} policyTypes: diff --git a/charts/countly-mongodb/templates/pdb.yaml b/charts/countly-mongodb/templates/pdb.yaml index b5a239d..ca531b3 100644 --- a/charts/countly-mongodb/templates/pdb.yaml +++ b/charts/countly-mongodb/templates/pdb.yaml @@ -5,6 +5,10 @@ metadata: name: {{ include "countly-mongodb.fullname" . }}-pdb labels: {{- include "countly-mongodb.labels" . | nindent 4 }} + {{- if ((.Values.argocd).enabled) }} + annotations: + {{- include "countly-mongodb.syncWave" (dict "wave" "5" "root" .) | nindent 4 }} + {{- end }} spec: {{- if .Values.podDisruptionBudget.minAvailable }} minAvailable: {{ .Values.podDisruptionBudget.minAvailable }} diff --git a/charts/countly-mongodb/templates/secret-passwords.yaml b/charts/countly-mongodb/templates/secret-passwords.yaml index 94d09b7..1200229 100644 --- a/charts/countly-mongodb/templates/secret-passwords.yaml +++ b/charts/countly-mongodb/templates/secret-passwords.yaml @@ -8,6 +8,7 @@ metadata: {{- if .Values.secrets.keep }} helm.sh/resource-policy: keep {{- end }} + {{- include "countly-mongodb.syncWave" (dict "wave" "0" "root" .) | nindent 4 }} type: Opaque data: {{ .Values.users.app.passwordSecretKey }}: |- @@ -31,6 +32,7 @@ metadata: {{- if .Values.secrets.keep }} helm.sh/resource-policy: keep {{- end }} + {{- include "countly-mongodb.syncWave" (dict "wave" "0" "root" .) | nindent 4 }} type: Opaque data: {{ .Values.users.metrics.passwordSecretKey }}: |- diff --git a/charts/countly-mongodb/templates/service-metrics.yaml b/charts/countly-mongodb/templates/service-metrics.yaml index f0dc265..2bae70f 100644 --- a/charts/countly-mongodb/templates/service-metrics.yaml +++ b/charts/countly-mongodb/templates/service-metrics.yaml @@ -6,6 +6,10 @@ metadata: labels: {{- include "countly-mongodb.labels" . | nindent 4 }} countly.io/metrics: "true" + {{- if ((.Values.argocd).enabled) }} + annotations: + {{- include "countly-mongodb.syncWave" (dict "wave" "10" "root" .) | nindent 4 }} + {{- end }} spec: selector: {{- include "countly-mongodb.selectorLabels" . | nindent 4 }} diff --git a/charts/countly-mongodb/templates/tests/test-connection.yaml b/charts/countly-mongodb/templates/tests/test-connection.yaml new file mode 100644 index 0000000..e5c092e --- /dev/null +++ b/charts/countly-mongodb/templates/tests/test-connection.yaml @@ -0,0 +1,16 @@ +apiVersion: v1 +kind: Pod +metadata: + name: {{ include "countly-mongodb.fullname" . }}-test-connection + namespace: {{ .Release.Namespace }} + labels: + {{- include "countly-mongodb.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test + "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded +spec: + restartPolicy: Never + containers: + - name: test + image: busybox:1.35 + command: ['sh', '-c', 'nc -z {{ include "countly-mongodb.fullname" . }}-svc 27017'] diff --git a/charts/countly-mongodb/values.yaml b/charts/countly-mongodb/values.yaml index 7a57a4a..d031e69 100644 --- a/charts/countly-mongodb/values.yaml +++ b/charts/countly-mongodb/values.yaml @@ -1,20 +1,37 @@ +# -- Global settings shared across all Countly charts global: + # -- Override container image registry for all images imageRegistry: "" + # -- Global image pull secrets imagePullSecrets: [] + # -- Default StorageClass for PVCs (empty = cluster default) storageClass: "" + # -- Sizing profile name (informational, used by environment overlays) sizing: small + # -- Global scheduling defaults (merged with per-component settings) scheduling: nodeSelector: {} tolerations: [] +# -- Override the chart name used in resource names nameOverride: "" +# -- Override the full resource name fullnameOverride: "" +# -- Create the target namespace (disable when ArgoCD manages namespace creation) createNamespace: false +# -- ArgoCD integration (adds sync-wave annotations when enabled) +argocd: + enabled: false + +# -- MongoDB replica set configuration mongodb: + # -- MongoDB server version version: "8.2.5" + # -- Number of replica set members members: 2 + # -- Resource requests and limits per member resources: requests: cpu: "500m" @@ -23,37 +40,54 @@ mongodb: cpu: "2" memory: "8Gi" + # -- Persistent volume configuration persistence: + # -- StorageClass override (empty = global or cluster default) storageClass: "" + # -- Data volume size per member size: 100Gi + # -- Scheduling constraints for MongoDB pods scheduling: nodeSelector: {} tolerations: [] affinity: {} + # -- Pod anti-affinity to spread members across nodes antiAffinity: enabled: true + # -- preferred or required type: preferred topologyKey: kubernetes.io/hostname weight: 100 + # -- TLS encryption for replica set communication tls: enabled: false +# -- MongoDB users created by the operator users: + # -- Application user (used by Countly services) app: + # -- Username name: app + # -- Authentication database database: admin + # -- Roles granted to the app user roles: - name: readWriteAnyDatabase db: admin - name: dbAdmin db: admin + # -- Name of the Secret created by the operator for this user's password passwordSecretName: app-user-password + # -- Key within the password Secret passwordSecretKey: password + # -- Password value (REQUIRED on first install; must match countly chart's backingServices.mongodb.password) password: "" + # -- Metrics user (used by the Percona exporter) metrics: + # -- Deploy the metrics user enabled: true name: metrics database: admin @@ -64,12 +98,18 @@ users: db: local passwordSecretName: metrics-user-password passwordSecretKey: password + # -- Password value (required when exporter.enabled=true) password: "" +# -- Percona MongoDB exporter for Prometheus metrics exporter: + # -- Deploy the exporter sidecar enabled: true + # -- Exporter container image image: percona/mongodb_exporter:0.40.0 + # -- Metrics port exposed by the exporter port: 9216 + # -- Exporter resource requests and limits resources: requests: cpu: "50m" @@ -77,8 +117,10 @@ exporter: limits: cpu: "200m" memory: "256Mi" + # -- Create a Service for the exporter (for ServiceMonitor scraping) service: enabled: true + # -- Exporter CLI arguments args: - --collect-all - --collector.diagnosticdata @@ -88,18 +130,27 @@ exporter: - --collector.indexstats - --collector.collstats +# -- Pod disruption budget for MongoDB members podDisruptionBudget: enabled: false maxUnavailable: 1 +# -- Network policy restricting ingress to MongoDB pods networkPolicy: + # -- Enable the NetworkPolicy enabled: false + # -- Namespaces allowed to connect to MongoDB allowedNamespaces: - countly + # -- Allow ingress from the monitoring namespace (for exporter scraping) allowMonitoring: false + # -- Label selector for the monitoring namespace monitoringNamespaceSelector: kubernetes.io/metadata.name: monitoring + # -- Additional custom ingress rules additionalIngress: [] +# -- Secret retention settings secrets: + # -- Preserve secrets on helm uninstall/upgrade keep: true diff --git a/charts/countly-observability/.helmignore b/charts/countly-observability/.helmignore index 24676c2..3b73490 100644 --- a/charts/countly-observability/.helmignore +++ b/charts/countly-observability/.helmignore @@ -15,4 +15,3 @@ docs/ examples/ environments/ ci/ -tests/ diff --git a/charts/countly-observability/README.md b/charts/countly-observability/README.md index a248252..e34001e 100644 --- a/charts/countly-observability/README.md +++ b/charts/countly-observability/README.md @@ -31,6 +31,8 @@ helm install countly-observability ./charts/countly-observability \ -n observability --create-namespace ``` +> **Production deployment:** Use the profile-based approach from the [root README](../../README.md#manual-installation-without-helmfile) instead of `--set` flags. This chart supports sizing, observability, and security profile layers. + Access Grafana: ```bash diff --git a/charts/countly-observability/examples/values-external.yaml b/charts/countly-observability/examples/values-external.yaml new file mode 100644 index 0000000..a67c95b --- /dev/null +++ b/charts/countly-observability/examples/values-external.yaml @@ -0,0 +1,32 @@ +# External mode: only deploy collectors, forward telemetry to external backends. +# Use this when you have Grafana Cloud, Mimir, or other external observability. + +mode: external + +metrics: + enabled: true + +traces: + enabled: true + +logs: + enabled: true + +profiling: + enabled: true + +prometheus: + external: + remoteWriteUrl: "https://mimir.corp.com/api/v1/push" + +loki: + external: + pushUrl: "https://loki.corp.com/loki/api/v1/push" + +tempo: + external: + otlpGrpcEndpoint: "tempo.corp.com:4317" + +pyroscope: + external: + ingestUrl: "https://pyroscope.corp.com" diff --git a/charts/countly-observability/examples/values-minimal.yaml b/charts/countly-observability/examples/values-minimal.yaml new file mode 100644 index 0000000..38e9741 --- /dev/null +++ b/charts/countly-observability/examples/values-minimal.yaml @@ -0,0 +1,19 @@ +# Minimal observability stack for development. +# All signals enabled, default resources, no ingress. + +mode: full +clusterName: dev-cluster + +metrics: + enabled: true + +traces: + enabled: true + sampling: + strategy: "AlwaysOn" + +logs: + enabled: true + +profiling: + enabled: true diff --git a/charts/countly-observability/examples/values-production.yaml b/charts/countly-observability/examples/values-production.yaml new file mode 100644 index 0000000..f109c83 --- /dev/null +++ b/charts/countly-observability/examples/values-production.yaml @@ -0,0 +1,79 @@ +# Production observability stack. +# Full mode with increased resources, tail sampling, ingress for Grafana. + +argocd: + enabled: true + +mode: full +clusterName: production-cluster + +metrics: + enabled: true + sampling: + interval: "15s" + +traces: + enabled: true + sampling: + strategy: "TailBased" + tailSampling: + waitDuration: "10s" + numTraces: 50000 + policies: + keepErrors: true + latencyThresholdMs: 2000 + baselineRatio: 0.1 + +logs: + enabled: true + +profiling: + enabled: true + +prometheus: + resources: + requests: + cpu: "4" + memory: "8Gi" + limits: + cpu: "4" + memory: "12Gi" + retention: + time: "90d" + size: "200GB" + storage: + size: 500Gi + +loki: + resources: + requests: + cpu: "2" + memory: "4Gi" + limits: + cpu: "4" + memory: "8Gi" + storage: + size: 500Gi + +tempo: + resources: + requests: + cpu: "4" + memory: "12Gi" + limits: + cpu: "8" + memory: "16Gi" + storage: + size: 500Gi + +ingress: + enabled: true + className: nginx + annotations: + cert-manager.io/cluster-issuer: letsencrypt-prod + hosts: + - host: obs.example.com + tls: + - secretName: obs-tls + hosts: + - obs.example.com diff --git a/charts/countly-observability/templates/_helpers.tpl b/charts/countly-observability/templates/_helpers.tpl index 5e366a4..a130987 100644 --- a/charts/countly-observability/templates/_helpers.tpl +++ b/charts/countly-observability/templates/_helpers.tpl @@ -258,3 +258,12 @@ Args: dict "component" "storage" "default" "al {{- fail (printf "%s.storage.forcePathStyle requires storage.endpoint (used for S3-compatible endpoints like MinIO)" .component) -}} {{- end -}} {{- end }} + +{{/* +ArgoCD sync-wave annotation (only when argocd.enabled). +*/}} +{{- define "obs.syncWave" -}} +{{- if ((.root.Values.argocd).enabled) }} +argocd.argoproj.io/sync-wave: {{ .wave | quote }} +{{- end }} +{{- end -}} diff --git a/charts/countly-observability/templates/ingress.yaml b/charts/countly-observability/templates/ingress.yaml index 1c7b14d..87f60c5 100644 --- a/charts/countly-observability/templates/ingress.yaml +++ b/charts/countly-observability/templates/ingress.yaml @@ -7,9 +7,12 @@ metadata: labels: {{- include "obs.labels" . | nindent 4 }} app.kubernetes.io/component: grafana - {{- with .Values.ingress.annotations }} + {{- if or ((.Values.argocd).enabled) .Values.ingress.annotations }} annotations: + {{- include "obs.syncWave" (dict "wave" "5" "root" .) | nindent 4 }} + {{- with .Values.ingress.annotations }} {{- toYaml . | nindent 4 }} + {{- end }} {{- end }} spec: ingressClassName: {{ .Values.ingress.className }} diff --git a/charts/countly-observability/templates/networkpolicy.yaml b/charts/countly-observability/templates/networkpolicy.yaml index b129833..3955412 100644 --- a/charts/countly-observability/templates/networkpolicy.yaml +++ b/charts/countly-observability/templates/networkpolicy.yaml @@ -8,6 +8,10 @@ metadata: namespace: {{ .Release.Namespace }} labels: {{- include "obs.labels" . | nindent 4 }} + {{- if ((.Values.argocd).enabled) }} + annotations: + {{- include "obs.syncWave" (dict "wave" "0" "root" .) | nindent 4 }} + {{- end }} spec: podSelector: {} policyTypes: diff --git a/charts/countly-observability/templates/serviceaccount.yaml b/charts/countly-observability/templates/serviceaccount.yaml index 394e66a..b7e81df 100644 --- a/charts/countly-observability/templates/serviceaccount.yaml +++ b/charts/countly-observability/templates/serviceaccount.yaml @@ -5,4 +5,8 @@ metadata: labels: {{- include "obs.labels" . | nindent 4 }} app.kubernetes.io/component: prometheus + {{- if ((.Values.argocd).enabled) }} + annotations: + {{- include "obs.syncWave" (dict "wave" "0" "root" .) | nindent 4 }} + {{- end }} automountServiceAccountToken: false diff --git a/charts/countly-observability/values.yaml b/charts/countly-observability/values.yaml index 0522a67..5929a36 100644 --- a/charts/countly-observability/values.yaml +++ b/charts/countly-observability/values.yaml @@ -1,3 +1,7 @@ +# -- ArgoCD integration (optional) +argocd: + enabled: false + # -- Deployment mode: "full" (all in-cluster), "hybrid" (no Grafana), "external" (no backends) mode: full diff --git a/charts/countly/.helmignore b/charts/countly/.helmignore index 24676c2..3b73490 100644 --- a/charts/countly/.helmignore +++ b/charts/countly/.helmignore @@ -15,4 +15,3 @@ docs/ examples/ environments/ ci/ -tests/ diff --git a/charts/countly/README.md b/charts/countly/README.md new file mode 100644 index 0000000..2fbaee1 --- /dev/null +++ b/charts/countly/README.md @@ -0,0 +1,212 @@ +# Countly Helm Chart + +Deploys the Countly web analytics platform as a set of microservices: API, Frontend, Ingestor, Aggregator, and Job Server. Each component runs as an independent Deployment with its own HPA, PDB, and scheduling configuration. + +**Chart version:** 0.1.0 +**App version:** 26.01 + +--- + +## Architecture + +```mermaid +flowchart LR + subgraph countly["countly namespace"] + frontend["Frontend\n:6001"] + api["API\n:3001"] + ingestor["Ingestor\n:3010"] + aggregator["Aggregator"] + jobserver["Job Server\n:3020"] + end + + subgraph backing["Backing Services"] + mongo["MongoDB"] + ch["ClickHouse"] + kafka["Kafka"] + end + + ingress["Ingress"] --> frontend + ingress --> api + ingress --> ingestor + + api --> mongo + api --> ch + frontend --> mongo + ingestor --> kafka + aggregator --> mongo + aggregator --> ch + aggregator --> kafka + jobserver --> mongo +``` + +All five components share a common container image and are differentiated by their startup command (`npm run start:`). Configuration is injected via ConfigMaps and Secrets. + +--- + +## Quick Start + +```bash +helm install countly ./charts/countly \ + -n countly --create-namespace \ + --set backingServices.mongodb.password="YOUR_MONGODB_PASSWORD" \ + --set ingress.hostname="countly.example.com" +``` + +This assumes MongoDB, ClickHouse, and Kafka are deployed in-cluster via the sibling charts (`countly-mongodb`, `countly-clickhouse`, `countly-kafka`). + +> **Production deployment:** Use the profile-based approach from the [root README](../../README.md#manual-installation-without-helmfile) instead of `--set` flags. This chart supports sizing, TLS, observability, and security profile layers. + +--- + +## Prerequisites + +- **MongoDB** — Deployed via `countly-mongodb` chart or provided externally +- **ClickHouse** — Deployed via `countly-clickhouse` chart or provided externally +- **Kafka** — Deployed via `countly-kafka` chart or provided externally +- **Ingress controller** — NGINX Ingress Controller (F5 or community) for external access + +--- + +## Components + +| Component | Purpose | Default Port | Scales With | +|-----------|---------|-------------|-------------| +| API | REST API for data retrieval and management | 3001 | CPU/Memory (1-6 replicas) | +| Frontend | Dashboard UI and web server | 6001 | CPU (1 replica) | +| Ingestor | High-throughput event ingestion endpoint | 3010 | CPU/Memory (1-12 replicas) | +| Aggregator | Background data aggregation from Kafka/CH | None | CPU/Memory (4-8 replicas) | +| Job Server | Scheduled jobs, reports, push notifications | 3020 | CPU (1 replica) | + +Each component can be independently enabled/disabled, scaled, and configured. + +--- + +## Configuration + +### Backing Services + +Each backing service supports **bundled** (in-cluster sibling chart) or **external** (bring your own) mode. + +```yaml +backingServices: + mongodb: + mode: bundled # bundled | external + # External-mode fields (used only when mode=external): + connectionString: "" + host: "" + port: "27017" + username: "app" + password: "" + existingSecret: "" + clickhouse: + mode: bundled # bundled | external + host: "" + port: "8123" + tls: "false" + existingSecret: "" + kafka: + mode: bundled # bundled | external + brokers: "" + existingSecret: "" +``` + +When `mode=bundled`, connection URLs are auto-constructed from in-cluster DNS using the release name and namespace conventions. + +### Secrets + +Three modes for managing credentials: + +| Mode | Description | Use Case | +|------|-------------|----------| +| `values` (default) | Secrets created from Helm values | Development, testing | +| `existingSecret` | Reference pre-created Kubernetes Secrets | Production with manual secret management | +| `externalSecret` | External Secrets Operator (AWS SM, Azure KV, etc.) | Production with vault integration | + +### Ingress + +```yaml +ingress: + enabled: true + className: nginx + hostname: countly.example.com + tls: + mode: http # http | letsencrypt | existingSecret | selfSigned + clusterIssuer: letsencrypt-prod +``` + +### Per-Component Configuration + +Each component supports the same set of overrides: + +```yaml +api: # or: frontend, ingestor, aggregator, jobserver + enabled: true + replicaCount: 1 + resources: + requests: { cpu: "1", memory: "3.5Gi" } + limits: { cpu: "1", memory: "4Gi" } + hpa: + enabled: true + minReplicas: 1 + maxReplicas: 6 + metrics: + cpu: { averageUtilization: 70 } + pdb: + enabled: false + scheduling: + nodeSelector: {} + tolerations: [] + antiAffinity: + enabled: true + type: preferred + topologyKey: kubernetes.io/hostname + extraEnv: [] + extraEnvFrom: [] +``` + +### ArgoCD Integration + +```yaml +argocd: + enabled: true +``` + +When enabled, all resources get sync-wave annotations for ordered deployment. + +--- + +## Verifying the Deployment + +```bash +# 1. Check all pods are running +kubectl get pods -n countly + +# 2. Check API health +kubectl exec -n countly deploy/countly-api -- \ + node -e "fetch('http://localhost:3001/o/ping').then(r=>r.text()).then(console.log)" + +# 3. Check Frontend health +kubectl exec -n countly deploy/countly-frontend -- \ + node -e "fetch('http://localhost:6001/ping').then(r=>r.text()).then(console.log)" + +# 4. View logs for a component +kubectl logs -n countly -l app.kubernetes.io/component=api -f +``` + +--- + +## Configuration Reference + +| Key | Default | Description | +|-----|---------|-------------| +| `image.repository` | `gcr.io/countly-dev-313620/countly-unified` | Container image | +| `image.tag` | `26.01` | Image tag | +| `backingServices.mongodb.mode` | `bundled` | MongoDB connection mode | +| `backingServices.clickhouse.mode` | `bundled` | ClickHouse connection mode | +| `backingServices.kafka.mode` | `bundled` | Kafka connection mode | +| `secrets.mode` | `values` | Secret management mode | +| `ingress.hostname` | `countly.example.com` | Ingress hostname | +| `ingress.tls.mode` | `http` | TLS mode: http, letsencrypt, existingSecret, selfSigned | +| `config.common.COUNTLY_PLUGINS` | (long list) | Enabled Countly plugins | +| `nodeOptions.` | Varies | Node.js `--max-old-space-size` per component | +| `networkPolicy.enabled` | `false` | Enable NetworkPolicy | diff --git a/charts/countly/examples/values-minimal.yaml b/charts/countly/examples/values-minimal.yaml new file mode 100644 index 0000000..6ca0d7e --- /dev/null +++ b/charts/countly/examples/values-minimal.yaml @@ -0,0 +1,31 @@ +# Minimal values for local development / testing. +# Deploys all components with bundled backing services. +# Requires: countly-mongodb, countly-clickhouse, countly-kafka deployed in-cluster. + +backingServices: + mongodb: + mode: bundled + password: "dev-password" + clickhouse: + mode: bundled + kafka: + mode: bundled + +secrets: + mode: values + common: + encryptionReportsKey: "dev-encryption-key" + webSessionSecret: "dev-session-secret" + passwordSecret: "dev-password-secret" + clickhouse: + username: "default" + password: "dev-ch-password" + database: "countly_drill" + mongodb: + password: "dev-password" + +ingress: + enabled: true + hostname: countly.local + tls: + mode: http diff --git a/charts/countly/examples/values-production.yaml b/charts/countly/examples/values-production.yaml new file mode 100644 index 0000000..9d95b39 --- /dev/null +++ b/charts/countly/examples/values-production.yaml @@ -0,0 +1,73 @@ +# Production values example for the Countly chart. +# Uses existingSecret mode and external backing services. + +image: + repository: gcr.io/countly-dev-313620/countly-unified + tag: "26.01" + pullPolicy: IfNotPresent + +argocd: + enabled: true + +backingServices: + mongodb: + mode: external + connectionString: "mongodb://app:PASSWORD@mongodb-0.mongodb.svc:27017,mongodb-1.mongodb.svc:27017/admin?replicaSet=rs0&ssl=false" + clickhouse: + mode: external + host: "clickhouse.clickhouse.svc" + port: "8123" + tls: "false" + kafka: + mode: external + brokers: '["kafka-bootstrap.kafka.svc:9092"]' + +secrets: + mode: existingSecret + keep: true + common: + existingSecret: countly-common-secrets + clickhouse: + existingSecret: countly-clickhouse-secrets + kafka: + existingSecret: countly-kafka-secrets + mongodb: + existingSecret: countly-mongodb-conn + +ingress: + enabled: true + className: nginx + hostname: analytics.example.com + tls: + mode: letsencrypt + clusterIssuer: letsencrypt-prod + +networkPolicy: + enabled: true + +api: + hpa: + enabled: true + minReplicas: 2 + maxReplicas: 8 + pdb: + enabled: true + minAvailable: 1 + +ingestor: + hpa: + enabled: true + minReplicas: 2 + maxReplicas: 12 + pdb: + enabled: true + minAvailable: 1 + +aggregator: + hpa: + enabled: true + minReplicas: 4 + maxReplicas: 12 + pdb: + enabled: true + minAvailable: 2 diff --git a/charts/countly/templates/_countly-component.tpl b/charts/countly/templates/_countly-component.tpl index a35d032..7468e76 100644 --- a/charts/countly/templates/_countly-component.tpl +++ b/charts/countly/templates/_countly-component.tpl @@ -22,6 +22,10 @@ metadata: labels: {{- include "countly.labels" $root | nindent 4 }} app.kubernetes.io/component: {{ $component }} + {{- if (($root.Values.argocd).enabled) }} + annotations: + {{- include "countly.syncWave" (dict "wave" "5" "root" $root) | nindent 4 }} + {{- end }} spec: {{- if not $values.hpa.enabled }} replicas: {{ $values.replicaCount }} diff --git a/charts/countly/templates/_helpers.tpl b/charts/countly/templates/_helpers.tpl index 9c10009..5902dfa 100644 --- a/charts/countly/templates/_helpers.tpl +++ b/charts/countly/templates/_helpers.tpl @@ -184,3 +184,12 @@ Called from NOTES.txt to surface errors during install. {{- end -}} {{- end -}} {{- end -}} + +{{/* +ArgoCD sync-wave annotation (only when argocd.enabled). +*/}} +{{- define "countly.syncWave" -}} +{{- if ((.root.Values.argocd).enabled) }} +argocd.argoproj.io/sync-wave: {{ .wave | quote }} +{{- end }} +{{- end -}} diff --git a/charts/countly/templates/configmap-aggregator.yaml b/charts/countly/templates/configmap-aggregator.yaml index 0f48cc9..f7e7b29 100644 --- a/charts/countly/templates/configmap-aggregator.yaml +++ b/charts/countly/templates/configmap-aggregator.yaml @@ -4,6 +4,10 @@ metadata: name: {{ include "countly.fullname" . }}-config-aggregator labels: {{- include "countly.labels" . | nindent 4 }} + {{- if ((.Values.argocd).enabled) }} + annotations: + {{- include "countly.syncWave" (dict "wave" "2" "root" .) | nindent 4 }} + {{- end }} data: NODE_OPTIONS: {{ .Values.nodeOptions.aggregator | quote }} {{- range $key, $value := .Values.config.aggregator }} diff --git a/charts/countly/templates/configmap-api.yaml b/charts/countly/templates/configmap-api.yaml index f757e28..46e2ae0 100644 --- a/charts/countly/templates/configmap-api.yaml +++ b/charts/countly/templates/configmap-api.yaml @@ -4,6 +4,10 @@ metadata: name: {{ include "countly.fullname" . }}-config-api labels: {{- include "countly.labels" . | nindent 4 }} + {{- if ((.Values.argocd).enabled) }} + annotations: + {{- include "countly.syncWave" (dict "wave" "2" "root" .) | nindent 4 }} + {{- end }} data: NODE_OPTIONS: {{ .Values.nodeOptions.api | quote }} {{- range $key, $value := .Values.config.api }} diff --git a/charts/countly/templates/configmap-clickhouse.yaml b/charts/countly/templates/configmap-clickhouse.yaml index aec37f3..d14e305 100644 --- a/charts/countly/templates/configmap-clickhouse.yaml +++ b/charts/countly/templates/configmap-clickhouse.yaml @@ -4,6 +4,10 @@ metadata: name: {{ include "countly.fullname" . }}-config-clickhouse labels: {{- include "countly.labels" . | nindent 4 }} + {{- if ((.Values.argocd).enabled) }} + annotations: + {{- include "countly.syncWave" (dict "wave" "2" "root" .) | nindent 4 }} + {{- end }} data: {{- range $key, $value := .Values.config.clickhouse }} {{ $key }}: {{ $value | quote }} diff --git a/charts/countly/templates/configmap-common.yaml b/charts/countly/templates/configmap-common.yaml index 8b7818c..5cd5eb4 100644 --- a/charts/countly/templates/configmap-common.yaml +++ b/charts/countly/templates/configmap-common.yaml @@ -4,6 +4,10 @@ metadata: name: {{ include "countly.fullname" . }}-config-common labels: {{- include "countly.labels" . | nindent 4 }} + {{- if ((.Values.argocd).enabled) }} + annotations: + {{- include "countly.syncWave" (dict "wave" "2" "root" .) | nindent 4 }} + {{- end }} data: {{- range $key, $value := .Values.config.common }} {{ $key }}: {{ $value | quote }} diff --git a/charts/countly/templates/configmap-frontend.yaml b/charts/countly/templates/configmap-frontend.yaml index 9374bd1..0b82e4e 100644 --- a/charts/countly/templates/configmap-frontend.yaml +++ b/charts/countly/templates/configmap-frontend.yaml @@ -4,6 +4,10 @@ metadata: name: {{ include "countly.fullname" . }}-config-frontend labels: {{- include "countly.labels" . | nindent 4 }} + {{- if ((.Values.argocd).enabled) }} + annotations: + {{- include "countly.syncWave" (dict "wave" "2" "root" .) | nindent 4 }} + {{- end }} data: NODE_OPTIONS: {{ .Values.nodeOptions.frontend | quote }} {{- range $key, $value := .Values.config.frontend }} diff --git a/charts/countly/templates/configmap-ingestor.yaml b/charts/countly/templates/configmap-ingestor.yaml index 10179f5..312cb4a 100644 --- a/charts/countly/templates/configmap-ingestor.yaml +++ b/charts/countly/templates/configmap-ingestor.yaml @@ -4,6 +4,10 @@ metadata: name: {{ include "countly.fullname" . }}-config-ingestor labels: {{- include "countly.labels" . | nindent 4 }} + {{- if ((.Values.argocd).enabled) }} + annotations: + {{- include "countly.syncWave" (dict "wave" "2" "root" .) | nindent 4 }} + {{- end }} data: NODE_OPTIONS: {{ .Values.nodeOptions.ingestor | quote }} {{- range $key, $value := .Values.config.ingestor }} diff --git a/charts/countly/templates/configmap-jobserver.yaml b/charts/countly/templates/configmap-jobserver.yaml index ee93bc3..c4af1f3 100644 --- a/charts/countly/templates/configmap-jobserver.yaml +++ b/charts/countly/templates/configmap-jobserver.yaml @@ -4,6 +4,10 @@ metadata: name: {{ include "countly.fullname" . }}-config-jobserver labels: {{- include "countly.labels" . | nindent 4 }} + {{- if ((.Values.argocd).enabled) }} + annotations: + {{- include "countly.syncWave" (dict "wave" "2" "root" .) | nindent 4 }} + {{- end }} data: NODE_OPTIONS: {{ .Values.nodeOptions.jobserver | quote }} {{- range $key, $value := .Values.config.jobserver }} diff --git a/charts/countly/templates/configmap-kafka.yaml b/charts/countly/templates/configmap-kafka.yaml index 5f1c663..9fa572f 100644 --- a/charts/countly/templates/configmap-kafka.yaml +++ b/charts/countly/templates/configmap-kafka.yaml @@ -4,6 +4,10 @@ metadata: name: {{ include "countly.fullname" . }}-config-kafka labels: {{- include "countly.labels" . | nindent 4 }} + {{- if ((.Values.argocd).enabled) }} + annotations: + {{- include "countly.syncWave" (dict "wave" "2" "root" .) | nindent 4 }} + {{- end }} data: {{- range $key, $value := .Values.config.kafka }} {{ $key }}: {{ $value | quote }} diff --git a/charts/countly/templates/configmap-otel.yaml b/charts/countly/templates/configmap-otel.yaml index cdb42b3..a437ac9 100644 --- a/charts/countly/templates/configmap-otel.yaml +++ b/charts/countly/templates/configmap-otel.yaml @@ -4,6 +4,10 @@ metadata: name: {{ include "countly.fullname" . }}-config-otel labels: {{- include "countly.labels" . | nindent 4 }} + {{- if ((.Values.argocd).enabled) }} + annotations: + {{- include "countly.syncWave" (dict "wave" "2" "root" .) | nindent 4 }} + {{- end }} data: {{- range $key, $value := .Values.config.otel }} {{ $key }}: {{ $value | quote }} diff --git a/charts/countly/templates/external-secret-clickhouse.yaml b/charts/countly/templates/external-secret-clickhouse.yaml index 27adf2e..ae7673c 100644 --- a/charts/countly/templates/external-secret-clickhouse.yaml +++ b/charts/countly/templates/external-secret-clickhouse.yaml @@ -6,6 +6,10 @@ metadata: name: {{ include "countly.fullname" . }}-clickhouse labels: {{- include "countly.labels" . | nindent 4 }} + {{- if ((.Values.argocd).enabled) }} + annotations: + {{- include "countly.syncWave" (dict "wave" "1" "root" .) | nindent 4 }} + {{- end }} spec: refreshInterval: {{ .Values.secrets.externalSecret.refreshInterval | default "1h" }} secretStoreRef: diff --git a/charts/countly/templates/external-secret-common.yaml b/charts/countly/templates/external-secret-common.yaml index 8b9d4ed..76fdad1 100644 --- a/charts/countly/templates/external-secret-common.yaml +++ b/charts/countly/templates/external-secret-common.yaml @@ -6,6 +6,10 @@ metadata: name: {{ include "countly.fullname" . }}-common labels: {{- include "countly.labels" . | nindent 4 }} + {{- if ((.Values.argocd).enabled) }} + annotations: + {{- include "countly.syncWave" (dict "wave" "1" "root" .) | nindent 4 }} + {{- end }} spec: refreshInterval: {{ .Values.secrets.externalSecret.refreshInterval | default "1h" }} secretStoreRef: diff --git a/charts/countly/templates/external-secret-kafka.yaml b/charts/countly/templates/external-secret-kafka.yaml index 8bba188..cc9b590 100644 --- a/charts/countly/templates/external-secret-kafka.yaml +++ b/charts/countly/templates/external-secret-kafka.yaml @@ -6,6 +6,10 @@ metadata: name: {{ include "countly.fullname" . }}-kafka labels: {{- include "countly.labels" . | nindent 4 }} + {{- if ((.Values.argocd).enabled) }} + annotations: + {{- include "countly.syncWave" (dict "wave" "1" "root" .) | nindent 4 }} + {{- end }} spec: refreshInterval: {{ .Values.secrets.externalSecret.refreshInterval | default "1h" }} secretStoreRef: diff --git a/charts/countly/templates/external-secret-mongodb.yaml b/charts/countly/templates/external-secret-mongodb.yaml index e1f5947..748c196 100644 --- a/charts/countly/templates/external-secret-mongodb.yaml +++ b/charts/countly/templates/external-secret-mongodb.yaml @@ -6,6 +6,10 @@ metadata: name: {{ include "countly.fullname" . }}-mongodb labels: {{- include "countly.labels" . | nindent 4 }} + {{- if ((.Values.argocd).enabled) }} + annotations: + {{- include "countly.syncWave" (dict "wave" "1" "root" .) | nindent 4 }} + {{- end }} spec: refreshInterval: {{ .Values.secrets.externalSecret.refreshInterval | default "1h" }} secretStoreRef: diff --git a/charts/countly/templates/ingress.yaml b/charts/countly/templates/ingress.yaml index ac406c8..413d051 100644 --- a/charts/countly/templates/ingress.yaml +++ b/charts/countly/templates/ingress.yaml @@ -8,6 +8,7 @@ metadata: labels: {{- include "countly.labels" . | nindent 4 }} annotations: + {{- include "countly.syncWave" (dict "wave" "10" "root" .) | nindent 4 }} {{- with .Values.ingress.annotations }} {{- toYaml . | nindent 4 }} {{- end }} diff --git a/charts/countly/templates/namespace.yaml b/charts/countly/templates/namespace.yaml index 6a5e355..4f7bd30 100644 --- a/charts/countly/templates/namespace.yaml +++ b/charts/countly/templates/namespace.yaml @@ -5,4 +5,8 @@ metadata: name: {{ .Release.Namespace }} labels: {{- include "countly.labels" . | nindent 4 }} + {{- if ((.Values.argocd).enabled) }} + annotations: + {{- include "countly.syncWave" (dict "wave" "0" "root" .) | nindent 4 }} + {{- end }} {{- end }} diff --git a/charts/countly/templates/networkpolicy.yaml b/charts/countly/templates/networkpolicy.yaml index 776a5e3..8380aed 100644 --- a/charts/countly/templates/networkpolicy.yaml +++ b/charts/countly/templates/networkpolicy.yaml @@ -5,6 +5,10 @@ metadata: name: {{ include "countly.fullname" . }}-default-deny labels: {{- include "countly.labels" . | nindent 4 }} + {{- if ((.Values.argocd).enabled) }} + annotations: + {{- include "countly.syncWave" (dict "wave" "0" "root" .) | nindent 4 }} + {{- end }} spec: podSelector: {} policyTypes: diff --git a/charts/countly/templates/secret-clickhouse.yaml b/charts/countly/templates/secret-clickhouse.yaml index 1f6bdb4..65fa4c7 100644 --- a/charts/countly/templates/secret-clickhouse.yaml +++ b/charts/countly/templates/secret-clickhouse.yaml @@ -12,6 +12,7 @@ metadata: {{- if .Values.secrets.rotationId }} countly.io/rotation-id: {{ .Values.secrets.rotationId | quote }} {{- end }} + {{- include "countly.syncWave" (dict "wave" "1" "root" .) | nindent 4 }} type: Opaque data: COUNTLY_CONFIG__CLICKHOUSE_URL: {{ include "countly.clickhouse.url" . | b64enc }} diff --git a/charts/countly/templates/secret-common.yaml b/charts/countly/templates/secret-common.yaml index 2aad8ab..2308a71 100644 --- a/charts/countly/templates/secret-common.yaml +++ b/charts/countly/templates/secret-common.yaml @@ -12,6 +12,7 @@ metadata: {{- if .Values.secrets.rotationId }} countly.io/rotation-id: {{ .Values.secrets.rotationId | quote }} {{- end }} + {{- include "countly.syncWave" (dict "wave" "1" "root" .) | nindent 4 }} type: Opaque data: {{- $secretName := printf "%s-common" (include "countly.fullname" .) }} diff --git a/charts/countly/templates/secret-kafka.yaml b/charts/countly/templates/secret-kafka.yaml index 5d8cab8..18ed231 100644 --- a/charts/countly/templates/secret-kafka.yaml +++ b/charts/countly/templates/secret-kafka.yaml @@ -12,6 +12,7 @@ metadata: {{- if .Values.secrets.rotationId }} countly.io/rotation-id: {{ .Values.secrets.rotationId | quote }} {{- end }} + {{- include "countly.syncWave" (dict "wave" "1" "root" .) | nindent 4 }} type: Opaque data: COUNTLY_CONFIG__KAFKA_RDKAFKA_BROKERS: {{ include "countly.kafka.brokers" . | b64enc }} diff --git a/charts/countly/templates/secret-mongodb.yaml b/charts/countly/templates/secret-mongodb.yaml index 8cde015..6a49d23 100644 --- a/charts/countly/templates/secret-mongodb.yaml +++ b/charts/countly/templates/secret-mongodb.yaml @@ -12,6 +12,7 @@ metadata: {{- if .Values.secrets.rotationId }} countly.io/rotation-id: {{ .Values.secrets.rotationId | quote }} {{- end }} + {{- include "countly.syncWave" (dict "wave" "1" "root" .) | nindent 4 }} type: Opaque data: {{ .Values.secrets.mongodb.key }}: {{ include "countly.mongodb.connectionString" . | b64enc }} diff --git a/charts/countly/templates/serviceaccount.yaml b/charts/countly/templates/serviceaccount.yaml index 849898d..755d489 100644 --- a/charts/countly/templates/serviceaccount.yaml +++ b/charts/countly/templates/serviceaccount.yaml @@ -5,9 +5,12 @@ metadata: name: {{ include "countly.serviceAccountName" . }} labels: {{- include "countly.labels" . | nindent 4 }} - {{- with .Values.serviceAccount.annotations }} + {{- if or ((.Values.argocd).enabled) .Values.serviceAccount.annotations }} annotations: + {{- include "countly.syncWave" (dict "wave" "0" "root" .) | nindent 4 }} + {{- with .Values.serviceAccount.annotations }} {{- toYaml . | nindent 4 }} + {{- end }} {{- end }} automountServiceAccountToken: false {{- end }} diff --git a/charts/countly/templates/tests/test-health.yaml b/charts/countly/templates/tests/test-health.yaml new file mode 100644 index 0000000..ae69b3b --- /dev/null +++ b/charts/countly/templates/tests/test-health.yaml @@ -0,0 +1,37 @@ +{{- if .Values.api.enabled }} +apiVersion: v1 +kind: Pod +metadata: + name: {{ include "countly.fullname" . }}-test-api + namespace: {{ .Release.Namespace }} + labels: + {{- include "countly.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test + "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded +spec: + restartPolicy: Never + containers: + - name: test + image: busybox:1.35 + command: ['sh', '-c', 'wget -qO- --timeout=30 http://{{ include "countly.fullname" . }}-api:{{ .Values.api.port }}{{ .Values.api.healthCheck.path }}'] +--- +{{- end }} +{{- if .Values.frontend.enabled }} +apiVersion: v1 +kind: Pod +metadata: + name: {{ include "countly.fullname" . }}-test-frontend + namespace: {{ .Release.Namespace }} + labels: + {{- include "countly.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test + "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded +spec: + restartPolicy: Never + containers: + - name: test + image: busybox:1.35 + command: ['sh', '-c', 'wget -qO- --timeout=30 http://{{ include "countly.fullname" . }}-frontend:{{ .Values.frontend.port }}{{ .Values.frontend.healthCheck.path }}'] +{{- end }} diff --git a/charts/countly/templates/tls-selfsigned.yaml b/charts/countly/templates/tls-selfsigned.yaml index 833f38f..61f58aa 100644 --- a/charts/countly/templates/tls-selfsigned.yaml +++ b/charts/countly/templates/tls-selfsigned.yaml @@ -10,6 +10,10 @@ apiVersion: cert-manager.io/v1 kind: ClusterIssuer metadata: name: {{ $selfSignedIssuerName }} + {{- if ((.Values.argocd).enabled) }} + annotations: + {{- include "countly.syncWave" (dict "wave" "10" "root" $) | nindent 4 }} + {{- end }} labels: {{- include "countly.labels" . | nindent 4 }} spec: @@ -25,6 +29,10 @@ metadata: namespace: cert-manager labels: {{- include "countly.labels" . | nindent 4 }} + {{- if ((.Values.argocd).enabled) }} + annotations: + {{- include "countly.syncWave" (dict "wave" "11" "root" $) | nindent 4 }} + {{- end }} spec: isCA: true commonName: {{ $fullname }}-ca @@ -42,6 +50,10 @@ metadata: name: {{ $issuerName }} labels: {{- include "countly.labels" . | nindent 4 }} + {{- if ((.Values.argocd).enabled) }} + annotations: + {{- include "countly.syncWave" (dict "wave" "12" "root" $) | nindent 4 }} + {{- end }} spec: ca: secretName: {{ $caSecretName }} diff --git a/charts/countly/values.yaml b/charts/countly/values.yaml index 9dc197d..df6c196 100644 --- a/charts/countly/values.yaml +++ b/charts/countly/values.yaml @@ -1,35 +1,61 @@ +# -- Global settings shared across all Countly charts global: + # -- Override container image registry for all images imageRegistry: "" + # -- Global image pull secrets imagePullSecrets: [] + # -- Default StorageClass for PVCs (empty = cluster default) storageClass: "" + # -- Sizing profile name (informational, used by environment overlays) sizing: small + # -- Global scheduling defaults (merged with per-component settings) scheduling: nodeSelector: {} tolerations: [] +# -- Override the chart name used in resource names nameOverride: "" +# -- Override the full resource name fullnameOverride: "" +# -- Create the target namespace (disable when ArgoCD manages namespace creation) createNamespace: false +# -- ArgoCD integration (adds sync-wave annotations when enabled) +argocd: + enabled: false + +# -- Kubernetes service account for all Countly pods serviceAccount: + # -- Create the ServiceAccount create: true + # -- Override service account name (defaults to release fullname) name: "" + # -- Additional annotations (e.g. for IAM role binding) annotations: {} +# -- Container image for all Countly components (shared unified image) image: + # -- Image repository repository: gcr.io/countly-dev-313620/countly-unified + # -- Image digest (takes precedence over tag when set) digest: "sha256:f81b39d4488c596f76a5c385d088a8998b7c1b20933366ad994f5315597ec48b" + # -- Image tag (used when digest is empty; defaults to appVersion) tag: "26.01" + # -- Image pull policy pullPolicy: IfNotPresent -# Cross-namespace references +# -- Cross-namespace references for backing service DNS resolution clickhouseNamespace: clickhouse kafkaNamespace: kafka mongodbNamespace: mongodb # --- Component Definitions --- +# Each component shares the same image but runs a different startup command. +# All components support: enabled, replicaCount, command, port, healthCheck, +# resources, hpa, pdb, scheduling, extraEnv, and extraEnvFrom. +# -- REST API for data retrieval and management api: enabled: true replicaCount: 1 @@ -48,6 +74,7 @@ api: limits: cpu: "1" memory: "4Gi" + # -- Horizontal Pod Autoscaler hpa: enabled: true minReplicas: 1 @@ -58,8 +85,10 @@ api: memory: averageUtilization: 80 behavior: {} + # -- Pod Disruption Budget pdb: enabled: false + # -- Scheduling constraints (nodeSelector, tolerations, affinity, anti-affinity) scheduling: nodeSelector: {} tolerations: [] @@ -70,9 +99,12 @@ api: type: preferred topologyKey: kubernetes.io/hostname weight: 100 + # -- Additional environment variables extraEnv: [] + # -- Additional envFrom references (ConfigMapRef, SecretRef) extraEnvFrom: [] +# -- Dashboard UI and web server frontend: enabled: true replicaCount: 1 @@ -115,6 +147,7 @@ frontend: extraEnv: [] extraEnvFrom: [] +# -- High-throughput event ingestion endpoint ingestor: enabled: true replicaCount: 1 @@ -158,6 +191,7 @@ ingestor: extraEnv: [] extraEnvFrom: [] +# -- Background data aggregation (no HTTP port; consumes from Kafka/ClickHouse) aggregator: enabled: true replicaCount: 4 @@ -210,6 +244,7 @@ aggregator: extraEnv: [] extraEnvFrom: [] +# -- Scheduled jobs, reports, and push notifications jobserver: enabled: true replicaCount: 1 @@ -251,10 +286,15 @@ jobserver: extraEnvFrom: [] # --- Configuration --- +# Environment variables injected into component pods via ConfigMaps. +# Organized by scope: common (all components), per-component, and per-integration. +# -- Application configuration (injected as ConfigMap environment variables) config: + # -- Shared config injected into all components common: NODE_ENV: production + # -- Comma-separated list of enabled Countly plugins COUNTLY_PLUGINS: "mobile,web,desktop,plugins,density,locale,browser,sources,views,logger,systemlogs,populator,reports,crashes,push,star-rating,slipping-away-users,compare,server-stats,dbviewer,crash_symbolication,crash-analytics,alerts,onboarding,consolidate,remote-config,hooks,dashboards,sdk,data-manager,guides,heatmaps,retention_segments,formulas,funnels,cohorts,ab-testing,performance-monitoring,config-transfer,data-migration,two-factor-auth,blocking,concurrent_users,revenue,activity-map,flows,surveys,event-timeline,drill,multi,active_users,ip-blocker,kafka,clickhouse" COUNTLY_CONFIG__FILESTORAGE: gridfs COUNTLY_CONFIG__DRILL_EVENTS_DRIVER: clickhouse @@ -265,6 +305,7 @@ config: COUNTLY_CONFIG__DATABASE_FAILONCONNECTIONERROR: "true" COUNTLY_CONFIG__EVENTSINK_SINKS: '["kafka"]' COUNTLY_CONFIG__RELOADCONFIGAFTER: "10000" + # -- API-specific config api: COUNTLY_CONTAINER: api COUNTLY_CONFIG__API_PORT: "3001" @@ -272,23 +313,28 @@ config: COUNTLY_CONFIG__API_MAX_SOCKETS: "1024" COUNTLY_CONFIG__API_MAX_UPLOAD_FILE_SIZE: "209715200" COUNTLY_CONFIG__API_TIMEOUT: "120000" + # -- Frontend-specific config frontend: COUNTLY_CONTAINER: frontend COUNTLY_CONFIG__WEB_PORT: "6001" COUNTLY_CONFIG__WEB_HOST: "0.0.0.0" COUNTLY_CONFIG__WEB_SECURE_COOKIES: "false" COUNTLY_CONFIG__COOKIE_MAXAGE: "86400000" + # -- Ingestor-specific config ingestor: COUNTLY_CONTAINER: ingestor COUNTLY_CONFIG__INGESTOR_PORT: "3010" COUNTLY_CONFIG__INGESTOR_HOST: "0.0.0.0" + # -- Aggregator-specific config aggregator: COUNTLY_CONTAINER: aggregator UV_THREADPOOL_SIZE: "6" + # -- Job server-specific config jobserver: COUNTLY_CONTAINER: jobserver COUNTLY_CONFIG__JOBSERVER_PORT: "3020" COUNTLY_CONFIG__JOBSERVER_HOST: "0.0.0.0" + # -- ClickHouse client configuration (shared across all components) clickhouse: COUNTLY_CONFIG__CLICKHOUSE_QUERYOPTIONS_MAX_EXECUTION_TIME: "600" COUNTLY_CONFIG__CLICKHOUSE_REQUEST_TIMEOUT: "1200000" @@ -314,6 +360,7 @@ config: COUNTLY_CONFIG__CLICKHOUSE_IDENTITY_DAYSOLD: "30" COUNTLY_CONFIG__CLICKHOUSE_IDENTITY_LIFETIME_MIN: "60" COUNTLY_CONFIG__CLICKHOUSE_IDENTITY_LIFETIME_MAX: "120" + # -- Kafka producer/consumer configuration (shared across all components) kafka: COUNTLY_CONFIG__KAFKA_ENABLED: "true" COUNTLY_CONFIG__KAFKA_DRILLEVENTSTOPIC: drill-events @@ -337,6 +384,7 @@ config: COUNTLY_CONFIG__KAFKA_CONSUMER_ENABLEAUTOCOMMIT: "false" COUNTLY_CONFIG__KAFKA_CONSUMER_MAXPOLLINTERVALMS: "600000" COUNTLY_CONFIG__KAFKA_CONNECTCONSUMERGROUPID: "connect-ch" + # -- OpenTelemetry and Pyroscope configuration otel: OTEL_ENABLED: "false" OTEL_EXPORTER_OTLP_ENDPOINT: "http://countly-observability-alloy-otlp.observability.svc.cluster.local:4318" @@ -345,7 +393,7 @@ config: OTEL_TRACES_SAMPLER_ARG: "1.0" PYROSCOPE_ENABLED: "false" -# Node options per component (injected into configmap) +# -- Node.js --max-old-space-size and GC options per component (injected via NODE_OPTIONS) nodeOptions: api: "--max-old-space-size=3072 --max-semi-space-size=256" frontend: "--max-old-space-size=2048" @@ -357,20 +405,28 @@ nodeOptions: # Control whether each backing service is deployed in-cluster (bundled) or provided externally. # When mode=external, the corresponding chart is not deployed and connection details must be provided. +# -- Backing service connection configuration backingServices: + # -- MongoDB connection (bundled: auto-resolve from countly-mongodb chart; external: provide details) mongodb: - mode: bundled # bundled | external + # -- bundled | external + mode: bundled # External-mode connection (used only when mode=external): host: "" port: "27017" - connectionString: "" # If set, used as-is (bypasses host/port/user/pass) + # -- Full connection string (if set, bypasses host/port/user/pass construction) + connectionString: "" username: "app" + # -- MongoDB password (REQUIRED on first install for bundled mode) password: "" database: "admin" replicaSet: "" + # -- Use a pre-created Secret for the connection string existingSecret: "" + # -- ClickHouse connection (bundled: auto-resolve from countly-clickhouse chart; external: provide details) clickhouse: - mode: bundled # bundled | external + # -- bundled | external + mode: bundled host: "" port: "8123" tls: "false" @@ -378,8 +434,11 @@ backingServices: password: "" database: "countly_drill" existingSecret: "" + # -- Kafka connection (bundled: auto-resolve from countly-kafka chart; external: provide details) kafka: - mode: bundled # bundled | external + # -- bundled | external + mode: bundled + # -- Kafka broker list (JSON array or comma-separated) brokers: "" securityProtocol: "PLAINTEXT" saslMechanism: "" @@ -388,24 +447,38 @@ backingServices: existingSecret: "" # --- Secrets --- +# Credentials injected into component pods. Three management modes: +# - values: create Secrets from Helm values (development) +# - existingSecret: reference pre-created Kubernetes Secrets (manual production) +# - externalSecret: use External Secrets Operator for vault integration (automated production) +# -- Secrets management configuration secrets: - mode: values # values | existingSecret | externalSecret + # -- Secret management mode: values | existingSecret | externalSecret + mode: values + # -- Preserve secrets on helm uninstall/upgrade keep: true + # -- Rotation ID annotation (change to trigger pod restart on secret rotation) rotationId: "" + # -- Countly application secrets common: existingSecret: "" + # -- Encryption key for exported reports encryptionReportsKey: "" + # -- Session cookie signing secret webSessionSecret: "" + # -- Internal password hashing secret passwordSecret: "" + # -- ClickHouse credential secrets clickhouse: existingSecret: "" username: "" password: "" database: "" + # -- Kafka credential secrets kafka: existingSecret: "" securityProtocol: "" @@ -413,17 +486,23 @@ secrets: saslUsername: "" saslPassword: "" + # -- MongoDB credential secrets mongodb: existingSecret: "" + # -- Key in the operator-generated Secret to read the connection string from key: "connectionString.standard" - password: "" # REQUIRED on first install (must match users.app.password in countly-mongodb chart) + # -- MongoDB password (REQUIRED on first install; must match users.app.password in countly-mongodb chart) + password: "" # --- ExternalSecret configuration (used only when mode=externalSecret) --- externalSecret: + # -- How often ESO syncs the secret refreshInterval: "1h" + # -- Reference to the SecretStore or ClusterSecretStore secretStoreRef: name: "" kind: ClusterSecretStore + # -- Remote key references in the external secret store remoteRefs: common: encryptionReportsKey: "" @@ -445,17 +524,25 @@ secrets: # --- Network Policy --- +# -- Network policy restricting ingress to Countly pods networkPolicy: + # -- Enable the NetworkPolicy enabled: false + # -- Namespace label selector for ingress controller traffic ingressNamespaceSelector: kubernetes.io/metadata.name: ingress-nginx + # -- Additional custom ingress rules additionalIngress: [] # --- Ingress --- +# -- Ingress configuration for external access to Countly ingress: + # -- Deploy the Ingress resource enabled: true + # -- Ingress class name (e.g. nginx, traefik) className: nginx + # -- Ingress annotations (F5 NGINX Ingress Controller defaults shown) annotations: # F5 NGINX Ingress Controller (OSS) annotations nginx.org/client-max-body-size: "50m" @@ -487,7 +574,9 @@ ingress: proxy_next_upstream_tries 3; proxy_temp_file_write_size 1m; client_body_timeout 120s; + # -- Hostname for the ingress rule hostname: countly.example.com + # -- TLS configuration tls: # TLS mode: letsencrypt | existingSecret | selfSigned | http # - http: No TLS (default) @@ -495,8 +584,13 @@ ingress: # - existingSecret: Bring your own TLS secret # - selfSigned: cert-manager self-signed CA (for development) mode: http + # -- cert-manager ClusterIssuer name (for letsencrypt mode) clusterIssuer: letsencrypt-prod - secretName: "" # Auto-derived if empty: -tls + # -- TLS Secret name (auto-derived if empty: -tls) + secretName: "" + # -- Self-signed CA configuration (for selfSigned mode) selfSigned: - issuerName: "" # Auto-derived if empty: -ca-issuer - caSecretName: "" # Auto-derived if empty: -ca-keypair + # -- Issuer name (auto-derived if empty: -ca-issuer) + issuerName: "" + # -- CA keypair Secret name (auto-derived if empty: -ca-keypair) + caSecretName: "" diff --git a/docs/ARGOCD.md b/docs/ARGOCD.md new file mode 100644 index 0000000..a45e161 --- /dev/null +++ b/docs/ARGOCD.md @@ -0,0 +1,601 @@ +# ArgoCD Deployment + +Deploy the Countly analytics platform using ArgoCD's GitOps model. The `countly-argocd` chart implements an **app-of-apps** pattern: one Helm release creates an `AppProject` and up to 6 `Application` CRs, each pointing to a child Helm chart in its own namespace. + +## Quick Start + +### Prerequisites + +1. **ArgoCD** installed on a management cluster (v2.8+) +2. **Required operators** installed on the target cluster — see [PREREQUISITES.md](PREREQUISITES.md) +3. **Git repo** accessible to ArgoCD (HTTPS or SSH) +4. **Custom health checks** configured in ArgoCD — see [Custom Health Checks](#custom-health-checks-required) + +### Deploy + +1. **Prepare your environment:** + ```bash + cp -r environments/reference environments/my-env + ``` + Edit `environments/my-env/global.yaml` — set `ingress.hostname`, sizing, TLS, security profiles. + Fill in secrets files (`secrets-mongodb.yaml`, `secrets-clickhouse.yaml`, etc.). + +2. **Install the ArgoCD chart:** + ```bash + helm install countly charts/countly-argocd \ + --set environment=my-env \ + --set destination.server=https://kubernetes.default.svc \ + -n argocd + ``` + +3. **Verify all Applications sync:** + ```bash + kubectl get applications -n argocd -l app.kubernetes.io/instance=countly + ``` + +4. **Watch sync progress:** + ```bash + argocd app list -l app.kubernetes.io/instance=countly + ``` + +5. **Access Countly** once all apps show `Healthy` / `Synced` — visit the ingress hostname configured in `global.yaml`. + +### Enable the Migration Service + +The batch migration service (MongoDB to ClickHouse) is disabled by default: + +```bash +helm upgrade countly charts/countly-argocd \ + --set migration.enabled=true \ + -n argocd --reuse-values +``` + +--- + +## Architecture + +```mermaid +flowchart TB + subgraph argocd["argocd namespace"] + project["AppProject\ncountly"] + app_mongo["Application\nmongodb\n(wave 0)"] + app_ch["Application\nclickhouse\n(wave 0)"] + app_kafka["Application\nkafka\n(wave 5)"] + app_countly["Application\ncountly\n(wave 10)"] + app_mig["Application\nmigration\n(wave 10, optional)"] + app_obs["Application\nobservability\n(wave 15)"] + end + + subgraph targets["Target Cluster"] + ns_mongo["mongodb namespace"] + ns_ch["clickhouse namespace"] + ns_kafka["kafka namespace"] + ns_countly["countly namespace"] + ns_mig["countly-migration namespace"] + ns_obs["observability namespace"] + end + + project --- app_mongo & app_ch & app_kafka & app_countly & app_mig & app_obs + app_mongo -->|"charts/countly-mongodb"| ns_mongo + app_ch -->|"charts/countly-clickhouse"| ns_ch + app_kafka -->|"charts/countly-kafka"| ns_kafka + app_countly -->|"charts/countly"| ns_countly + app_mig -->|"charts/countly-migration"| ns_mig + app_obs -->|"charts/countly-observability"| ns_obs +``` + +Each Application: +- Points to a chart path in the Git repo (`repoURL` + `targetRevision`) +- Layers value files: `global.yaml` -> `profiles` -> `environment` -> `secrets` +- Injects `argocd.enabled=true` via Helm parameters (activates sync-wave annotations in child charts) + +--- + +## Sync Waves + +ArgoCD processes resources in sync-wave order. Lower waves complete before higher waves start. The Countly deployment uses **two levels** of sync-wave ordering. + +### Application-Level Waves (between charts) + +These control the order in which the 6 child charts deploy: + +| Wave | Applications | Purpose | +|------|-------------|---------| +| **0** | mongodb, clickhouse | Data stores deploy first | +| **5** | kafka | Message broker — depends on databases for Kafka Connect sink | +| **10** | countly, migration | Application layer — depends on all backing services | +| **15** | observability | Monitoring — deploys last, needs targets to scrape | + +Wave 5 (Kafka) **will not start** until all Wave 0 Applications (MongoDB, ClickHouse) report `Healthy`. This requires [custom health checks](#custom-health-checks-required). + +### Resource-Level Waves (within each chart) + +When `argocd.enabled=true` is set (done automatically by the ArgoCD chart), each child chart annotates its resources with `argocd.argoproj.io/sync-wave`. All charts use an identical helper function: + +```gotemplate +{{- define ".syncWave" -}} +{{- if .root.Values.argocd.enabled }} +argocd.argoproj.io/sync-wave: {{ .wave | quote }} +{{- end }} +{{- end -}} +``` + +This ensures that within each Application, resources are created in dependency order (namespaces before secrets, secrets before deployments, etc.). + +### Complete Resource-Level Wave Map + +#### countly + +| Wave | Resources | +|------|-----------| +| 0 | Namespace, ServiceAccount | +| 1 | Secrets (common, mongodb, kafka, clickhouse), ExternalSecrets | +| 2 | ConfigMaps (common, api, aggregator, frontend, ingestor, jobserver, clickhouse, kafka, otel) | +| 5 | Deployments (API, Frontend, Ingestor, Aggregator, JobServer) | +| 10 | Ingress, TLS self-signed ClusterIssuer | +| 11 | TLS CA Certificate | +| 12 | TLS CA Issuer | + +#### countly-mongodb + +| Wave | Resources | +|------|-----------| +| 0 | Namespace, NetworkPolicy, Secrets (user + keyfile) | +| 5 | MongoDBCommunity CR, PodDisruptionBudget | +| 10 | MongoDB Exporter Deployment, ServiceMonitor | + +#### countly-clickhouse + +| Wave | Resources | +|------|-----------| +| 0 | Namespace, NetworkPolicy, Secret (default password) | +| 3 | PodDisruptionBudget (Keeper) | +| 5 | ClickHouseCluster CR, KeeperCluster CR, PodDisruptionBudget (Server) | +| 10 | ServiceMonitors, Metrics Services | + +#### countly-kafka + +| Wave | Resources | +|------|-----------| +| 0 | Namespace, ConfigMaps (connect-env, metrics), NetworkPolicies, Secret (ClickHouse connect) | +| 5 | Kafka CR, KafkaNodePool CRs | +| 10 | KafkaConnect CR, HPA | +| 15 | KafkaConnector CRs | + +#### countly-migration + +| Wave | Resources | +|------|-----------| +| 0 | ServiceAccount, ConfigMap, NetworkPolicy, ExternalSecret | +| 1 | Secret | +| 10 | Deployment, Service, Ingress, ServiceMonitor | + +Namespace is **not** rendered by the chart when using ArgoCD — `CreateNamespace=true` in the Application handles it. + +#### countly-observability + +| Wave | Resources | +|------|-----------| +| 0 | ServiceAccount, NetworkPolicy | +| 5 | Ingress | + +--- + +## Sync Policy + +All Applications share a common sync policy defined in the chart's `_helpers.tpl`: + +| Option | Default | Purpose | +|--------|---------|---------| +| `syncPolicy.automated` | `true` | Enable automatic sync from Git | +| `syncPolicy.prune` | `true` | Delete resources removed from Git | +| `syncPolicy.selfHeal` | `true` | Revert manual changes back to Git state | +| `CreateNamespace=true` | Always | ArgoCD creates target namespaces — charts don't need to | +| `ServerSideApply=true` | Always | Required for large CRDs (ClickHouse, Kafka, MongoDB operators) | +| `RespectIgnoreDifferences=true` | Always | Honors per-resource ignoreDifferences | +| `retry.limit` | `5` | Maximum sync retry attempts | +| `retry.backoff.duration` | `5s` | Initial retry delay | +| `retry.backoff.factor` | `2` | Exponential backoff multiplier | +| `retry.backoff.maxDuration` | `3m` | Maximum retry delay | + +### Disable Automated Sync + +For manual-sync-only workflows (e.g., staging environments with approval gates): + +```bash +helm install countly charts/countly-argocd \ + --set syncPolicy.automated=false \ + -n argocd +``` + +--- + +## Custom Health Checks (Required) + +ArgoCD does not know how to assess health of operator-managed CRDs by default. **Without custom health checks, sync waves cannot block on actual readiness** — Wave 5 (Kafka) would start immediately instead of waiting for Wave 0 databases to become healthy. + +Add the following to the `argocd-cm` ConfigMap in the `argocd` namespace: + +### Strimzi Kafka + +Applies to: `Kafka`, `KafkaConnect`, `KafkaNodePool`, `KafkaConnector` + +```yaml +resource.customizations.health.kafka.strimzi.io_Kafka: | + hs = {} + if obj.status ~= nil and obj.status.conditions ~= nil then + for _, c in ipairs(obj.status.conditions) do + if c.type == "Ready" and c.status == "True" then + hs.status = "Healthy"; hs.message = c.message or "Ready"; return hs + end + if c.type == "NotReady" then + hs.status = "Progressing"; hs.message = c.message or "Not ready"; return hs + end + end + end + hs.status = "Progressing"; hs.message = "Waiting for status"; return hs +``` + +Use the same Lua script for `KafkaConnect`, `KafkaNodePool`, and `KafkaConnector` — replace `_Kafka` with `_KafkaConnect`, `_KafkaNodePool`, or `_KafkaConnector` in the key name. + +### ClickHouse + +```yaml +resource.customizations.health.clickhouse.com_ClickHouseCluster: | + hs = {} + if obj.status ~= nil and obj.status.status ~= nil then + if obj.status.status == "Completed" then + hs.status = "Healthy"; hs.message = "Completed"; return hs + end + end + hs.status = "Progressing"; hs.message = "Provisioning"; return hs +``` + +### MongoDB + +```yaml +resource.customizations.health.mongodbcommunity.mongodb.com_MongoDBCommunity: | + hs = {} + if obj.status ~= nil and obj.status.phase ~= nil then + if obj.status.phase == "Running" then + hs.status = "Healthy"; hs.message = "Running"; return hs + end + end + hs.status = "Progressing"; hs.message = "Provisioning"; return hs +``` + +### How to Apply + +**Option 1:** Edit the ConfigMap directly: +```bash +kubectl edit configmap argocd-cm -n argocd +``` + +**Option 2:** If ArgoCD is deployed via Helm, add the health checks to the ArgoCD Helm values: +```yaml +# argocd-values.yaml +server: + config: + resource.customizations.health.kafka.strimzi.io_Kafka: | + ... +``` + +**Option 3:** Use Kustomize to patch the ConfigMap in your ArgoCD deployment manifests. + +--- + +## ignoreDifferences + +Operator-managed CRDs have `/status` fields that operators constantly update. Without `ignoreDifferences`, ArgoCD would show these Applications as perpetually "OutOfSync" even when nothing changed in Git. + +The `countly-argocd` chart configures `ignoreDifferences` per Application: + +| Application | API Group | Kind | Ignored Path | +|-------------|-----------|------|-------------| +| mongodb | `mongodbcommunity.mongodb.com` | `MongoDBCommunity` | `/status` | +| clickhouse | `clickhouse.com` | `ClickHouseCluster` | `/status` | +| clickhouse | `clickhouse.com` | `KeeperCluster` | `/status` | +| kafka | `kafka.strimzi.io` | `Kafka` | `/status` | +| kafka | `kafka.strimzi.io` | `KafkaConnect` | `/status` | +| kafka | `kafka.strimzi.io` | `KafkaConnector` | `/status` | +| kafka | `kafka.strimzi.io` | `KafkaNodePool` | `/status` | +| countly | `networking.k8s.io` | `Ingress` | `/status` | + +These are configured automatically — no manual action required. The `RespectIgnoreDifferences=true` sync option ensures ArgoCD honors them. + +--- + +## AppProject + +Each Helm release creates an isolated `AppProject`: + +```yaml +spec: + description: "Countly analytics platform ()" + sourceRepos: + - # Restricted to configured repo + destinations: + - namespace: "*" + server: # Restricted to configured cluster + clusterResourceWhitelist: + - group: storage.k8s.io # StorageClass + kind: StorageClass + - group: rbac.authorization.k8s.io + kind: ClusterRole # Operator RBAC + - group: rbac.authorization.k8s.io + kind: ClusterRoleBinding + - group: cert-manager.io + kind: ClusterIssuer # TLS issuers + namespaceResourceWhitelist: + - group: "*" + kind: "*" + orphanedResources: + warn: true # Warn but don't prune orphans +``` + +Key design decisions: +- **Source isolation**: Each project only allows the configured `repoURL` +- **Cluster isolation**: Each project only allows the configured destination cluster +- **Cluster resources**: Whitelisted for StorageClass, RBAC, and ClusterIssuer — the minimum needed by Countly charts +- **Orphan detection**: Warns on resources in target namespaces not tracked by any Application + +--- + +## Multi-Tenant Deployment + +The chart is designed for multi-tenant environments where each customer gets an isolated Countly deployment. + +### One Release Per Customer + +```bash +# Customer A — production cluster +helm install customer-a charts/countly-argocd \ + --set environment=customer-a \ + --set project=countly-customer-a \ + --set destination.server=https://cluster-a.example.com \ + --set global.sizing=production \ + --set global.security=hardened \ + -n argocd + +# Customer B — small cluster +helm install customer-b charts/countly-argocd \ + --set environment=customer-b \ + --set project=countly-customer-b \ + --set destination.server=https://cluster-b.example.com \ + --set global.sizing=small \ + --set global.security=open \ + -n argocd +``` + +Each release creates: +- 1 isolated `AppProject` (named after the release or `project` override) +- Up to 6 `Application` CRs with unique names (`-mongodb`, `-clickhouse`, etc.) +- Target namespaces created automatically by `CreateNamespace=true` + +See `charts/countly-argocd/examples/multi-cluster.yaml` for a complete values file. + +### ApplicationSet Alternative + +For 50+ customers where maintaining individual Helm releases is unwieldy, use an `ApplicationSet` with a list generator: + +```yaml +apiVersion: argoproj.io/v1alpha1 +kind: ApplicationSet +metadata: + name: countly-mongodb + namespace: argocd +spec: + generators: + - list: + elements: + - customer: customer-a + server: https://cluster-a.example.com + sizing: production + security: hardened + - customer: customer-b + server: https://cluster-b.example.com + sizing: small + security: open + template: + metadata: + name: "{{customer}}-mongodb" + annotations: + argocd.argoproj.io/sync-wave: "0" + spec: + project: "{{customer}}" + source: + repoURL: https://github.com/Countly/helm.git + targetRevision: main + path: charts/countly-mongodb + helm: + valueFiles: + - "../../environments/{{customer}}/global.yaml" + - "../../profiles/sizing/{{sizing}}/mongodb.yaml" + - "../../profiles/security/{{security}}/mongodb.yaml" + - "../../environments/{{customer}}/mongodb.yaml" + parameters: + - name: argocd.enabled + value: "true" + destination: + server: "{{server}}" + namespace: mongodb +``` + +Create one ApplicationSet per component (MongoDB, ClickHouse, Kafka, Countly, Observability, Migration). See `charts/countly-argocd/examples/applicationset.yaml` for the full example. + +--- + +## Migration Service + +The `countly-migration` chart deploys a MongoDB-to-ClickHouse batch migration service. It is fully ArgoCD-compatible: + +- **Wave 10**: Deploys in parallel with Countly, after databases and Kafka are healthy +- **Disabled by default**: Enable with `--set migration.enabled=true` +- **Bundled Redis**: Includes a Bitnami Redis subchart for hot-state caching. No separate Redis Application needed — Redis pods deploy in the same namespace (`countly-migration`) +- **Singleton**: `values.schema.json` enforces `replicas: 1` and `strategy: Recreate` to prevent concurrent migration runs +- **External progress link**: When `externalLink.enabled=true`, the Deployment gets a `link.argocd.argoproj.io/progress` annotation. This renders as a clickable link in the ArgoCD UI pointing to the migration progress endpoint + +```yaml +# Enable migration with progress link +migration: + enabled: true + +# In the migration values file: +argocd: + enabled: true +externalLink: + enabled: true + url: "https://migration.example.internal/runs/current" +``` + +See [charts/countly-migration/README.md](../charts/countly-migration/README.md) for full configuration. + +--- + +## Value File Layering + +Each Application layers Helm value files in a specific order. Later files override earlier ones: + +1. `environments//global.yaml` — environment-wide defaults +2. `profiles/sizing//.yaml` — CPU, memory, replicas, HPA, PDBs +3. `profiles///.yaml` — additional profiles per chart: + + | Chart | Extra Profiles | + |-------|---------------| + | countly | `tls/`, `observability/`, `security/` | + | countly-kafka | `kafka-connect/`, `observability/`, `security/` | + | countly-clickhouse | `security/` | + | countly-mongodb | `security/` | + | countly-observability | `observability/`, `security/` | + | countly-migration | *(no profiles — uses environment values only)* | + +4. `environments//.yaml` — environment-specific overrides +5. `environments//secrets-.yaml` — credentials (gitignored) + +Additionally, the ArgoCD chart injects `argocd.enabled=true` via Helm `parameters` (not value files). This activates sync-wave annotations in each child chart. + +--- + +## Configuration Reference + +Full `countly-argocd` values: + +```yaml +# Git repo containing the Helm charts +repoURL: "https://github.com/Countly/helm.git" +targetRevision: main + +# Environment name (maps to environments// directory) +environment: example-production + +# Target cluster +destination: + server: "https://kubernetes.default.svc" + +# ArgoCD project name (defaults to release name if empty) +# Each customer MUST have a unique project to avoid collisions +project: "" + +# Profile selections (passed to child charts via valueFiles) +global: + sizing: production # local | small | production + security: hardened # open | hardened + tls: letsencrypt # none | letsencrypt | provided | selfSigned + observability: full # disabled | full | external-grafana | external + kafkaConnect: balanced # throughput | balanced | low-latency + +# Component toggles +mongodb: + enabled: true + namespace: mongodb +clickhouse: + enabled: true + namespace: clickhouse +kafka: + enabled: true + namespace: kafka +countly: + enabled: true + namespace: countly +observability: + enabled: true + namespace: observability +migration: + enabled: false # Disabled by default + namespace: countly-migration + +# Sync policy for all child Applications +syncPolicy: + automated: true + selfHeal: true + prune: true + retry: + limit: 5 + backoff: + duration: 5s + factor: 2 + maxDuration: 3m +``` + +--- + +## Deployment Methods + +| Method | Best For | Orchestration | Prerequisites | +|--------|----------|---------------|---------------| +| **Helmfile** | CI/CD pipelines, single-cluster | `helmfile apply` handles ordering via `needs:` | Helmfile CLI | +| **ArgoCD** (this chart) | GitOps, multi-cluster, multi-tenant | App-of-apps with sync waves | ArgoCD + operators + health checks | +| **Manual `helm install`** | Single-chart testing, development | User runs commands in dependency order | Helm CLI | + +### When to Use ArgoCD + +- You want GitOps: changes to the Git repo automatically sync to the cluster +- You manage multiple customers or clusters from a single ArgoCD instance +- You need drift detection (`selfHeal`) and automated pruning +- You want a UI for monitoring deployment status across all components + +### When to Use Helmfile + +- You deploy from a CI/CD pipeline (e.g., GitHub Actions, GitLab CI) +- You deploy to a single cluster +- You don't need continuous reconciliation +- You want simpler tooling without ArgoCD infrastructure + +--- + +## Troubleshooting + +| Symptom | Cause | Fix | +|---------|-------|-----| +| Application stuck "Progressing" forever | Custom health checks not configured | Add Lua health checks to `argocd-cm` — see [Custom Health Checks](#custom-health-checks-required) | +| Application shows "OutOfSync" constantly | Operator `/status` fields changing | Verify `ignoreDifferences` is applied (it's automatic from the chart) | +| "the server could not find the requested resource" | CRDs not installed | Install operators first — see [PREREQUISITES.md](PREREQUISITES.md) | +| Namespace already exists error | Both chart and ArgoCD creating namespace | Ensure `namespace.create: false` in chart values (automatic when `argocd.enabled=true`) | +| Large CRD apply fails | Client-side apply can't handle large objects | `ServerSideApply=true` is set by default — verify it's in syncOptions | +| Wave N starts before Wave N-1 is healthy | Missing health checks | ArgoCD cannot determine health without custom Lua checks — see above | +| "Secret not found" during sync | Missing secrets file | Ensure `environments//secrets-.yaml` exists with required values | +| "Permission denied" on cluster resource | AppProject too restrictive | Check `clusterResourceWhitelist` includes the resource group/kind | +| Migration not deploying | Migration disabled | Set `migration.enabled: true` in values | +| Redis not starting in migration namespace | Redis subchart disabled | Set `redis.enabled: true` in migration values (enabled by default) | + +### Useful Commands + +```bash +# List all Countly Applications +kubectl get applications -n argocd -l app.kubernetes.io/instance= + +# Sync all Applications +argocd app sync -l app.kubernetes.io/instance= + +# Check sync status +argocd app list -l app.kubernetes.io/instance= + +# View Application details +argocd app get -mongodb + +# Force refresh from Git +argocd app get -mongodb --refresh + +# Teardown (cascading — finalizers delete all child resources) +helm uninstall -n argocd +``` diff --git a/docs/PREREQUISITES.md b/docs/PREREQUISITES.md index 3cd8376..37dc7bc 100644 --- a/docs/PREREQUISITES.md +++ b/docs/PREREQUISITES.md @@ -81,3 +81,7 @@ kubectl get pods -n ingress-nginx The `countly-observability` chart does **not** require any operators — it deploys standard Kubernetes workloads (Deployments, StatefulSets, DaemonSets). No additional prerequisites are needed. See [charts/countly-observability/README.md](../charts/countly-observability/README.md) for configuration. + +## 6. ArgoCD (optional) + +If deploying via ArgoCD instead of Helmfile, install ArgoCD (v2.8+) on a management cluster and configure custom health checks for operator CRDs. See [ARGOCD.md](ARGOCD.md) for full setup instructions. diff --git a/docs/migration-guide.md b/docs/migration-guide.md new file mode 100644 index 0000000..9f526b3 --- /dev/null +++ b/docs/migration-guide.md @@ -0,0 +1,603 @@ +# Countly Migration Guide + +Migrate Countly drill events from MongoDB to ClickHouse. This guide covers architecture, deployment, operations, and troubleshooting. + +## Table of Contents + +- [Overview](#overview) +- [Architecture](#architecture) +- [Prerequisites](#prerequisites) +- [Deployment](#deployment) +- [Multi-Pod Mode](#multi-pod-mode) +- [Configuration Reference](#configuration-reference) +- [Operations](#operations) +- [API Reference](#api-reference) +- [Troubleshooting](#troubleshooting) + +--- + +## Overview + +The migration service reads `drill_events*` collections from MongoDB, transforms documents into ClickHouse rows, and inserts them into the `drill_events` table. It handles: + +- **Multi-collection discovery** — automatically finds all collections matching the prefix +- **Crash recovery** — resumes from the last committed batch after restart +- **Idempotent inserts** — deduplication tokens prevent duplicate rows on retry +- **Backpressure** — pauses when ClickHouse is under compaction pressure +- **Multi-pod coordination** — distribute work across multiple pods with Redis-based locking + +### Data Flow + +``` +MongoDB Migration Service ClickHouse +(drill_events*) (1+ pods) (drill_events) + + collections ──discover──▶ CollectionOrchestrator + │ + ◀──read page── BatchRunner ──insert batch──▶ + │ + ManifestStore ──state──▶ MongoDB (mig_runs, mig_batches) + RedisHotState ──cache──▶ Redis (bitmaps, stats, commands) +``` + +### Batch Processing Loop + +Each batch follows this sequence: + +1. **Check commands** — pause, resume, stop-after-batch +2. **Sample backpressure** — query ClickHouse parts count, pause if high +3. **Read page** — cursor-paginated read from MongoDB (`cd`, `_id` compound index) +4. **Transform** — normalize timestamps, derive event names, validate fields +5. **Persist manifest** — write batch metadata to MongoDB with SHA-256 digest +6. **Insert to ClickHouse** — with dedup token and exponential backoff retry +7. **Checkpoint** — atomically mark batch done and advance cursor +8. **Update Redis** — stats, bitmap, timeline (best-effort, non-blocking) +9. **Conditional GC** — trigger if heap/RSS thresholds exceeded + +### State Model + +| Store | Purpose | Durability | +|-------|---------|------------| +| MongoDB (`mig_runs`, `mig_batches`) | Authoritative run/batch state, cursors, digests | Durable (write concern: majority) | +| Redis | Hot state, completion bitmaps, commands, timeline | Rebuildable from manifest | +| ClickHouse | Target data (append-only with dedup tokens) | Durable | + +**Core principle:** MongoDB manifest is authoritative. Redis is rebuildable. ClickHouse is append-only with dedup tokens. + +--- + +## Architecture + +### Components + +| Component | Responsibility | +|-----------|---------------| +| **CollectionOrchestrator** | Discovers collections, processes them sequentially (single-pod) or coordinates via locks (multi-pod) | +| **BatchRunner** | Core batch loop — read, transform, insert, checkpoint | +| **MongoReader** | Cursor-based pagination on `(cd, _id)` compound index | +| **ClickHouseWriter** | Batch insertion with retry and dedup tokens | +| **ClickHousePressure** | Monitors ClickHouse parts/merges for backpressure | +| **ManifestStore** | MongoDB-backed authoritative state (runs, batches, events) | +| **RedisHotState** | Fast rebuildable cache (bitmaps, stats, commands) | +| **GcController** | Manual V8 garbage collection based on heap/RSS thresholds | +| **HTTP Server** | Health checks, stats, control endpoints, run management | + +### Run Modes + +| Mode | Behavior | +|------|----------| +| `resume` (default) | Resume active/paused/stopped run, or create new if none exists | +| `new-run` | Mark any active run as completed, start fresh | +| `clone-run` | Clone active run's upper bound, start new run with same boundary | + +### Crash Recovery + +The service recovers from crashes at any point: + +- **Before insert**: Re-reads source data, verifies SHA-256 digest, retries insert +- **After insert, before checkpoint**: Re-inserts with same dedup token (ClickHouse ignores duplicate) +- **After checkpoint**: Resumes from next batch normally + +--- + +## Prerequisites + +- **MongoDB** with `drill_events*` collections in `countly_drill` database +- **ClickHouse** with `drill_events` table in `countly_drill` database +- **Redis** for state tracking (bundled by default in the Helm chart) +- **Kubernetes** cluster with the `countly-mongodb` and `countly-clickhouse` charts deployed + +The ClickHouse `drill_events` table must exist before starting the migration. The migration service does **not** create the target table. + +--- + +## Deployment + +### Minimal (alongside sibling charts) + +The chart defaults to **bundled mode** — it auto-discovers MongoDB and ClickHouse from sibling charts via DNS. Only passwords are required: + +```bash +helm install countly-migration ./charts/countly-migration \ + -n countly-migration --create-namespace \ + --set backingServices.mongodb.password="YOUR_MONGODB_APP_PASSWORD" \ + --set backingServices.clickhouse.password="YOUR_CLICKHOUSE_PASSWORD" +``` + +### File-based (recommended) + +Create environment files for repeatable deploys: + +**`environments/my-env/migration.yaml`:** +```yaml +# Override defaults as needed (empty file uses all defaults) +{} +``` + +**`environments/my-env/secrets-migration.yaml`:** +```yaml +backingServices: + mongodb: + password: "your-mongodb-password" + clickhouse: + password: "your-clickhouse-password" +``` + +Deploy: +```bash +helm install countly-migration ./charts/countly-migration \ + -n countly-migration --create-namespace \ + --wait --timeout 5m \ + -f environments/my-env/global.yaml \ + -f environments/my-env/migration.yaml \ + -f environments/my-env/secrets-migration.yaml +``` + +### External MongoDB/ClickHouse + +If MongoDB and ClickHouse are not deployed via sibling charts: + +```bash +helm install countly-migration ./charts/countly-migration \ + -n countly-migration --create-namespace \ + --set backingServices.mongodb.mode=external \ + --set backingServices.mongodb.uri="mongodb://app:PASS@host:27017/admin?replicaSet=rs0&ssl=false" \ + --set backingServices.clickhouse.mode=external \ + --set backingServices.clickhouse.url="http://clickhouse-host:8123" \ + --set backingServices.clickhouse.password="PASS" +``` + +### Verify deployment + +```bash +# 1. Check pods +kubectl get pods -n countly-migration + +# 2. Check health (liveness) +kubectl exec -n countly-migration deploy/countly-migration -- \ + node -e "fetch('http://localhost:8080/healthz').then(r=>r.text()).then(console.log)" + +# 3. Check readiness (all backing services) +kubectl exec -n countly-migration deploy/countly-migration -- \ + node -e "fetch('http://localhost:8080/readyz').then(r=>r.text()).then(console.log)" + +# 4. Check migration progress +kubectl exec -n countly-migration deploy/countly-migration -- \ + node -e "fetch('http://localhost:8080/stats').then(r=>r.json()).then(d=>console.log(JSON.stringify(d,null,2)))" + +# 5. View logs +kubectl logs -n countly-migration -l app.kubernetes.io/name=countly-migration -f +``` + +--- + +## Multi-Pod Mode + +Scale the migration across multiple pods for faster throughput. Each pod picks up collections (or time ranges within large collections) via Redis-based locking. + +### How it works + +1. **Collection-level locking**: Each pod acquires a Redis lock before processing a collection. Other pods skip locked collections and pick up the next available one. +2. **Range splitting**: Collections larger than `rangeParallelThreshold` documents are split into time ranges. Multiple pods process different ranges of the same collection in parallel. +3. **Heartbeat & dead pod detection**: Pods send heartbeats every `podHeartbeatMs`. If a pod misses heartbeats for `podDeadAfterSec`, its locks are released for other pods to claim. +4. **Lock renewal**: Active locks are renewed every `lockRenewMs` to prevent expiration during long batches. + +### Enable multi-pod mode + +```yaml +deployment: + replicas: 3 + strategy: + type: RollingUpdate + +pdb: + enabled: true + minAvailable: 1 +``` + +Or via helm: +```bash +helm upgrade countly-migration ./charts/countly-migration \ + -n countly-migration --reuse-values \ + --set deployment.replicas=3 \ + --set deployment.strategy.type=RollingUpdate \ + --set pdb.enabled=true +``` + +### Worker configuration + +| Value | Env Var | Default | Description | +|-------|---------|---------|-------------| +| `worker.enabled` | `MULTI_POD_ENABLED` | `true` | Enable coordination (auto-activates when replicas > 1) | +| `worker.lockTtlSec` | `LOCK_TTL_SECONDS` | `300` | Collection lock TTL (seconds) | +| `worker.lockRenewMs` | `LOCK_RENEW_MS` | `60000` | Lock renewal interval (ms) | +| `worker.podHeartbeatMs` | `POD_HEARTBEAT_MS` | `30000` | Heartbeat interval (ms) | +| `worker.podDeadAfterSec` | `POD_DEAD_AFTER_SEC` | `180` | Dead pod threshold (seconds) | +| `worker.rangeParallelThreshold` | `RANGE_PARALLEL_THRESHOLD` | `500000` | Doc count to trigger range splitting | +| `worker.rangeCount` | `RANGE_COUNT` | `100` | Number of time ranges per collection | +| `worker.rangeLeaseTtlSec` | `RANGE_LEASE_TTL_SEC` | `300` | Range lease TTL (seconds) | +| `worker.progressUpdateMs` | `PROGRESS_UPDATE_MS` | `5000` | Progress report interval (ms) | + +### Multi-pod operations + +```bash +# Global pause (all pods) +kubectl exec -n countly-migration deploy/countly-migration -- \ + node -e "fetch('http://localhost:8080/control/global/pause',{method:'POST'}).then(r=>r.text()).then(console.log)" + +# Global resume +kubectl exec -n countly-migration deploy/countly-migration -- \ + node -e "fetch('http://localhost:8080/control/global/resume',{method:'POST'}).then(r=>r.text()).then(console.log)" + +# List collection locks +kubectl exec -n countly-migration deploy/countly-migration -- \ + node -e "fetch('http://localhost:8080/control/locks').then(r=>r.text()).then(console.log)" + +# List all pods and their status +kubectl exec -n countly-migration deploy/countly-migration -- \ + node -e "fetch('http://localhost:8080/control/pods').then(r=>r.text()).then(console.log)" + +# Scale up/down +kubectl scale deploy/countly-migration -n countly-migration --replicas=5 +``` + +### When to use multi-pod + +- **Many collections**: Multiple pods each take a different collection +- **Large collections** (>500K docs): Range splitting distributes work within a single collection +- **Time-sensitive migrations**: Reduce total wall-clock time by parallelizing + +### When single-pod is enough + +- Few collections with moderate size +- ClickHouse is the bottleneck (backpressure), not MongoDB reads +- Simpler operations and debugging + +--- + +## Configuration Reference + +### Service + +| Variable | Default | Description | +|----------|---------|-------------| +| `SERVICE_NAME` | `countly-migration` | Service identifier | +| `SERVICE_PORT` | `8080` | HTTP server port | +| `SERVICE_HOST` | `0.0.0.0` | Bind address | +| `GRACEFUL_SHUTDOWN_TIMEOUT_MS` | `60000` | Shutdown grace period (ms) | +| `RERUN_MODE` | `resume` | `resume`, `new-run`, or `clone-run` | +| `LOG_LEVEL` | `info` | `fatal`, `error`, `warn`, `info`, `debug`, `trace` | + +### MongoDB Source + +| Variable | Default | Description | +|----------|---------|-------------| +| `MONGO_URI` | *(required)* | Full connection string | +| `MONGO_DB` | `countly_drill` | Source database | +| `MONGO_COLLECTION_PREFIX` | `drill_events` | Prefix to discover collections | +| `MONGO_READ_PREFERENCE` | `primary` | Read preference | +| `MONGO_READ_CONCERN` | `majority` | Read concern level | +| `MONGO_RETRY_READS` | `true` | Retry transient read failures | +| `MONGO_APP_NAME` | `countly-migration` | Driver app name | +| `MONGO_BATCH_ROWS_TARGET` | `10000` | Documents per batch | +| `MONGO_CURSOR_BATCH_SIZE` | `2000` | MongoDB cursor fetch size | +| `MONGO_MAX_TIME_MS` | `120000` | Cursor timeout (ms) | + +### ClickHouse Target + +| Variable | Default | Description | +|----------|---------|-------------| +| `CLICKHOUSE_URL` | *(required)* | HTTP endpoint | +| `CLICKHOUSE_DB` | `countly_drill` | Target database | +| `CLICKHOUSE_TABLE` | `drill_events` | Target table | +| `CLICKHOUSE_USERNAME` | `default` | Username | +| `CLICKHOUSE_PASSWORD` | *(empty)* | Password | +| `CLICKHOUSE_QUERY_TIMEOUT_MS` | `120000` | Query timeout (ms) | +| `CLICKHOUSE_MAX_RETRIES` | `8` | Max insert retry attempts | +| `CLICKHOUSE_RETRY_BASE_DELAY_MS` | `1000` | Backoff base delay (ms) | +| `CLICKHOUSE_RETRY_MAX_DELAY_MS` | `30000` | Backoff max delay (ms) | +| `CLICKHOUSE_USE_DEDUP_TOKEN` | `true` | Insert deduplication tokens | + +### Backpressure + +| Variable | Default | Description | +|----------|---------|-------------| +| `BACKPRESSURE_ENABLED` | `true` | Monitor ClickHouse parts | +| `BACKPRESSURE_PARTS_TO_THROW_INSERT` | `300` | Parts threshold to pause | +| `BACKPRESSURE_MAX_PARTS_IN_TOTAL` | `500` | Max total parts | +| `BACKPRESSURE_PARTITION_PCT_HIGH` | `0.50` | Partition high watermark (pause) | +| `BACKPRESSURE_PARTITION_PCT_LOW` | `0.35` | Partition low watermark (resume) | +| `BACKPRESSURE_TOTAL_PCT_HIGH` | `0.50` | Total high watermark (pause) | +| `BACKPRESSURE_TOTAL_PCT_LOW` | `0.40` | Total low watermark (resume) | +| `BACKPRESSURE_POLL_INTERVAL_MS` | `15000` | Polling interval (ms) | +| `BACKPRESSURE_MAX_PAUSE_EPISODE_MS` | `180000` | Max pause duration before force resume (ms) | + +### Garbage Collection + +| Variable | Default | Description | +|----------|---------|-------------| +| `GC_ENABLED` | `true` | Enable manual V8 GC | +| `GC_RSS_SOFT_LIMIT_MB` | `1536` | RSS threshold to trigger GC | +| `GC_RSS_HARD_LIMIT_MB` | `2048` | RSS threshold to log warning | +| `GC_HEAP_USED_RATIO` | `0.70` | Heap usage ratio trigger | +| `GC_EVERY_N_BATCHES` | `10` | GC every N batches | + +### State Storage + +| Variable | Default | Description | +|----------|---------|-------------| +| `MANIFEST_DB` | `countly_drill` | MongoDB database for run manifests | +| `REDIS_URL` | *(auto)* | Redis URL (auto-wired from bundled subchart) | +| `REDIS_KEY_PREFIX` | `mig` | Redis key namespace | + +### Transform + +| Variable | Default | Description | +|----------|---------|-------------| +| `TRANSFORM_VERSION` | `v1` | Transform version tag stored in manifest | + +--- + +## Operations + +### Port-forward for browser access + +```bash +kubectl port-forward -n countly-migration svc/countly-migration 8080:8080 +# Open: http://localhost:8080/stats +``` + +### Pause and resume + +```bash +# Pause after current batch +kubectl exec -n countly-migration deploy/countly-migration -- \ + node -e "fetch('http://localhost:8080/control/pause',{method:'POST'}).then(r=>r.text()).then(console.log)" + +# Resume +kubectl exec -n countly-migration deploy/countly-migration -- \ + node -e "fetch('http://localhost:8080/control/resume',{method:'POST'}).then(r=>r.text()).then(console.log)" +``` + +### Graceful stop + +```bash +kubectl exec -n countly-migration deploy/countly-migration -- \ + node -e "fetch('http://localhost:8080/control/stop-after-batch',{method:'POST'}).then(r=>r.text()).then(console.log)" +``` + +### Check progress + +```bash +# Overall stats +kubectl exec -n countly-migration deploy/countly-migration -- \ + node -e "fetch('http://localhost:8080/stats').then(r=>r.json()).then(d=>console.log(JSON.stringify(d,null,2)))" + +# Current run +kubectl exec -n countly-migration deploy/countly-migration -- \ + node -e "fetch('http://localhost:8080/runs/current').then(r=>r.text()).then(console.log)" + +# All runs +kubectl exec -n countly-migration deploy/countly-migration -- \ + node -e "fetch('http://localhost:8080/runs?limit=10').then(r=>r.text()).then(console.log)" +``` + +### Check failures + +```bash +# Get failure analysis for a run +kubectl exec -n countly-migration deploy/countly-migration -- \ + node -e "fetch('http://localhost:8080/runs/RUN_ID/failures').then(r=>r.text()).then(console.log)" +``` + +### Cleanup Redis cache + +After a run completes, free Redis memory: + +```bash +kubectl exec -n countly-migration deploy/countly-migration -- \ + node -e "fetch('http://localhost:8080/runs/RUN_ID/cache',{method:'DELETE'}).then(r=>r.text()).then(console.log)" +``` + +### Trigger garbage collection + +```bash +kubectl exec -n countly-migration deploy/countly-migration -- \ + node -e "fetch('http://localhost:8080/control/gc',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({mode:'now'})}).then(r=>r.text()).then(console.log)" +``` + +### Verify data in ClickHouse + +```bash +kubectl exec -n clickhouse -- \ + clickhouse-client --password \ + --query "SELECT count() FROM countly_drill.drill_events" +``` + +### View logs + +```bash +# All pods +kubectl logs -n countly-migration -l app.kubernetes.io/name=countly-migration -f + +# Specific pod +kubectl logs -n countly-migration countly-migration- -f +``` + +--- + +## API Reference + +### Health + +| Method | Path | Description | +|--------|------|-------------| +| GET | `/healthz` | Liveness probe — always 200 if server is up | +| GET | `/readyz` | Readiness probe — checks MongoDB, ClickHouse, Redis, ManifestStore, BatchRunner | + +### Stats + +| Method | Path | Description | +|--------|------|-------------| +| GET | `/stats` | Comprehensive JSON: throughput, skip reasons, integrity, memory, backpressure, orchestrator progress | + +### Control + +| Method | Path | Description | +|--------|------|-------------| +| POST | `/control/pause` | Pause after current batch completes | +| POST | `/control/resume` | Resume from pause | +| POST | `/control/stop-after-batch` | Graceful stop — finish batch, persist state, exit | +| POST | `/control/gc` | Trigger GC. Body: `{"mode": "now"|"force"|"after-batch"}` | +| POST | `/control/drain` | Graceful drain (called by preStop hook) | + +### Multi-Pod Control + +| Method | Path | Description | +|--------|------|-------------| +| POST | `/control/global/pause` | Pause all pods | +| POST | `/control/global/resume` | Resume all pods | +| POST | `/control/global/stop` | Stop all pods | +| GET | `/control/locks` | List collection locks | +| GET | `/control/pods` | List all pods and their status | + +### Run Management + +| Method | Path | Description | +|--------|------|-------------| +| GET | `/runs` | List runs. Query: `?status=active\|completed\|failed&limit=20&offset=0` | +| GET | `/runs/current` | Current active run | +| GET | `/runs/:id` | Single run details | +| GET | `/runs/:id/batches` | Batches for a run. Query: `?status=done\|failed&limit=50` | +| GET | `/runs/:id/failures` | Failure analysis — errors, digest mismatches, retries | +| GET | `/runs/:id/timeline` | Performance timeline snapshots | +| GET | `/runs/:id/coverage` | Document range coverage percentage | +| DELETE | `/runs/:id/cache` | Cleanup Redis cache for a completed run | + +--- + +## Troubleshooting + +### Pod crashes with "No collections found" + +``` +Error: No collections found matching prefix "drill_events" in database "countly_drill" +``` + +**Cause**: The source MongoDB database has no collections matching the prefix. + +**Fix**: Ensure `countly_drill` database exists with `drill_events*` collections. Check the MongoDB connection and database name: +```bash +kubectl exec -n countly-migration deploy/countly-migration -- env | grep MONGO +``` + +### ImagePullBackOff + +**Cause**: The container image doesn't exist or registry credentials are missing. + +**Fix**: Verify the image exists: +```bash +docker pull countly/countly-migration:latest +``` + +If using a private registry, set `image.pullSecrets` in values. + +### Pod not ready (readiness probe failing) + +**Cause**: One or more backing services are unreachable. + +**Fix**: Check which service is failing: +```bash +kubectl exec -n countly-migration deploy/countly-migration -- \ + node -e "fetch('http://localhost:8080/readyz').then(r=>r.text()).then(console.log)" +``` + +Verify MongoDB, ClickHouse, and Redis connectivity from the pod: +```bash +kubectl exec -n countly-migration deploy/countly-migration -- \ + node -e "console.log(process.env.MONGO_URI, process.env.CLICKHOUSE_URL, process.env.REDIS_URL)" +``` + +### Backpressure stall (migration paused for a long time) + +**Cause**: ClickHouse has too many active parts, likely from a compaction backlog. + +**Check**: +```bash +kubectl exec -n countly-migration deploy/countly-migration -- \ + node -e "fetch('http://localhost:8080/stats').then(r=>r.json()).then(d=>console.log(d.clickhouse))" +``` + +**Fix options**: +- Wait for ClickHouse merges to complete +- Increase `BACKPRESSURE_PARTS_TO_THROW_INSERT` threshold +- Reduce batch size (`MONGO_BATCH_ROWS_TARGET`) +- If truly stuck, the service auto-resumes after `BACKPRESSURE_MAX_PAUSE_EPISODE_MS` (default 3 minutes) + +### High memory / OOM kills + +**Cause**: Batch processing accumulates memory faster than GC reclaims it. + +**Fix**: +- Reduce `MONGO_BATCH_ROWS_TARGET` (smaller batches = less memory per cycle) +- Lower `GC_RSS_SOFT_LIMIT_MB` and `GC_EVERY_N_BATCHES` for more aggressive GC +- Increase container memory limits in `resources.limits.memory` +- Trigger manual GC via `POST /control/gc {"mode":"force"}` + +### Digest mismatch warnings + +``` +Digest mismatch for batch N — source data may have changed between crash and recovery +``` + +**Cause**: Source MongoDB data was modified between the original insert attempt and the crash recovery re-read. + +**Impact**: Low — ClickHouse dedup tokens prevent duplicates. The warning is informational. + +**Fix**: No action needed unless mismatches are frequent, which would indicate concurrent writes to the source collections during migration. + +### Multi-pod: pods stuck waiting for locks + +**Cause**: A pod crashed without releasing its collection lock, and `podDeadAfterSec` hasn't elapsed yet. + +**Fix**: Wait for the dead pod threshold (default 180s), then locks are automatically released. To speed up: +```yaml +worker: + podDeadAfterSec: 60 # Reduce dead pod threshold +``` + +### Wrong MongoDB/ClickHouse endpoint (bundled mode) + +If the sibling charts use non-standard release names: + +```yaml +backingServices: + mongodb: + releaseName: "my-custom-prefix" # Default: "countly" + clickhouse: + releaseName: "my-custom-prefix" +``` + +This controls the DNS hostname construction: +- MongoDB: `{releaseName}-mongodb-svc.{namespace}.svc.cluster.local` +- ClickHouse: `{releaseName}-clickhouse-clickhouse-headless.{namespace}.svc` diff --git a/environments/local/observability.yaml b/environments/local/observability.yaml index af0ed79..ee0482b 100644 --- a/environments/local/observability.yaml +++ b/environments/local/observability.yaml @@ -3,10 +3,9 @@ # Deployment mode: full | hybrid | external | disabled mode: full -# Grafana admin credentials -grafana: - auth: - adminPassword: "countly-local-2026" +# Grafana admin password is auto-generated and stored in a Secret. +# To retrieve it: kubectl get secret -n observability -countly-observability-grafana -o jsonpath='{.data.admin-password}' | base64 -d +# To use your own secret: set grafana.admin.existingSecret and grafana.admin.passwordKey # Grafana ingress with self-signed TLS ingress: diff --git a/helmfile.yaml.gotmpl b/helmfile.yaml.gotmpl index a28654a..3759daf 100644 --- a/helmfile.yaml.gotmpl +++ b/helmfile.yaml.gotmpl @@ -12,6 +12,9 @@ environments: example-production: values: - environments/example-production/global.yaml + test-local-full: + values: + - environments/test-local-full/global.yaml --- repositories: [] @@ -92,3 +95,17 @@ releases: - environments/{{ .Environment.Name }}/secrets-observability.yaml needs: - countly/countly + + # Optional: MongoDB to ClickHouse batch migration service (with bundled Redis) + # Set migration.enabled: true in your environment's global.yaml to deploy + - name: countly-migration + installed: {{ .Values | get "migration.enabled" false }} + chart: ./charts/countly-migration + namespace: countly-migration + values: + - environments/{{ .Environment.Name }}/global.yaml + - environments/{{ .Environment.Name }}/migration.yaml + - environments/{{ .Environment.Name }}/secrets-migration.yaml + needs: + - mongodb/countly-mongodb + - clickhouse/countly-clickhouse