From fa85a98cdaeee18437c450dbc8d26d4aff1436cb Mon Sep 17 00:00:00 2001 From: TJ Moore Date: Mon, 9 Dec 2024 13:07:11 -0500 Subject: [PATCH] Configure Patroni Logs to be stored in a file This commit allows for the configuration of the Postgres instance Patroni logs to go to a file on the 'pgdata' volume instead of to stdout. This file is located at '/pgdata/patroni/log/patroni.log'. Both the log size limit and log level are configurable; only the size limit setting is required. If not set, the default behavior of sending all logs to stdout is maintained. Changes to this configuration require a reload to take effect. - https://patroni.readthedocs.io/en/latest/yaml_configuration.html#log Issue: PGO-1701 --- ...ator.crunchydata.com_postgresclusters.yaml | 29 +++++++ .../controller/postgrescluster/cluster.go | 29 ++++++- .../postgrescluster/cluster_test.go | 74 ++++++++++++++++++ internal/naming/names.go | 4 + internal/patroni/config.go | 25 +++++- internal/patroni/config_test.go | 78 ++++++++++++++++++- internal/patroni/reconcile.go | 3 +- internal/patroni/reconcile_test.go | 6 +- internal/postgres/config.go | 9 ++- internal/postgres/reconcile_test.go | 10 ++- .../v1beta1/patroni_types.go | 22 ++++++ .../v1beta1/zz_generated.deepcopy.go | 30 +++++++ 12 files changed, 306 insertions(+), 13 deletions(-) diff --git a/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml b/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml index f06b0d49dd..6e055a5911 100644 --- a/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml +++ b/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml @@ -11630,6 +11630,35 @@ spec: format: int32 minimum: 3 type: integer + logging: + description: Patroni log configuration settings. + properties: + level: + default: INFO + description: |- + The Patroni log level. + https://docs.python.org/3.6/library/logging.html#levels + enum: + - CRITICAL + - ERROR + - WARNING + - INFO + - DEBUG + - NOTSET + type: string + storageLimit: + anyOf: + - type: integer + - type: string + description: |- + Limits the total amount of space taken by Patroni Log files. + Minimum value is 25MB. + https://pkg.go.dev/k8s.io/apimachinery/pkg/api/resource#Quantity + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + required: + - storageLimit + type: object port: default: 8008 description: |- diff --git a/internal/controller/postgrescluster/cluster.go b/internal/controller/postgrescluster/cluster.go index a8dbff0e78..e11731bdd1 100644 --- a/internal/controller/postgrescluster/cluster.go +++ b/internal/controller/postgrescluster/cluster.go @@ -44,7 +44,7 @@ func (r *Reconciler) reconcileClusterConfigMap( if err == nil { err = patroni.ClusterConfigMap(ctx, cluster, pgHBAs, pgParameters, - clusterConfigMap) + clusterConfigMap, r.patroniLogSize(cluster)) } if err == nil { err = errors.WithStack(r.apply(ctx, clusterConfigMap)) @@ -53,6 +53,33 @@ func (r *Reconciler) reconcileClusterConfigMap( return clusterConfigMap, err } +// patroniLogSize attempts to parse the defined log file storage limit, if configured. +// If a value is set, this enables volume based log storage and triggers the +// relevant Patroni configuration. If the value given is less than 25M, the log +// file size storage limit defaults to 25M and an event is triggered. +func (r *Reconciler) patroniLogSize(cluster *v1beta1.PostgresCluster) int64 { + + if cluster.Spec.Patroni != nil { + if cluster.Spec.Patroni.Logging != nil { + if cluster.Spec.Patroni.Logging.StorageLimit != nil { + + sizeInBytes := cluster.Spec.Patroni.Logging.StorageLimit.Value() + + if sizeInBytes < 25000000 { + // TODO(validation): Eventually we should be able to remove this in favor of CEL validation. + // - https://kubernetes.io/docs/reference/using-api/cel/ + r.Recorder.Eventf(cluster, corev1.EventTypeWarning, "PatroniLogStorageLimitTooSmall", + "Configured Patroni log storage limit is too small. File size will default to 25M.") + + sizeInBytes = 25000000 + } + return sizeInBytes + } + } + } + return 0 +} + // +kubebuilder:rbac:groups="",resources="services",verbs={create,patch} // reconcileClusterPodService writes the Service that can provide stable DNS diff --git a/internal/controller/postgrescluster/cluster_test.go b/internal/controller/postgrescluster/cluster_test.go index 3ef98c58cf..c6d21751be 100644 --- a/internal/controller/postgrescluster/cluster_test.go +++ b/internal/controller/postgrescluster/cluster_test.go @@ -13,6 +13,7 @@ import ( batchv1 "k8s.io/api/batch/v1" corev1 "k8s.io/api/core/v1" rbacv1 "k8s.io/api/rbac/v1" + "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/client-go/tools/record" @@ -23,6 +24,7 @@ import ( "github.com/crunchydata/postgres-operator/internal/initialize" "github.com/crunchydata/postgres-operator/internal/naming" "github.com/crunchydata/postgres-operator/internal/testing/cmp" + "github.com/crunchydata/postgres-operator/internal/testing/events" "github.com/crunchydata/postgres-operator/internal/testing/require" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) @@ -783,3 +785,75 @@ postgres-operator.crunchydata.com/role: replica `)) }) } + +func TestPatroniLogSize(t *testing.T) { + + oneHundredMeg, err := resource.ParseQuantity("100M") + assert.NilError(t, err) + + tooSmall, err := resource.ParseQuantity("1k") + assert.NilError(t, err) + + cluster := v1beta1.PostgresCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "sometest", + Namespace: "test-namespace", + }, + Spec: v1beta1.PostgresClusterSpec{}} + + t.Run("Default", func(t *testing.T) { + recorder := events.NewRecorder(t, runtime.Scheme) + reconciler := &Reconciler{Recorder: recorder} + + size := reconciler.patroniLogSize(&cluster) + + assert.Equal(t, size, int64(0)) + assert.Equal(t, len(recorder.Events), 0) + }) + + t.Run("NoSize", func(t *testing.T) { + recorder := events.NewRecorder(t, runtime.Scheme) + reconciler := &Reconciler{Recorder: recorder} + + cluster.Spec.Patroni = &v1beta1.PatroniSpec{ + Logging: &v1beta1.PatroniLogConfig{}} + + size := reconciler.patroniLogSize(&cluster) + + assert.Equal(t, size, int64(0)) + assert.Equal(t, len(recorder.Events), 0) + }) + + t.Run("ValidSize", func(t *testing.T) { + recorder := events.NewRecorder(t, runtime.Scheme) + reconciler := &Reconciler{Recorder: recorder} + + cluster.Spec.Patroni = &v1beta1.PatroniSpec{ + Logging: &v1beta1.PatroniLogConfig{ + StorageLimit: &oneHundredMeg, + }} + + size := reconciler.patroniLogSize(&cluster) + + assert.Equal(t, size, int64(100000000)) + assert.Equal(t, len(recorder.Events), 0) + }) + + t.Run("BadSize", func(t *testing.T) { + recorder := events.NewRecorder(t, runtime.Scheme) + reconciler := &Reconciler{Recorder: recorder} + + cluster.Spec.Patroni = &v1beta1.PatroniSpec{ + Logging: &v1beta1.PatroniLogConfig{ + StorageLimit: &tooSmall, + }} + + size := reconciler.patroniLogSize(&cluster) + + assert.Equal(t, size, int64(25000000)) + assert.Equal(t, len(recorder.Events), 1) + assert.Equal(t, recorder.Events[0].Regarding.Name, cluster.Name) + assert.Equal(t, recorder.Events[0].Reason, "PatroniLogStorageLimitTooSmall") + assert.Equal(t, recorder.Events[0].Note, "Configured Patroni log storage limit is too small. File size will default to 25M.") + }) +} diff --git a/internal/naming/names.go b/internal/naming/names.go index 369591de91..f02951b292 100644 --- a/internal/naming/names.go +++ b/internal/naming/names.go @@ -131,6 +131,10 @@ const ( ) const ( + // PatroniPGDataLogPath is the Patroni default log path configuration used by the + // PostgreSQL instance. + PatroniPGDataLogPath = "/pgdata/patroni/log" + // PGBackRestRepoContainerName is the name assigned to the container used to run pgBackRest PGBackRestRepoContainerName = "pgbackrest" diff --git a/internal/patroni/config.go b/internal/patroni/config.go index b4d7e54f68..65b1f8d239 100644 --- a/internal/patroni/config.go +++ b/internal/patroni/config.go @@ -43,7 +43,7 @@ func quoteShellWord(s string) string { // clusterYAML returns Patroni settings that apply to the entire cluster. func clusterYAML( cluster *v1beta1.PostgresCluster, - pgHBAs postgres.HBAs, pgParameters postgres.Parameters, + pgHBAs postgres.HBAs, pgParameters postgres.Parameters, patroniLogStorageLimit int64, ) (string, error) { root := map[string]any{ // The cluster identifier. This value cannot change during the cluster's @@ -152,6 +152,29 @@ func clusterYAML( }, } + // if a Patroni log file size is configured, configure volume file storage + if patroniLogStorageLimit != 0 { + + // Configure the Patroni log settings + // - https://patroni.readthedocs.io/en/latest/yaml_configuration.html#log + root["log"] = map[string]any{ + + "dir": naming.PatroniPGDataLogPath, + "type": "json", + + // defaults to "INFO" + "level": cluster.Spec.Patroni.Logging.Level, + + // There will only be two log files. Cannot set to 1 or the logs won't rotate. + // - https://github.com/python/cpython/blob/3.11/Lib/logging/handlers.py#L134 + "file_num": 1, + + // Since there are two log files, ensure the total space used is under + // the configured limit. + "file_size": patroniLogStorageLimit / 2, + } + } + if !ClusterBootstrapped(cluster) { // Patroni has not yet bootstrapped. Populate the "bootstrap.dcs" field to // facilitate it. When Patroni is already bootstrapped, this field is ignored. diff --git a/internal/patroni/config_test.go b/internal/patroni/config_test.go index 788d687a43..38ade680b7 100644 --- a/internal/patroni/config_test.go +++ b/internal/patroni/config_test.go @@ -13,6 +13,7 @@ import ( "gotest.tools/v3/assert" corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "sigs.k8s.io/yaml" @@ -32,7 +33,7 @@ func TestClusterYAML(t *testing.T) { cluster.Namespace = "some-namespace" cluster.Name = "cluster-name" - data, err := clusterYAML(cluster, postgres.HBAs{}, postgres.Parameters{}) + data, err := clusterYAML(cluster, postgres.HBAs{}, postgres.Parameters{}, 0) assert.NilError(t, err) assert.Equal(t, data, strings.TrimSpace(` # Generated by postgres-operator. DO NOT EDIT. @@ -90,7 +91,7 @@ watchdog: cluster.Name = "cluster-name" cluster.Spec.PostgresVersion = 14 - data, err := clusterYAML(cluster, postgres.HBAs{}, postgres.Parameters{}) + data, err := clusterYAML(cluster, postgres.HBAs{}, postgres.Parameters{}, 0) assert.NilError(t, err) assert.Equal(t, data, strings.TrimSpace(` # Generated by postgres-operator. DO NOT EDIT. @@ -136,6 +137,79 @@ restapi: keyfile: null verify_client: optional scope: cluster-name-ha +watchdog: + mode: "off" + `)+"\n") + }) + + t.Run("PatroniLogSizeConfigured", func(t *testing.T) { + cluster := new(v1beta1.PostgresCluster) + cluster.Default() + cluster.Namespace = "some-namespace" + cluster.Name = "cluster-name" + cluster.Spec.PostgresVersion = 14 + + fileSize, err := resource.ParseQuantity("1k") + assert.NilError(t, err) + + logLevel := "DEBUG" + cluster.Spec.Patroni.Logging = &v1beta1.PatroniLogConfig{ + StorageLimit: &fileSize, + Level: &logLevel, + } + + data, err := clusterYAML(cluster, postgres.HBAs{}, postgres.Parameters{}, 1000) + assert.NilError(t, err) + assert.Equal(t, data, strings.TrimSpace(` +# Generated by postgres-operator. DO NOT EDIT. +# Your changes will not be saved. +bootstrap: + dcs: + loop_wait: 10 + postgresql: + parameters: {} + pg_hba: [] + use_pg_rewind: true + use_slots: false + ttl: 30 +ctl: + cacert: /etc/patroni/~postgres-operator/patroni.ca-roots + certfile: /etc/patroni/~postgres-operator/patroni.crt+key + insecure: false + keyfile: null +kubernetes: + labels: + postgres-operator.crunchydata.com/cluster: cluster-name + namespace: some-namespace + role_label: postgres-operator.crunchydata.com/role + scope_label: postgres-operator.crunchydata.com/patroni + use_endpoints: true +log: + dir: /pgdata/patroni/log + file_num: 1 + file_size: 500 + level: DEBUG + type: json +postgresql: + authentication: + replication: + sslcert: /tmp/replication/tls.crt + sslkey: /tmp/replication/tls.key + sslmode: verify-ca + sslrootcert: /tmp/replication/ca.crt + username: _crunchyrepl + rewind: + sslcert: /tmp/replication/tls.crt + sslkey: /tmp/replication/tls.key + sslmode: verify-ca + sslrootcert: /tmp/replication/ca.crt + username: _crunchyrepl +restapi: + cafile: /etc/patroni/~postgres-operator/patroni.ca-roots + certfile: /etc/patroni/~postgres-operator/patroni.crt+key + keyfile: null + verify_client: optional +scope: cluster-name-ha watchdog: mode: "off" `)+"\n") diff --git a/internal/patroni/reconcile.go b/internal/patroni/reconcile.go index 4fbb08b67d..29f0a00008 100644 --- a/internal/patroni/reconcile.go +++ b/internal/patroni/reconcile.go @@ -32,13 +32,14 @@ func ClusterConfigMap(ctx context.Context, inHBAs postgres.HBAs, inParameters postgres.Parameters, outClusterConfigMap *corev1.ConfigMap, + patroniLogStorageLimit int64, ) error { var err error initialize.Map(&outClusterConfigMap.Data) outClusterConfigMap.Data[configMapFileKey], err = clusterYAML(inCluster, inHBAs, - inParameters) + inParameters, patroniLogStorageLimit) return err } diff --git a/internal/patroni/reconcile_test.go b/internal/patroni/reconcile_test.go index 5d2a2c0ad5..5b78acacec 100644 --- a/internal/patroni/reconcile_test.go +++ b/internal/patroni/reconcile_test.go @@ -29,15 +29,15 @@ func TestClusterConfigMap(t *testing.T) { cluster.Default() config := new(corev1.ConfigMap) - assert.NilError(t, ClusterConfigMap(ctx, cluster, pgHBAs, pgParameters, config)) + assert.NilError(t, ClusterConfigMap(ctx, cluster, pgHBAs, pgParameters, config, 0)) // The output of clusterYAML should go into config. - data, _ := clusterYAML(cluster, pgHBAs, pgParameters) + data, _ := clusterYAML(cluster, pgHBAs, pgParameters, 0) assert.DeepEqual(t, config.Data["patroni.yaml"], data) // No change when called again. before := config.DeepCopy() - assert.NilError(t, ClusterConfigMap(ctx, cluster, pgHBAs, pgParameters, config)) + assert.NilError(t, ClusterConfigMap(ctx, cluster, pgHBAs, pgParameters, config, 0)) assert.DeepEqual(t, config, before) } diff --git a/internal/postgres/config.go b/internal/postgres/config.go index ce1acde3fb..db46ea3ba7 100644 --- a/internal/postgres/config.go +++ b/internal/postgres/config.go @@ -291,9 +291,9 @@ chmod +x /tmp/pg_rewind_tde.sh ` } - args := []string{version, walDir, naming.PGBackRestPGDataLogPath} + args := []string{version, walDir, naming.PGBackRestPGDataLogPath, naming.PatroniPGDataLogPath} script := strings.Join([]string{ - `declare -r expected_major_version="$1" pgwal_directory="$2" pgbrLog_directory="$3"`, + `declare -r expected_major_version="$1" pgwal_directory="$2" pgbrLog_directory="$3" patroniLog_directory="$4"`, // Function to print the permissions of a file or directory and its parents. bashPermissions, @@ -369,6 +369,11 @@ chmod +x /tmp/pg_rewind_tde.sh `install --directory --mode=0775 "${pgbrLog_directory}" ||`, `halt "$(permissions "${pgbrLog_directory}" ||:)"`, + // Create the Patroni log directory. + `results 'Patroni log directory' "${patroniLog_directory}"`, + `install --directory --mode=0775 "${patroniLog_directory}" ||`, + `halt "$(permissions "${patroniLog_directory}" ||:)"`, + // Copy replication client certificate files // from the /pgconf/tls/replication directory to the /tmp/replication directory in order // to set proper file permissions. This is required because the group permission settings diff --git a/internal/postgres/reconcile_test.go b/internal/postgres/reconcile_test.go index 138b5c7b3e..f35fb09150 100644 --- a/internal/postgres/reconcile_test.go +++ b/internal/postgres/reconcile_test.go @@ -230,7 +230,7 @@ initContainers: - -ceu - -- - |- - declare -r expected_major_version="$1" pgwal_directory="$2" pgbrLog_directory="$3" + declare -r expected_major_version="$1" pgwal_directory="$2" pgbrLog_directory="$3" patroniLog_directory="$4" permissions() { while [[ -n "$1" ]]; do set "${1%/*}" "$@"; done; shift; stat -Lc '%A %4u %4g %n' "$@"; } halt() { local rc=$?; >&2 echo "$@"; exit "${rc/#0/1}"; } results() { printf '::postgres-operator: %s::%s\n' "$@"; } @@ -270,6 +270,9 @@ initContainers: results 'pgBackRest log directory' "${pgbrLog_directory}" install --directory --mode=0775 "${pgbrLog_directory}" || halt "$(permissions "${pgbrLog_directory}" ||:)" + results 'Patroni log directory' "${patroniLog_directory}" + install --directory --mode=0775 "${patroniLog_directory}" || + halt "$(permissions "${patroniLog_directory}" ||:)" install -D --mode=0600 -t "/tmp/replication" "/pgconf/tls/replication"/{tls.crt,tls.key,ca.crt} @@ -286,6 +289,7 @@ initContainers: - "11" - /pgdata/pg11_wal - /pgdata/pgbackrest/log + - /pgdata/patroni/log env: - name: PGDATA value: /pgdata/pg11 @@ -473,7 +477,7 @@ volumes: // Startup moves WAL files to data volume. assert.DeepEqual(t, pod.InitContainers[0].Command[4:], - []string{"startup", "11", "/pgdata/pg11_wal", "/pgdata/pgbackrest/log"}) + []string{"startup", "11", "/pgdata/pg11_wal", "/pgdata/pgbackrest/log", "/pgdata/patroni/log"}) }) t.Run("WithAdditionalConfigFiles", func(t *testing.T) { @@ -703,7 +707,7 @@ volumes: // Startup moves WAL files to WAL volume. assert.DeepEqual(t, pod.InitContainers[0].Command[4:], - []string{"startup", "11", "/pgwal/pg11_wal", "/pgdata/pgbackrest/log"}) + []string{"startup", "11", "/pgwal/pg11_wal", "/pgdata/pgbackrest/log", "/pgdata/patroni/log"}) }) } diff --git a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/patroni_types.go b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/patroni_types.go index 2f01399372..47f060408b 100644 --- a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/patroni_types.go +++ b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/patroni_types.go @@ -4,6 +4,8 @@ package v1beta1 +import "k8s.io/apimachinery/pkg/api/resource" + type PatroniSpec struct { // Patroni dynamic configuration settings. Changes to this value will be // automatically reloaded without validation. Changes to certain PostgreSQL @@ -23,6 +25,10 @@ type PatroniSpec struct { // +kubebuilder:validation:Minimum=3 LeaderLeaseDurationSeconds *int32 `json:"leaderLeaseDurationSeconds,omitempty"` + // Patroni log configuration settings. + // +optional + Logging *PatroniLogConfig `json:"logging,omitempty"` + // The port on which Patroni should listen. // Changing this value causes PostgreSQL to restart. // +optional @@ -48,6 +54,22 @@ type PatroniSpec struct { // - https://patroni.readthedocs.io/en/latest/kubernetes.html } +type PatroniLogConfig struct { + + // Limits the total amount of space taken by Patroni Log files. + // Minimum value is 25MB. + // https://pkg.go.dev/k8s.io/apimachinery/pkg/api/resource#Quantity + // +required + StorageLimit *resource.Quantity `json:"storageLimit"` + + // The Patroni log level. + // https://docs.python.org/3.6/library/logging.html#levels + // +kubebuilder:validation:Enum={CRITICAL,ERROR,WARNING,INFO,DEBUG,NOTSET} + // +kubebuilder:default:=INFO + // +optional + Level *string `json:"level,omitempty"` +} + type PatroniSwitchover struct { // Whether or not the operator should allow switchovers in a PostgresCluster diff --git a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/zz_generated.deepcopy.go b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/zz_generated.deepcopy.go index 5d097f01d3..e8d8826c22 100644 --- a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/zz_generated.deepcopy.go +++ b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/zz_generated.deepcopy.go @@ -1447,6 +1447,31 @@ func (in *PGUpgradeStatus) DeepCopy() *PGUpgradeStatus { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *PatroniLogConfig) DeepCopyInto(out *PatroniLogConfig) { + *out = *in + if in.StorageLimit != nil { + in, out := &in.StorageLimit, &out.StorageLimit + x := (*in).DeepCopy() + *out = &x + } + if in.Level != nil { + in, out := &in.Level, &out.Level + *out = new(string) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PatroniLogConfig. +func (in *PatroniLogConfig) DeepCopy() *PatroniLogConfig { + if in == nil { + return nil + } + out := new(PatroniLogConfig) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *PatroniSpec) DeepCopyInto(out *PatroniSpec) { *out = *in @@ -1456,6 +1481,11 @@ func (in *PatroniSpec) DeepCopyInto(out *PatroniSpec) { *out = new(int32) **out = **in } + if in.Logging != nil { + in, out := &in.Logging, &out.Logging + *out = new(PatroniLogConfig) + (*in).DeepCopyInto(*out) + } if in.Port != nil { in, out := &in.Port, &out.Port *out = new(int32)