From 5979ab9b17ff9611333a509ac63f27b8e80f5fd4 Mon Sep 17 00:00:00 2001 From: cnp-autobot Date: Thu, 11 Nov 2021 18:05:24 +0000 Subject: [PATCH] [create-pull-request] automated change --- .../cloud_native_postgresql/api_reference.mdx | 278 +++++++++-- .../cloud_native_postgresql/architecture.mdx | 5 + .../backup_recovery.mdx | 58 ++- .../cloud_native_postgresql/bootstrap.mdx | 76 ++- .../cloud_native_postgresql/cnp-plugin.mdx | 57 ++- .../connection_pooling.mdx | 464 ++++++++++++++++++ .../cloud_native_postgresql/credits.mdx | 9 +- .../images/pgbouncer-architecture-rw.png | 3 + .../cloud_native_postgresql/index.mdx | 21 +- .../installation_upgrade.mdx | 111 +++-- .../cloud_native_postgresql/logging.mdx | 12 +- .../cloud_native_postgresql/monitoring.mdx | 41 +- .../operator_capability_levels.mdx | 15 + .../cloud_native_postgresql/operator_conf.mdx | 21 +- .../postgresql_conf.mdx | 30 +- .../cloud_native_postgresql/release_notes.mdx | 63 +++ .../rolling_update.mdx | 8 +- .../samples/cluster-backup-retention-30d.yaml | 32 ++ .../samples/cluster-example-full.yaml | 3 +- .../samples/cluster-example-initdb.yaml | 9 +- .../samples/cluster-example-monitoring.yaml | 12 +- .../samples/cnp-basic-monitoring.yaml | 10 +- .../samples/pooler-basic-auth.yaml | 15 + .../samples/pooler-tls.yaml | 12 + .../cloud_native_postgresql/scheduling.mdx | 2 +- .../cloud_native_postgresql/security.mdx | 11 +- .../ssl_connections.mdx | 2 +- .../troubleshooting.mdx | 282 +++++++++++ scripts/fileProcessor/package-lock.json | 2 +- 29 files changed, 1491 insertions(+), 173 deletions(-) create mode 100644 advocacy_docs/kubernetes/cloud_native_postgresql/connection_pooling.mdx create mode 100644 advocacy_docs/kubernetes/cloud_native_postgresql/images/pgbouncer-architecture-rw.png create mode 100644 advocacy_docs/kubernetes/cloud_native_postgresql/samples/cluster-backup-retention-30d.yaml create mode 100644 advocacy_docs/kubernetes/cloud_native_postgresql/samples/pooler-basic-auth.yaml create mode 100644 advocacy_docs/kubernetes/cloud_native_postgresql/samples/pooler-tls.yaml create mode 100644 advocacy_docs/kubernetes/cloud_native_postgresql/troubleshooting.mdx diff --git a/advocacy_docs/kubernetes/cloud_native_postgresql/api_reference.mdx b/advocacy_docs/kubernetes/cloud_native_postgresql/api_reference.mdx index a14b608179c..3b9c11c7e7b 100644 --- a/advocacy_docs/kubernetes/cloud_native_postgresql/api_reference.mdx +++ b/advocacy_docs/kubernetes/cloud_native_postgresql/api_reference.mdx @@ -26,6 +26,7 @@ Below you will find a description of the defined resources: - [Backup](#Backup) - [BackupConfiguration](#BackupConfiguration) - [BackupList](#BackupList) +- [BackupSource](#BackupSource) - [BackupSpec](#BackupSpec) - [BackupStatus](#BackupStatus) - [BarmanObjectStoreConfiguration](#BarmanObjectStoreConfiguration) @@ -44,9 +45,21 @@ Below you will find a description of the defined resources: - [DataBackupConfiguration](#DataBackupConfiguration) - [EPASConfiguration](#EPASConfiguration) - [ExternalCluster](#ExternalCluster) +- [InstanceID](#InstanceID) - [LocalObjectReference](#LocalObjectReference) - [MonitoringConfiguration](#MonitoringConfiguration) - [NodeMaintenanceWindow](#NodeMaintenanceWindow) +- [PgBouncerIntegrationStatus](#PgBouncerIntegrationStatus) +- [PgBouncerSecrets](#PgBouncerSecrets) +- [PgBouncerSpec](#PgBouncerSpec) +- [PodMeta](#PodMeta) +- [PodTemplateSpec](#PodTemplateSpec) +- [Pooler](#Pooler) +- [PoolerIntegrations](#PoolerIntegrations) +- [PoolerList](#PoolerList) +- [PoolerSecrets](#PoolerSecrets) +- [PoolerSpec](#PoolerSpec) +- [PoolerStatus](#PoolerStatus) - [PostgresConfiguration](#PostgresConfiguration) - [RecoveryTarget](#RecoveryTarget) - [ReplicaClusterConfiguration](#ReplicaClusterConfiguration) @@ -57,6 +70,7 @@ Below you will find a description of the defined resources: - [ScheduledBackupSpec](#ScheduledBackupSpec) - [ScheduledBackupStatus](#ScheduledBackupStatus) - [SecretKeySelector](#SecretKeySelector) +- [SecretVersion](#SecretVersion) - [SecretsResourceVersion](#SecretsResourceVersion) - [StorageConfiguration](#StorageConfiguration) - [WalBackupConfiguration](#WalBackupConfiguration) @@ -110,9 +124,10 @@ Backup is the Schema for the backups API BackupConfiguration defines how the backup of the cluster are taken. Currently the only supported backup method is barmanObjectStore. For details and examples refer to the Backup and Recovery section of the documentation -| Name | Description | Type | -| ------------------- | ------------------------------------------------- | ------------------------------------------------------------------- | -| `barmanObjectStore` | The configuration for the barman-cloud tool suite | [\*BarmanObjectStoreConfiguration](#BarmanObjectStoreConfiguration) | +| Name | Description | Type | +| ------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------- | +| `barmanObjectStore` | The configuration for the barman-cloud tool suite | [\*BarmanObjectStoreConfiguration](#BarmanObjectStoreConfiguration) | +| `retentionPolicy ` | RetentionPolicy is the retention policy to be used for backups and WALs (i.e. '60d'). The retention policy is expressed in the form of `XXu` where `XX` is a positive integer and `u` is in `[dwm]` - days, weeks, months. | string | @@ -125,6 +140,16 @@ BackupList contains a list of Backup | `metadata` | Standard list metadata. More info: | [metav1.ListMeta](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.21/#listmeta-v1-meta) | | `items ` | List of backups - *mandatory* | [\[\]Backup](#Backup) | + + +## BackupSource + +BackupSource contains the backup we need to restore from, plus some information that could be needed to correctly restore it. + +| Name | Description | Type | +| ------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------- | +| `endpointCA` | EndpointCA store the CA bundle of the barman endpoint. Useful when using self-signed certificates to avoid errors with certificate issuer and barman-cloud-wal-archive | [\*SecretKeySelector](#SecretKeySelector) | + ## BackupSpec @@ -160,6 +185,7 @@ BackupStatus defines the observed state of Backup | `error ` | The detected error | string | | `commandOutput ` | Unused. Retained for compatibility with old versions. | string | | `commandError ` | The backup command output in case of error | string | +| `instanceID ` | Information to identify the instance where the backup has been taken from | [\*InstanceID](#InstanceID) | @@ -196,14 +222,20 @@ BootstrapConfiguration contains information about how to create the PostgreSQL c BootstrapInitDB is the configuration of the bootstrap process when initdb is used Refer to the Bootstrap page of the documentation for more information. -| Name | Description | Type | -| ------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------- | -| `database ` | Name of the database used by the application. Default: `app`. - *mandatory* | string | -| `owner ` | Name of the owner of the database in the instance to be used by applications. Defaults to the value of the `database` key. - *mandatory* | string | -| `secret ` | Name of the secret containing the initial credentials for the owner of the user database. If empty a new secret will be created from scratch | [\*LocalObjectReference](#LocalObjectReference) | -| `redwood ` | If we need to enable/disable Redwood compatibility. Requires EPAS and for EPAS defaults to true | \*bool | -| `options ` | The list of options that must be passed to initdb when creating the cluster | \[]string | -| `postInitSQL` | List of SQL queries to be executed as a superuser immediately after the cluster has been created - to be used with extreme care (by default empty) | \[]string | +| Name | Description | Type | +| --------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------- | +| `database ` | Name of the database used by the application. Default: `app`. - *mandatory* | string | +| `owner ` | Name of the owner of the database in the instance to be used by applications. Defaults to the value of the `database` key. - *mandatory* | string | +| `secret ` | Name of the secret containing the initial credentials for the owner of the user database. If empty a new secret will be created from scratch | [\*LocalObjectReference](#LocalObjectReference) | +| `redwood ` | If we need to enable/disable Redwood compatibility. Requires EPAS and for EPAS defaults to true | \*bool | +| `options ` | The list of options that must be passed to initdb when creating the cluster. Deprecated: This could lead to inconsistent configurations, please use the explicit provided parameters instead. If defined, explicit values will be ignored. | \[]string | +| `dataChecksums ` | Whether the `-k` option should be passed to initdb, enabling checksums on data pages (default: `false`) | \*bool | +| `encoding ` | The value to be passed as option `--encoding` for initdb (default:`UTF8`) | string | +| `localeCollate ` | The value to be passed as option `--lc-collate` for initdb (default:`C`) | string | +| `localeCType ` | The value to be passed as option `--lc-ctype` for initdb (default:`C`) | string | +| `walSegmentSize ` | The value in megabytes (1 to 1024) to be passed to the `--wal-segsize` option for initdb (default: empty, resulting in PostgreSQL default: 16MB) | int | +| `postInitSQL ` | List of SQL queries to be executed as a superuser immediately after the cluster has been created - to be used with extreme care (by default empty) | \[]string | +| `postInitTemplateSQL` | List of SQL queries to be executed as a superuser in the `template1` after the cluster has been created - to be used with extreme care (by default empty) | \[]string | @@ -221,11 +253,11 @@ BootstrapPgBaseBackup contains the configuration required to take a physical bac BootstrapRecovery contains the configuration required to restore the backup with the specified name and, after having changed the password with the one chosen for the superuser, will use it to bootstrap a full cluster cloning all the instances from the restored primary. Refer to the Bootstrap page of the documentation for more information. -| Name | Description | Type | -| ---------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------- | -| `backup ` | The backup we need to restore | [\*LocalObjectReference](#LocalObjectReference) | -| `source ` | The external cluster whose backup we will restore. This is also used as the name of the folder under which the backup is stored, so it must be set to the name of the source cluster | string | -| `recoveryTarget` | By default, the recovery process applies all the available WAL files in the archive (full recovery). However, you can also end the recovery as soon as a consistent state is reached or recover to a point-in-time (PITR) by specifying a `RecoveryTarget` object, as expected by PostgreSQL (i.e., timestamp, transaction Id, LSN, ...). More info: | [\*RecoveryTarget](#RecoveryTarget) | +| Name | Description | Type | +| ---------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------- | +| `backup ` | The backup we need to restore | [\*BackupSource](#BackupSource) | +| `source ` | The external cluster whose backup we will restore. This is also used as the name of the folder under which the backup is stored, so it must be set to the name of the source cluster | string | +| `recoveryTarget` | By default, the recovery process applies all the available WAL files in the archive (full recovery). However, you can also end the recovery as soon as a consistent state is reached or recover to a point-in-time (PITR) by specifying a `RecoveryTarget` object, as expected by PostgreSQL (i.e., timestamp, transaction Id, LSN, ...). More info: | [\*RecoveryTarget](#RecoveryTarget) | @@ -317,31 +349,34 @@ ClusterSpec defines the desired state of Cluster ClusterStatus defines the observed state of Cluster -| Name | Description | Type | -| --------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------- | -| `instances ` | Total number of instances in the cluster | int32 | -| `readyInstances ` | Total number of ready instances in the cluster | int32 | -| `instancesStatus ` | Instances status | map[utils.PodStatus][]string | -| `latestGeneratedNode ` | ID of the latest generated node (used to avoid node name clashing) | int32 | -| `currentPrimary ` | Current primary instance | string | -| `targetPrimary ` | Target primary instance, this is different from the previous one during a switchover or a failover | string | -| `pvcCount ` | How many PVCs have been created by this cluster | int32 | -| `jobCount ` | How many Jobs have been created by this cluster | int32 | -| `danglingPVC ` | List of all the PVCs created by this cluster and still available which are not attached to a Pod | \[]string | -| `initializingPVC ` | List of all the PVCs that are being initialized by this cluster | \[]string | -| `healthyPVC ` | List of all the PVCs not dangling nor initializing | \[]string | -| `licenseStatus ` | Status of the license | licensekey.Status | -| `writeService ` | Current write pod | string | -| `readService ` | Current list of read pods | string | -| `phase ` | Current phase of the cluster | string | -| `phaseReason ` | Reason for the current phase | string | -| `secretsResourceVersion ` | The list of resource versions of the secrets managed by the operator. Every change here is done in the interest of the instance manager, which will refresh the secret data | [SecretsResourceVersion](#SecretsResourceVersion) | -| `configMapResourceVersion ` | The list of resource versions of the configmaps, managed by the operator. Every change here is done in the interest of the instance manager, which will refresh the configmap data | [ConfigMapResourceVersion](#ConfigMapResourceVersion) | -| `certificates ` | The configuration for the CA and related certificates, initialized with defaults. | [CertificatesStatus](#CertificatesStatus) | -| `firstRecoverabilityPoint ` | The first recoverability point, stored as a date in RFC3339 format | string | -| `cloudNativePostgresqlCommitHash` | The commit hash number of which this operator running | string | -| `currentPrimaryTimestamp ` | The timestamp when the last actual promotion to primary has occurred | string | -| `targetPrimaryTimestamp ` | The timestamp when the last request for a new primary has occurred | string | +| Name | Description | Type | +| ----------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------- | +| `instances ` | Total number of instances in the cluster | int32 | +| `readyInstances ` | Total number of ready instances in the cluster | int32 | +| `instancesStatus ` | Instances status | map[utils.PodStatus][]string | +| `latestGeneratedNode ` | ID of the latest generated node (used to avoid node name clashing) | int32 | +| `currentPrimary ` | Current primary instance | string | +| `targetPrimary ` | Target primary instance, this is different from the previous one during a switchover or a failover | string | +| `pvcCount ` | How many PVCs have been created by this cluster | int32 | +| `jobCount ` | How many Jobs have been created by this cluster | int32 | +| `danglingPVC ` | List of all the PVCs created by this cluster and still available which are not attached to a Pod | \[]string | +| `initializingPVC ` | List of all the PVCs that are being initialized by this cluster | \[]string | +| `healthyPVC ` | List of all the PVCs not dangling nor initializing | \[]string | +| `licenseStatus ` | Status of the license | licensekey.Status | +| `writeService ` | Current write pod | string | +| `readService ` | Current list of read pods | string | +| `phase ` | Current phase of the cluster | string | +| `phaseReason ` | Reason for the current phase | string | +| `secretsResourceVersion ` | The list of resource versions of the secrets managed by the operator. Every change here is done in the interest of the instance manager, which will refresh the secret data | [SecretsResourceVersion](#SecretsResourceVersion) | +| `configMapResourceVersion ` | The list of resource versions of the configmaps, managed by the operator. Every change here is done in the interest of the instance manager, which will refresh the configmap data | [ConfigMapResourceVersion](#ConfigMapResourceVersion) | +| `certificates ` | The configuration for the CA and related certificates, initialized with defaults. | [CertificatesStatus](#CertificatesStatus) | +| `firstRecoverabilityPoint ` | The first recoverability point, stored as a date in RFC3339 format | string | +| `cloudNativePostgresqlCommitHash ` | The commit hash number of which this operator running | string | +| `currentPrimaryTimestamp ` | The timestamp when the last actual promotion to primary has occurred | string | +| `targetPrimaryTimestamp ` | The timestamp when the last request for a new primary has occurred | string | +| `poolerIntegrations ` | The integration needed by poolers referencing the cluster | [\*PoolerIntegrations](#PoolerIntegrations) | +| `cloudNativePostgresqlOperatorHash` | The hash of the binary of the operator | string | +| `onlineUpdateEnabled ` | OnlineUpdateEnabled shows if the online upgrade is enabled inside the cluster | bool | @@ -402,6 +437,17 @@ ExternalCluster represents the connection parameters to an external cluster whic | `password ` | The reference to the password to be used to connect to the server | [\*corev1.SecretKeySelector](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.21/#secretkeyselector-v1-core) | | `barmanObjectStore ` | The configuration for the barman-cloud tool suite | [\*BarmanObjectStoreConfiguration](#BarmanObjectStoreConfiguration) | + + +## InstanceID + +InstanceID contains the information to identify an instance + +| Name | Description | Type | +| ------------- | ---------------- | ------ | +| `podName ` | The pod name | string | +| `ContainerID` | The container ID | string | + ## LocalObjectReference @@ -418,10 +464,11 @@ LocalObjectReference contains enough information to let you locate a local objec MonitoringConfiguration is the type containing all the monitoring configuration for a certain cluster -| Name | Description | Type | -| ------------------------ | ----------------------------------------------------- | ------------------------------------------------- | -| `customQueriesConfigMap` | The list of config maps containing the custom queries | [\[\]ConfigMapKeySelector](#ConfigMapKeySelector) | -| `customQueriesSecret ` | The list of secrets containing the custom queries | [\[\]SecretKeySelector](#SecretKeySelector) | +| Name | Description | Type | +| ------------------------ | --------------------------------------------------------------- | ------------------------------------------------- | +| `disableDefaultQueries ` | Whether the default queries should be injected. Default: false. | \*bool | +| `customQueriesConfigMap` | The list of config maps containing the custom queries | [\[\]ConfigMapKeySelector](#ConfigMapKeySelector) | +| `customQueriesSecret ` | The list of secrets containing the custom queries | [\[\]SecretKeySelector](#SecretKeySelector) | @@ -436,6 +483,136 @@ This option is only useful when the chosen storage prevents the Pods from being | `inProgress` | Is there a node maintenance activity in progress? - *mandatory* | bool | | `reusePVC ` | Reuse the existing PVC (wait for the node to come up again) or not (recreate it elsewhere) - *mandatory* | \*bool | + + +## PgBouncerIntegrationStatus + +PgBouncerIntegrationStatus encapsulates the needed integration for the pgbouncer poolers referencing the cluster + +Name | Description | Type +------- \| \| -------- +`secrets` | | \[]string + + + +## PgBouncerSecrets + +PgBouncerSecrets contains the versions of the secrets used by pgbouncer + +| Name | Description | Type | +| ----------- | ----------------------------- | ------------------------------- | +| `authQuery` | The auth query secret version | [SecretVersion](#SecretVersion) | + + + +## PgBouncerSpec + +PgBouncerSpec defines how to configure PgBouncer + +| Name | Description | Type | +| ----------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------- | +| `poolMode ` | The pool mode - *mandatory* | PgBouncerPoolMode | +| `authQuerySecret` | The credentials of the user that need to be used for the authentication query. In case it is specified, also an AuthQuery (e.g. "SELECT usename, passwd FROM pg_shadow WHERE usename=$1") has to be specified and no automatic CNP Cluster integration will be triggered. | [\*LocalObjectReference](#LocalObjectReference) | +| `authQuery ` | The query that will be used to download the hash of the password of a certain user. Default: "SELECT usename, passwd FROM user_search($1)". In case it is specified, also an AuthQuerySecret has to be specified and no automatic CNP Cluster integration will be triggered. | string | +| `parameters ` | Additional parameters to be passed to PgBouncer - please check the CNP documentation for a list of options you can configure | map[string]string | +| `paused ` | When set to `true`, PgBouncer will disconnect from the PostgreSQL server, first waiting for all queries to complete, and pause all new client connections until this value is set to `false` (default). Internally, the operator calls PgBouncer's `PAUSE` and `RESUME` commands. | \*bool | + + + +## PodMeta + +PodMeta is a structure similar to the metav1.ObjectMeta, but still parseable by controller-gen to create a suitable CRD for the user. The comment of PodTemplateSpec has an explanation of why we are not using the core data types. + +| Name | Description | Type | +| ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ----------------- | +| `labels ` | Map of string keys and values that can be used to organize and categorize (scope and select) objects. May match selectors of replication controllers and services. More info: | map[string]string | +| `annotations` | Annotations is an unstructured key value map stored with a resource that may be set by external tools to store and retrieve arbitrary metadata. They are not queryable and should be preserved when modifying objects. More info: | map[string]string | + + + +## PodTemplateSpec + +PodTemplateSpec is a structure allowing the user to set a template for Pod generation. + +Unfortunately we can't use the corev1.PodTemplateSpec type because the generated CRD won't have the field for the metadata section. + +References: + +| Name | Description | Type | +| ---------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------- | +| `metadata` | Standard object's metadata. More info: | [PodMeta](#PodMeta) | +| `spec ` | Specification of the desired behavior of the pod. More info: | corev1.PodSpec | + + + +## Pooler + +Pooler is the Schema for the poolers API + +Name | Description | Type +-------- \| \| ------------------------------------------------------------------------------------------------------------ +`metadata` \| \| [metav1.ObjectMeta](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.21/#objectmeta-v1-meta) +`spec ` \| \| [PoolerSpec](#PoolerSpec) +`status ` \| \| [PoolerStatus](#PoolerStatus) + + + +## PoolerIntegrations + +PoolerIntegrations encapsulates the needed integration for the poolers referencing the cluster + +Name | Description | Type +-------------------- \| \| --------------------------------------------------------- +`pgBouncerIntegration` \| \| [PgBouncerIntegrationStatus](#PgBouncerIntegrationStatus) + + + +## PoolerList + +PoolerList contains a list of Pooler + +Name | Description | Type +-------- \| \| -------------------------------------------------------------------------------------------------------- +`metadata` \| \| [metav1.ListMeta](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.21/#listmeta-v1-meta) +`items ` \| - *mandatory* \| [\[\]Pooler](#Pooler) + + + +## PoolerSecrets + +PoolerSecrets contains the versions of all the secrets used + +| Name | Description | Type | +| ------------------ | -------------------------------------------- | --------------------------------------- | +| `serverTLS ` | The server TLS secret version | [SecretVersion](#SecretVersion) | +| `serverCA ` | The server CA secret version | [SecretVersion](#SecretVersion) | +| `clientCA ` | The client CA secret version | [SecretVersion](#SecretVersion) | +| `pgBouncerSecrets` | The version of the secrets used by PgBouncer | [\*PgBouncerSecrets](#PgBouncerSecrets) | + + + +## PoolerSpec + +PoolerSpec defines the desired state of Pooler + +| Name | Description | Type | +| ----------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------- | +| `cluster ` | This is the cluster reference on which the Pooler will work. Pooler name should never match with any cluster name within the same namespace. - *mandatory* | [LocalObjectReference](#LocalObjectReference) | +| `type ` | Which instances we must forward traffic to? - *mandatory* | PoolerType | +| `instances` | The number of replicas we want - *mandatory* | int32 | +| `template ` | The template of the Pod to be created | [\*PodTemplateSpec](#PodTemplateSpec) | +| `pgbouncer` | The PgBouncer configuration - *mandatory* | [\*PgBouncerSpec](#PgBouncerSpec) | + + + +## PoolerStatus + +PoolerStatus defines the observed state of Pooler + +| Name | Description | Type | +| --------- | ----------------------------------------- | --------------------------------- | +| `secrets` | The resource version of the config object | [\*PoolerSecrets](#PoolerSecrets) | + ## PostgresConfiguration @@ -557,6 +734,17 @@ SecretKeySelector contains enough information to let you locate the key of a Sec | ----- | ------------------------------- | ------ | | `key` | The key to select - *mandatory* | string | + + +## SecretVersion + +SecretVersion contains a secret name and its ResourceVersion + +| Name | Description | Type | +| --------- | --------------------------------- | ------ | +| `name ` | The name of the secret | string | +| `version` | The ResourceVersion of the secret | string | + ## SecretsResourceVersion diff --git a/advocacy_docs/kubernetes/cloud_native_postgresql/architecture.mdx b/advocacy_docs/kubernetes/cloud_native_postgresql/architecture.mdx index 66cd3ff3950..6dfa6f59127 100644 --- a/advocacy_docs/kubernetes/cloud_native_postgresql/architecture.mdx +++ b/advocacy_docs/kubernetes/cloud_native_postgresql/architecture.mdx @@ -35,6 +35,11 @@ Kubernetes cluster, with the following specifications: information about how Cloud Native PostgreSQL relies on PostgreSQL replication, including synchronous settings. +!!! Seealso "Connection Pooling" + Please refer to the ["Connection Pooling" section](connection_pooling.md) for + information about how to take advantage of PgBouncer as a connection pooler, + and create an access layer between your applications and the PostgreSQL clusters. + ## Read-write workloads Applications can decide to connect to the PostgreSQL instance elected as diff --git a/advocacy_docs/kubernetes/cloud_native_postgresql/backup_recovery.mdx b/advocacy_docs/kubernetes/cloud_native_postgresql/backup_recovery.mdx index d9a4c214874..02f3d64dff2 100644 --- a/advocacy_docs/kubernetes/cloud_native_postgresql/backup_recovery.mdx +++ b/advocacy_docs/kubernetes/cloud_native_postgresql/backup_recovery.mdx @@ -7,8 +7,9 @@ product: 'Cloud Native Operator' The operator can orchestrate a continuous backup infrastructure that is based on the [Barman](https://pgbarman.org) tool. Instead of using the classical architecture with a Barman server, which -backup many PostgreSQL instances, the operator will use the -`barman-cloud-wal-archive` and `barman-cloud-backup` tools. +backs up many PostgreSQL instances, the operator relies on the +`barman-cloud-wal-archive`, `barman-cloud-backup`, `barman-cloud-backup-list`, +and `barman-cloud-backup-delete` tools. As a result, base backups will be *tarballs*. Both base backups and WAL files can be compressed and encrypted. @@ -17,17 +18,16 @@ You can use the image `quay.io/enterprisedb/postgresql` for this scope, as it is composed of a community PostgreSQL image and the latest `barman-cli-cloud` package. +!!! Important + Always ensure that you are running the latest version of the operands + in your system to take advantage of the improvements introduced in + Barman cloud (as well as improve the security aspects of your cluster). + A backup is performed from a primary or a designated primary instance in a `Cluster` (please refer to [replica clusters](replication.md#replication-from-an-external-postgresql-cluster) for more information about designated primary instances). -!!! Warning - Cloud Native PostgreSQL does not currently manage the deletion of backup files - from the backup object store. The retention policy feature will be merged from - Barman to Barman Cloud in the future. For the time being, it is your responsibility - to configure retention policies directly on the object store. - ## Cloud provider support You can archive the backup files in any service that is supported @@ -464,6 +464,7 @@ will use it unless you override it in the cluster configuration. ## Recovery +Cluster restores are not performed "in-place" on an existing cluster. You can use the data uploaded to the object storage to bootstrap a new cluster from a backup. The operator will orchestrate the recovery process using the `barman-cloud-restore` tool. @@ -540,4 +541,43 @@ manager running in the Pods. You can optionally specify a `recoveryTarget` to perform a point in time recovery. If left unspecified, the recovery will continue up to the latest available WAL on the default target timeline (`current` for PostgreSQL up to -11, `latest` for version 12 and above). \ No newline at end of file +11, `latest` for version 12 and above). + +## Retention policies + +Cloud Native PostgreSQL can manage the automated deletion of backup files from the backup object store, using **retention policies** based on recovery window. + +Internally, the retention policy feature uses `barman-cloud-backup-delete` +with `--retention-policy “RECOVERY WINDOW OF {{ retention policy value }} {{ retention policy unit }}”`. + +For example, you can define your backups with a retention policy of 30 days as +follows: + +```yaml +apiVersion: postgresql.k8s.enterprisedb.io/v1 +kind: Cluster +[...] +spec: + backup: + barmanObjectStore: + destinationPath: "" + s3Credentials: + accessKeyId: + name: aws-creds + key: ACCESS_KEY_ID + secretAccessKey: + name: aws-creds + key: ACCESS_SECRET_KEY + retentionPolicy: "30d" +``` + +!!! Note "There's more ..." + The **recovery window retention policy** is focused on the concept of + *Point of Recoverability* (`PoR`), a moving point in time determined by + `current time - recovery window`. The *first valid backup* is the first + available backup before `PoR` (in reverse chronological order). + Cloud Native PostgreSQL must ensure that we can recover the cluster at + any point in time between `PoR` and the latest successfully archived WAL + file, starting from the first valid backup. Base backups that are older + than the first valid backup will be marked as *obsolete* and permanently + removed after the next backup is completed. \ No newline at end of file diff --git a/advocacy_docs/kubernetes/cloud_native_postgresql/bootstrap.mdx b/advocacy_docs/kubernetes/cloud_native_postgresql/bootstrap.mdx index 7efd36f4774..434f5d734d6 100644 --- a/advocacy_docs/kubernetes/cloud_native_postgresql/bootstrap.mdx +++ b/advocacy_docs/kubernetes/cloud_native_postgresql/bootstrap.mdx @@ -183,10 +183,44 @@ relies on the superuser to reconcile the cluster with the desired status. to the cluster. The actual PostgreSQL data directory is created via an invocation of the -`initdb` PostgreSQL command. If you need to add custom options to that -command (i.e., to change the locale used for the template databases or to -add data checksums), you can add them to the `options` section like in -the following example: +`initdb` PostgreSQL command. If you need to add custom options to that command +(i.e., to change the `locale` used for the template databases or to add data +checksums), you can use the following parameters: + +dataChecksums +: When `dataChecksums` is set to `true`, CNP invokes the `-k` option in + `initdb` to enable checksums on data pages and help detect corruption by the + I/O system - that would otherwise be silent (default: `false`). + +encoding +: When `encoding` set to a value, CNP passes it to the `--encoding` option in `initdb`, + which selects the encoding of the template database (default: `UTF8`). + +localeCollate +: When `localeCollate` is set to a value, CNP passes it to the `--lc-collate` + option in `initdb`. This option controls the collation order (`LC_COLLATE` + subcategory), as defined in ["Locale Support"](https://www.postgresql.org/docs/current/locale.html) + from the PostgreSQL documentation (default: `C`). + +localeCType +: When `localeCType` is set to a value, CNP passes it to the `--lc-ctype` option in + `initdb`. This option controls the collation order (`LC_CTYPE` subcategory), as + defined in ["Locale Support"](https://www.postgresql.org/docs/current/locale.html) + from the PostgreSQL documentation (default: `C`). + +walSegmentSize +: When `walSegmentSize` is set to a value, CNP passes it to the `--wal-segsize` + option in `initdb` (default: not set - defined by PostgreSQL as 16 megabytes). + +!!! Note + The only two locale options that Cloud Native PostgreSQL implements during + the `initdb` bootstrap refer to the `LC_COLLATE` and `LC_TYPE` subcategories. + The remaining locale subcategories can be configured directly in the PostgreSQL + configuration, using the `lc_messages`, `lc_monetary`, `lc_numeric`, and + `lc_time` parameters. + +The following example enables data checksums and sets the default encoding to +`LATIN1`: ```yaml apiVersion: postgresql.k8s.enterprisedb.io/v1 @@ -200,14 +234,19 @@ spec: initdb: database: app owner: app - options: - - "-k" - - "--locale=en_US" + dataChecksums: true + encoding: 'LATIN1' storage: size: 1Gi ``` -The user can also specify a custom list of queries that will be executed +Cloud Native PostgreSQL supports another way to customize the behaviour of the +`initdb` invocation, using the `options` subsection. However, given that there +are options that can break the behaviour of the operator (such as `--auth` or +`-d`), this technique is deprecated and will be removed from future versions of +the API. + +You can also specify a custom list of queries that will be executed once, just after the database is created and configured. These queries will be executed as the *superuser* (`postgres`), connected to the `postgres` database: @@ -224,9 +263,9 @@ spec: initdb: database: app owner: app - options: - - "-k" - - "--locale=en_US" + dataChecksums: true + localeCollate: 'en_US' + localeCType: 'en_US' postInitSQL: - CREATE ROLE angus - CREATE ROLE malcolm @@ -235,8 +274,9 @@ spec: ``` !!! Warning - Please use the `postInitSQL` option with extreme care as queries - are run as a superuser and can disrupt the entire cluster. + Please use the `postInitSQL` and `postInitTemplateSQL` options with extreme care, + as queries are run as a superuser and can disrupt the entire cluster. + An error in any of those queries interrupts the bootstrap phase, leaving the cluster incomplete. ### Compatibility Features @@ -618,7 +658,7 @@ file on the source PostgreSQL instance: host replication streaming_replica all md5 ``` -The following manifest creates a new PostgreSQL 14.0 cluster, +The following manifest creates a new PostgreSQL 14.1 cluster, called `target-db`, using the `pg_basebackup` bootstrap method to clone an external PostgreSQL cluster defined as `source-db` (in the `externalClusters` array). As you can see, the `source-db` @@ -633,7 +673,7 @@ metadata: name: target-db spec: instances: 3 - imageName: quay.io/enterprisedb/postgresql:14.0 + imageName: quay.io/enterprisedb/postgresql:14.1 bootstrap: pg_basebackup: @@ -653,7 +693,7 @@ spec: ``` All the requirements must be met for the clone operation to work, including -the same PostgreSQL version (in our case 14.0). +the same PostgreSQL version (in our case 14.1). #### TLS certificate authentication @@ -668,7 +708,7 @@ in the same Kubernetes cluster. This example can be easily adapted to cover an instance that resides outside the Kubernetes cluster. -The manifest defines a new PostgreSQL 14.0 cluster called `cluster-clone-tls`, +The manifest defines a new PostgreSQL 14.1 cluster called `cluster-clone-tls`, which is bootstrapped using the `pg_basebackup` method from the `cluster-example` external cluster. The host is identified by the read/write service in the same cluster, while the `streaming_replica` user is authenticated @@ -683,7 +723,7 @@ metadata: name: cluster-clone-tls spec: instances: 3 - imageName: quay.io/enterprisedb/postgresql:14.0 + imageName: quay.io/enterprisedb/postgresql:14.1 bootstrap: pg_basebackup: diff --git a/advocacy_docs/kubernetes/cloud_native_postgresql/cnp-plugin.mdx b/advocacy_docs/kubernetes/cloud_native_postgresql/cnp-plugin.mdx index 6512d63a5d9..0aaedd8971c 100644 --- a/advocacy_docs/kubernetes/cloud_native_postgresql/cnp-plugin.mdx +++ b/advocacy_docs/kubernetes/cloud_native_postgresql/cnp-plugin.mdx @@ -41,13 +41,20 @@ PostgreSQL Image: quay.io/enterprisedb/postgresql:13 Primary instance: cluster-example-1 Instances: 3 Ready instances: 3 +Current Timeline: 2 +Current WAL file: 00000002000000000000000A + +Continuous Backup status +First Point of Recoverability: 2021-11-09T13:36:43Z +Working WAL archiving: OK +Last Archived WAL: 00000002000000000000000A @ 2021-11-09T13:47:28.354645Z Instances status -Pod name Current LSN Received LSN Replay LSN System ID Primary Replicating Replay paused Pending restart --------- ----------- ------------ ---------- --------- ------- ----------- ------------- --------------- -cluster-example-1 0/6000060 6927251808674721812 ✓ ✗ ✗ ✗ -cluster-example-2 0/6000060 0/6000060 6927251808674721812 ✗ ✓ ✗ ✗ -cluster-example-3 0/6000060 0/6000060 6927251808674721812 ✗ ✓ ✗ ✗ +Manager Version Pod name Current LSN Received LSN Replay LSN System ID Primary Replicating Replay paused Pending restart Status +--------------- -------- ----------- ------------ ---------- --------- ------- ----------- ------------- --------------- ------ +1.10.0 cluster-example-1 0/5000060 7027078108164751389 ✓ ✗ ✗ ✗ OK +1.10.0 cluster-example-2 0/5000060 0/5000060 7027078108164751389 ✗ ✓ ✗ ✗ OK +1.10.0 cluster-example-3 0/5000060 0/5000060 7027078108164751389 ✗ ✓ ✗ ✗ OK ``` @@ -65,47 +72,65 @@ PostgreSQL Image: quay.io/enterprisedb/postgresql:13 Primary instance: cluster-example-1 Instances: 3 Ready instances: 3 +Current Timeline: 2 +Current WAL file: 00000002000000000000000A PostgreSQL Configuration -archive_command = '/controller/manager wal-archive %p' +archive_command = '/controller/manager wal-archive --log-destination /controller/log/postgres.json %p' archive_mode = 'on' archive_timeout = '5min' +cluster_name = 'cluster-example' full_page_writes = 'on' hot_standby = 'true' listen_addresses = '*' -logging_collector = 'off' +log_destination = 'csvlog' +log_directory = '/controller/log' +log_filename = 'postgres' +log_rotation_age = '0' +log_rotation_size = '0' +log_truncate_on_rotation = 'false' +logging_collector = 'on' max_parallel_workers = '32' max_replication_slots = '32' max_worker_processes = '32' port = '5432' +shared_preload_libraries = '' ssl = 'on' ssl_ca_file = '/controller/certificates/client-ca.crt' ssl_cert_file = '/controller/certificates/server.crt' ssl_key_file = '/controller/certificates/server.key' -unix_socket_directories = '/var/run/postgresql' +unix_socket_directories = '/controller/run' wal_keep_size = '512MB' wal_level = 'logical' wal_log_hints = 'on' - +cnp.config_sha256 = '407239112913e96626722395d549abc78b2cf9b767471e1c8eac6f33132e789c' PostgreSQL HBA Rules + # Grant local access -local all all peer +local all all peer map=local # Require client certificate authentication for the streaming_replica user hostssl postgres streaming_replica all cert hostssl replication streaming_replica all cert +hostssl all cnp_pooler_pgbouncer all cert + + -# Otherwise use md5 authentication +# Otherwise use the default authentication method host all all all md5 +Continuous Backup status +First Point of Recoverability: 2021-11-09T13:36:43Z +Working WAL archiving: OK +Last Archived WAL: 00000002000000000000000A @ 2021-11-09T13:47:28.354645Z Instances status -Pod name Current LSN Received LSN Replay LSN System ID Primary Replicating Replay paused Pending restart --------- ----------- ------------ ---------- --------- ------- ----------- ------------- --------------- -cluster-example-1 0/6000060 6927251808674721812 ✓ ✗ ✗ ✗ -cluster-example-2 0/6000060 0/6000060 6927251808674721812 ✗ ✓ ✗ ✗ -cluster-example-3 0/6000060 0/6000060 6927251808674721812 ✗ ✓ ✗ ✗ +Manager Version Pod name Current LSN Received LSN Replay LSN System ID Primary Replicating Replay paused Pending restart Status +--------------- -------- ----------- ------------ ---------- --------- ------- ----------- ------------- --------------- ------ +1.10.0 cluster-example-1 0/5000060 7027078108164751389 ✓ ✗ ✗ ✗ OK +1.10.0 cluster-example-2 0/5000060 0/5000060 7027078108164751389 ✗ ✓ ✗ ✗ OK +1.10.0 cluster-example-3 0/5000060 0/5000060 7027078108164751389 ✗ ✓ ✗ ✗ OK ``` The command also supports output in `yaml` and `json` format. diff --git a/advocacy_docs/kubernetes/cloud_native_postgresql/connection_pooling.mdx b/advocacy_docs/kubernetes/cloud_native_postgresql/connection_pooling.mdx new file mode 100644 index 00000000000..4483782bfe8 --- /dev/null +++ b/advocacy_docs/kubernetes/cloud_native_postgresql/connection_pooling.mdx @@ -0,0 +1,464 @@ +--- +title: 'Connection Pooling' +originalFilePath: 'src/connection_pooling.md' +product: 'Cloud Native Operator' +--- + +Cloud Native PostgreSQL provides native support for connection pooling with +[PgBouncer](https://www.pgbouncer.org/), one of the most popular open source +connection poolers for PostgreSQL, through the `Pooler` CRD. + +In a nutshell, a `Pooler` in Cloud Native PostgreSQL is a deployment of +PgBouncer pods that sits between your applications and a PostgreSQL service +(for example the `rw` service), creating a separate, scalable, configurable, +and highly available **database access layer**. + +## Architecture + +The following diagram highlights how the introduction of a database access +layer based on PgBouncer changes the architecture of Cloud Native PostgreSQL, +like an additional blade in a Swiss Army knife. Instead of directly connecting +to the PostgreSQL primary service, applications can now connect to the +equivalent service for PgBouncer, enabling reuse of existing connections for +faster performance and better resource management on the PostgreSQL side. + +![Applications writing to the single primary via PgBouncer](./images/pgbouncer-architecture-rw.png) + +## Quickstart + +The easiest way to explain how Cloud Native PostgreSQL implements a PgBouncer +pooler is through an example: + +```yaml +apiVersion: postgresql.k8s.enterprisedb.io/v1 +kind: Pooler +metadata: + name: pooler-example-rw +spec: + cluster: + name: cluster-example + + instances: 3 + type: rw + pgbouncer: + poolMode: session + parameters: + max_client_connections: "1000" + default_pool_size: "10" +``` + +!!! Important + Pooler name should never match with any Cluster name within the same namespace. + +This creates a new `Pooler` resource called `pooler-example-rw` (the name is +arbitrary) that is strictly associated with the Postgres `Cluster` resource called +`cluster-example` and pointing to the primary, identified by the read/write +service (`rw`, therefore `cluster-example-rw`). + +The `Pooler` must live in the same namespace of the Postgres cluster. +It consists of a Kubernetes deployment of 3 pods running the +[latest stable image of PgBouncer](https://quay.io/repository/enterprisedb/pgbouncer), +configured with the [`session` pooling mode](https://www.pgbouncer.org/config.html#pool-mode) +and accepting up to 1000 connections each - with a default pool size of 10 +user/database pairs towards PostgreSQL. + +!!! Important + The `Pooler` only sets the `*` fallback database in PgBouncer, meaning + that all parameters in the connection strings passed from the client are + relayed to the PostgreSQL server (please refer to ["Section \[databases\]" + in PgBouncer's documentation](https://www.pgbouncer.org/config.html#section-databases)). + +Additionally, Cloud Native PostgreSQL automatically creates a secret with the +same name of the pooler containing the configuration files used with PgBouncer. + +!!! Seealso "API reference" + For details, please refer to [`PgBouncerSpec` section](api_reference.md#PgBouncerSpec) + in the API reference. + +## Pooler resource lifecycle + +`Pooler` resources are not `Cluster`-managed resources. You are supposed to +create poolers manually when they are needed. Additionally, you can deploy +multiple poolers per PostgreSQL Cluster. + +What is important to note is that the lifecycles of the `Cluster` and the +`Pooler` resources are currently independent: the deletion of the `Cluster` +doesn't imply the automatic deletion of the `Pooler`, and viceversa. + +!!! Important + Now that you know how a `Pooler` works, you have full freedom in terms of + possible architectures: you can have clusters without poolers, clusters with + a single pooler, or clusters with several poolers (i.e. one per application). + +## Security + +Any PgBouncer pooler is transparently integrated with Cloud Native PostgreSQL +support for in-transit encryption via **TLS connections**, both on the client +(application) and server (PostgreSQL) side of the pool. + +Specifically, PgBouncer automatically reuses the certificates of the PostgreSQL +server. Moreover, it uses TLS client certificate authentication to connect +to the PostgreSQL server to run the `auth_query` for clients' password +authentication (see the ["Authentication" section](#authentication) below). + +Containers run as the `pgbouncer` system user, and access to the `pgbouncer` +database is only allowed via local connections, through `peer` authentication. + +## Authentication + +**Password based authentication** is the only supported method for clients of +PgBouncer in Cloud Native PostgreSQL. + +Internally, our implementation relies on PgBouncer's `auth_user` and `auth_query` options. Specifically, the operator: + +- creates a standard user called `cnp_pooler_pgbouncer` in the PostgreSQL server +- creates the lookup function in the `postgres` database and grants execution + privileges to the `cnp_pooler_pgbouncer` user (PoLA) +- issues a TLS certificate for this user +- sets `cnp_pooler_pgbouncer` as the `auth_user` +- configures PgBouncer to use the TLS certificate to authenticate + `cnp_pooler_pgbouncer` against the PostgreSQL server +- removes all the above when it detects that a cluster does not have + any pooler associated to it + +## PodTemplates + +You can take advantage of pod templates specification in the `template` +section of a `Pooler` resource. For details, please refer to [`PoolerSpec` +section](api_reference.md#PoolerSpec) in the API reference. + +Through templates you can configure pods as you like, including fine +control over affinity and anti-affinity rules for pods and nodes. +By default, containers use images from `quay.io/enterprisedb/pgbouncer`. + +## High Availability (HA) + +Thanks to Kubernetes' deployments, you can configure your pooler to run +on a single instance or over multiple pods. The exposed service will +make sure that your clients are randomly distributed over the available +pods running PgBouncer - which will then automatically manage and reuse +connections towards the underlying server (if using the `rw` service) +or servers (if using the `ro` service with multiple replicas). + +!!! Warning + Please be aware of network hops in case your infrastructure spans + multiple availability zones with high latency across them. Consider + for example the case of your application running in zone 2, + connecting to PgBouncer running in zone 3, pointing to the PostgreSQL + primary in zone 1. + +## PgBouncer configuration options + +The operator manages most of the [configuration options for PgBouncer](https://www.pgbouncer.org/config.html), allowing you to modify only a subset of them. + +!!! Warning + You are responsible to correctly set the value of each option, as the operator + does not validate them. + +Below you can find a list of the PgBouncer options you are allowed to +customize. Each of them contains a link to the PgBouncer documentation for that +specific parameter. Unless differently stated here, the default values are the +ones directly set by PgBouncer: + +- [`application_name_add_host`](https://www.pgbouncer.org/config.html#application_name_add_host) +- [`autodb_idle_timeout`](https://www.pgbouncer.org/config.html#autodb_idle_timeout) +- [`client_idle_timeout`](https://www.pgbouncer.org/config.html#client_idle_timeout) +- [`client_login_timeout`](https://www.pgbouncer.org/config.html#client_login_timeout) +- [`default_pool_size`](https://www.pgbouncer.org/config.html#default_pool_size) +- [`disable_pqexec`](https://www.pgbouncer.org/config.html#disable_pqexec) +- [`idle_transaction_timeout`](https://www.pgbouncer.org/config.html#idle_transaction_timeout) +- [`ignore_startup_parameters`](https://www.pgbouncer.org/config.html#ignore_startup_parameters): + to be appended to `extra_float_digits,options` - required by CNP +- [`log_connections`](https://www.pgbouncer.org/config.html#log_connections) +- [`log_disconnections`](https://www.pgbouncer.org/config.html#log_disconnections) +- [`log_pooler_errors`](https://www.pgbouncer.org/config.html#log_pooler_errors) +- [`log_stats`](https://www.pgbouncer.org/config.html#log_stats): by default + disabled (`0`), given that statistics are already collected by the Prometheus + export as described in the ["Monitoring"](#monitoring) section below +- [`max_client_conn`](https://www.pgbouncer.org/config.html#max_client_conn) +- [`max_db_connections`](https://www.pgbouncer.org/config.html#max_db_connections) +- [`max_user_connections`](https://www.pgbouncer.org/config.html#max_user_connections) +- [`min_pool_size`](https://www.pgbouncer.org/config.html#min_pool_size) +- [`query_timeout`](https://www.pgbouncer.org/config.html#query_timeout) +- [`query_wait_timeout`](https://www.pgbouncer.org/config.html#query_wait_timeout) +- [`reserve_pool_size`](https://www.pgbouncer.org/config.html#reserve_pool_size) +- [`reserve_pool_timeout`](https://www.pgbouncer.org/config.html#reserve_pool_timeout) +- [`server_check_delay`](https://www.pgbouncer.org/config.html#server_check_delay) +- [`server_check_query`](https://www.pgbouncer.org/config.html#server_check_query) +- [`server_connect_timeout`](https://www.pgbouncer.org/config.html#server_connect_timeout) +- [`server_fast_close`](https://www.pgbouncer.org/config.html#server_fast_close) +- [`server_idle_timeout`](https://www.pgbouncer.org/config.html#server_idle_timeout) +- [`server_lifetime`](https://www.pgbouncer.org/config.html#server_lifetime) +- [`server_login_retry`](https://www.pgbouncer.org/config.html#server_login_retry) +- [`server_reset_query`](https://www.pgbouncer.org/config.html#server_reset_query) +- [`server_reset_query_always`](https://www.pgbouncer.org/config.html#server_reset_query_always) +- [`server_round_robin`](https://www.pgbouncer.org/config.html#server_round_robin) +- [`stats_period`](https://www.pgbouncer.org/config.html#stats_period) +- [`verbose`](https://www.pgbouncer.org/config.html#verbose) + +Customizations of the PgBouncer configuration are written +declaratively in the `.spec.pgbouncer.parameters` map. + +The operator reacts to the changes in the Pooler specification, +and every PgBouncer instance reloads the updated configuration +without disrupting the service. + +!!! Warning + Every PgBouncer pod will have the same configuration, aligned + with the parameters in the specification. A mistake in these + parameters could disrupt the operability of the **whole Pooler**. + The operator **does not** validate the value of any option. + +## Monitoring + +The PgBouncer implementation of the `Pooler` comes with a default +Prometheus exporter that automatically makes available several +metrics having the `cnp_pgbouncer_` prefix, by running: + +- `SHOW LISTS` (prefix: `cnp_pgbouncer_lists`) +- `SHOW POOLS` (prefix: `cnp_pgbouncer_pools`) +- `SHOW STATS` (prefix: `cnp_pgbouncer_stats`) + +Similarly to the Cloud Native PostgreSQL instance, the exporter runs on port +`9187` of each pod running PgBouncer, and also provides metrics related to the +Go runtime (with prefix `go_*`). You can debug the exporter on a pod running +PgBouncer through the following command: + +```console +kubectl exec -ti -- curl 127.0.0.1:9187/metrics +``` + +An example of the output for `cnp_pgbouncer` metrics: + +```text +# HELP cnp_pgbouncer_collection_duration_seconds Collection time duration in seconds +# TYPE cnp_pgbouncer_collection_duration_seconds gauge +cnp_pgbouncer_collection_duration_seconds{collector="Collect.up"} 0.002443168 + +# HELP cnp_pgbouncer_collections_total Total number of times PostgreSQL was accessed for metrics. +# TYPE cnp_pgbouncer_collections_total counter +cnp_pgbouncer_collections_total 1 + +# HELP cnp_pgbouncer_last_collection_error 1 if the last collection ended with error, 0 otherwise. +# TYPE cnp_pgbouncer_last_collection_error gauge +cnp_pgbouncer_last_collection_error 0 + +# HELP cnp_pgbouncer_lists_databases Count of databases. +# TYPE cnp_pgbouncer_lists_databases gauge +cnp_pgbouncer_lists_databases 1 + +# HELP cnp_pgbouncer_lists_dns_names Count of DNS names in the cache. +# TYPE cnp_pgbouncer_lists_dns_names gauge +cnp_pgbouncer_lists_dns_names 0 + +# HELP cnp_pgbouncer_lists_dns_pending Not used. +# TYPE cnp_pgbouncer_lists_dns_pending gauge +cnp_pgbouncer_lists_dns_pending 0 + +# HELP cnp_pgbouncer_lists_dns_queries Count of in-flight DNS queries. +# TYPE cnp_pgbouncer_lists_dns_queries gauge +cnp_pgbouncer_lists_dns_queries 0 + +# HELP cnp_pgbouncer_lists_dns_zones Count of DNS zones in the cache. +# TYPE cnp_pgbouncer_lists_dns_zones gauge +cnp_pgbouncer_lists_dns_zones 0 + +# HELP cnp_pgbouncer_lists_free_clients Count of free clients. +# TYPE cnp_pgbouncer_lists_free_clients gauge +cnp_pgbouncer_lists_free_clients 49 + +# HELP cnp_pgbouncer_lists_free_servers Count of free servers. +# TYPE cnp_pgbouncer_lists_free_servers gauge +cnp_pgbouncer_lists_free_servers 0 + +# HELP cnp_pgbouncer_lists_login_clients Count of clients in login state. +# TYPE cnp_pgbouncer_lists_login_clients gauge +cnp_pgbouncer_lists_login_clients 0 + +# HELP cnp_pgbouncer_lists_pools Count of pools. +# TYPE cnp_pgbouncer_lists_pools gauge +cnp_pgbouncer_lists_pools 1 + +# HELP cnp_pgbouncer_lists_used_clients Count of used clients. +# TYPE cnp_pgbouncer_lists_used_clients gauge +cnp_pgbouncer_lists_used_clients 1 + +# HELP cnp_pgbouncer_lists_used_servers Count of used servers. +# TYPE cnp_pgbouncer_lists_used_servers gauge +cnp_pgbouncer_lists_used_servers 0 + +# HELP cnp_pgbouncer_lists_users Count of users. +# TYPE cnp_pgbouncer_lists_users gauge +cnp_pgbouncer_lists_users 2 + +# HELP cnp_pgbouncer_pools_cl_active Client connections that are linked to server connection and can process queries. +# TYPE cnp_pgbouncer_pools_cl_active gauge +cnp_pgbouncer_pools_cl_active{database="pgbouncer",user="pgbouncer"} 1 + +# HELP cnp_pgbouncer_pools_cl_cancel_req Client connections that have not forwarded query cancellations to the server yet. +# TYPE cnp_pgbouncer_pools_cl_cancel_req gauge +cnp_pgbouncer_pools_cl_cancel_req{database="pgbouncer",user="pgbouncer"} 0 + +# HELP cnp_pgbouncer_pools_cl_waiting Client connections that have sent queries but have not yet got a server connection. +# TYPE cnp_pgbouncer_pools_cl_waiting gauge +cnp_pgbouncer_pools_cl_waiting{database="pgbouncer",user="pgbouncer"} 0 + +# HELP cnp_pgbouncer_pools_maxwait How long the first (oldest) client in the queue has waited, in seconds. If this starts increasing, then the current pool of servers does not handle requests quickly enough. The reason may be either an overloaded server or just too small of a pool_size setting. +# TYPE cnp_pgbouncer_pools_maxwait gauge +cnp_pgbouncer_pools_maxwait{database="pgbouncer",user="pgbouncer"} 0 + +# HELP cnp_pgbouncer_pools_maxwait_us Microsecond part of the maximum waiting time. +# TYPE cnp_pgbouncer_pools_maxwait_us gauge +cnp_pgbouncer_pools_maxwait_us{database="pgbouncer",user="pgbouncer"} 0 + +# HELP cnp_pgbouncer_pools_pool_mode The pooling mode in use. 1 for session, 2 for transaction, 3 for statement, -1 if unknown +# TYPE cnp_pgbouncer_pools_pool_mode gauge +cnp_pgbouncer_pools_pool_mode{database="pgbouncer",user="pgbouncer"} 3 + +# HELP cnp_pgbouncer_pools_sv_active Server connections that are linked to a client. +# TYPE cnp_pgbouncer_pools_sv_active gauge +cnp_pgbouncer_pools_sv_active{database="pgbouncer",user="pgbouncer"} 0 + +# HELP cnp_pgbouncer_pools_sv_idle Server connections that are unused and immediately usable for client queries. +# TYPE cnp_pgbouncer_pools_sv_idle gauge +cnp_pgbouncer_pools_sv_idle{database="pgbouncer",user="pgbouncer"} 0 + +# HELP cnp_pgbouncer_pools_sv_login Server connections currently in the process of logging in. +# TYPE cnp_pgbouncer_pools_sv_login gauge +cnp_pgbouncer_pools_sv_login{database="pgbouncer",user="pgbouncer"} 0 + +# HELP cnp_pgbouncer_pools_sv_tested Server connections that are currently running either server_reset_query or server_check_query. +# TYPE cnp_pgbouncer_pools_sv_tested gauge +cnp_pgbouncer_pools_sv_tested{database="pgbouncer",user="pgbouncer"} 0 + +# HELP cnp_pgbouncer_pools_sv_used Server connections that have been idle for more than server_check_delay, so they need server_check_query to run on them before they can be used again. +# TYPE cnp_pgbouncer_pools_sv_used gauge +cnp_pgbouncer_pools_sv_used{database="pgbouncer",user="pgbouncer"} 0 + +# HELP cnp_pgbouncer_stats_avg_query_count Average queries per second in last stat period. +# TYPE cnp_pgbouncer_stats_avg_query_count gauge +cnp_pgbouncer_stats_avg_query_count{database="pgbouncer"} 1 + +# HELP cnp_pgbouncer_stats_avg_query_time Average query duration, in microseconds. +# TYPE cnp_pgbouncer_stats_avg_query_time gauge +cnp_pgbouncer_stats_avg_query_time{database="pgbouncer"} 0 + +# HELP cnp_pgbouncer_stats_avg_recv Average received (from clients) bytes per second. +# TYPE cnp_pgbouncer_stats_avg_recv gauge +cnp_pgbouncer_stats_avg_recv{database="pgbouncer"} 0 + +# HELP cnp_pgbouncer_stats_avg_sent Average sent (to clients) bytes per second. +# TYPE cnp_pgbouncer_stats_avg_sent gauge +cnp_pgbouncer_stats_avg_sent{database="pgbouncer"} 0 + +# HELP cnp_pgbouncer_stats_avg_wait_time Time spent by clients waiting for a server, in microseconds (average per second). +# TYPE cnp_pgbouncer_stats_avg_wait_time gauge +cnp_pgbouncer_stats_avg_wait_time{database="pgbouncer"} 0 + +# HELP cnp_pgbouncer_stats_avg_xact_count Average transactions per second in last stat period. +# TYPE cnp_pgbouncer_stats_avg_xact_count gauge +cnp_pgbouncer_stats_avg_xact_count{database="pgbouncer"} 1 + +# HELP cnp_pgbouncer_stats_avg_xact_time Average transaction duration, in microseconds. +# TYPE cnp_pgbouncer_stats_avg_xact_time gauge +cnp_pgbouncer_stats_avg_xact_time{database="pgbouncer"} 0 + +# HELP cnp_pgbouncer_stats_total_query_count Total number of SQL queries pooled by pgbouncer. +# TYPE cnp_pgbouncer_stats_total_query_count gauge +cnp_pgbouncer_stats_total_query_count{database="pgbouncer"} 3 + +# HELP cnp_pgbouncer_stats_total_query_time Total number of microseconds spent by pgbouncer when actively connected to PostgreSQL, executing queries. +# TYPE cnp_pgbouncer_stats_total_query_time gauge +cnp_pgbouncer_stats_total_query_time{database="pgbouncer"} 0 + +# HELP cnp_pgbouncer_stats_total_received Total volume in bytes of network traffic received by pgbouncer. +# TYPE cnp_pgbouncer_stats_total_received gauge +cnp_pgbouncer_stats_total_received{database="pgbouncer"} 0 + +# HELP cnp_pgbouncer_stats_total_sent Total volume in bytes of network traffic sent by pgbouncer. +# TYPE cnp_pgbouncer_stats_total_sent gauge +cnp_pgbouncer_stats_total_sent{database="pgbouncer"} 0 + +# HELP cnp_pgbouncer_stats_total_wait_time Time spent by clients waiting for a server, in microseconds. +# TYPE cnp_pgbouncer_stats_total_wait_time gauge +cnp_pgbouncer_stats_total_wait_time{database="pgbouncer"} 0 + +# HELP cnp_pgbouncer_stats_total_xact_count Total number of SQL transactions pooled by pgbouncer. +# TYPE cnp_pgbouncer_stats_total_xact_count gauge +cnp_pgbouncer_stats_total_xact_count{database="pgbouncer"} 3 + +# HELP cnp_pgbouncer_stats_total_xact_time Total number of microseconds spent by pgbouncer when connected to PostgreSQL in a transaction, either idle in transaction or executing queries. +# TYPE cnp_pgbouncer_stats_total_xact_time gauge +cnp_pgbouncer_stats_total_xact_time{database="pgbouncer"} 0 +``` + +## Logging + +Logs are directly sent to standard output, in JSON format, like in the +following example: + +```json +{ + "level": "info", + "ts": SECONDS.MICROSECONDS, + "msg": "record", + "pipe": "stderr", + "record": { + "timestamp": "YYYY-MM-DD HH:MM:SS.MS UTC", + "pid": "", + "level": "LOG", + "msg": "kernel file descriptor limit: 1048576 (hard: 1048576); max_client_conn: 100, max expected fd use: 112" + } +} +``` + +## Pausing connections + +The `Pooler` specification allows you to take advantage of PgBouncer's `PAUSE` +and `RESUME` commands, using only declarative configuration - via the `paused` +option, by default set to `false`. When set to `true`, the operator internally +invokes the `PAUSE` command in PgBouncer, which: + +1. closes all active connections towards the PostgreSQL server, after waiting for the queries to complete +2. pauses any new connection coming from the client + +When the `paused` option is set back to `false`, the operator will invoke the +`RESUME` command in PgBouncer, re-opening the taps towards the PostgreSQL +service defined in the `Pooler`. + +!!! Seealso "PAUSE" + For further information, please refer to the + [`PAUSE` section in the PgBouncer documentation](https://www.pgbouncer.org/usage.html#pause-db). + +!!! Important + In future versions, the switchover operation will be fully integrated + with the PgBouncer pooler, and take advantage of the `PAUSE`/`RESUME` + features to reduce the perceived downtime by client applications. + At the moment, you can achieve the same results by setting the `paused` + attribute to `true`, then issuing the switchover command through the + [`cnp` plugin](cnp-plugin.md#promote), and finally restoring the `paused` + attribute to `false`. + +## Limitations + +### Single PostgreSQL cluster + +The current implementation of the pooler is designed to work as part of a +specific Cloud Native PostgreSQL cluster (a service, to be precise). It is not +possible at the moment to create a pooler that spans over multiple clusters. + +### Controlled configurability + +Cloud Native PostgreSQL transparently manages several configuration options +that are used for the PgBouncer layer to communicate with PostgreSQL. Such +options are not configurable from outside and include TLS certificates, +authentication settings, `databases` section, and `users` section. Also, +considering the specific use case for the single PostgreSQL cluster, the +adopted criteria is to explicitly list the options that can be configured by +users. + +!!! Note + We have reasons to believe that the adopted solution addresses the majority of + use cases, while leaving room for the future implementation of a separate + operator for PgBouncer to complete the gamma with more advanced and customized + scenarios. \ No newline at end of file diff --git a/advocacy_docs/kubernetes/cloud_native_postgresql/credits.mdx b/advocacy_docs/kubernetes/cloud_native_postgresql/credits.mdx index 23649dbe3b5..824479dfcd5 100644 --- a/advocacy_docs/kubernetes/cloud_native_postgresql/credits.mdx +++ b/advocacy_docs/kubernetes/cloud_native_postgresql/credits.mdx @@ -9,15 +9,22 @@ developed, and tested by the EnterpriseDB Cloud Native team: - Gabriele Bartolini - Jonathan Battiato +- Jihyuk Bok - Francesco Canovai - Leonardo Cecchi - Valerio Del Sarto - Niccolò Fei - Jonathan Gonzalez +- Hai He +- Federico Innocenti - Danish Khan -- Anand Nednur +- John Long - Marco Nenciarini +- Adrian Noland - Gabriele Quaresima +- Armando Ruocco - Philippe Scorsolini +- Marcos Silva +- Jaime Silvela - Jitendra Wadle - Adam Wright \ No newline at end of file diff --git a/advocacy_docs/kubernetes/cloud_native_postgresql/images/pgbouncer-architecture-rw.png b/advocacy_docs/kubernetes/cloud_native_postgresql/images/pgbouncer-architecture-rw.png new file mode 100644 index 00000000000..efceb9ce266 --- /dev/null +++ b/advocacy_docs/kubernetes/cloud_native_postgresql/images/pgbouncer-architecture-rw.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27b9d1ec02c4a0f527ed3dc04c535542d2279cf327382746c742cf28b06ef735 +size 169722 diff --git a/advocacy_docs/kubernetes/cloud_native_postgresql/index.mdx b/advocacy_docs/kubernetes/cloud_native_postgresql/index.mdx index 435e245de8b..b47641b9451 100644 --- a/advocacy_docs/kubernetes/cloud_native_postgresql/index.mdx +++ b/advocacy_docs/kubernetes/cloud_native_postgresql/index.mdx @@ -33,9 +33,11 @@ navigation: - logging - certificates - ssl_connections + - connection_pooling - kubernetes_upgrade - expose_pg_services - cnp-plugin + - troubleshooting - e2e - license_keys - container_images @@ -62,6 +64,7 @@ PostgreSQL database using a service which is solely managed by the operator, without having to worry about changes of the primary role following a failover or a switchover. Applications that reside outside the Kubernetes cluster, need to configure an Ingress object to expose the service via TCP. +Web applications can take advantage of the native connection pooler based on PgBouncer. Cloud Native PostgreSQL works with PostgreSQL and [EDB Postgres Advanced](https://www.enterprisedb.com/products/edb-postgres-advanced-server-secure-ha-oracle-compatible) and is available under the [EnterpriseDB Limited Use License](https://www.enterprisedb.com/limited-use-license). @@ -103,14 +106,21 @@ format for the following platforms: `linux/amd64`, `linux/arm64`, !!! Warning Cloud Native PostgreSQL requires that all nodes in a Kubernetes cluster have the same CPU architecture, thus a hybrid CPU architecture Kubernetes cluster is not - supported. + supported. Additionally, EDB supports `linux/ppc64le` and `linux/s390x` architectures + on OpenShift only. ## Supported Postgres versions The following versions of Postgres are currently supported: -- PostgreSQL 13, 12, 11 and 10 (`linux/amd64`) -- EDB Postgres Advanced 13, 12, 11 and 10 (`linux/amd64`, `linux/ppc64le`, `linux/s390x`) +- PostgreSQL 14 (default), 13, 12, 11, and 10 +- EDB Postgres Advanced 13, 12, 11, and 10 + +All of the above versions, except PostgreSQL/EPAS 10, are available on the +following platforms: `linux/amd64`, `linux/ppc64le`, `linux/s390x`. PostgreSQL +and EPAS 10 is available on `linux/amd64` only. +EDB supports operand images for `linux/ppc64le` and `linux/s390x` architectures +on OpenShift only. ## Main features @@ -126,14 +136,17 @@ The following versions of Postgres are currently supported: - Definition of the *read-only* service, to connect your applications to any of the instances for reading workloads - Support for Local Persistent Volumes with PVC templates - Reuse of Persistent Volumes storage in Pods -- Rolling updates for PostgreSQL minor versions and operator upgrades +- Rolling updates for PostgreSQL minor versions +- In-place or rolling updates for operator upgrades - TLS connections and client certificate authentication - Support for custom TLS certificates (including integration with cert-manager) - Continuous backup to an S3 compatible object store +- Backup retention policies (based on recovery window) - Full recovery and Point-In-Time recovery from an S3 compatible object store backup - Replica clusters for PostgreSQL deployments across multiple Kubernetes clusters, enabling private, public, hybrid, and multi-cloud architectures - Support for Synchronous Replicas +- Connection pooling with PgBouncer - Support for node affinity via `nodeSelector` - Native customizable exporter of user defined metrics for Prometheus through the `metrics` port (9187) - Standard output logging of PostgreSQL error messages in JSON format diff --git a/advocacy_docs/kubernetes/cloud_native_postgresql/installation_upgrade.mdx b/advocacy_docs/kubernetes/cloud_native_postgresql/installation_upgrade.mdx index 924b89b7915..dc52d5d8a50 100644 --- a/advocacy_docs/kubernetes/cloud_native_postgresql/installation_upgrade.mdx +++ b/advocacy_docs/kubernetes/cloud_native_postgresql/installation_upgrade.mdx @@ -11,12 +11,12 @@ product: 'Cloud Native Operator' The operator can be installed like any other resource in Kubernetes, through a YAML manifest applied via `kubectl`. -You can install the [latest operator manifest](https://get.enterprisedb.io/cnp/postgresql-operator-1.9.2.yaml) +You can install the [latest operator manifest](https://get.enterprisedb.io/cnp/postgresql-operator-1.10.0.yaml) as follows: ```sh kubectl apply -f \ - https://get.enterprisedb.io/cnp/postgresql-operator-1.9.2.yaml + https://get.enterprisedb.io/cnp/postgresql-operator-1.10.0.yaml ``` Once you have run the `kubectl` command, Cloud Native PostgreSQL will be installed in your Kubernetes cluster. @@ -118,7 +118,7 @@ selected installation method. !!! Important Please carefully read the [release notes](release_notes.md) before performing an upgrade as some versions might require - extraordinary measures. + extra steps. Upgrading Cloud Native PostgreSQL operator is a two-step process: @@ -131,10 +131,12 @@ installations, or using the native package manager of the used distribution (please follow the instructions in the above sections). The second step is automatically executed after having updated the controller, -triggering a rolling update of every deployed PostgreSQL instance to use the -new instance manager. If the `primaryUpdateStrategy` is set to `supervised`, -users need to complete the rolling update by manually promoting a new instance -through the `cnp` plugin for `kubectl`. +by default triggering a rolling update of every deployed PostgreSQL instance to +use the new instance manager. The rolling update procedure culminates with a +switchover, which is controlled by the `primaryUpdateStrategy` option, by +default set to `unsupervised`. When set to `supervised`, users need to complete +the rolling update by manually promoting a new instance through the `cnp` +plugin for `kubectl`. !!! Seealso "Rolling updates" This process is discussed in-depth on the [Rolling Updates](rolling_update.md) page. @@ -144,15 +146,66 @@ through the `cnp` plugin for `kubectl`. an upgrade of the operator will trigger a switchover on your PostgreSQL cluster, causing a (normally negligible) downtime. +Since version 1.10.0, the rolling update behavior can be replaced with in-place +updates of the instance manager. The latter don't require a restart of the +PostgreSQL instance and, as a result, a switchover in the cluster. +This behavior, which is disabled by default, is described below. + +### In-place updates of the instance manager + +By default, Cloud Native PostgreSQL issues a rolling update of the cluster +every time the operator is updated. The new instance manager shipped with the +operator is added to each PostgreSQL pod via an init container. + +However, this behavior can be changed via configuration to enable in-place +updates of the instance manager, which is the PID 1 process that keeps the +container alive. + +Internally, any instance manager from version 1.10 of Cloud Native PostgreSQL +supports injection of a new executable that will replace the existing one, +once the integrity verification phase is completed, as well as graceful +termination of all the internal processes. When the new instance manager +restarts using the new binary, it adopts the already running *postmaster*. + +As a result, the PostgreSQL process is unaffected by the update, refraining +from the need to perform a switchover. The other side of the coin, is that +the Pod is changed after the start, breaking the pure concept of immutability. + +You can enable this feature by setting the `ENABLE_INSTANCE_MANAGER_INPLACE_UPDATES` +environment variable to `'true'` in the +[operator configuration](operator_conf.md#available-options). + +The in-place upgrade process will not change the init container image inside the +Pods. Therefore, the Pod definition will not reflect the current version of the +operator. + +!!! Important + This feature requires that all pods (operators and operands) run on the + same platform/architecture (for example, all `linux/amd64`). + ### Compatibility among versions -We strive to maintain compatibility between different operator versions, but in -some cases, this might not be possible. -Every version of the operator is compatible with the previous one, unless -[release notes](release_notes.md) state the opposite. -The release notes page indeed contains a detailed list of the changes introduced -in every released version of the Cloud Native PostgreSQL Operator, and it must -be read before upgrading to a newer version of the software. +Cloud Native PostgreSQL follows semantic versioning. Every release of the +operator within the same API version is compatible with the previous one. +The current API version is v1, corresponding to versions 1.x.y of the operator. + +In addition to new features, new versions of the operator contain bug fixes and +stability enhancements. Because of this, **we strongly encourage users to upgrade +to the latest version of the operator**, as each version is released in order to +maintain the most secure and stable Postgres environment. + +Cloud Native PostgreSQL currently releases new versions of the operator at +least monthly. If you are unable to apply updates as each version becomes +available, we recommend upgrading through each version in sequential order to +come current periodically and not skipping versions. + +!!! Important + In 2022, EDB plans an LTS release for Cloud Native PostgreSQL in + environments where frequent online updates are not possible. + +The [release notes](release_notes.md) page contains a detailed list of the +changes introduced in every released version of Cloud Native PostgreSQL, +and it must be read before upgrading to a newer version of the software. Most versions are directly upgradable and in that case, applying the newer manifest for plain Kubernetes installations or using the native package @@ -160,32 +213,4 @@ manager of the chosen distribution is enough. When versions are not directly upgradable, the old version needs to be removed before installing the new one. This won't affect user data but -only the operator itself. Please consult the release notes for -detailed information on how to upgrade to any released version. - -#### Upgrading to version 1.4.0 - -If you have installed the operator on Kubernetes using the distributed YAML manifest -you must delete the operator controller deployment before installing the -1.4.0 manifest with the following command: - -```bash -kubectl delete deployments \ - -n postgresql-operator-system \ - postgresql-operator-controller-manager -``` - -!!! Important - Removing the operator controller deployment will not delete or remove any - of your deployed PostgreSQL clusters. - -!!! Warning - Remember to install the new version of the operator after having performed - the above command. Otherwise, your PostgreSQL clusters will keep running - without an operator and, as such, without any self-healing and high-availability - capabilities. - -!!! Note - In case you deployed the operator in a different namespace than the default - (`postgresql-operator-system`), you need to use the correct namespace for - the `-n` option in the above command. \ No newline at end of file +only the operator itself. \ No newline at end of file diff --git a/advocacy_docs/kubernetes/cloud_native_postgresql/logging.mdx b/advocacy_docs/kubernetes/cloud_native_postgresql/logging.mdx index 75f7a12fa6b..2cbfee740a9 100644 --- a/advocacy_docs/kubernetes/cloud_native_postgresql/logging.mdx +++ b/advocacy_docs/kubernetes/cloud_native_postgresql/logging.mdx @@ -15,11 +15,12 @@ Each log entry has the following fields: - `msg`: the actual message or the keyword `record` in case the message is parsed in JSON format - `record`: the actual record (with structure that varies depending on the `logger` type) +- `logging_pod`: the name of the pod where the log was created ## Operator log A log level can be specified in the cluster spec with the option `logLevel` and -can be set to any of `error`, `info`(default), `debug` or `trace`. +can be set to any of `error`, `warning`, `info`(default), `debug` or `trace`. At the moment, the log level can only be set when an instance starts and can not be changed at runtime. If the value is changed in the cluster spec after the cluster @@ -61,7 +62,8 @@ to `postgres` and the structure described in the following example: "location": "", "application_name": "", "backend_type": "startup" - } + }, + "logging_pod": "cluster-example-1", } ``` @@ -156,7 +158,8 @@ See the example below: "statement": "SELECT pg_current_wal_lsn()", "parameter": "" } - } + }, + "logging_pod": "cluster-example-1", } ``` @@ -245,7 +248,8 @@ See the example below: "command_tag": "GRANT", "audit_tag": "", "type": "grant" - } + }, + "logging_pod": "cluster-example-1", } ``` diff --git a/advocacy_docs/kubernetes/cloud_native_postgresql/monitoring.mdx b/advocacy_docs/kubernetes/cloud_native_postgresql/monitoring.mdx index 7eddfda6c58..a60e701c790 100644 --- a/advocacy_docs/kubernetes/cloud_native_postgresql/monitoring.mdx +++ b/advocacy_docs/kubernetes/cloud_native_postgresql/monitoring.mdx @@ -125,7 +125,15 @@ cnp_collector_sync_replicas{value="observed"} 0 # HELP cnp_collector_up 1 if PostgreSQL is up, 0 otherwise. # TYPE cnp_collector_up gauge -cnp_collector_up 1 +cnp_collector_up{cluster="cluster-example"} 1 + +# HELP cnp_collector_postgres_version Postgres version +# TYPE cnp_collector_postgres_version gauge +cnp_collector_postgres_version{cluster="cluster-example",full="13.4.0"} 13.4 + +# HELP cnp_collector_first_recoverability_point The first point of recoverability for the cluster as a unix timestamp +# TYPE cnp_collector_first_recoverability_point gauge +cnp_collector_first_recoverability_point 1.63238406e+09 # HELP cnp_collector_lo_pages Estimated number of pages in the pg_largeobject table # TYPE cnp_collector_lo_pages gauge @@ -251,6 +259,12 @@ go_memstats_sys_bytes 7.6891144e+07 go_threads 18 ``` +!!! Note + `cnp_collector_postgres_version` is a GaugeVec metric containing the + `Major.Minor` version of Postgres (either PostgreSQL or EPAS). The full + semantic version `Major.Minor.Patch` can be found inside one of its label + field named `full`. + ### User defined metrics This feature is currently in *beta* state and the format is inspired by the @@ -287,6 +301,11 @@ Take care that the referred resources have to be created **in the same namespace add a label with key `k8s.enterprisedb.io/reload` to it, otherwise you will have to reload the instances using the `kubectl cnp reload` subcommand. +!!! Important + When a user defined metric overwrites an already existing metric the instance manager prints a json warning log, + containing the message:`Query with the same name already found. Overwriting the existing one.` + and a key `queryName` containing the overwritten query name. + #### Example of a user defined metric Here you can see an example of a `ConfigMap` containing a single custom query, @@ -308,7 +327,10 @@ data: ELSE GREATEST (0, EXTRACT(EPOCH FROM (now() - pg_last_xact_replay_timestamp()))) END AS lag, - pg_is_in_recovery() AS in_recovery" + pg_is_in_recovery() AS in_recovery, + EXISTS (TABLE pg_stat_wal_receiver) AS is_wal_receiver_up, + (SELECT count(*) FROM pg_stat_replication) AS streaming_replicas" + metrics: - lag: usage: "GAUGE" @@ -316,6 +338,12 @@ data: - in_recovery: usage: "GAUGE" description: "Whether the instance is in recovery" + - is_wal_receiver_up: + usage: "GAUGE" + description: "Whether the instance wal_receiver is up" + - streaming_replicas: + usage: "GAUGE" + description: "Number of streaming replicas connected to the instance" ``` A list of basic monitoring queries can be found in the [`cnp-basic-monitoring.yaml` file](../samples/cnp-basic-monitoring.yaml). @@ -421,7 +449,7 @@ Here is a short description of all the available fields: - `primary`: whether to run the query only on the primary instance - `master`: same as `primary` (for compatibility with the Prometheus PostgreSQL exporter's syntax - deprecated) - `runonserver`: a semantic version range to limit the versions of PostgreSQL the query should run on - (e.g. `">=10.0.0"` or `">=12.0.0 <=14.0.0"`) + (e.g. `">=10.0.0"` or `">=12.0.0 <=14.1.0"`) - `target_databases`: a list of databases to run the `query` against, or a [shell-like pattern](#example-of-a-user-defined-metric-running-on-multiple-databases) to enable auto discovery. Overwrites the default database if provided. @@ -468,7 +496,12 @@ cnp_pg_replication_in_recovery 0 # HELP cnp_pg_replication_lag Replication lag behind primary in seconds # TYPE cnp_pg_replication_lag gauge cnp_pg_replication_lag 0 - +# HELP cnp_pg_replication_streaming_replicas Number of streaming replicas connected to the instance +# TYPE cnp_pg_replication_streaming_replicas gauge +cnp_pg_replication_streaming_replicas 2 +# HELP cnp_pg_replication_is_wal_receiver_up Whether the instance wal_receiver is up +# TYPE cnp_pg_replication_is_wal_receiver_up gauge +cnp_pg_replication_is_wal_receiver_up 0 ``` ### Differences with the Prometheus Postgres exporter diff --git a/advocacy_docs/kubernetes/cloud_native_postgresql/operator_capability_levels.mdx b/advocacy_docs/kubernetes/cloud_native_postgresql/operator_capability_levels.mdx index 4b4bfefe003..ef6b0e0b101 100644 --- a/advocacy_docs/kubernetes/cloud_native_postgresql/operator_capability_levels.mdx +++ b/advocacy_docs/kubernetes/cloud_native_postgresql/operator_capability_levels.mdx @@ -246,6 +246,10 @@ You can upgrade the operator seamlessly as a new deployment. A change in the operator does not require a change in the operand - thanks to the instance manager's injection. The operator can manage older versions of the operand. +Cloud Native PostgreSQL also supports [in-place updates of the instance manager](installation_upgrade.md#in-place-updates-of-the-instance-manager) +following an upgrade of the operator: in-place updates do not require a rolling +update - and subsequent switchover - of the cluster. + ### Upgrade of the managed workload The operand can be upgraded using a declarative configuration approach as @@ -419,6 +423,17 @@ The operator allows administrators to control and manage resource usage by the cluster's pods, through the `resources` section of the manifest. In particular `requests` and `limits` values can be set for both CPU and RAM. +### Connection pooling with PgBouncer + +Cloud Native PostgreSQL provides native support for connection pooling with +[PgBouncer](connection_pooling.md), one of the most popular open source +connection poolers for PostgreSQL. From an architectural point of view, the +native implementation of a PgBouncer connection pooler introduces a new layer +to access the database which optimizes the query flow towards the instances +and makes the usage of the underlying PostgreSQL resources more efficient. +Instead of connecting directly to a PostgreSQL service, applications can now +connect to the PgBouncer service and start reusing any existing connection. + ## Level 4 - Deep Insights Capability level 4 is about **observability**: in particular, monitoring, diff --git a/advocacy_docs/kubernetes/cloud_native_postgresql/operator_conf.mdx b/advocacy_docs/kubernetes/cloud_native_postgresql/operator_conf.mdx index 197a7842e52..ff0404ae9ba 100644 --- a/advocacy_docs/kubernetes/cloud_native_postgresql/operator_conf.mdx +++ b/advocacy_docs/kubernetes/cloud_native_postgresql/operator_conf.mdx @@ -39,14 +39,13 @@ is located in the same namespace of the operator deployment and with The operator looks for the following environment variables to be defined in the `ConfigMap`/`Secret`: -| Name | Description | -| ----------------------- | ------------------------------------------------------------------------------------------------------------------------------------- | -| `EDB_LICENSE_KEY` | default license key (to be used only if the cluster does not define one, and preferably in the `Secret`) | -| `INHERITED_ANNOTATIONS` | list of annotation names that, when defined in a `Cluster` metadata, will be inherited by all the generated resources, including pods | -| `INHERITED_LABELS` | list of label names that, when defined in a `Cluster` metadata, will be inherited by all the generated resources, including pods | -| `PULL_SECRET_NAME` | name of an additional pull secret to be defined in the operator's namespace and to be used to download images | - -By default, the above variables are not set. +| Name | Description | +| ----------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `EDB_LICENSE_KEY` | default license key (to be used only if the cluster does not define one, and preferably in the `Secret`) | +| `INHERITED_ANNOTATIONS` | list of annotation names that, when defined in a `Cluster` metadata, will be inherited by all the generated resources, including pods | +| `INHERITED_LABELS` | list of label names that, when defined in a `Cluster` metadata, will be inherited by all the generated resources, including pods | +| `PULL_SECRET_NAME` | name of an additional pull secret to be defined in the operator's namespace and to be used to download images | +| `ENABLE_INSTANCE_MANAGER_INPLACE_UPDATES` | when set to `true`, enables in-place updates of the instance manager after an update of the operator, avoiding rolling updates of the cluster (default `false`) | Values in `INHERITED_ANNOTATIONS` and `INHERITED_LABELS` support path-like wildcards. For example, the value `example.com/*` will match both the value `example.com/one` and `example.com/two`. @@ -54,9 +53,10 @@ both the value `example.com/one` and `example.com/two`. ## Defining an operator config map The example below customizes the behavior of the operator, by defining a -default license key (namely a company key) and the label/annotation names to be +default license key (namely a company key), the label/annotation names to be inherited by the resources created by any `Cluster` object that is deployed -at a later time. +at a later time, and by enabling +[in-place updates for the instance manager](installation_upgrade.md#in-place-updates-of-the-instance-manager). ```yaml apiVersion: v1 @@ -67,6 +67,7 @@ metadata: data: INHERITED_ANNOTATIONS: categories INHERITED_LABELS: environment, workload, app + ENABLE_INSTANCE_MANAGER_INPLACE_UPDATES: 'true' ``` ## Defining an operator secret diff --git a/advocacy_docs/kubernetes/cloud_native_postgresql/postgresql_conf.mdx b/advocacy_docs/kubernetes/cloud_native_postgresql/postgresql_conf.mdx index b3bf0d68e67..d4e98bf3b01 100644 --- a/advocacy_docs/kubernetes/cloud_native_postgresql/postgresql_conf.mdx +++ b/advocacy_docs/kubernetes/cloud_native_postgresql/postgresql_conf.mdx @@ -42,8 +42,22 @@ listen_addresses = '*' include custom.conf ``` -The `custom.conf` file will contain the user-defined settings. Refer to the -PostgreSQL documentation for [more information on the available parameters](https://www.postgresql.org/docs/current/runtime-config.html). +The `custom.conf` file will contain the user-defined settings in the +`postgresql` section, as in the following example: + +```yaml + # ... + postgresql: + parameters: + shared_buffers: "1GB" + # ... +``` + +!!! Seealso "PostgreSQL GUCs: Grand Unified Configuration" + Refer to the PostgreSQL documentation for + [more information on the available parameters](https://www.postgresql.org/docs/current/runtime-config.html), + also known as GUC (Grand Unified Configuration). + The content of `custom.conf` is automatically generated and maintained by the operator by applying the following sections in this order: @@ -263,9 +277,17 @@ hostssl replication streaming_replica all cert Default rules: ```text -host all all all md5 +host all all all ``` +From PostgreSQL 14 the default value of the `password_encryption` +database parameter is set to `scram-sha-256`. Because of that, +the default authentication method is `scram-sha-256` from this +PostgreSQL version. + +PostgreSQL 13 and older will use `md5` as the default authentication +method. + The resulting `pg_hba.conf` will look like this: ```text @@ -276,7 +298,7 @@ hostssl replication streaming_replica all cert -host all all all md5 +host all all all scram-sha-256 # (or md5 for PostgreSQL version <= 13) ``` Refer to the PostgreSQL documentation for [more information on `pg_hba.conf`](https://www.postgresql.org/docs/current/auth-pg-hba-conf.html). diff --git a/advocacy_docs/kubernetes/cloud_native_postgresql/release_notes.mdx b/advocacy_docs/kubernetes/cloud_native_postgresql/release_notes.mdx index 8945504486f..935ccb077c9 100644 --- a/advocacy_docs/kubernetes/cloud_native_postgresql/release_notes.mdx +++ b/advocacy_docs/kubernetes/cloud_native_postgresql/release_notes.mdx @@ -6,6 +6,69 @@ product: 'Cloud Native Operator' History of user-visible changes for Cloud Native PostgreSQL. +## Version 1.10.0 + +**Release date:** 11 November 2021 + +Features: + +- **Connection Pooling with PgBouncer**: introduce the `Pooler` resource and + controller to automatically manage a PgBouncer deployment to be used as a + connection pooler for a local PostgreSQL `Cluster`. The feature includes TLS + client/server connections, password authentication, High Availability, pod + templates support, configuration of key PgBouncer parameters, `PAUSE`/`RESUME`, + logging in JSON format, Prometheus exporter for stats, pools, and lists +- **Backup Retention Policies**: support definition of recovery window retention + policies for backups (e.g. ‘30d’ to ensure a recovery window of 30 days) +- **In-Place updates of the operator**: introduce an in-place online update of the + instance manager, which removes the need to perform a rolling update of the + entire cluster following an update of the operator. By default this option is + disabled (please refer to the + [documentation for more detailed information](installation_upgrade.md#in-place-updates-of-the-instance-manager)) +- Limit the list of options that can be customized in the `initdb` bootstrap + method to `dataChecksums`, `encoding`, `localeCollate`, `localeCType`, + `walSegmentSize`. This makes the `options` array obsolete and planned to be + removed in the v2 API +- Introduce the `postInitTemplateSQL` option as part of the `initdb` bootstrap + method to specify a list of SQL queries to be executed on the `template1` + database as a superuser immediately after the cluster has been created. This + feature allows you to include default objects in all application databases + created in the cluster +- New default metrics added to the instance Prometheus exporter: Postgres + version, cluster name, and first point of recoverability according to the + backup catalog +- Retry taking a backup after a failure +- Build awareness about Barman Cloud capabilities in order to prevent the + operator from invoking recently introduced features (such as retention + policies, or Azure Blob Container storage) that are not present in operand + images that are not frequently updated +- Integrate the output of the `status` command of the `cnp` plugin with information + about the backup +- Introduce a new annotation that reports the status of a PVC (being + initialized or ready) +- Set the cluster name in the `k8s.enterprisedb.io/cluster` label for every + object generated in a `Cluster`, including `Backup` objects +- Drop support for deprecated API version + `postgresql.k8s.enterprisedb.io/v1alpha1` on the `Cluster`, `Backup`, and + `ScheduledBackup` kinds +- Set default operand image to PostgreSQL 14.1 + +Security: + +- Set allowPrivilegeEscalation to `false` for the operator containers + securityContext + +Fixes: + +- Disable primary PodDisruptionBudget during maintenance in single-instance + clusters +- Use the correct certificate certification authority (CA) during recovery + operations +- Prevent Postgres connection leaking when checking WAL archiving status before + taking a backup +- Let WAL archive/restore sleep for 100ms following transient errors that would + flood logs otherwise + ## Version 1.9.2 **Release date:** 15 October 2021 diff --git a/advocacy_docs/kubernetes/cloud_native_postgresql/rolling_update.mdx b/advocacy_docs/kubernetes/cloud_native_postgresql/rolling_update.mdx index 5a5ec050b5b..bc3763f7f6a 100644 --- a/advocacy_docs/kubernetes/cloud_native_postgresql/rolling_update.mdx +++ b/advocacy_docs/kubernetes/cloud_native_postgresql/rolling_update.mdx @@ -14,11 +14,11 @@ Rolling upgrades are started when: - the user changes the `imageName` attribute of the cluster specification; -- after the operator is updated, to ensure the Pods run the latest instance - manager; +- a change in the PostgreSQL configuration requires a restart to be + applied; -- when a change in the PostgreSQL configuration requires a restart to be - applied. +- after the operator is updated, to ensure the Pods run the latest instance + manager (unless [in-place updates are enabled](installation_upgrade.md#in-place-updates-of-the-instance-manager)). The operator starts upgrading all the replicas, one Pod at a time, starting from the one with the highest serial. diff --git a/advocacy_docs/kubernetes/cloud_native_postgresql/samples/cluster-backup-retention-30d.yaml b/advocacy_docs/kubernetes/cloud_native_postgresql/samples/cluster-backup-retention-30d.yaml new file mode 100644 index 00000000000..ff87944bf3b --- /dev/null +++ b/advocacy_docs/kubernetes/cloud_native_postgresql/samples/cluster-backup-retention-30d.yaml @@ -0,0 +1,32 @@ +--- +apiVersion: postgresql.k8s.enterprisedb.io/v1 +kind: Cluster +metadata: + name: pg-backup-retention-30d +spec: + instances: 3 + + # Example of rolling update strategy: + # - unsupervised: automated update of the primary once all + # replicas have been upgraded (default) + # - supervised: requires manual supervision to perform + # the switchover of the primary + primaryUpdateStrategy: unsupervised + + # Persistent storage configuration + storage: + storageClass: standard + size: 1Gi + + # Backup properties + backup: + barmanObjectStore: + destinationPath: s3://BUCKET_NAME/path/to/folder + s3Credentials: + accessKeyId: + name: aws-creds + key: ACCESS_KEY_ID + secretAccessKey: + name: aws-creds + key: ACCESS_SECRET_KEY + retentionPolicy: "30d" diff --git a/advocacy_docs/kubernetes/cloud_native_postgresql/samples/cluster-example-full.yaml b/advocacy_docs/kubernetes/cloud_native_postgresql/samples/cluster-example-full.yaml index ab3870d2930..0c7da2e1440 100644 --- a/advocacy_docs/kubernetes/cloud_native_postgresql/samples/cluster-example-full.yaml +++ b/advocacy_docs/kubernetes/cloud_native_postgresql/samples/cluster-example-full.yaml @@ -35,7 +35,7 @@ metadata: name: cluster-example-full spec: description: "Example of cluster" - imageName: quay.io/enterprisedb/postgresql:14.0 + imageName: quay.io/enterprisedb/postgresql:14.1 # imagePullSecret is only required if the images are located in a private registry # imagePullSecrets: # - name: private_registry_access @@ -91,6 +91,7 @@ spec: encryption: AES256 immediateCheckpoint: false jobs: 2 + retentionPolicy: "30d" resources: requests: diff --git a/advocacy_docs/kubernetes/cloud_native_postgresql/samples/cluster-example-initdb.yaml b/advocacy_docs/kubernetes/cloud_native_postgresql/samples/cluster-example-initdb.yaml index 22a6e196b5b..a18cdbdcdd6 100644 --- a/advocacy_docs/kubernetes/cloud_native_postgresql/samples/cluster-example-initdb.yaml +++ b/advocacy_docs/kubernetes/cloud_native_postgresql/samples/cluster-example-initdb.yaml @@ -12,8 +12,11 @@ spec: postInitSQL: - create table numbers (i integer) - insert into numbers (select generate_series(1,10000)) - options: - - "-k" - - "--locale=en_ZA.utf8" + postInitTemplateSQL: + - create extension intarray + dataChecksums: true + encoding: 'UTF8' + localeCollate: 'en_AU.UTF-8' + localeCType: 'en_AU.UTF-8' storage: size: 1Gi diff --git a/advocacy_docs/kubernetes/cloud_native_postgresql/samples/cluster-example-monitoring.yaml b/advocacy_docs/kubernetes/cloud_native_postgresql/samples/cluster-example-monitoring.yaml index 0844cfcfbcb..a08ce6ef9df 100644 --- a/advocacy_docs/kubernetes/cloud_native_postgresql/samples/cluster-example-monitoring.yaml +++ b/advocacy_docs/kubernetes/cloud_native_postgresql/samples/cluster-example-monitoring.yaml @@ -31,7 +31,10 @@ data: ELSE GREATEST (0, EXTRACT(EPOCH FROM (now() - pg_last_xact_replay_timestamp()))) END AS lag, - pg_is_in_recovery() AS in_recovery" + pg_is_in_recovery() AS in_recovery, + EXISTS (TABLE pg_stat_wal_receiver) AS is_wal_receiver_up, + (SELECT count(*) FROM pg_stat_replication) AS streaming_replicas" + metrics: - lag: usage: "GAUGE" @@ -39,7 +42,12 @@ data: - in_recovery: usage: "GAUGE" description: "Whether the instance is in recovery" - + - is_wal_receiver_up: + usage: "GAUGE" + description: "Whether the instance wal_receiver is up" + - streaming_replicas: + usage: "GAUGE" + description: "Number of streaming replicas connected to the instance" pg_postmaster: query: "SELECT pg_postmaster_start_time as start_time_seconds from pg_postmaster_start_time()" diff --git a/advocacy_docs/kubernetes/cloud_native_postgresql/samples/cnp-basic-monitoring.yaml b/advocacy_docs/kubernetes/cloud_native_postgresql/samples/cnp-basic-monitoring.yaml index 7ac25848a4f..57363a178df 100644 --- a/advocacy_docs/kubernetes/cloud_native_postgresql/samples/cnp-basic-monitoring.yaml +++ b/advocacy_docs/kubernetes/cloud_native_postgresql/samples/cnp-basic-monitoring.yaml @@ -111,7 +111,9 @@ data: ELSE GREATEST (0, EXTRACT(EPOCH FROM (now() - pg_catalog.pg_last_xact_replay_timestamp()))) END AS lag, - pg_catalog.pg_is_in_recovery() AS in_recovery" + pg_catalog.pg_is_in_recovery() AS in_recovery, + EXISTS (TABLE pg_stat_wal_receiver) AS is_wal_receiver_up, + (SELECT count(*) FROM pg_stat_replication) AS streaming_replicas" metrics: - lag: usage: "GAUGE" @@ -119,6 +121,12 @@ data: - in_recovery: usage: "GAUGE" description: "Whether the instance is in recovery" + - is_wal_receiver_up: + usage: "GAUGE" + description: "Whether the instance wal_receiver is up" + - streaming_replicas: + usage: "GAUGE" + description: "Number of streaming replicas connected to the instance" pg_replication_slots: query: "SELECT slot_name, database, active, pg_catalog.pg_wal_lsn_diff(pg_catalog.pg_current_wal_lsn(), restart_lsn) FROM pg_catalog.pg_replication_slots" diff --git a/advocacy_docs/kubernetes/cloud_native_postgresql/samples/pooler-basic-auth.yaml b/advocacy_docs/kubernetes/cloud_native_postgresql/samples/pooler-basic-auth.yaml new file mode 100644 index 00000000000..435da93d8b4 --- /dev/null +++ b/advocacy_docs/kubernetes/cloud_native_postgresql/samples/pooler-basic-auth.yaml @@ -0,0 +1,15 @@ +apiVersion: postgresql.k8s.enterprisedb.io/v1 +kind: Pooler +metadata: + name: pooler-example-rw +spec: + cluster: + name: cluster-example + + instances: 1 + type: rw + pgbouncer: + poolMode: session + authQuerySecret: + name: cluster-example-superuser + authQuery: SELECT usename, passwd FROM pg_shadow WHERE usename=$1 diff --git a/advocacy_docs/kubernetes/cloud_native_postgresql/samples/pooler-tls.yaml b/advocacy_docs/kubernetes/cloud_native_postgresql/samples/pooler-tls.yaml new file mode 100644 index 00000000000..b878c00e977 --- /dev/null +++ b/advocacy_docs/kubernetes/cloud_native_postgresql/samples/pooler-tls.yaml @@ -0,0 +1,12 @@ +apiVersion: postgresql.k8s.enterprisedb.io/v1 +kind: Pooler +metadata: + name: pooler-example-rw +spec: + cluster: + name: cluster-example + + instances: 1 + type: rw + pgbouncer: + poolMode: session diff --git a/advocacy_docs/kubernetes/cloud_native_postgresql/scheduling.mdx b/advocacy_docs/kubernetes/cloud_native_postgresql/scheduling.mdx index 4d33d1a01b3..7d658b920a7 100644 --- a/advocacy_docs/kubernetes/cloud_native_postgresql/scheduling.mdx +++ b/advocacy_docs/kubernetes/cloud_native_postgresql/scheduling.mdx @@ -62,7 +62,7 @@ metadata: name: cluster-example spec: instances: 3 - imageName: quay.io/enterprisedb/postgresql:14.0 + imageName: quay.io/enterprisedb/postgresql:14.1 affinity: enablePodAntiAffinity: true #default value diff --git a/advocacy_docs/kubernetes/cloud_native_postgresql/security.mdx b/advocacy_docs/kubernetes/cloud_native_postgresql/security.mdx index f0fac197f55..3b7a604ef77 100644 --- a/advocacy_docs/kubernetes/cloud_native_postgresql/security.mdx +++ b/advocacy_docs/kubernetes/cloud_native_postgresql/security.mdx @@ -140,6 +140,15 @@ levels, as listed in the table below: The current implementation of Cloud Native PostgreSQL automatically creates passwords and `.pgpass` files for the `postgres` superuser and the database owner. +As far as encryption of password is concerned, Cloud Native PostgreSQL follows +the default behavior of PostgreSQL: starting from PostgreSQL 14, +`password_encryption` is by default set to `scram-sha-256`, while on earlier +versions it is set to `md5`. + +!!! Important + Please refer to the ["Password authentication"](https://www.postgresql.org/docs/current/auth-password.html) + section in the PostgreSQL documentation for details. + You can disable management of the `postgres` user password via secrets by setting `enableSuperuserAccess` to `false`. @@ -157,7 +166,7 @@ By default, every replica is automatically configured to connect in **physical async streaming replication** with the current primary instance, with a special user called `streaming_replica`. The connection between nodes is **encrypted** and authentication is via **TLS client certificates** (please refer to the -["Client TLS/SSL Connections"]\(ssl_connections.md#Client TLS/SSL Connections) page +["Client TLS/SSL Connections"]\(ssl_connections.md#"Client TLS/SSL Connections") page for details). Currently, the operator allows administrators to add `pg_hba.conf` lines directly in the manifest diff --git a/advocacy_docs/kubernetes/cloud_native_postgresql/ssl_connections.mdx b/advocacy_docs/kubernetes/cloud_native_postgresql/ssl_connections.mdx index b8b2c25f4fe..582752b97c4 100644 --- a/advocacy_docs/kubernetes/cloud_native_postgresql/ssl_connections.mdx +++ b/advocacy_docs/kubernetes/cloud_native_postgresql/ssl_connections.mdx @@ -167,7 +167,7 @@ Output : version -------------------------------------------------------------------------------------- ------------------ -PostgreSQL 14.0 on x86_64-pc-linux-gnu, compiled by gcc (GCC) 8.3.1 20191121 (Red Hat +PostgreSQL 14.1 on x86_64-pc-linux-gnu, compiled by gcc (GCC) 8.3.1 20191121 (Red Hat 8.3.1-5), 64-bit (1 row) ``` \ No newline at end of file diff --git a/advocacy_docs/kubernetes/cloud_native_postgresql/troubleshooting.mdx b/advocacy_docs/kubernetes/cloud_native_postgresql/troubleshooting.mdx new file mode 100644 index 00000000000..670aabfa261 --- /dev/null +++ b/advocacy_docs/kubernetes/cloud_native_postgresql/troubleshooting.mdx @@ -0,0 +1,282 @@ +--- +title: 'Troubleshooting' +originalFilePath: 'src/troubleshooting.md' +product: 'Cloud Native Operator' +--- + +In this page, you can find some basic information on how to troubleshoot Cloud +Native PostgreSQL in your Kubernetes cluster deployment. + +!!! Hint + As a Kubernetes administrator, you should have the + [`kubectl` Cheat Sheet](https://kubernetes.io/docs/reference/kubectl/cheatsheet/) page + bookmarked! + +## Before you start + +### Kubernetes environment + +What can make a difference in a troubleshooting activity is to provide +clear information about the underlying Kubernetes system. + +Make sure you know: + +- the Kubernetes distribution and version you are using +- the specifications of the nodes where PostgreSQL is running +- as much as you can about the actual [storage](storage.md), including storage + class and benchmarks you have done before going into production. +- which relevant Kubernetes applications you are using in your cluster (i.e. + Prometheus, Grafana, Istio, Certmanager, ...) + +### Useful utilities + +On top of the mandatory `kubectl` utility, for troubleshooting, we recommend the +following plugins/utilities to be available in your system: + +- [`cnp` plugin](cnp-plugin.md) for `kubectl` +- [`jq`](https://stedolan.github.io/jq/), a lightweight and flexible command-line JSON processor + +### Logs + +Every resource created and controlled by Cloud Native PostgreSQL logs to +standard output, as expected by Kubernetes, and directly in [JSON +format](logging.md). As a result, you should rely on the `kubectl logs` +command to retrieve logs from a given resource. + +For more information, type: + +```shell +kubectl logs --help +``` + +!!! Hint + JSON logs are great for machine reading, but hard to read for human beings. + Our recommendation is to use the `jq` command to improve usability. For + example, you can *pipe* the `kubectl logs` command with `| jq -C`. + +!!! Note + In the sections below, we will show some examples on how to retrieve logs + about different resources when it comes to troubleshooting Cloud Native + PostgreSQL. + +## Operator information + +By default, the Cloud Native PostgreSQL operator is installed in the +`postgresql-operator-system` namespace in Kubernetes as a `Deployment` +(see the ["Details about the deployment" section](installation_upgrade.md#details-about-the-deployment) +for details). + +You can get a list of the operator pods by running: + +```shell +kubectl get pods -n postgresql-operator-system +``` + +!!! Note + Under normal circumstances, you should have one pod where the operator is + running, identified by a name starting with `postgresql-operator-controller-manager-`. + In case you have set up your operator for high availability, you should have more entries. + Those pods are managed by a deployment named `postgresql-operator-controller-manager`. + +Collect the relevant information about the operator that is running in pod +`` with: + +```shell +kubectl describe pod -n postgresql-operator-system +``` + +Then get the logs from the same pod by running: + +```shell +kubectl get logs -n postgresql-operator-system +``` + +## Cluster information + +You can check the status of the `` cluster in the `NAMESPACE` +namespace with: + +```shell +kubectl get cluster -n +``` + +Output: + +```console +NAME AGE INSTANCES READY STATUS PRIMARY + 10d4h3m 3 3 Cluster in healthy state -1 +``` + +The above example reports a healthy PostgreSQL cluster of 3 instances, all in +*ready* state, and with `-1` being the primary. + +In case of unhealthy conditions, you can discover more by getting the manifest +of the `Cluster` resource: + +```shell +kubectl get cluster -o yaml -n +``` + +Another important command to gather is the `status` one, as provided by the +`cnp` plugin: + +```shell +kubectl cnp status -n +``` + +!!! Tip + You can print more information by adding the `--verbose` option. + +## Pod information + +You can retrieve the list of instances that belong to a given PostgreSQL +cluster with: + +```shell +# using labels available from CNP 1.10.0 +kubectl get pod -l k8s.enterprisedb.io/cluster= -L role -n +# using legacy labels +kubectl get pod -l postgresql= -L role -n +``` + +Output: + +```console +NAME READY STATUS RESTARTS AGE ROLE +-1 1/1 Running 0 10d4h5m primary +-2 1/1 Running 0 10d4h4m replica +-3 1/1 Running 0 10d4h4m replica +``` + +You can check if/how a pod is failing by running: + +```shell +kubectl get pod -n -o yaml - +``` + +You can get all the logs for a given PostgreSQL instance with: + +```shell +kubectl logs -n - +``` + +If you want to limit the search to the PostgreSQL process only, you can run: + +```shell +kubectl logs -n - \ + | jq 'select(.logger=="postgres") | .record.message' +``` + +The following example also adds the timestamp in a user-friendly format: + +```shell +kubectl logs -n - \ + | jq -r 'select(.logger=="postgres") | [(.ts|strflocaltime("%Y-%m-%dT%H:%M:%S %Z")), .record.message] | @csv' +``` + +## Backup information + +You can list the backups that have been created for a named cluster with: + +```shell +kubectl get backup -l k8s.enterprisedb.io/cluster= +``` + +!!! Important + Backup labelling has been introduced in version 1.10.0 of Cloud Native + PostgreSQL. So only those resources that have been created with that version or + a higher one will contain such a label. + +## Storage information + +Sometimes it might be useful to gather more information about the underlying +storage class used in the cluster. You can execute the following operation on +any of the pods that are part of the PostgreSQL cluster: + +```shell +STORAGECLASS=$(kubectl get pvc -o jsonpath='{.spec.storageClassName}') +kubectl get storageclasses $STORAGECLASS -o yaml +``` + +## Node information + +Kubernetes nodes is where ultimately PostgreSQL pods will be running. It's +strategically important to know as much as we can about them. + +You can get the list of nodes in your Kubernetes cluster with: + +```shell +# look at the worker nodes and their status +kubectl get nodes -o wide +``` + +Additionally, you can gather the list of nodes where the pods of a given +cluster are running with: + +```shell +kubectl get pod -l k8s.enterprisedb.io/clusterName= \ + -L role -n -o wide +``` + +The latter is important to understand where your pods are distributed - very +useful if you are using [affinity/anti-affinity rules and/or tolerations](scheduling.md). + +## Some common issues + +### Storage is full + +If one or more pods in the cluster are in `CrashloopBackoff` and logs +suggest this could be due to a full disk, you probably have to increase the +size of the instance's `PersistentVolumeClaim`. Please look at the +["Volume expansion" section](storage.md#volume-expansion) in the documentation. + +### Pods are stuck in `Pending` state + +In case a Cluster's instance is stuck in the `Pending` phase, you should check +the pod's `Events` section to get an idea of the reasons behind this: + +```shell +kubectl describe pod -n +``` + +Some of the possible causes for this are: + +- No nodes are matching the `nodeSelector` +- Tolerations are not correctly configured to match the nodes' taints +- No nodes are available at all: this could also be related to + `cluster-autoscaler` hitting some limits, or having some temporary issues + +In this case, it could also be useful to check events in the namespace: + +```shell +kubectl get events -n +# list events in chronological order +kubectl get events -n --sort-by=.metadata.creationTimestamp +``` + +### Replicas out of sync when no backup is configured + +Sometimes replicas might be switched off for a bit of time due to maintenance +reasons (think of when a Kubernetes nodes is drained). In case your cluster +does not have backup configured, when replicas come back up, they might +require a WAL file that is not present anymore on the primary (having been +already recycled according to the WAL management policies as mentioned in +["The `postgresql` section"](postgresql_conf.md#the-postgresql-section)), and +fall out of synchronization. + +Similarly, when `pg_rewind` might require a WAL file that is not present +anymore in the former primary, reporting `pg_rewind: error: could not open file`. + +In these cases, pods cannot become ready anymore and you are required to delete +the PVC and let the operator rebuild the replica. + +If you rely on dynamically provisioned Persistent Volumes, and you are confident +in deleting the PV itself, you can do so with: + +```shell +PODNAME= +VOLNAME=$(kubectl get pv -o json | \ + jq -r '.items[]|select(.spec.claimRef.name=='\"$PODNAME\"')|.metadata.name') + +kubectl delete pod/$PODNAME pvc/$PODNAME pv/$VOLNAME +``` \ No newline at end of file diff --git a/scripts/fileProcessor/package-lock.json b/scripts/fileProcessor/package-lock.json index 900b83755ce..e13ba9f75d5 100644 --- a/scripts/fileProcessor/package-lock.json +++ b/scripts/fileProcessor/package-lock.json @@ -2353,7 +2353,7 @@ }, "remark-admonitions": { "version": "git+ssh://git@github.com/josh-heyer/remark-admonitions.git#f1d595d63815b891dc3a348afd56894942c74ae3", - "from": "remark-admonitions@https://github.com/josh-heyer/remark-admonitions", + "from": "remark-admonitions@github:josh-heyer/remark-admonitions", "requires": { "rehype-parse": "^6.0.2 || ^7.0.1", "unified": "^8.4.2 || ^9.2.0",