From b46f6343df768eeed8b61db052245b3deeb01a8e Mon Sep 17 00:00:00 2001 From: Chris Bandy Date: Mon, 4 Nov 2024 10:19:28 -0600 Subject: [PATCH 1/3] Explicitly enable "hot_standby" during restore This parameter has been enabled by default since Postgres 10 and is unlikely to change, but we want the behavior, so we should set it. --- internal/pgbackrest/config.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/internal/pgbackrest/config.go b/internal/pgbackrest/config.go index f50b2690ee..9a889d2cd8 100644 --- a/internal/pgbackrest/config.go +++ b/internal/pgbackrest/config.go @@ -174,7 +174,7 @@ func MakePGBackrestLogDir(template *corev1.PodTemplateSpec, func RestoreCommand(pgdata, hugePagesSetting, fetchKeyCommand string, tablespaceVolumes []*corev1.PersistentVolumeClaim, args ...string) []string { // After pgBackRest restores files, PostgreSQL starts in recovery to finish - // replaying WAL files. "hot_standby" is "on" (by default) so we can detect + // replaying WAL files. "hot_standby" is "on" so we can detect // when recovery has finished. In that mode, some parameters cannot be // smaller than they were when PostgreSQL was backed up. Configure them to // match the values reported by "pg_controldata". Those parameters are also @@ -233,6 +233,7 @@ cat > /tmp/postgres.restore.conf < Date: Fri, 8 Nov 2024 16:17:47 -0600 Subject: [PATCH 2/3] Consider pg_ctl successful when progress is made There is a race when using pg_ctl start --wait: - pg_ctl starts Postgres - Postgres begins recovery, detects a parameter requires restart, and exits - pg_ctl reports that Postgres did not start Now we look at the LSN reported by pg_controldata to determine if Postgres made any progress during a "failed" start. Issue: PGO-1945 --- internal/pgbackrest/config.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/internal/pgbackrest/config.go b/internal/pgbackrest/config.go index 9a889d2cd8..6be5622e65 100644 --- a/internal/pgbackrest/config.go +++ b/internal/pgbackrest/config.go @@ -248,7 +248,10 @@ read -r max_wals <<< "${control##*max_wal_senders setting:}" echo >> /tmp/postgres.restore.conf "max_wal_senders = '${max_wals}'" fi -pg_ctl start --silent --timeout=31536000 --wait --options='--config-file=/tmp/postgres.restore.conf' +read -r stopped <<< "${control##*recovery ending location:}" +pg_ctl start --silent --timeout=31536000 --wait --options='--config-file=/tmp/postgres.restore.conf' || failed=$? +[[ "${started-}" == "${stopped}" && -n "${failed-}" ]] && exit "${failed}" +started="${stopped}" && [[ -n "${failed-}" ]] && failed= && continue fi recovery=$(psql -Atc "SELECT CASE From 589f0e227d41c78929d67d150d2bd87ee8e16d30 Mon Sep 17 00:00:00 2001 From: Chris Bandy Date: Mon, 4 Nov 2024 10:06:43 -0600 Subject: [PATCH 3/3] Tidy the pgBackRest restore command using slices Having these lines broken into string slices allows for Go comments that explain them without presenting those comments in YAML at runtime. This also: - Uses the postgres.ParameterSet type to accumulate Postgres settings. A new String method renders those values safely for use in postgresql.conf. - Disables localization using LC_ALL=C in calls to pg_controldata before we parse its output. - Removes commands to change permissions on tablespace directories; pgBackRest handles this for us now. - Passes command line parameters to Postgres using "-c" rather than "--" long flags. Both work on Linux, but the former works on all systems. - Explains why we need a large timeout for "pg_ctl --wait" and configures it once using the PGCTLTIMEOUT environment variable. --- internal/pgbackrest/config.go | 198 +++++++++++++++------------ internal/postgres/parameters.go | 20 +++ internal/postgres/parameters_test.go | 4 + 3 files changed, 134 insertions(+), 88 deletions(-) diff --git a/internal/pgbackrest/config.go b/internal/pgbackrest/config.go index 6be5622e65..7443eaf440 100644 --- a/internal/pgbackrest/config.go +++ b/internal/pgbackrest/config.go @@ -9,6 +9,7 @@ import ( "fmt" "strconv" "strings" + "time" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -171,100 +172,121 @@ func MakePGBackrestLogDir(template *corev1.PodTemplateSpec, // - Renames the data directory as needed to bootstrap the cluster using the restored database. // This ensures compatibility with the "existing" bootstrap method that is included in the // Patroni config when bootstrapping a cluster using an existing data directory. -func RestoreCommand(pgdata, hugePagesSetting, fetchKeyCommand string, tablespaceVolumes []*corev1.PersistentVolumeClaim, args ...string) []string { - - // After pgBackRest restores files, PostgreSQL starts in recovery to finish - // replaying WAL files. "hot_standby" is "on" so we can detect - // when recovery has finished. In that mode, some parameters cannot be - // smaller than they were when PostgreSQL was backed up. Configure them to - // match the values reported by "pg_controldata". Those parameters are also - // written to WAL files and may change during recovery. When they increase, - // PostgreSQL exits and we reconfigure and restart it. - // For PG14, when some parameters from WAL require a restart, the behavior is - // to pause unless a restart is requested. For this edge case, we run a CASE - // query to check - // (a) if the instance is in recovery; - // (b) if so, if the WAL replay is paused; - // (c) if so, to unpause WAL replay, allowing our expected behavior to resume. - // A note on the PostgreSQL code: we cast `pg_catalog.pg_wal_replay_resume()` as text - // because that method returns a void (which is a non-NULL but empty result). When - // that void is cast as a string, it is an '' - // - https://www.postgresql.org/docs/current/hot-standby.html - // - https://www.postgresql.org/docs/current/app-pgcontroldata.html +func RestoreCommand(pgdata, hugePagesSetting, fetchKeyCommand string, _ []*corev1.PersistentVolumeClaim, args ...string) []string { + ps := postgres.NewParameterSet() + ps.Add("data_directory", pgdata) + ps.Add("huge_pages", hugePagesSetting) - // The postmaster.pid file is removed, if it exists, before attempting a restore. - // This allows the restore to be tried more than once without the causing an - // error due to the presence of the file in subsequent attempts. + // Keep history and WAL files until the cluster starts with its normal + // archiving enabled. + ps.Add("archive_command", "false -- store WAL files locally for now") + ps.Add("archive_mode", "on") - // The 'pg_ctl' timeout is set to a very large value (1 year) to ensure there - // are no timeouts when starting or stopping Postgres. - - tablespaceCmd := "" - for _, tablespaceVolume := range tablespaceVolumes { - tablespaceCmd = tablespaceCmd + fmt.Sprintf( - "\ninstall --directory --mode=0700 '/tablespaces/%s/data'", - tablespaceVolume.Labels[naming.LabelData]) - } + // Enable "hot_standby" so we can connect to Postgres and observe its + // progress during recovery. + ps.Add("hot_standby", "on") - // If the fetch key command is not empty, save the GUC variable and value - // to a new string. - var ekc string if fetchKeyCommand != "" { - ekc = ` -encryption_key_command = '` + fetchKeyCommand + `'` + ps.Add("encryption_key_command", fetchKeyCommand) } - restoreScript := `declare -r pgdata="$1" opts="$2" -install --directory --mode=0700 "${pgdata}"` + tablespaceCmd + ` -rm -f "${pgdata}/postmaster.pid" -bash -xc "pgbackrest restore ${opts}" -rm -f "${pgdata}/patroni.dynamic.json" -export PGDATA="${pgdata}" PGHOST='/tmp' - -until [[ "${recovery=}" == 'f' ]]; do -if [[ -z "${recovery}" ]]; then -control=$(pg_controldata) -read -r max_conn <<< "${control##*max_connections setting:}" -read -r max_lock <<< "${control##*max_locks_per_xact setting:}" -read -r max_ptxn <<< "${control##*max_prepared_xacts setting:}" -read -r max_work <<< "${control##*max_worker_processes setting:}" -echo > /tmp/pg_hba.restore.conf 'local all "postgres" peer' -cat > /tmp/postgres.restore.conf <> /tmp/postgres.restore.conf "max_wal_senders = '${max_wals}'" -fi - -read -r stopped <<< "${control##*recovery ending location:}" -pg_ctl start --silent --timeout=31536000 --wait --options='--config-file=/tmp/postgres.restore.conf' || failed=$? -[[ "${started-}" == "${stopped}" && -n "${failed-}" ]] && exit "${failed}" -started="${stopped}" && [[ -n "${failed-}" ]] && failed= && continue -fi - -recovery=$(psql -Atc "SELECT CASE - WHEN NOT pg_catalog.pg_is_in_recovery() THEN false - WHEN NOT pg_catalog.pg_is_wal_replay_paused() THEN true - ELSE pg_catalog.pg_wal_replay_resume()::text = '' -END recovery" && sleep 1) ||: -done - -pg_ctl stop --silent --wait --timeout=31536000 -mv "${pgdata}" "${pgdata}_bootstrap"` - - return append([]string{"bash", "-ceu", "--", restoreScript, "-", pgdata}, args...) + configure := strings.Join([]string{ + // With "hot_standby" on, some parameters cannot be smaller than they were + // when Postgres was backed up. Configure these to match values reported by + // "pg_controldata" before starting Postgres. These parameters are also + // written to WAL files and may change during recovery. When they increase, + // Postgres exits and we reconfigure it here. + // - https://www.postgresql.org/docs/current/app-pgcontroldata.html + `control=$(LC_ALL=C pg_controldata)`, + `read -r max_conn <<< "${control##*max_connections setting:}"`, + `read -r max_lock <<< "${control##*max_locks_per_xact setting:}"`, + `read -r max_ptxn <<< "${control##*max_prepared_xacts setting:}"`, + `read -r max_work <<< "${control##*max_worker_processes setting:}"`, + + // During recovery, only allow connections over the the domain socket. + `echo > /tmp/pg_hba.restore.conf 'local all "postgres" peer'`, + + // Combine parameters from Go with those detected in Bash. + `cat > /tmp/postgres.restore.conf <<'EOF'`, ps.String(), `EOF`, + `cat >> /tmp/postgres.restore.conf <> /tmp/postgres.restore.conf "max_wal_senders = '${max_wals}'"`, + `fi`, + + // TODO(sockets): PostgreSQL v14 is able to connect over abstract sockets in the network namespace. + `PGHOST=$([[ "${version}" -ge 14 ]] && echo '/tmp' || echo '/tmp')`, + `echo >> /tmp/postgres.restore.conf "unix_socket_directories = '${PGHOST}'"`, + }, "\n") + + script := strings.Join([]string{ + `declare -r PGDATA="$1" opts="$2"; export PGDATA PGHOST`, + + // Remove any "postmaster.pid" file leftover from a prior failure. + `rm -f "${PGDATA}/postmaster.pid"`, + + // Run the restore and print its arguments. + `bash -xc "pgbackrest restore ${opts}"`, + + // Ignore any Patroni settings present in the backup. + `rm -f "${PGDATA}/patroni.dynamic.json"`, + + // By default, pg_ctl waits 60 seconds for Postgres to stop or start. + // We want to be certain when Postgres is running or not, so we use + // a very large timeout (365 days) to effectively wait forever. With + // this, the result of "pg_ctl --wait" indicates the state of Postgres. + // - https://www.postgresql.org/docs/current/app-pg-ctl.html + fmt.Sprintf(`export PGCTLTIMEOUT=%d`, 365*24*time.Hour/time.Second), + + // Configure and start Postgres until we can see that it has finished + // replaying WAL. + // + // PostgreSQL v13 and earlier exit when they need reconfiguration with + // "hot_standby" on. This can cause pg_ctl to fail, so we compare the + // LSN from before and after calling it. If the LSN changed, Postgres + // ran and was able to replay WAL before exiting. In that case, configure + // Postgres and start it again to see if it can make more progress. + // + // If Postgres exits after pg_ctl succeeds, psql returns nothing which + // resets the "recovering" variable. Configure Postgres and start it again. + `until [[ "${recovering=}" == 'f' ]]; do`, + ` if [[ -z "${recovering}" ]]; then`, configure, + ` read -r stopped <<< "${control##*recovery ending location:}"`, + ` pg_ctl start --silent --wait --options='-c config_file=/tmp/postgres.restore.conf' || failed=$?`, + ` [[ "${started-}" == "${stopped}" && -n "${failed-}" ]] && exit "${failed}"`, + ` started="${stopped}" && [[ -n "${failed-}" ]] && failed= && continue`, + ` fi`, + // Ask Postgres if it is still recovering. PostgreSQL v14 pauses when it + // needs reconfiguration with "hot_standby" on, and resuming replay causes + // it to exit like prior versions. + // - https://www.postgresql.org/docs/current/hot-standby.html + // + // NOTE: "pg_wal_replay_resume()" returns void which cannot be compared to + // null. Instead, cast it to text and compare that for a boolean result. + ` recovering=$(psql -Atc "SELECT CASE`, + ` WHEN NOT pg_catalog.pg_is_in_recovery() THEN false`, + ` WHEN NOT pg_catalog.pg_is_wal_replay_paused() THEN true`, + ` ELSE pg_catalog.pg_wal_replay_resume()::text = ''`, + ` END" && sleep 1) ||:`, + `done`, + + // Replay is done. Stop Postgres gracefully and move the data directory + // into position for our Patroni bootstrap method. + `pg_ctl stop --silent --wait`, + `mv "${PGDATA}" "${PGDATA}_bootstrap"`, + }, "\n") + + return append([]string{"bash", "-ceu", "--", script, "-", pgdata}, args...) } // DedicatedSnapshotVolumeRestoreCommand returns the command for performing a pgBackRest delta restore diff --git a/internal/postgres/parameters.go b/internal/postgres/parameters.go index 3ec837c27d..bbb80b0ac1 100644 --- a/internal/postgres/parameters.go +++ b/internal/postgres/parameters.go @@ -5,6 +5,8 @@ package postgres import ( + "fmt" + "slices" "strings" ) @@ -124,3 +126,21 @@ func (ps *ParameterSet) Value(name string) string { value, _ := ps.Get(name) return value } + +func (ps *ParameterSet) String() string { + keys := make([]string, 0, len(ps.values)) + for k := range ps.values { + keys = append(keys, k) + } + + slices.Sort(keys) + + var b strings.Builder + for _, k := range keys { + _, _ = fmt.Fprintf(&b, "%s = '%s'\n", k, escapeParameterQuotes(ps.values[k])) + } + return b.String() +} + +// escapeParameterQuotes is used by [ParameterSet.String]. +var escapeParameterQuotes = strings.NewReplacer(`'`, `''`).Replace diff --git a/internal/postgres/parameters_test.go b/internal/postgres/parameters_test.go index c6228d7958..0720d8b42a 100644 --- a/internal/postgres/parameters_test.go +++ b/internal/postgres/parameters_test.go @@ -56,6 +56,10 @@ func TestParameterSet(t *testing.T) { ps2.Add("x", "n") assert.Assert(t, ps2.Value("x") != ps.Value("x")) + + assert.DeepEqual(t, ps.String(), ``+ + `abc = 'j''l'`+"\n"+ + `x = 'z'`+"\n") } func TestParameterSetAppendToList(t *testing.T) {