From 1e077af0e1c0743be35fcc62590905c6deedd4cf Mon Sep 17 00:00:00 2001 From: "Razinkin.Denis" Date: Mon, 25 May 2020 20:03:11 +0300 Subject: [PATCH 1/7] Fixing deadlock during waiting for apply worker stop. --- pglogical_functions.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/pglogical_functions.c b/pglogical_functions.c index a078039..06a9fed 100644 --- a/pglogical_functions.c +++ b/pglogical_functions.c @@ -52,6 +52,7 @@ #include "storage/ipc.h" #include "storage/latch.h" +#include "storage/lmgr.h" #include "storage/proc.h" #include "tcop/tcopprot.h" @@ -600,6 +601,23 @@ pglogical_drop_subscription(PG_FUNCTION_ARGS) LWLockRelease(PGLogicalCtx->lock); break; } + + if (apply->proc->waitLock) + { + const LOCKTAG apply_lock_tag = apply->proc->waitLock->tag; + if (((LockTagType) apply_lock_tag.locktag_type == LOCKTAG_TRANSACTION) && + (apply_lock_tag.locktag_field1 == (uint32)GetCurrentTransactionId())) + { + StringInfoData buf; + initStringInfo(&buf); + DescribeLockTag(&buf, &apply_lock_tag); + elog( WARNING, "Apply worker [%d] is locked by %s of [%d], try to kill it", apply->proc->pid, buf.data, MyProc->pid ); + + /* cancel transaction */ + kill(apply->proc->pid, SIGINT); + } + } + LWLockRelease(PGLogicalCtx->lock); CHECK_FOR_INTERRUPTS(); From 1b0a0ac2ab8df55db84e991c053ce75892205ce7 Mon Sep 17 00:00:00 2001 From: "Razinkin.Denis" Date: Mon, 25 May 2020 20:13:42 +0300 Subject: [PATCH 2/7] Codestyle: fixing idents --- pglogical_functions.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pglogical_functions.c b/pglogical_functions.c index 06a9fed..dc79eed 100644 --- a/pglogical_functions.c +++ b/pglogical_functions.c @@ -608,9 +608,9 @@ pglogical_drop_subscription(PG_FUNCTION_ARGS) if (((LockTagType) apply_lock_tag.locktag_type == LOCKTAG_TRANSACTION) && (apply_lock_tag.locktag_field1 == (uint32)GetCurrentTransactionId())) { - StringInfoData buf; - initStringInfo(&buf); - DescribeLockTag(&buf, &apply_lock_tag); + StringInfoData buf; + initStringInfo(&buf); + DescribeLockTag(&buf, &apply_lock_tag); elog( WARNING, "Apply worker [%d] is locked by %s of [%d], try to kill it", apply->proc->pid, buf.data, MyProc->pid ); /* cancel transaction */ From 3efe43c8e5dc214e9b2685ef37e06fd513f08f29 Mon Sep 17 00:00:00 2001 From: Petr Jelinek Date: Tue, 2 Apr 2019 16:17:09 +0200 Subject: [PATCH 3/7] Couple of small changes to make coverity happier --- pglogical_create_subscriber.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pglogical_create_subscriber.c b/pglogical_create_subscriber.c index 8825816..e74000f 100644 --- a/pglogical_create_subscriber.c +++ b/pglogical_create_subscriber.c @@ -277,6 +277,7 @@ main(int argc, char **argv) break; case 8: apply_delay = atoi(optarg); + break; case 9: databases = pg_strdup(optarg); break; @@ -1216,8 +1217,7 @@ get_connstr_dbname(char *connstr) } } - if (conn_opts) - PQconninfoFree(conn_opts); + PQconninfoFree(conn_opts); return ret; } From e57b86ba28ae2e83d34d54dd0a6fafdb78844b0b Mon Sep 17 00:00:00 2001 From: Petr Jelinek Date: Thu, 7 May 2020 18:15:14 +0200 Subject: [PATCH 4/7] Quick hack to compile with recent export of AcquireDeletionLock And ReleaseDeletionLock which we implement statically. Later we'll want to move them to compat, or better, rething the pglogical_depedency module completely. --- pglogical_dependency.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/pglogical_dependency.c b/pglogical_dependency.c index f21a470..bb84d44 100644 --- a/pglogical_dependency.c +++ b/pglogical_dependency.c @@ -232,8 +232,8 @@ static void reportDependentObjects(const ObjectAddresses *targetObjects, DropBehavior behavior, int msglevel, const ObjectAddress *origObject); -static void AcquireDeletionLock(const ObjectAddress *object, int flags); -static void ReleaseDeletionLock(const ObjectAddress *object); +static void PGLAcquireDeletionLock(const ObjectAddress *object, int flags); +static void PGLReleaseDeletionLock(const ObjectAddress *object); static bool find_expr_references_walker(Node *node, find_expr_references_context *context); static void eliminate_duplicate_dependencies(ObjectAddresses *addrs); @@ -497,7 +497,7 @@ findDependentObjects(const ObjectAddress *object, { systable_endscan(scan); /* need to release caller's lock; see notes below */ - ReleaseDeletionLock(object); + PGLReleaseDeletionLock(object); return; } @@ -561,8 +561,8 @@ findDependentObjects(const ObjectAddress *object, * caller's lock to avoid deadlock against a concurrent * deletion of the owning object.) */ - ReleaseDeletionLock(object); - AcquireDeletionLock(&otherObject, 0); + PGLReleaseDeletionLock(object); + PGLAcquireDeletionLock(&otherObject, 0); /* * The owning object might have been deleted while we waited @@ -573,7 +573,7 @@ findDependentObjects(const ObjectAddress *object, if (!systable_recheck_tuple(scan, tup)) { systable_endscan(scan); - ReleaseDeletionLock(&otherObject); + PGLReleaseDeletionLock(&otherObject); return; } @@ -658,7 +658,7 @@ findDependentObjects(const ObjectAddress *object, /* * Must lock the dependent object before recursing to it. */ - AcquireDeletionLock(&otherObject, 0); + PGLAcquireDeletionLock(&otherObject, 0); /* * The dependent object might have been deleted while we waited to @@ -670,7 +670,7 @@ findDependentObjects(const ObjectAddress *object, if (!systable_recheck_tuple(scan, tup)) { /* release the now-useless lock */ - ReleaseDeletionLock(&otherObject); + PGLReleaseDeletionLock(&otherObject); /* and continue scanning for dependencies */ continue; } @@ -929,14 +929,14 @@ reportDependentObjects(const ObjectAddresses *targetObjects, } /* - * AcquireDeletionLock - acquire a suitable lock for deleting an object + * PGLAcquireDeletionLock - acquire a suitable lock for deleting an object * * We use LockRelation for relations, LockDatabaseObject for everything * else. Note that dependency.c is not concerned with deleting any kind of * shared-across-databases object, so we have no need for LockSharedObject. */ static void -AcquireDeletionLock(const ObjectAddress *object, int flags) +PGLAcquireDeletionLock(const ObjectAddress *object, int flags) { if (object->classId == RelationRelationId) { @@ -960,10 +960,10 @@ AcquireDeletionLock(const ObjectAddress *object, int flags) } /* - * ReleaseDeletionLock - release an object deletion lock + * PGLReleaseDeletionLock - release an object deletion lock */ static void -ReleaseDeletionLock(const ObjectAddress *object) +PGLReleaseDeletionLock(const ObjectAddress *object) { if (object->classId == RelationRelationId) UnlockRelationOid(object->objectId, AccessExclusiveLock); From 81e4c80bf4e256e22a8b1534871679f13acbe9d5 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Mon, 25 May 2020 15:40:50 +0200 Subject: [PATCH 5/7] Bump version to 2.3.2 --- Makefile | 4 +- expected/init.out | 2 +- pglogical--2.3.1--2.3.2.sql | 0 pglogical--2.3.2.sql | 249 ++++++++++++++++++++++++++++++++++++ pglogical.h | 4 +- 5 files changed, 255 insertions(+), 4 deletions(-) create mode 100644 pglogical--2.3.1--2.3.2.sql create mode 100644 pglogical--2.3.2.sql diff --git a/Makefile b/Makefile index 3908896..f90f08d 100644 --- a/Makefile +++ b/Makefile @@ -22,7 +22,9 @@ DATA = pglogical--1.0.0.sql pglogical--1.0.0--1.0.1.sql \ pglogical--2.2.2--2.3.1.sql \ pglogical--2.3.0.sql \ pglogical--2.3.0--2.3.1.sql \ - pglogical--2.3.1.sql + pglogical--2.3.1.sql \ + pglogical--2.3.1--2.3.2.sql \ + pglogical--2.3.2.sql OBJS = pglogical_apply.o pglogical_conflict.o pglogical_manager.o \ pglogical.o pglogical_node.o pglogical_relcache.o \ diff --git a/expected/init.out b/expected/init.out index 871fde9..ef89251 100644 --- a/expected/init.out +++ b/expected/init.out @@ -58,7 +58,7 @@ ALTER EXTENSION pglogical UPDATE; List of installed extensions Name | Version | Schema | Description -----------+---------+-----------+-------------------------------- - pglogical | 2.3.1 | pglogical | PostgreSQL Logical Replication + pglogical | 2.3.2 | pglogical | PostgreSQL Logical Replication (1 row) SELECT * FROM pglogical.create_node(node_name := 'test_provider', dsn := (SELECT provider_dsn FROM pglogical_regress_variables()) || ' user=super'); diff --git a/pglogical--2.3.1--2.3.2.sql b/pglogical--2.3.1--2.3.2.sql new file mode 100644 index 0000000..e69de29 diff --git a/pglogical--2.3.2.sql b/pglogical--2.3.2.sql new file mode 100644 index 0000000..692107b --- /dev/null +++ b/pglogical--2.3.2.sql @@ -0,0 +1,249 @@ +\echo Use "CREATE EXTENSION pglogical" to load this file. \quit + +CREATE TABLE pglogical.node ( + node_id oid NOT NULL PRIMARY KEY, + node_name name NOT NULL UNIQUE +) WITH (user_catalog_table=true); + +CREATE TABLE pglogical.node_interface ( + if_id oid NOT NULL PRIMARY KEY, + if_name name NOT NULL, -- default same as node name + if_nodeid oid REFERENCES node(node_id), + if_dsn text NOT NULL, + UNIQUE (if_nodeid, if_name) +); + +CREATE TABLE pglogical.local_node ( + node_id oid PRIMARY KEY REFERENCES node(node_id), + node_local_interface oid NOT NULL REFERENCES node_interface(if_id) +); + +CREATE TABLE pglogical.subscription ( + sub_id oid NOT NULL PRIMARY KEY, + sub_name name NOT NULL UNIQUE, + sub_origin oid NOT NULL REFERENCES node(node_id), + sub_target oid NOT NULL REFERENCES node(node_id), + sub_origin_if oid NOT NULL REFERENCES node_interface(if_id), + sub_target_if oid NOT NULL REFERENCES node_interface(if_id), + sub_enabled boolean NOT NULL DEFAULT true, + sub_slot_name name NOT NULL, + sub_replication_sets text[], + sub_forward_origins text[], + sub_apply_delay interval NOT NULL DEFAULT '0', + sub_force_text_transfer boolean NOT NULL DEFAULT 'f' +); + +CREATE TABLE pglogical.local_sync_status ( + sync_kind "char" NOT NULL CHECK (sync_kind IN ('i', 's', 'd', 'f')), + sync_subid oid NOT NULL REFERENCES pglogical.subscription(sub_id), + sync_nspname name, + sync_relname name, + sync_status "char" NOT NULL, + sync_statuslsn pg_lsn NOT NULL, + UNIQUE (sync_subid, sync_nspname, sync_relname) +); + + +CREATE FUNCTION pglogical.create_node(node_name name, dsn text) +RETURNS oid STRICT VOLATILE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_create_node'; +CREATE FUNCTION pglogical.drop_node(node_name name, ifexists boolean DEFAULT false) +RETURNS boolean STRICT VOLATILE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_drop_node'; + +CREATE FUNCTION pglogical.alter_node_add_interface(node_name name, interface_name name, dsn text) +RETURNS oid STRICT VOLATILE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_alter_node_add_interface'; +CREATE FUNCTION pglogical.alter_node_drop_interface(node_name name, interface_name name) +RETURNS boolean STRICT VOLATILE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_alter_node_drop_interface'; + +CREATE FUNCTION pglogical.create_subscription(subscription_name name, provider_dsn text, + replication_sets text[] = '{default,default_insert_only,ddl_sql}', synchronize_structure boolean = false, + synchronize_data boolean = true, forward_origins text[] = '{all}', apply_delay interval DEFAULT '0', + force_text_transfer boolean = false) +RETURNS oid STRICT VOLATILE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_create_subscription'; +CREATE FUNCTION pglogical.drop_subscription(subscription_name name, ifexists boolean DEFAULT false) +RETURNS oid STRICT VOLATILE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_drop_subscription'; + +CREATE FUNCTION pglogical.alter_subscription_interface(subscription_name name, interface_name name) +RETURNS boolean STRICT VOLATILE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_alter_subscription_interface'; + +CREATE FUNCTION pglogical.alter_subscription_disable(subscription_name name, immediate boolean DEFAULT false) +RETURNS boolean STRICT VOLATILE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_alter_subscription_disable'; +CREATE FUNCTION pglogical.alter_subscription_enable(subscription_name name, immediate boolean DEFAULT false) +RETURNS boolean STRICT VOLATILE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_alter_subscription_enable'; + +CREATE FUNCTION pglogical.alter_subscription_add_replication_set(subscription_name name, replication_set name) +RETURNS boolean STRICT VOLATILE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_alter_subscription_add_replication_set'; +CREATE FUNCTION pglogical.alter_subscription_remove_replication_set(subscription_name name, replication_set name) +RETURNS boolean STRICT VOLATILE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_alter_subscription_remove_replication_set'; + +CREATE FUNCTION pglogical.show_subscription_status(subscription_name name DEFAULT NULL, + OUT subscription_name text, OUT status text, OUT provider_node text, + OUT provider_dsn text, OUT slot_name text, OUT replication_sets text[], + OUT forward_origins text[]) +RETURNS SETOF record STABLE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_show_subscription_status'; + +CREATE TABLE pglogical.replication_set ( + set_id oid NOT NULL PRIMARY KEY, + set_nodeid oid NOT NULL, + set_name name NOT NULL, + replicate_insert boolean NOT NULL DEFAULT true, + replicate_update boolean NOT NULL DEFAULT true, + replicate_delete boolean NOT NULL DEFAULT true, + replicate_truncate boolean NOT NULL DEFAULT true, + UNIQUE (set_nodeid, set_name) +) WITH (user_catalog_table=true); + +CREATE TABLE pglogical.replication_set_table ( + set_id oid NOT NULL, + set_reloid regclass NOT NULL, + set_att_list text[], + set_row_filter pg_node_tree, + PRIMARY KEY(set_id, set_reloid) +) WITH (user_catalog_table=true); + +CREATE TABLE pglogical.replication_set_seq ( + set_id oid NOT NULL, + set_seqoid regclass NOT NULL, + PRIMARY KEY(set_id, set_seqoid) +) WITH (user_catalog_table=true); + +CREATE TABLE pglogical.sequence_state ( + seqoid oid NOT NULL PRIMARY KEY, + cache_size integer NOT NULL, + last_value bigint NOT NULL +) WITH (user_catalog_table=true); + +CREATE TABLE pglogical.depend ( + classid oid NOT NULL, + objid oid NOT NULL, + objsubid integer NOT NULL, + + refclassid oid NOT NULL, + refobjid oid NOT NULL, + refobjsubid integer NOT NULL, + + deptype "char" NOT NULL +) WITH (user_catalog_table=true); + +CREATE VIEW pglogical.TABLES AS + WITH set_relations AS ( + SELECT s.set_name, r.set_reloid + FROM pglogical.replication_set_table r, + pglogical.replication_set s, + pglogical.local_node n + WHERE s.set_nodeid = n.node_id + AND s.set_id = r.set_id + ), + user_tables AS ( + SELECT r.oid, n.nspname, r.relname, r.relreplident + FROM pg_catalog.pg_class r, + pg_catalog.pg_namespace n + WHERE r.relkind = 'r' + AND r.relpersistence = 'p' + AND n.oid = r.relnamespace + AND n.nspname !~ '^pg_' + AND n.nspname != 'information_schema' + AND n.nspname != 'pglogical' + ) + SELECT r.oid AS relid, n.nspname, r.relname, s.set_name + FROM pg_catalog.pg_namespace n, + pg_catalog.pg_class r, + set_relations s + WHERE r.relkind = 'r' + AND n.oid = r.relnamespace + AND r.oid = s.set_reloid + UNION + SELECT t.oid AS relid, t.nspname, t.relname, NULL + FROM user_tables t + WHERE t.oid NOT IN (SELECT set_reloid FROM set_relations); + +CREATE FUNCTION pglogical.create_replication_set(set_name name, + replicate_insert boolean = true, replicate_update boolean = true, + replicate_delete boolean = true, replicate_truncate boolean = true) +RETURNS oid STRICT VOLATILE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_create_replication_set'; +CREATE FUNCTION pglogical.alter_replication_set(set_name name, + replicate_insert boolean DEFAULT NULL, replicate_update boolean DEFAULT NULL, + replicate_delete boolean DEFAULT NULL, replicate_truncate boolean DEFAULT NULL) +RETURNS oid CALLED ON NULL INPUT VOLATILE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_alter_replication_set'; +CREATE FUNCTION pglogical.drop_replication_set(set_name name, ifexists boolean DEFAULT false) +RETURNS boolean STRICT VOLATILE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_drop_replication_set'; + +CREATE FUNCTION pglogical.replication_set_add_table(set_name name, relation regclass, synchronize_data boolean DEFAULT false, + columns text[] DEFAULT NULL, row_filter text DEFAULT NULL) +RETURNS boolean CALLED ON NULL INPUT VOLATILE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_replication_set_add_table'; +CREATE FUNCTION pglogical.replication_set_add_all_tables(set_name name, schema_names text[], synchronize_data boolean DEFAULT false) +RETURNS boolean STRICT VOLATILE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_replication_set_add_all_tables'; +CREATE FUNCTION pglogical.replication_set_remove_table(set_name name, relation regclass) +RETURNS boolean STRICT VOLATILE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_replication_set_remove_table'; + +CREATE FUNCTION pglogical.replication_set_add_sequence(set_name name, relation regclass, synchronize_data boolean DEFAULT false) +RETURNS boolean STRICT VOLATILE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_replication_set_add_sequence'; +CREATE FUNCTION pglogical.replication_set_add_all_sequences(set_name name, schema_names text[], synchronize_data boolean DEFAULT false) +RETURNS boolean STRICT VOLATILE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_replication_set_add_all_sequences'; +CREATE FUNCTION pglogical.replication_set_remove_sequence(set_name name, relation regclass) +RETURNS boolean STRICT VOLATILE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_replication_set_remove_sequence'; + +CREATE FUNCTION pglogical.alter_subscription_synchronize(subscription_name name, truncate boolean DEFAULT false) +RETURNS boolean STRICT VOLATILE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_alter_subscription_synchronize'; + +CREATE FUNCTION pglogical.alter_subscription_resynchronize_table(subscription_name name, relation regclass, + truncate boolean DEFAULT true) +RETURNS boolean STRICT VOLATILE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_alter_subscription_resynchronize_table'; + +CREATE FUNCTION pglogical.synchronize_sequence(relation regclass) +RETURNS boolean STRICT VOLATILE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_synchronize_sequence'; + +CREATE FUNCTION pglogical.table_data_filtered(reltyp anyelement, relation regclass, repsets text[]) +RETURNS SETOF anyelement CALLED ON NULL INPUT STABLE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_table_data_filtered'; + +CREATE FUNCTION pglogical.show_repset_table_info(relation regclass, repsets text[], OUT relid oid, OUT nspname text, + OUT relname text, OUT att_list text[], OUT has_row_filter boolean) +RETURNS record STRICT STABLE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_show_repset_table_info'; + +CREATE FUNCTION pglogical.show_subscription_table(subscription_name name, relation regclass, OUT nspname text, OUT relname text, OUT status text) +RETURNS record STRICT STABLE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_show_subscription_table'; + +CREATE TABLE pglogical.queue ( + queued_at timestamp with time zone NOT NULL, + role name NOT NULL, + replication_sets text[], + message_type "char" NOT NULL, + message json NOT NULL +); + +CREATE FUNCTION pglogical.replicate_ddl_command(command text, replication_sets text[] DEFAULT '{ddl_sql}') +RETURNS boolean STRICT VOLATILE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_replicate_ddl_command'; + +CREATE OR REPLACE FUNCTION pglogical.queue_truncate() +RETURNS trigger LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_queue_truncate'; + +CREATE FUNCTION pglogical.pglogical_node_info(OUT node_id oid, OUT node_name text, OUT sysid text, OUT dbname text, OUT replication_sets text) +RETURNS record +STABLE STRICT LANGUAGE c AS 'MODULE_PATHNAME'; + +CREATE FUNCTION pglogical.pglogical_gen_slot_name(name, name, name) +RETURNS name +IMMUTABLE STRICT LANGUAGE c AS 'MODULE_PATHNAME'; + +CREATE FUNCTION pglogical_version() RETURNS text +LANGUAGE c AS 'MODULE_PATHNAME'; + +CREATE FUNCTION pglogical_version_num() RETURNS integer +LANGUAGE c AS 'MODULE_PATHNAME'; + +CREATE FUNCTION pglogical_max_proto_version() RETURNS integer +LANGUAGE c AS 'MODULE_PATHNAME'; + +CREATE FUNCTION pglogical_min_proto_version() RETURNS integer +LANGUAGE c AS 'MODULE_PATHNAME'; + +CREATE FUNCTION +pglogical.wait_slot_confirm_lsn(slotname name, target pg_lsn) +RETURNS void LANGUAGE c AS 'pglogical','pglogical_wait_slot_confirm_lsn'; +CREATE FUNCTION pglogical.wait_for_subscription_sync_complete(subscription_name name) +RETURNS void RETURNS NULL ON NULL INPUT VOLATILE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_wait_for_subscription_sync_complete'; + +CREATE FUNCTION pglogical.wait_for_table_sync_complete(subscription_name name, relation regclass) +RETURNS void RETURNS NULL ON NULL INPUT VOLATILE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_wait_for_table_sync_complete'; + +CREATE FUNCTION pglogical.xact_commit_timestamp_origin("xid" xid, OUT "timestamp" timestamptz, OUT "roident" oid) +RETURNS record RETURNS NULL ON NULL INPUT VOLATILE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_xact_commit_timestamp_origin'; diff --git a/pglogical.h b/pglogical.h index 5f8a9c8..d9f277a 100644 --- a/pglogical.h +++ b/pglogical.h @@ -26,8 +26,8 @@ #include "pglogical_compat.h" -#define PGLOGICAL_VERSION "2.3.1" -#define PGLOGICAL_VERSION_NUM 20301 +#define PGLOGICAL_VERSION "2.3.2" +#define PGLOGICAL_VERSION_NUM 20302 #define PGLOGICAL_MIN_PROTO_VERSION_NUM 1 #define PGLOGICAL_MAX_PROTO_VERSION_NUM 1 From 77bad006fa20d6f660689fbb4f66fa6583facf3f Mon Sep 17 00:00:00 2001 From: "Razinkin.Denis" Date: Fri, 4 Jun 2021 19:27:03 +0300 Subject: [PATCH 6/7] fix endless queue replication --- pglogical--2.3.4.sql | 1 + pglogical_apply.c | 35 +- pglogical_output_plugin.c | 1468 ++++++++++++++++++------------------- pglogical_queue.c | 35 +- pglogical_queue.h | 4 + 5 files changed, 793 insertions(+), 750 deletions(-) diff --git a/pglogical--2.3.4.sql b/pglogical--2.3.4.sql index 692107b..50fa578 100644 --- a/pglogical--2.3.4.sql +++ b/pglogical--2.3.4.sql @@ -203,6 +203,7 @@ CREATE FUNCTION pglogical.show_subscription_table(subscription_name name, relati RETURNS record STRICT STABLE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_show_subscription_table'; CREATE TABLE pglogical.queue ( + node_id oid REFERENCES node(node_id), queued_at timestamp with time zone NOT NULL, role name NOT NULL, replication_sets text[], diff --git a/pglogical_apply.c b/pglogical_apply.c index 7e480fb..bb895b1 100644 --- a/pglogical_apply.c +++ b/pglogical_apply.c @@ -158,7 +158,7 @@ static TransactionId remote_xid; static void multi_insert_finish(void); -static void handle_queued_message(HeapTuple msgtup, bool tx_just_started); +static void handle_queued_message(QueuedMessage *queued_message, bool tx_just_started); static void handle_startup_param(const char *key, const char *value); static bool parse_bool_param(const char *key, const char *value); static void process_syncing_tables(XLogRecPtr end_lsn); @@ -576,9 +576,6 @@ handle_insert(StringInfo s) } } - /* Normal insert. */ - apply_api.do_insert(rel, &newtup); - /* if INSERT was into our queue, process the message. */ if (RelationGetRelid(rel->rel) == QueueRelid) { @@ -593,12 +590,29 @@ handle_insert(StringInfo s) ht = heap_form_tuple(RelationGetDescr(rel->rel), newtup.values, newtup.nulls); + QueuedMessage *queued_message = queued_message_from_tuple(ht); + PGLogicalLocalNode *local_node = get_local_node(false, false); + + // ignore queue messages forwarded from local node and not original messages + if ( queued_message->node_id == local_node->node->id || + queued_message->node_id != queued_message->orig_node_id ) + { + pglogical_relation_close(rel, NoLock); + return; + } + + // change node_id to current + queued_message_tuple_set_local_node_id( &newtup.values, local_node->node->id ); + + /* Normal insert. */ + apply_api.do_insert(rel, &newtup); + LockRelationIdForSession(&lockid, RowExclusiveLock); pglogical_relation_close(rel, NoLock); apply_api.on_commit(); - handle_queued_message(ht, started_tx); + handle_queued_message(queued_message, started_tx); heap_freetuple(ht); @@ -615,7 +629,11 @@ handle_insert(StringInfo s) // CommitTransactionCommand(); } else - pglogical_relation_close(rel, NoLock); + { + /* Normal insert. */ + apply_api.do_insert(rel, &newtup); + pglogical_relation_close( rel, NoLock ); + } } static void @@ -1057,16 +1075,13 @@ handle_sql(QueuedMessage *queued_message, bool tx_just_started) * Handles messages comming from the queue. */ static void -handle_queued_message(HeapTuple msgtup, bool tx_just_started) +handle_queued_message(QueuedMessage *queued_message, bool tx_just_started) { - QueuedMessage *queued_message; const char *old_action_name; old_action_name = errcallback_arg.action_name; errcallback_arg.is_ddl_or_drop = true; - queued_message = queued_message_from_tuple(msgtup); - switch (queued_message->message_type) { case QUEUE_COMMAND_TYPE_SQL: diff --git a/pglogical_output_plugin.c b/pglogical_output_plugin.c index 11fa818..8514388 100644 --- a/pglogical_output_plugin.c +++ b/pglogical_output_plugin.c @@ -46,15 +46,15 @@ extern void _PG_output_plugin_init(OutputPluginCallbacks *cb); static void pg_decode_startup(LogicalDecodingContext * ctx, - OutputPluginOptions *opt, bool is_init); + OutputPluginOptions *opt, bool is_init); static void pg_decode_shutdown(LogicalDecodingContext * ctx); static void pg_decode_begin_txn(LogicalDecodingContext *ctx, - ReorderBufferTXN *txn); + ReorderBufferTXN *txn); static void pg_decode_commit_txn(LogicalDecodingContext *ctx, - ReorderBufferTXN *txn, XLogRecPtr commit_lsn); + ReorderBufferTXN *txn, XLogRecPtr commit_lsn); static void pg_decode_change(LogicalDecodingContext *ctx, - ReorderBufferTXN *txn, Relation rel, - ReorderBufferChange *change); + ReorderBufferTXN *txn, Relation rel, + ReorderBufferChange *change); #ifdef HAVE_REPLICATION_ORIGINS static bool pg_decode_origin_filter(LogicalDecodingContext *ctx, @@ -62,17 +62,17 @@ static bool pg_decode_origin_filter(LogicalDecodingContext *ctx, #endif static void send_startup_message(LogicalDecodingContext *ctx, - PGLogicalOutputData *data, bool last_message); + PGLogicalOutputData *data, bool last_message); static bool startup_message_sent = false; typedef struct PGLRelMetaCacheEntry { - Oid relid; - /* Does the client have this relation cached? */ - bool is_cached; - /* Entry is valid and not due to be purged */ - bool is_valid; + Oid relid; + /* Does the client have this relation cached? */ + bool is_cached; + /* Entry is valid and not due to be purged */ + bool is_valid; } PGLRelMetaCacheEntry; #define RELMETACACHE_INITIAL_SIZE 128 @@ -82,7 +82,7 @@ static int InvalidRelMetaCacheCnt = 0; static void relmetacache_init(MemoryContext decoding_context); static PGLRelMetaCacheEntry *relmetacache_get_relation(PGLogicalOutputData *data, - Relation rel); + Relation rel); static void relmetacache_flush(void); static void relmetacache_prune(void); @@ -92,292 +92,292 @@ static void pglReorderBufferCleanSerializedTXNs(const char *slotname); void _PG_output_plugin_init(OutputPluginCallbacks *cb) { - AssertVariableIsOfType(&_PG_output_plugin_init, LogicalOutputPluginInit); + AssertVariableIsOfType(&_PG_output_plugin_init, LogicalOutputPluginInit); - cb->startup_cb = pg_decode_startup; - cb->begin_cb = pg_decode_begin_txn; - cb->change_cb = pg_decode_change; - cb->commit_cb = pg_decode_commit_txn; + cb->startup_cb = pg_decode_startup; + cb->begin_cb = pg_decode_begin_txn; + cb->change_cb = pg_decode_change; + cb->commit_cb = pg_decode_commit_txn; #ifdef HAVE_REPLICATION_ORIGINS - cb->filter_by_origin_cb = pg_decode_origin_filter; + cb->filter_by_origin_cb = pg_decode_origin_filter; #endif - cb->shutdown_cb = pg_decode_shutdown; + cb->shutdown_cb = pg_decode_shutdown; } static bool check_binary_compatibility(PGLogicalOutputData *data) { - if (data->client_binary_basetypes_major_version != PG_VERSION_NUM / 100) - return false; - - if (data->client_binary_bigendian_set - && data->client_binary_bigendian != server_bigendian()) - { - elog(DEBUG1, "Binary mode rejected: Server and client endian mismatch"); - return false; - } - - if (data->client_binary_sizeofdatum != 0 - && data->client_binary_sizeofdatum != sizeof(Datum)) - { - elog(DEBUG1, "Binary mode rejected: Server and client sizeof(Datum) mismatch"); - return false; - } - - if (data->client_binary_sizeofint != 0 - && data->client_binary_sizeofint != sizeof(int)) - { - elog(DEBUG1, "Binary mode rejected: Server and client sizeof(int) mismatch"); - return false; - } - - if (data->client_binary_sizeoflong != 0 - && data->client_binary_sizeoflong != sizeof(long)) - { - elog(DEBUG1, "Binary mode rejected: Server and client sizeof(long) mismatch"); - return false; - } - - if (data->client_binary_float4byval_set - && data->client_binary_float4byval != server_float4_byval()) - { - elog(DEBUG1, "Binary mode rejected: Server and client float4byval mismatch"); - return false; - } - - if (data->client_binary_float8byval_set - && data->client_binary_float8byval != server_float8_byval()) - { - elog(DEBUG1, "Binary mode rejected: Server and client float8byval mismatch"); - return false; - } - - if (data->client_binary_intdatetimes_set - && data->client_binary_intdatetimes != server_integer_datetimes()) - { - elog(DEBUG1, "Binary mode rejected: Server and client integer datetimes mismatch"); - return false; - } - - return true; + if (data->client_binary_basetypes_major_version != PG_VERSION_NUM / 100) + return false; + + if (data->client_binary_bigendian_set + && data->client_binary_bigendian != server_bigendian()) + { + elog(DEBUG1, "Binary mode rejected: Server and client endian mismatch"); + return false; + } + + if (data->client_binary_sizeofdatum != 0 + && data->client_binary_sizeofdatum != sizeof(Datum)) + { + elog(DEBUG1, "Binary mode rejected: Server and client sizeof(Datum) mismatch"); + return false; + } + + if (data->client_binary_sizeofint != 0 + && data->client_binary_sizeofint != sizeof(int)) + { + elog(DEBUG1, "Binary mode rejected: Server and client sizeof(int) mismatch"); + return false; + } + + if (data->client_binary_sizeoflong != 0 + && data->client_binary_sizeoflong != sizeof(long)) + { + elog(DEBUG1, "Binary mode rejected: Server and client sizeof(long) mismatch"); + return false; + } + + if (data->client_binary_float4byval_set + && data->client_binary_float4byval != server_float4_byval()) + { + elog(DEBUG1, "Binary mode rejected: Server and client float4byval mismatch"); + return false; + } + + if (data->client_binary_float8byval_set + && data->client_binary_float8byval != server_float8_byval()) + { + elog(DEBUG1, "Binary mode rejected: Server and client float8byval mismatch"); + return false; + } + + if (data->client_binary_intdatetimes_set + && data->client_binary_intdatetimes != server_integer_datetimes()) + { + elog(DEBUG1, "Binary mode rejected: Server and client integer datetimes mismatch"); + return false; + } + + return true; } /* initialize this plugin */ static void pg_decode_startup(LogicalDecodingContext * ctx, OutputPluginOptions *opt, - bool is_init) + bool is_init) { - PGLogicalOutputData *data = palloc0(sizeof(PGLogicalOutputData)); - - /* Short lived memory context for individual messages */ - data->context = AllocSetContextCreate(ctx->context, - "pglogical output msg context", - ALLOCSET_DEFAULT_SIZES); - data->allow_internal_basetypes = false; - data->allow_binary_basetypes = false; - - - ctx->output_plugin_private = data; - - /* - * This is replication start and not slot initialization. - * - * Parse and validate options passed by the client. - */ - if (!is_init) - { - int params_format; - bool started_tx = false; - PGLogicalLocalNode *node; - MemoryContext oldctx; - - /* - * There's a potential corruption bug in PostgreSQL 10.1, 9.6.6, 9.5.10 - * and 9.4.15 that can cause reorder buffers to accumulate duplicated - * transactions. See - * https://www.postgresql.org/message-id/CAMsr+YHdX=XECbZshDZ2CZNWGTyw-taYBnzqVfx4JzM4ExP5xg@mail.gmail.com - * - * We can defend against this by doing our own cleanup of any serialized - * txns in the reorder buffer on startup. - */ - pglReorderBufferCleanSerializedTXNs(NameStr(MyReplicationSlot->data.name)); - - if (!IsTransactionState()) - { - StartTransactionCommand(); - started_tx = true; - } - node = get_local_node(false, false); - data->local_node_id = node->node->id; - - /* - * Ideally we'd send the startup message immediately. That way - * it'd arrive before any error we emit if we see incompatible - * options sent by the client here. That way the client could - * possibly adjust its options and reconnect. It'd also make - * sure the client gets the startup message in a timely way if - * the server is idle, since otherwise it could be a while - * before the next callback. - * - * The decoding plugin API doesn't let us write to the stream - * from here, though, so we have to delay the startup message - * until the first change processed on the stream, in a begin - * callback. - * - * If we ERROR there, the startup message is buffered but not - * sent since the callback didn't finish. So we'd have to send - * the startup message, finish the callback and check in the - * next callback if we need to ERROR. - * - * That's a bit much hoop jumping, so for now ERRORs are - * immediate. A way to emit a message from the startup callback - * is really needed to change that. - */ - startup_message_sent = false; - - /* Now parse the rest of the params and ERROR if we see any we don't recognise */ - oldctx = MemoryContextSwitchTo(ctx->context); - params_format = process_parameters(ctx->output_plugin_options, data); - MemoryContextSwitchTo(oldctx); - - if (params_format != 1) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("client sent startup parameters in format %d but we only support format 1", - params_format))); - - if (data->client_min_proto_version > PGLOGICAL_PROTO_VERSION_NUM) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("client sent min_proto_version=%d but we only support protocol %d or lower", - data->client_min_proto_version, PGLOGICAL_PROTO_VERSION_NUM))); - - if (data->client_max_proto_version < PGLOGICAL_PROTO_MIN_VERSION_NUM) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("client sent max_proto_version=%d but we only support protocol %d or higher", - data->client_max_proto_version, PGLOGICAL_PROTO_MIN_VERSION_NUM))); - - /* - * Set correct protocol format. - * - * This is the output plugin protocol format, this is different - * from the individual fields binary vs textual format. - */ - if (data->client_protocol_format != NULL - && strcmp(data->client_protocol_format, "json") == 0) - { - oldctx = MemoryContextSwitchTo(ctx->context); - data->api = pglogical_init_api(PGLogicalProtoJson); - opt->output_type = OUTPUT_PLUGIN_TEXTUAL_OUTPUT; - MemoryContextSwitchTo(oldctx); - } - else if ((data->client_protocol_format != NULL - && strcmp(data->client_protocol_format, "native") == 0) - || data->client_protocol_format == NULL) - { - oldctx = MemoryContextSwitchTo(ctx->context); - data->api = pglogical_init_api(PGLogicalProtoNative); - opt->output_type = OUTPUT_PLUGIN_BINARY_OUTPUT; - - if (data->client_no_txinfo) - { - elog(WARNING, "no_txinfo option ignored for protocols other than json"); - data->client_no_txinfo = false; - } - MemoryContextSwitchTo(oldctx); - } - else - { - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("client requested protocol %s but only \"json\" or \"native\" are supported", - data->client_protocol_format))); - } - - /* check for encoding match if specific encoding demanded by client */ - if (data->client_expected_encoding != NULL - && strlen(data->client_expected_encoding) != 0) - { - int wanted_encoding = pg_char_to_encoding(data->client_expected_encoding); - - if (wanted_encoding == -1) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("unrecognised encoding name %s passed to expected_encoding", - data->client_expected_encoding))); - - if (opt->output_type == OUTPUT_PLUGIN_TEXTUAL_OUTPUT) - { - /* - * datum encoding must match assigned client_encoding in text - * proto, since everything is subject to client_encoding - * conversion. - */ - if (wanted_encoding != pg_get_client_encoding()) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("expected_encoding must be unset or match client_encoding in text protocols"))); - } - else - { - /* - * currently in the binary protocol we can only emit encoded - * datums in the server encoding. There's no support for encoding - * conversion. - */ - if (wanted_encoding != GetDatabaseEncoding()) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("encoding conversion for binary datum not supported yet"), - errdetail("expected_encoding %s must be unset or match server_encoding %s", - data->client_expected_encoding, GetDatabaseEncodingName()))); - } - - data->field_datum_encoding = wanted_encoding; - } - - /* - * It's obviously not possible to send binary representation of data - * unless we use the binary output. - */ - if (opt->output_type == OUTPUT_PLUGIN_BINARY_OUTPUT && - data->client_want_internal_basetypes) - { - data->allow_internal_basetypes = - check_binary_compatibility(data); - } - - if (opt->output_type == OUTPUT_PLUGIN_BINARY_OUTPUT && - data->client_want_binary_basetypes && - data->client_binary_basetypes_major_version == PG_VERSION_NUM / 100) - { - data->allow_binary_basetypes = true; - } - - /* - * 9.4 lacks origins info so don't forward it. - * - * There's currently no knob for clients to use to suppress - * this info and it's sent if it's supported and available. - */ - if (PG_VERSION_NUM/100 == 904) - data->forward_changeset_origins = false; - else - data->forward_changeset_origins = true; - - if (started_tx) - CommitTransactionCommand(); - - relmetacache_init(ctx->context); - } - - /* So we can identify the process type in Valgrind logs */ - VALGRIND_PRINTF("PGLOGICAL: pglogical worker output_plugin\n"); - /* For incremental leak checking */ - VALGRIND_DISABLE_ERROR_REPORTING; - VALGRIND_DO_LEAK_CHECK; - VALGRIND_ENABLE_ERROR_REPORTING; + PGLogicalOutputData *data = palloc0(sizeof(PGLogicalOutputData)); + + /* Short lived memory context for individual messages */ + data->context = AllocSetContextCreate(ctx->context, + "pglogical output msg context", + ALLOCSET_DEFAULT_SIZES); + data->allow_internal_basetypes = false; + data->allow_binary_basetypes = false; + + + ctx->output_plugin_private = data; + + /* + * This is replication start and not slot initialization. + * + * Parse and validate options passed by the client. + */ + if (!is_init) + { + int params_format; + bool started_tx = false; + PGLogicalLocalNode *node; + MemoryContext oldctx; + + /* + * There's a potential corruption bug in PostgreSQL 10.1, 9.6.6, 9.5.10 + * and 9.4.15 that can cause reorder buffers to accumulate duplicated + * transactions. See + * https://www.postgresql.org/message-id/CAMsr+YHdX=XECbZshDZ2CZNWGTyw-taYBnzqVfx4JzM4ExP5xg@mail.gmail.com + * + * We can defend against this by doing our own cleanup of any serialized + * txns in the reorder buffer on startup. + */ + pglReorderBufferCleanSerializedTXNs(NameStr(MyReplicationSlot->data.name)); + + if (!IsTransactionState()) + { + StartTransactionCommand(); + started_tx = true; + } + node = get_local_node(false, false); + data->local_node_id = node->node->id; + + /* + * Ideally we'd send the startup message immediately. That way + * it'd arrive before any error we emit if we see incompatible + * options sent by the client here. That way the client could + * possibly adjust its options and reconnect. It'd also make + * sure the client gets the startup message in a timely way if + * the server is idle, since otherwise it could be a while + * before the next callback. + * + * The decoding plugin API doesn't let us write to the stream + * from here, though, so we have to delay the startup message + * until the first change processed on the stream, in a begin + * callback. + * + * If we ERROR there, the startup message is buffered but not + * sent since the callback didn't finish. So we'd have to send + * the startup message, finish the callback and check in the + * next callback if we need to ERROR. + * + * That's a bit much hoop jumping, so for now ERRORs are + * immediate. A way to emit a message from the startup callback + * is really needed to change that. + */ + startup_message_sent = false; + + /* Now parse the rest of the params and ERROR if we see any we don't recognise */ + oldctx = MemoryContextSwitchTo(ctx->context); + params_format = process_parameters(ctx->output_plugin_options, data); + MemoryContextSwitchTo(oldctx); + + if (params_format != 1) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("client sent startup parameters in format %d but we only support format 1", + params_format))); + + if (data->client_min_proto_version > PGLOGICAL_PROTO_VERSION_NUM) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("client sent min_proto_version=%d but we only support protocol %d or lower", + data->client_min_proto_version, PGLOGICAL_PROTO_VERSION_NUM))); + + if (data->client_max_proto_version < PGLOGICAL_PROTO_MIN_VERSION_NUM) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("client sent max_proto_version=%d but we only support protocol %d or higher", + data->client_max_proto_version, PGLOGICAL_PROTO_MIN_VERSION_NUM))); + + /* + * Set correct protocol format. + * + * This is the output plugin protocol format, this is different + * from the individual fields binary vs textual format. + */ + if (data->client_protocol_format != NULL + && strcmp(data->client_protocol_format, "json") == 0) + { + oldctx = MemoryContextSwitchTo(ctx->context); + data->api = pglogical_init_api(PGLogicalProtoJson); + opt->output_type = OUTPUT_PLUGIN_TEXTUAL_OUTPUT; + MemoryContextSwitchTo(oldctx); + } + else if ((data->client_protocol_format != NULL + && strcmp(data->client_protocol_format, "native") == 0) + || data->client_protocol_format == NULL) + { + oldctx = MemoryContextSwitchTo(ctx->context); + data->api = pglogical_init_api(PGLogicalProtoNative); + opt->output_type = OUTPUT_PLUGIN_BINARY_OUTPUT; + + if (data->client_no_txinfo) + { + elog(WARNING, "no_txinfo option ignored for protocols other than json"); + data->client_no_txinfo = false; + } + MemoryContextSwitchTo(oldctx); + } + else + { + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("client requested protocol %s but only \"json\" or \"native\" are supported", + data->client_protocol_format))); + } + + /* check for encoding match if specific encoding demanded by client */ + if (data->client_expected_encoding != NULL + && strlen(data->client_expected_encoding) != 0) + { + int wanted_encoding = pg_char_to_encoding(data->client_expected_encoding); + + if (wanted_encoding == -1) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("unrecognised encoding name %s passed to expected_encoding", + data->client_expected_encoding))); + + if (opt->output_type == OUTPUT_PLUGIN_TEXTUAL_OUTPUT) + { + /* + * datum encoding must match assigned client_encoding in text + * proto, since everything is subject to client_encoding + * conversion. + */ + if (wanted_encoding != pg_get_client_encoding()) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("expected_encoding must be unset or match client_encoding in text protocols"))); + } + else + { + /* + * currently in the binary protocol we can only emit encoded + * datums in the server encoding. There's no support for encoding + * conversion. + */ + if (wanted_encoding != GetDatabaseEncoding()) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("encoding conversion for binary datum not supported yet"), + errdetail("expected_encoding %s must be unset or match server_encoding %s", + data->client_expected_encoding, GetDatabaseEncodingName()))); + } + + data->field_datum_encoding = wanted_encoding; + } + + /* + * It's obviously not possible to send binary representation of data + * unless we use the binary output. + */ + if (opt->output_type == OUTPUT_PLUGIN_BINARY_OUTPUT && + data->client_want_internal_basetypes) + { + data->allow_internal_basetypes = + check_binary_compatibility(data); + } + + if (opt->output_type == OUTPUT_PLUGIN_BINARY_OUTPUT && + data->client_want_binary_basetypes && + data->client_binary_basetypes_major_version == PG_VERSION_NUM / 100) + { + data->allow_binary_basetypes = true; + } + + /* + * 9.4 lacks origins info so don't forward it. + * + * There's currently no knob for clients to use to suppress + * this info and it's sent if it's supported and available. + */ + if (PG_VERSION_NUM/100 == 904) + data->forward_changeset_origins = false; + else + data->forward_changeset_origins = true; + + if (started_tx) + CommitTransactionCommand(); + + relmetacache_init(ctx->context); + } + + /* So we can identify the process type in Valgrind logs */ + VALGRIND_PRINTF("PGLOGICAL: pglogical worker output_plugin\n"); + /* For incremental leak checking */ + VALGRIND_DISABLE_ERROR_REPORTING; + VALGRIND_DO_LEAK_CHECK; + VALGRIND_ENABLE_ERROR_REPORTING; } /* @@ -386,27 +386,27 @@ pg_decode_startup(LogicalDecodingContext * ctx, OutputPluginOptions *opt, static void pg_decode_begin_txn(LogicalDecodingContext *ctx, ReorderBufferTXN *txn) { - PGLogicalOutputData* data = (PGLogicalOutputData*)ctx->output_plugin_private; - bool send_replication_origin = data->forward_changeset_origins; - MemoryContext old_ctx; + PGLogicalOutputData* data = (PGLogicalOutputData*)ctx->output_plugin_private; + bool send_replication_origin = data->forward_changeset_origins; + MemoryContext old_ctx; - old_ctx = MemoryContextSwitchTo(data->context); + old_ctx = MemoryContextSwitchTo(data->context); - VALGRIND_DO_ADDED_LEAK_CHECK; + VALGRIND_DO_ADDED_LEAK_CHECK; - if (!startup_message_sent) - send_startup_message(ctx, data, false /* can't be last message */); + if (!startup_message_sent) + send_startup_message(ctx, data, false /* can't be last message */); #ifdef HAVE_REPLICATION_ORIGINS - /* If the record didn't originate locally, send origin info */ + /* If the record didn't originate locally, send origin info */ send_replication_origin &= txn->origin_id != InvalidRepOriginId; #endif - OutputPluginPrepareWrite(ctx, !send_replication_origin); - data->api->write_begin(ctx->out, data, txn); + OutputPluginPrepareWrite(ctx, !send_replication_origin); + data->api->write_begin(ctx->out, data, txn); #ifdef HAVE_REPLICATION_ORIGINS - if (send_replication_origin) + if (send_replication_origin) { char *origin; @@ -429,10 +429,10 @@ pg_decode_begin_txn(LogicalDecodingContext *ctx, ReorderBufferTXN *txn) } #endif - OutputPluginWrite(ctx, true); + OutputPluginWrite(ctx, true); - Assert(CurrentMemoryContext == data->context); - MemoryContextSwitchTo(old_ctx); + Assert(CurrentMemoryContext == data->context); + MemoryContextSwitchTo(old_ctx); } /* @@ -440,295 +440,295 @@ pg_decode_begin_txn(LogicalDecodingContext *ctx, ReorderBufferTXN *txn) */ static void pg_decode_commit_txn(LogicalDecodingContext *ctx, ReorderBufferTXN *txn, - XLogRecPtr commit_lsn) + XLogRecPtr commit_lsn) { - PGLogicalOutputData* data = (PGLogicalOutputData*)ctx->output_plugin_private; - MemoryContext old_ctx; + PGLogicalOutputData* data = (PGLogicalOutputData*)ctx->output_plugin_private; + MemoryContext old_ctx; - old_ctx = MemoryContextSwitchTo(data->context); + old_ctx = MemoryContextSwitchTo(data->context); - OutputPluginPrepareWrite(ctx, true); - data->api->write_commit(ctx->out, data, txn, commit_lsn); - OutputPluginWrite(ctx, true); + OutputPluginPrepareWrite(ctx, true); + data->api->write_commit(ctx->out, data, txn, commit_lsn); + OutputPluginWrite(ctx, true); - /* - * Now is a good time to get rid of invalidated relation - * metadata entries since nothing will be referencing them - * at the moment. - */ - relmetacache_prune(); + /* + * Now is a good time to get rid of invalidated relation + * metadata entries since nothing will be referencing them + * at the moment. + */ + relmetacache_prune(); - Assert(CurrentMemoryContext == data->context); - MemoryContextSwitchTo(old_ctx); - MemoryContextReset(data->context); + Assert(CurrentMemoryContext == data->context); + MemoryContextSwitchTo(old_ctx); + MemoryContextReset(data->context); - VALGRIND_DO_ADDED_LEAK_CHECK; + VALGRIND_DO_ADDED_LEAK_CHECK; } static bool pglogical_change_filter(PGLogicalOutputData *data, Relation relation, - ReorderBufferChange *change, Bitmapset **att_list) + ReorderBufferChange *change, Bitmapset **att_list) { - PGLogicalTableRepInfo *tblinfo; - ListCell *lc; - - if (data->replicate_only_table) - { - /* - * Special case - we are catching up just one table. - * TODO: performance - */ - return strcmp(RelationGetRelationName(relation), - data->replicate_only_table->relname) == 0 && - RelationGetNamespace(relation) == - get_namespace_oid(data->replicate_only_table->schemaname, true); - } - else if (RelationGetRelid(relation) == get_queue_table_oid()) - { - /* Special case - queue table */ - if (change->action == REORDER_BUFFER_CHANGE_INSERT) - { - HeapTuple tup = &change->data.tp.newtuple->tuple; - QueuedMessage *q; - ListCell *qlc; - - LockRelation(relation, AccessShareLock); - q = queued_message_from_tuple(tup); - UnlockRelation(relation, AccessShareLock); - - /* - * No replication set means global message, those are always - * replicated. - */ - if (q->replication_sets == NULL) - return true; - - foreach (qlc, q->replication_sets) - { - char *queue_set = (char *) lfirst(qlc); - ListCell *plc; - - foreach (plc, data->replication_sets) - { - PGLogicalRepSet *rs = lfirst(plc); - - /* TODO: this is somewhat ugly. */ - if (strcmp(queue_set, rs->name) == 0 && - (q->message_type != QUEUE_COMMAND_TYPE_TRUNCATE || - rs->replicate_truncate)) - return true; - } - } - } - - return false; - } - else if (RelationGetRelid(relation) == get_replication_set_rel_oid()) - { - /* - * Special case - replication set table. - * - * We can use this to update our cached replication set info, without - * having to deal with cache invalidation callbacks. - */ - HeapTuple tup; - PGLogicalRepSet *replicated_set; - ListCell *plc; - - if (change->action == REORDER_BUFFER_CHANGE_UPDATE) - tup = &change->data.tp.newtuple->tuple; - else if (change->action == REORDER_BUFFER_CHANGE_DELETE) - tup = &change->data.tp.oldtuple->tuple; - else - return false; - - replicated_set = replication_set_from_tuple(tup); - foreach (plc, data->replication_sets) - { - PGLogicalRepSet *rs = lfirst(plc); - - /* Check if the changed repset is used by us. */ - if (rs->id == replicated_set->id) - { - /* - * In case this was delete, somebody deleted one of our - * rep sets, bail here and let reconnect logic handle any - * potential issues. - */ - if (change->action == REORDER_BUFFER_CHANGE_DELETE) - elog(ERROR, "replication set \"%s\" used by this connection was deleted, existing", - rs->name); - - /* This was update of our repset, update the cache. */ - rs->replicate_insert = replicated_set->replicate_insert; - rs->replicate_update = replicated_set->replicate_update; - rs->replicate_delete = replicated_set->replicate_delete; - rs->replicate_truncate = replicated_set->replicate_truncate; - - return false; - } - } - - return false; - } - - /* Normal case - use replication set membership. */ - tblinfo = get_table_replication_info(data->local_node_id, relation, - data->replication_sets); - - /* First try filter out by change type. */ - switch (change->action) - { - case REORDER_BUFFER_CHANGE_INSERT: - if (!tblinfo->replicate_insert) - return false; - break; - case REORDER_BUFFER_CHANGE_UPDATE: - if (!tblinfo->replicate_update) - return false; - break; - case REORDER_BUFFER_CHANGE_DELETE: - if (!tblinfo->replicate_delete) - return false; - break; - default: - elog(ERROR, "Unhandled reorder buffer change type %d", - change->action); - return false; /* shut compiler up */ - } - - /* - * Proccess row filters. - * XXX: we could probably cache some of the executor stuff. - */ - if (list_length(tblinfo->row_filter) > 0) - { - EState *estate; - ExprContext *econtext; - TupleDesc tupdesc = RelationGetDescr(relation); - HeapTuple oldtup = change->data.tp.oldtuple ? - &change->data.tp.oldtuple->tuple : NULL; - HeapTuple newtup = change->data.tp.newtuple ? - &change->data.tp.newtuple->tuple : NULL; - - /* Skip empty changes. */ - if (!newtup && !oldtup) - { - elog(DEBUG1, "pglogical output got empty change"); - return false; - } - - estate = create_estate_for_relation(relation, false); - econtext = prepare_per_tuple_econtext(estate, tupdesc); - - ExecStoreHeapTuple(newtup ? newtup : oldtup, econtext->ecxt_scantuple, false); - - /* Next try the row_filters if there are any. */ - foreach (lc, tblinfo->row_filter) - { - Node *row_filter = (Node *) lfirst(lc); - ExprState *exprstate = pglogical_prepare_row_filter(row_filter); - Datum res; - bool isnull; - - res = ExecEvalExpr(exprstate, econtext, &isnull, NULL); - - /* NULL is same as false for our use. */ - if (isnull) - return false; - - if (!DatumGetBool(res)) - return false; - } - - ExecDropSingleTupleTableSlot(econtext->ecxt_scantuple); - FreeExecutorState(estate); - } - - /* Make sure caller is aware of any attribute filter. */ - *att_list = tblinfo->att_list; - - return true; + PGLogicalTableRepInfo *tblinfo; + ListCell *lc; + + if (data->replicate_only_table) + { + /* + * Special case - we are catching up just one table. + * TODO: performance + */ + return strcmp(RelationGetRelationName(relation), + data->replicate_only_table->relname) == 0 && + RelationGetNamespace(relation) == + get_namespace_oid(data->replicate_only_table->schemaname, true); + } + else if (RelationGetRelid(relation) == get_queue_table_oid()) + { + /* Special case - queue table */ + if (change->action == REORDER_BUFFER_CHANGE_INSERT) + { + HeapTuple tup = &change->data.tp.newtuple->tuple; + QueuedMessage *q; + ListCell *qlc; + + LockRelation(relation, AccessShareLock); + q = queued_message_from_tuple(tup); + UnlockRelation(relation, AccessShareLock); + + /* + * No replication set means global message, those are always + * replicated. + */ + if (q->replication_sets == NULL) + return true; + + foreach (qlc, q->replication_sets) + { + char *queue_set = (char *) lfirst(qlc); + ListCell *plc; + + foreach (plc, data->replication_sets) + { + PGLogicalRepSet *rs = lfirst(plc); + + /* TODO: this is somewhat ugly. */ + if (strcmp(queue_set, rs->name) == 0 && + (q->message_type != QUEUE_COMMAND_TYPE_TRUNCATE || + rs->replicate_truncate)) + return true; + } + } + } + + return false; + } + else if (RelationGetRelid(relation) == get_replication_set_rel_oid()) + { + /* + * Special case - replication set table. + * + * We can use this to update our cached replication set info, without + * having to deal with cache invalidation callbacks. + */ + HeapTuple tup; + PGLogicalRepSet *replicated_set; + ListCell *plc; + + if (change->action == REORDER_BUFFER_CHANGE_UPDATE) + tup = &change->data.tp.newtuple->tuple; + else if (change->action == REORDER_BUFFER_CHANGE_DELETE) + tup = &change->data.tp.oldtuple->tuple; + else + return false; + + replicated_set = replication_set_from_tuple(tup); + foreach (plc, data->replication_sets) + { + PGLogicalRepSet *rs = lfirst(plc); + + /* Check if the changed repset is used by us. */ + if (rs->id == replicated_set->id) + { + /* + * In case this was delete, somebody deleted one of our + * rep sets, bail here and let reconnect logic handle any + * potential issues. + */ + if (change->action == REORDER_BUFFER_CHANGE_DELETE) + elog(ERROR, "replication set \"%s\" used by this connection was deleted, existing", + rs->name); + + /* This was update of our repset, update the cache. */ + rs->replicate_insert = replicated_set->replicate_insert; + rs->replicate_update = replicated_set->replicate_update; + rs->replicate_delete = replicated_set->replicate_delete; + rs->replicate_truncate = replicated_set->replicate_truncate; + + return false; + } + } + + return false; + } + + /* Normal case - use replication set membership. */ + tblinfo = get_table_replication_info(data->local_node_id, relation, + data->replication_sets); + + /* First try filter out by change type. */ + switch (change->action) + { + case REORDER_BUFFER_CHANGE_INSERT: + if (!tblinfo->replicate_insert) + return false; + break; + case REORDER_BUFFER_CHANGE_UPDATE: + if (!tblinfo->replicate_update) + return false; + break; + case REORDER_BUFFER_CHANGE_DELETE: + if (!tblinfo->replicate_delete) + return false; + break; + default: + elog(ERROR, "Unhandled reorder buffer change type %d", + change->action); + return false; /* shut compiler up */ + } + + /* + * Proccess row filters. + * XXX: we could probably cache some of the executor stuff. + */ + if (list_length(tblinfo->row_filter) > 0) + { + EState *estate; + ExprContext *econtext; + TupleDesc tupdesc = RelationGetDescr(relation); + HeapTuple oldtup = change->data.tp.oldtuple ? + &change->data.tp.oldtuple->tuple : NULL; + HeapTuple newtup = change->data.tp.newtuple ? + &change->data.tp.newtuple->tuple : NULL; + + /* Skip empty changes. */ + if (!newtup && !oldtup) + { + elog(DEBUG1, "pglogical output got empty change"); + return false; + } + + estate = create_estate_for_relation(relation, false); + econtext = prepare_per_tuple_econtext(estate, tupdesc); + + ExecStoreHeapTuple(newtup ? newtup : oldtup, econtext->ecxt_scantuple, false); + + /* Next try the row_filters if there are any. */ + foreach (lc, tblinfo->row_filter) + { + Node *row_filter = (Node *) lfirst(lc); + ExprState *exprstate = pglogical_prepare_row_filter(row_filter); + Datum res; + bool isnull; + + res = ExecEvalExpr(exprstate, econtext, &isnull, NULL); + + /* NULL is same as false for our use. */ + if (isnull) + return false; + + if (!DatumGetBool(res)) + return false; + } + + ExecDropSingleTupleTableSlot(econtext->ecxt_scantuple); + FreeExecutorState(estate); + } + + /* Make sure caller is aware of any attribute filter. */ + *att_list = tblinfo->att_list; + + return true; } static void pg_decode_change(LogicalDecodingContext *ctx, ReorderBufferTXN *txn, - Relation relation, ReorderBufferChange *change) + Relation relation, ReorderBufferChange *change) { - PGLogicalOutputData *data = ctx->output_plugin_private; - MemoryContext old; - Bitmapset *att_list = NULL; - - /* Avoid leaking memory by using and resetting our own context */ - old = MemoryContextSwitchTo(data->context); - - /* First check the table filter */ - if (!pglogical_change_filter(data, relation, change, &att_list)) - return; - - /* - * If the protocol wants to write relation information and the client - * isn't known to have metadata cached for this relation already, - * send relation metadata. - * - * TODO: track hit/miss stats - */ - if (data->api->write_rel != NULL) - { - PGLRelMetaCacheEntry *cached_relmeta; - cached_relmeta = relmetacache_get_relation(data, relation); - - if (!cached_relmeta->is_cached) - { - OutputPluginPrepareWrite(ctx, false); - data->api->write_rel(ctx->out, data, relation, att_list); - OutputPluginWrite(ctx, false); - cached_relmeta->is_cached = true; - } - } - - /* Send the data */ - switch (change->action) - { - case REORDER_BUFFER_CHANGE_INSERT: - OutputPluginPrepareWrite(ctx, true); - data->api->write_insert(ctx->out, data, relation, - &change->data.tp.newtuple->tuple, - att_list); - OutputPluginWrite(ctx, true); - break; - case REORDER_BUFFER_CHANGE_UPDATE: - { - HeapTuple oldtuple = change->data.tp.oldtuple ? - &change->data.tp.oldtuple->tuple : NULL; - - OutputPluginPrepareWrite(ctx, true); - data->api->write_update(ctx->out, data, relation, oldtuple, - &change->data.tp.newtuple->tuple, - att_list); - OutputPluginWrite(ctx, true); - break; - } - case REORDER_BUFFER_CHANGE_DELETE: - if (change->data.tp.oldtuple) - { - OutputPluginPrepareWrite(ctx, true); - data->api->write_delete(ctx->out, data, relation, - &change->data.tp.oldtuple->tuple, - att_list); - OutputPluginWrite(ctx, true); - } - else - elog(DEBUG1, "didn't send DELETE change because of missing oldtuple"); - break; - default: - Assert(false); - } - - /* Cleanup */ - Assert(CurrentMemoryContext == data->context); - MemoryContextSwitchTo(old); - MemoryContextReset(data->context); + PGLogicalOutputData *data = ctx->output_plugin_private; + MemoryContext old; + Bitmapset *att_list = NULL; + + /* Avoid leaking memory by using and resetting our own context */ + old = MemoryContextSwitchTo(data->context); + + /* First check the table filter */ + if (!pglogical_change_filter(data, relation, change, &att_list)) + return; + + /* + * If the protocol wants to write relation information and the client + * isn't known to have metadata cached for this relation already, + * send relation metadata. + * + * TODO: track hit/miss stats + */ + if (data->api->write_rel != NULL) + { + PGLRelMetaCacheEntry *cached_relmeta; + cached_relmeta = relmetacache_get_relation(data, relation); + + if (!cached_relmeta->is_cached) + { + OutputPluginPrepareWrite(ctx, false); + data->api->write_rel(ctx->out, data, relation, att_list); + OutputPluginWrite(ctx, false); + cached_relmeta->is_cached = true; + } + } + + /* Send the data */ + switch (change->action) + { + case REORDER_BUFFER_CHANGE_INSERT: + OutputPluginPrepareWrite(ctx, true); + data->api->write_insert(ctx->out, data, relation, + &change->data.tp.newtuple->tuple, + att_list); + OutputPluginWrite(ctx, true); + break; + case REORDER_BUFFER_CHANGE_UPDATE: + { + HeapTuple oldtuple = change->data.tp.oldtuple ? + &change->data.tp.oldtuple->tuple : NULL; + + OutputPluginPrepareWrite(ctx, true); + data->api->write_update(ctx->out, data, relation, oldtuple, + &change->data.tp.newtuple->tuple, + att_list); + OutputPluginWrite(ctx, true); + break; + } + case REORDER_BUFFER_CHANGE_DELETE: + if (change->data.tp.oldtuple) + { + OutputPluginPrepareWrite(ctx, true); + data->api->write_delete(ctx->out, data, relation, + &change->data.tp.oldtuple->tuple, + att_list); + OutputPluginWrite(ctx, true); + } + else + elog(DEBUG1, "didn't send DELETE change because of missing oldtuple"); + break; + default: + Assert(false); + } + + /* Cleanup */ + Assert(CurrentMemoryContext == data->context); + MemoryContextSwitchTo(old); + MemoryContextReset(data->context); } #ifdef HAVE_REPLICATION_ORIGINS @@ -760,28 +760,28 @@ pg_decode_origin_filter(LogicalDecodingContext *ctx, static void send_startup_message(LogicalDecodingContext *ctx, - PGLogicalOutputData *data, bool last_message) + PGLogicalOutputData *data, bool last_message) { - List *msg; + List *msg; - Assert(!startup_message_sent); + Assert(!startup_message_sent); - msg = prepare_startup_message(data); + msg = prepare_startup_message(data); - /* - * We could free the extra_startup_params DefElem list here, but it's - * pretty harmless to just ignore it, since it's in the decoding memory - * context anyway, and we don't know if it's safe to free the defnames or - * not. - */ + /* + * We could free the extra_startup_params DefElem list here, but it's + * pretty harmless to just ignore it, since it's in the decoding memory + * context anyway, and we don't know if it's safe to free the defnames or + * not. + */ - OutputPluginPrepareWrite(ctx, last_message); - data->api->write_startup_message(ctx->out, msg); - OutputPluginWrite(ctx, last_message); + OutputPluginPrepareWrite(ctx, last_message); + data->api->write_startup_message(ctx->out, msg); + OutputPluginWrite(ctx, last_message); - list_free_deep(msg); + list_free_deep(msg); - startup_message_sent = true; + startup_message_sent = true; } @@ -791,14 +791,14 @@ send_startup_message(LogicalDecodingContext *ctx, static void pg_decode_shutdown(LogicalDecodingContext * ctx) { - relmetacache_flush(); + relmetacache_flush(); - VALGRIND_PRINTF("PGLOGICAL: output plugin shutdown\n"); + VALGRIND_PRINTF("PGLOGICAL: output plugin shutdown\n"); - /* - * no need to delete data->context as it's child of ctx->context which - * will expire on return. - */ + /* + * no need to delete data->context as it's child of ctx->context which + * will expire on return. + */ } @@ -808,30 +808,30 @@ pg_decode_shutdown(LogicalDecodingContext * ctx) */ static void relmetacache_invalidation_cb(Datum arg, Oid relid) - { - struct PGLRelMetaCacheEntry *hentry; - Assert (RelMetaCache != NULL); - - /* - * Nobody keeps pointers to entries in this hash table around outside - * logical decoding callback calls - but invalidation events can come in - * *during* a callback if we access the relcache in the callback. Because - * of that we must mark the cache entry as invalid but not remove it from - * the hash while it could still be referenced, then prune it at a later - * safe point. - * - * Getting invalidations for relations that aren't in the table is - * entirely normal, since there's no way to unregister for an - * invalidation event. So we don't care if it's found or not. - */ - hentry = (struct PGLRelMetaCacheEntry *) - hash_search(RelMetaCache, &relid, HASH_FIND, NULL); - - if (hentry != NULL) - { - hentry->is_valid = false; - InvalidRelMetaCacheCnt++; - } +{ + struct PGLRelMetaCacheEntry *hentry; + Assert (RelMetaCache != NULL); + + /* + * Nobody keeps pointers to entries in this hash table around outside + * logical decoding callback calls - but invalidation events can come in + * *during* a callback if we access the relcache in the callback. Because + * of that we must mark the cache entry as invalid but not remove it from + * the hash while it could still be referenced, then prune it at a later + * safe point. + * + * Getting invalidations for relations that aren't in the table is + * entirely normal, since there's no way to unregister for an + * invalidation event. So we don't care if it's found or not. + */ + hentry = (struct PGLRelMetaCacheEntry *) + hash_search(RelMetaCache, &relid, HASH_FIND, NULL); + + if (hentry != NULL) + { + hentry->is_valid = false; + InvalidRelMetaCacheCnt++; + } } /* @@ -844,44 +844,44 @@ relmetacache_invalidation_cb(Datum arg, Oid relid) static void relmetacache_init(MemoryContext decoding_context) { - HASHCTL ctl; - int hash_flags; + HASHCTL ctl; + int hash_flags; - InvalidRelMetaCacheCnt = 0; + InvalidRelMetaCacheCnt = 0; - if (RelMetaCache == NULL) - { - MemoryContext old_ctxt; + if (RelMetaCache == NULL) + { + MemoryContext old_ctxt; - RelMetaCacheContext = AllocSetContextCreate(TopMemoryContext, - "pglogical output relmetacache", - ALLOCSET_DEFAULT_SIZES); + RelMetaCacheContext = AllocSetContextCreate(TopMemoryContext, + "pglogical output relmetacache", + ALLOCSET_DEFAULT_SIZES); - /* Make a new hash table for the cache */ - hash_flags = HASH_ELEM | HASH_CONTEXT; + /* Make a new hash table for the cache */ + hash_flags = HASH_ELEM | HASH_CONTEXT; - MemSet(&ctl, 0, sizeof(ctl)); - ctl.keysize = sizeof(Oid); - ctl.entrysize = sizeof(struct PGLRelMetaCacheEntry); - ctl.hcxt = RelMetaCacheContext; + MemSet(&ctl, 0, sizeof(ctl)); + ctl.keysize = sizeof(Oid); + ctl.entrysize = sizeof(struct PGLRelMetaCacheEntry); + ctl.hcxt = RelMetaCacheContext; #if PG_VERSION_NUM >= 90500 - hash_flags |= HASH_BLOBS; + hash_flags |= HASH_BLOBS; #else - ctl.hash = tag_hash; - hash_flags |= HASH_FUNCTION; + ctl.hash = tag_hash; + hash_flags |= HASH_FUNCTION; #endif - old_ctxt = MemoryContextSwitchTo(RelMetaCacheContext); - RelMetaCache = hash_create("pglogical relation metadata cache", - RELMETACACHE_INITIAL_SIZE, - &ctl, hash_flags); - (void) MemoryContextSwitchTo(old_ctxt); + old_ctxt = MemoryContextSwitchTo(RelMetaCacheContext); + RelMetaCache = hash_create("pglogical relation metadata cache", + RELMETACACHE_INITIAL_SIZE, + &ctl, hash_flags); + (void) MemoryContextSwitchTo(old_ctxt); - Assert(RelMetaCache != NULL); + Assert(RelMetaCache != NULL); - CacheRegisterRelcacheCallback(relmetacache_invalidation_cb, (Datum)0); - } + CacheRegisterRelcacheCallback(relmetacache_invalidation_cb, (Datum)0); + } } @@ -896,31 +896,31 @@ relmetacache_init(MemoryContext decoding_context) */ static PGLRelMetaCacheEntry * relmetacache_get_relation(struct PGLogicalOutputData *data, - Relation rel) + Relation rel) { - struct PGLRelMetaCacheEntry *hentry; - bool found; - MemoryContext old_mctx; - - /* Find cached function info, creating if not found */ - old_mctx = MemoryContextSwitchTo(RelMetaCacheContext); - hentry = (struct PGLRelMetaCacheEntry*) hash_search(RelMetaCache, - (void *)(&RelationGetRelid(rel)), - HASH_ENTER, &found); - (void) MemoryContextSwitchTo(old_mctx); - - /* If not found or not valid, it can't be cached. */ - if (!found || !hentry->is_valid) - { - Assert(hentry->relid = RelationGetRelid(rel)); - hentry->is_cached = false; - /* Only used for lazy purging of invalidations */ - hentry->is_valid = true; - } - - Assert(hentry != NULL); - - return hentry; + struct PGLRelMetaCacheEntry *hentry; + bool found; + MemoryContext old_mctx; + + /* Find cached function info, creating if not found */ + old_mctx = MemoryContextSwitchTo(RelMetaCacheContext); + hentry = (struct PGLRelMetaCacheEntry*) hash_search(RelMetaCache, + (void *)(&RelationGetRelid(rel)), + HASH_ENTER, &found); + (void) MemoryContextSwitchTo(old_mctx); + + /* If not found or not valid, it can't be cached. */ + if (!found || !hentry->is_valid) + { + Assert(hentry->relid = RelationGetRelid(rel)); + hentry->is_cached = false; + /* Only used for lazy purging of invalidations */ + hentry->is_valid = true; + } + + Assert(hentry != NULL); + + return hentry; } @@ -934,21 +934,21 @@ relmetacache_get_relation(struct PGLogicalOutputData *data, static void relmetacache_flush(void) { - HASH_SEQ_STATUS status; - struct PGLRelMetaCacheEntry *hentry; - - if (RelMetaCache != NULL) - { - hash_seq_init(&status, RelMetaCache); - - while ((hentry = (struct PGLRelMetaCacheEntry*) hash_seq_search(&status)) != NULL) - { - if (hash_search(RelMetaCache, - (void *) &hentry->relid, - HASH_REMOVE, NULL) == NULL) - elog(ERROR, "hash table corrupted"); - } - } + HASH_SEQ_STATUS status; + struct PGLRelMetaCacheEntry *hentry; + + if (RelMetaCache != NULL) + { + hash_seq_init(&status, RelMetaCache); + + while ((hentry = (struct PGLRelMetaCacheEntry*) hash_seq_search(&status)) != NULL) + { + if (hash_search(RelMetaCache, + (void *) &hentry->relid, + HASH_REMOVE, NULL) == NULL) + elog(ERROR, "hash table corrupted"); + } + } } /* @@ -960,30 +960,30 @@ relmetacache_flush(void) static void relmetacache_prune(void) { - HASH_SEQ_STATUS status; - struct PGLRelMetaCacheEntry *hentry; - - /* - * Since the pruning can be expensive, do it only if ig we invalidated - * at least half of initial cache size. - */ - if (InvalidRelMetaCacheCnt < RELMETACACHE_INITIAL_SIZE/2) - return; - - hash_seq_init(&status, RelMetaCache); - - while ((hentry = (struct PGLRelMetaCacheEntry*) hash_seq_search(&status)) != NULL) - { - if (!hentry->is_valid) - { - if (hash_search(RelMetaCache, - (void *) &hentry->relid, - HASH_REMOVE, NULL) == NULL) - elog(ERROR, "hash table corrupted"); - } - } - - InvalidRelMetaCacheCnt = 0; + HASH_SEQ_STATUS status; + struct PGLRelMetaCacheEntry *hentry; + + /* + * Since the pruning can be expensive, do it only if ig we invalidated + * at least half of initial cache size. + */ + if (InvalidRelMetaCacheCnt < RELMETACACHE_INITIAL_SIZE/2) + return; + + hash_seq_init(&status, RelMetaCache); + + while ((hentry = (struct PGLRelMetaCacheEntry*) hash_seq_search(&status)) != NULL) + { + if (!hentry->is_valid) + { + if (hash_search(RelMetaCache, + (void *) &hentry->relid, + HASH_REMOVE, NULL) == NULL) + elog(ERROR, "hash table corrupted"); + } + } + + InvalidRelMetaCacheCnt = 0; } /* @@ -993,33 +993,33 @@ relmetacache_prune(void) static void pglReorderBufferCleanSerializedTXNs(const char *slotname) { - DIR *spill_dir; - struct dirent *spill_de; - struct stat statbuf; - char path[MAXPGPATH * 2 + 12]; - - sprintf(path, "pg_replslot/%s", slotname); - - /* we're only handling directories here, skip if it's not ours */ - if (lstat(path, &statbuf) == 0 && !S_ISDIR(statbuf.st_mode)) - return; - - spill_dir = AllocateDir(path); - while ((spill_de = ReadDirExtended(spill_dir, path, INFO)) != NULL) - { - /* only look at names that can be ours */ - if (strncmp(spill_de->d_name, "xid", 3) == 0) - { - snprintf(path, sizeof(path), - "pg_replslot/%s/%s", slotname, - spill_de->d_name); - - if (unlink(path) != 0) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not remove file \"%s\" during removal of pg_replslot/%s/*.xid: %m", - path, slotname))); - } - } - FreeDir(spill_dir); + DIR *spill_dir; + struct dirent *spill_de; + struct stat statbuf; + char path[MAXPGPATH * 2 + 12]; + + sprintf(path, "pg_replslot/%s", slotname); + + /* we're only handling directories here, skip if it's not ours */ + if (lstat(path, &statbuf) == 0 && !S_ISDIR(statbuf.st_mode)) + return; + + spill_dir = AllocateDir(path); + while ((spill_de = ReadDirExtended(spill_dir, path, INFO)) != NULL) + { + /* only look at names that can be ours */ + if (strncmp(spill_de->d_name, "xid", 3) == 0) + { + snprintf(path, sizeof(path), + "pg_replslot/%s/%s", slotname, + spill_de->d_name); + + if (unlink(path) != 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not remove file \"%s\" during removal of pg_replslot/%s/*.xid: %m", + path, slotname))); + } + } + FreeDir(spill_dir); } diff --git a/pglogical_queue.c b/pglogical_queue.c index d1d97a3..5485806 100644 --- a/pglogical_queue.c +++ b/pglogical_queue.c @@ -50,15 +50,19 @@ #define CATALOG_QUEUE "queue" -#define Natts_queue 5 -#define Anum_queue_queued_at 1 -#define Anum_queue_role 2 -#define Anum_queue_replication_sets 3 -#define Anum_queue_message_type 4 -#define Anum_queue_message 5 +#define Natts_queue 7 +#define Anum_queue_node_id 1 +#define Anum_queue_orig_node_id 2 +#define Anum_queue_queued_at 3 +#define Anum_queue_role 4 +#define Anum_queue_replication_sets 5 +#define Anum_queue_message_type 6 +#define Anum_queue_message 7 typedef struct QueueTuple { + Oid node_id; + Oid orig_node_id; TimestampTz queued_at; NameData replication_set; NameData role; @@ -82,6 +86,7 @@ queue_message(List *replication_sets, Oid roleoid, char message_type, const char *role; TimestampTz ts = GetCurrentTimestamp(); + PGLogicalLocalNode *local_node = get_local_node(false, false); role = GetUserNameFromId(roleoid #if PG_VERSION_NUM >= 90500 , false @@ -95,6 +100,8 @@ queue_message(List *replication_sets, Oid roleoid, char message_type, /* Form a tuple. */ memset(nulls, false, sizeof(nulls)); + values[Anum_queue_node_id - 1] = ObjectIdGetDatum(local_node->node->id); + values[Anum_queue_orig_node_id - 1] = ObjectIdGetDatum(local_node->node->id); values[Anum_queue_queued_at - 1] = TimestampTzGetDatum(ts); values[Anum_queue_role - 1] = DirectFunctionCall1(namein, CStringGetDatum(role)); @@ -140,6 +147,14 @@ queued_message_from_tuple(HeapTuple queue_tup) res = (QueuedMessage *) palloc(sizeof(QueuedMessage)); + d = fastgetattr(queue_tup, Anum_queue_node_id, tupDesc, &isnull); + Assert(!isnull); + res->node_id = DatumGetObjectId(d); + + d = fastgetattr(queue_tup, Anum_queue_orig_node_id, tupDesc, &isnull); + Assert(!isnull); + res->orig_node_id = DatumGetObjectId(d); + d = fastgetattr(queue_tup, Anum_queue_queued_at, tupDesc, &isnull); Assert(!isnull); res->queued_at = DatumGetTimestampTz(d); @@ -170,6 +185,14 @@ queued_message_from_tuple(HeapTuple queue_tup) return res; } +/* + * Set local node for tuple coming from other node + */ +void queued_message_tuple_set_local_node_id(Datum *values, Oid node_id) +{ + values[Anum_queue_node_id - 1] = ObjectIdGetDatum(node_id); +} + /* * Get (cached) oid of the queue table. */ diff --git a/pglogical_queue.h b/pglogical_queue.h index 9bf13df..9e6bfba 100644 --- a/pglogical_queue.h +++ b/pglogical_queue.h @@ -22,6 +22,8 @@ typedef struct QueuedMessage { + Oid node_id; + Oid orig_node_id; TimestampTz queued_at; List *replication_sets; char *role; @@ -34,6 +36,8 @@ extern void queue_message(List *replication_sets, Oid roleoid, extern QueuedMessage *queued_message_from_tuple(HeapTuple queue_tup); +void queued_message_tuple_set_local_node_id(Datum *values, Oid node_id); + extern Oid get_queue_table_oid(void); extern void create_truncate_trigger(Relation rel); From d4e6466d2d747bab7368213c875a78406ec05bfe Mon Sep 17 00:00:00 2001 From: "Razinkin.Denis" Date: Tue, 8 Jun 2021 18:33:58 +0300 Subject: [PATCH 7/7] Add sub_data_replace and sub_after_sync_queries fields for subscription. --- Makefile | 4 +- pglogical--2.3.4--2.3.4.1.sql | 34 + pglogical--2.3.4-1.sql | 255 ++++++ pglogical--2.3.4.sql | 2 +- pglogical.h | 4 +- pglogical_functions.c | 513 ++++++------ pglogical_node.c | 54 +- pglogical_node.h | 2 + pglogical_output_plugin.c | 1469 +++++++++++++++++---------------- pglogical_relcache.h | 1 + pglogical_repset.c | 22 +- pglogical_repset.h | 4 +- pglogical_rpc.c | 13 +- pglogical_sync.c | 162 +++- 14 files changed, 1524 insertions(+), 1015 deletions(-) create mode 100644 pglogical--2.3.4--2.3.4.1.sql create mode 100644 pglogical--2.3.4-1.sql diff --git a/Makefile b/Makefile index 093d0cb..6af5a6b 100644 --- a/Makefile +++ b/Makefile @@ -28,7 +28,9 @@ DATA = pglogical--1.0.0.sql pglogical--1.0.0--1.0.1.sql \ pglogical--2.3.2--2.3.3.sql \ pglogical--2.3.3.sql \ pglogical--2.3.3--2.3.4.sql \ - pglogical--2.3.4.sql + pglogical--2.3.4.sql \ + pglogical--2.3.4-1.sql \ + pglogical--2.3.4--2.3.4.1.sql OBJS = pglogical_apply.o pglogical_conflict.o pglogical_manager.o \ pglogical.o pglogical_node.o pglogical_relcache.o \ diff --git a/pglogical--2.3.4--2.3.4.1.sql b/pglogical--2.3.4--2.3.4.1.sql new file mode 100644 index 0000000..227e1c2 --- /dev/null +++ b/pglogical--2.3.4--2.3.4.1.sql @@ -0,0 +1,34 @@ +ALTER TABLE pglogical.subscription ADD COLUMN sub_data_replace boolean NOT NULL DEFAULT true; +ALTER TABLE pglogical.subscription ADD COLUMN sub_after_sync_queries text[]; + +DROP FUNCTION pglogical.create_subscription(subscription_name name, provider_dsn text, + replication_sets text[], synchronize_structure boolean, + synchronize_data boolean, forward_origins text[], apply_delay interval, + force_text_transfer boolean); +CREATE FUNCTION pglogical.create_subscription(subscription_name name, provider_dsn text, + replication_sets text[] = '{default,default_insert_only,ddl_sql}', synchronize_structure boolean = false, + synchronize_data boolean = true, data_replace boolean = true, after_sync_queries text[] = '{}', + forward_origins text[] = '{all}', apply_delay interval DEFAULT '0', + force_text_transfer boolean = false) +RETURNS oid STRICT VOLATILE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_create_subscription'; + +ALTER TABLE pglogical.replication_set_table ADD COLUMN set_sync_clear_filter text; + +DROP FUNCTION pglogical.replication_set_add_table(set_name name, relation regclass, synchronize_data boolean, + columns text[], row_filter text); +CREATE FUNCTION pglogical.replication_set_add_table(set_name name, relation regclass, synchronize_data boolean DEFAULT false, + columns text[] DEFAULT NULL, row_filter text DEFAULT NULL, sync_clear_filter text DEFAULT NULL) +RETURNS boolean CALLED ON NULL INPUT VOLATILE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_replication_set_add_table'; + +TRUNCATE pglogical.queue; +ALTER TABLE pglogical.queue ADD COLUMN node_id oid REFERENCES node(node_id); +ALTER TABLE pglogical.queue ADD COLUMN original_node_id oid REFERENCES node(node_id); + + +DROP FUNCTION pglogical.wait_for_subscription_sync_complete(subscription_name name); +CREATE FUNCTION pglogical.wait_for_subscription_sync_complete(subscription_name name) + RETURNS boolean RETURNS NULL ON NULL INPUT VOLATILE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_wait_for_subscription_sync_complete'; + +DROP FUNCTION pglogical.wait_for_table_sync_complete(subscription_name name, relation regclass); +CREATE FUNCTION pglogical.wait_for_table_sync_complete(subscription_name name, relation regclass) + RETURNS boolean RETURNS NULL ON NULL INPUT VOLATILE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_wait_for_table_sync_complete'; diff --git a/pglogical--2.3.4-1.sql b/pglogical--2.3.4-1.sql new file mode 100644 index 0000000..141507c --- /dev/null +++ b/pglogical--2.3.4-1.sql @@ -0,0 +1,255 @@ +\echo Use "CREATE EXTENSION pglogical" to load this file. \quit + +CREATE TABLE pglogical.node ( + node_id oid NOT NULL PRIMARY KEY, + node_name name NOT NULL UNIQUE +) WITH (user_catalog_table=true); + +CREATE TABLE pglogical.node_interface ( + if_id oid NOT NULL PRIMARY KEY, + if_name name NOT NULL, -- default same as node name + if_nodeid oid REFERENCES node(node_id), + if_dsn text NOT NULL, + UNIQUE (if_nodeid, if_name) +); + +CREATE TABLE pglogical.local_node ( + node_id oid PRIMARY KEY REFERENCES node(node_id), + node_local_interface oid NOT NULL REFERENCES node_interface(if_id) +); + +CREATE TABLE pglogical.subscription ( + sub_id oid NOT NULL PRIMARY KEY, + sub_name name NOT NULL UNIQUE, + sub_origin oid NOT NULL REFERENCES node(node_id), + sub_target oid NOT NULL REFERENCES node(node_id), + sub_origin_if oid NOT NULL REFERENCES node_interface(if_id), + sub_target_if oid NOT NULL REFERENCES node_interface(if_id), + sub_enabled boolean NOT NULL DEFAULT true, + sub_data_replace boolean NOT NULL DEFAULT true, + sub_slot_name name NOT NULL, + sub_replication_sets text[], + sub_forward_origins text[], + sub_apply_delay interval NOT NULL DEFAULT '0', + sub_force_text_transfer boolean NOT NULL DEFAULT 'f', + sub_after_sync_queries text[] +); + +CREATE TABLE pglogical.local_sync_status ( + sync_kind "char" NOT NULL CHECK (sync_kind IN ('i', 's', 'd', 'f')), + sync_subid oid NOT NULL REFERENCES pglogical.subscription(sub_id), + sync_nspname name, + sync_relname name, + sync_status "char" NOT NULL, + sync_statuslsn pg_lsn NOT NULL, + UNIQUE (sync_subid, sync_nspname, sync_relname) +); + + +CREATE FUNCTION pglogical.create_node(node_name name, dsn text) +RETURNS oid STRICT VOLATILE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_create_node'; +CREATE FUNCTION pglogical.drop_node(node_name name, ifexists boolean DEFAULT false) +RETURNS boolean STRICT VOLATILE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_drop_node'; + +CREATE FUNCTION pglogical.alter_node_add_interface(node_name name, interface_name name, dsn text) +RETURNS oid STRICT VOLATILE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_alter_node_add_interface'; +CREATE FUNCTION pglogical.alter_node_drop_interface(node_name name, interface_name name) +RETURNS boolean STRICT VOLATILE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_alter_node_drop_interface'; + +CREATE FUNCTION pglogical.create_subscription(subscription_name name, provider_dsn text, + replication_sets text[] = '{default,default_insert_only,ddl_sql}', synchronize_structure boolean = false, + synchronize_data boolean = true, data_replace boolean = true, after_sync_queries text[] = '{}', + forward_origins text[] = '{all}', apply_delay interval DEFAULT '0', + force_text_transfer boolean = false) +RETURNS oid STRICT VOLATILE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_create_subscription'; +CREATE FUNCTION pglogical.drop_subscription(subscription_name name, ifexists boolean DEFAULT false) +RETURNS oid STRICT VOLATILE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_drop_subscription'; + +CREATE FUNCTION pglogical.alter_subscription_interface(subscription_name name, interface_name name) +RETURNS boolean STRICT VOLATILE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_alter_subscription_interface'; + +CREATE FUNCTION pglogical.alter_subscription_disable(subscription_name name, immediate boolean DEFAULT false) +RETURNS boolean STRICT VOLATILE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_alter_subscription_disable'; +CREATE FUNCTION pglogical.alter_subscription_enable(subscription_name name, immediate boolean DEFAULT false) +RETURNS boolean STRICT VOLATILE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_alter_subscription_enable'; + +CREATE FUNCTION pglogical.alter_subscription_add_replication_set(subscription_name name, replication_set name) +RETURNS boolean STRICT VOLATILE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_alter_subscription_add_replication_set'; +CREATE FUNCTION pglogical.alter_subscription_remove_replication_set(subscription_name name, replication_set name) +RETURNS boolean STRICT VOLATILE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_alter_subscription_remove_replication_set'; + +CREATE FUNCTION pglogical.show_subscription_status(subscription_name name DEFAULT NULL, + OUT subscription_name text, OUT status text, OUT provider_node text, + OUT provider_dsn text, OUT slot_name text, OUT replication_sets text[], + OUT forward_origins text[]) +RETURNS SETOF record STABLE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_show_subscription_status'; + +CREATE TABLE pglogical.replication_set ( + set_id oid NOT NULL PRIMARY KEY, + set_nodeid oid NOT NULL, + set_name name NOT NULL, + replicate_insert boolean NOT NULL DEFAULT true, + replicate_update boolean NOT NULL DEFAULT true, + replicate_delete boolean NOT NULL DEFAULT true, + replicate_truncate boolean NOT NULL DEFAULT true, + UNIQUE (set_nodeid, set_name) +) WITH (user_catalog_table=true); + +CREATE TABLE pglogical.replication_set_table ( + set_id oid NOT NULL, + set_reloid regclass NOT NULL, + set_att_list text[], + set_row_filter pg_node_tree, + set_sync_clear_filter text, + PRIMARY KEY(set_id, set_reloid) +) WITH (user_catalog_table=true); + +CREATE TABLE pglogical.replication_set_seq ( + set_id oid NOT NULL, + set_seqoid regclass NOT NULL, + PRIMARY KEY(set_id, set_seqoid) +) WITH (user_catalog_table=true); + +CREATE TABLE pglogical.sequence_state ( + seqoid oid NOT NULL PRIMARY KEY, + cache_size integer NOT NULL, + last_value bigint NOT NULL +) WITH (user_catalog_table=true); + +CREATE TABLE pglogical.depend ( + classid oid NOT NULL, + objid oid NOT NULL, + objsubid integer NOT NULL, + + refclassid oid NOT NULL, + refobjid oid NOT NULL, + refobjsubid integer NOT NULL, + + deptype "char" NOT NULL +) WITH (user_catalog_table=true); + +CREATE VIEW pglogical.TABLES AS +WITH set_relations AS ( + SELECT s.set_name, r.set_reloid + FROM pglogical.replication_set_table r, + pglogical.replication_set s, + pglogical.local_node n + WHERE s.set_nodeid = n.node_id + AND s.set_id = r.set_id +), + user_tables AS ( + SELECT r.oid, n.nspname, r.relname, r.relreplident + FROM pg_catalog.pg_class r, + pg_catalog.pg_namespace n + WHERE r.relkind = 'r' + AND r.relpersistence = 'p' + AND n.oid = r.relnamespace + AND n.nspname !~ '^pg_' + AND n.nspname != 'information_schema' + AND n.nspname != 'pglogical' + ) +SELECT r.oid AS relid, n.nspname, r.relname, s.set_name +FROM pg_catalog.pg_namespace n, + pg_catalog.pg_class r, + set_relations s +WHERE r.relkind = 'r' + AND n.oid = r.relnamespace + AND r.oid = s.set_reloid +UNION +SELECT t.oid AS relid, t.nspname, t.relname, NULL +FROM user_tables t +WHERE t.oid NOT IN (SELECT set_reloid FROM set_relations); + +CREATE FUNCTION pglogical.create_replication_set(set_name name, + replicate_insert boolean = true, replicate_update boolean = true, + replicate_delete boolean = true, replicate_truncate boolean = true) +RETURNS oid STRICT VOLATILE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_create_replication_set'; +CREATE FUNCTION pglogical.alter_replication_set(set_name name, + replicate_insert boolean DEFAULT NULL, replicate_update boolean DEFAULT NULL, + replicate_delete boolean DEFAULT NULL, replicate_truncate boolean DEFAULT NULL) +RETURNS oid CALLED ON NULL INPUT VOLATILE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_alter_replication_set'; +CREATE FUNCTION pglogical.drop_replication_set(set_name name, ifexists boolean DEFAULT false) +RETURNS boolean STRICT VOLATILE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_drop_replication_set'; + +CREATE FUNCTION pglogical.replication_set_add_table(set_name name, relation regclass, synchronize_data boolean DEFAULT false, + columns text[] DEFAULT NULL, row_filter text DEFAULT NULL, sync_clear_filter text DEFAULT NULL) +RETURNS boolean CALLED ON NULL INPUT VOLATILE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_replication_set_add_table'; +CREATE FUNCTION pglogical.replication_set_add_all_tables(set_name name, schema_names text[], synchronize_data boolean DEFAULT false) +RETURNS boolean STRICT VOLATILE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_replication_set_add_all_tables'; +CREATE FUNCTION pglogical.replication_set_remove_table(set_name name, relation regclass) +RETURNS boolean STRICT VOLATILE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_replication_set_remove_table'; + +CREATE FUNCTION pglogical.replication_set_add_sequence(set_name name, relation regclass, synchronize_data boolean DEFAULT false) +RETURNS boolean STRICT VOLATILE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_replication_set_add_sequence'; +CREATE FUNCTION pglogical.replication_set_add_all_sequences(set_name name, schema_names text[], synchronize_data boolean DEFAULT false) +RETURNS boolean STRICT VOLATILE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_replication_set_add_all_sequences'; +CREATE FUNCTION pglogical.replication_set_remove_sequence(set_name name, relation regclass) +RETURNS boolean STRICT VOLATILE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_replication_set_remove_sequence'; + +CREATE FUNCTION pglogical.alter_subscription_synchronize(subscription_name name, truncate boolean DEFAULT false) +RETURNS boolean STRICT VOLATILE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_alter_subscription_synchronize'; + +CREATE FUNCTION pglogical.alter_subscription_resynchronize_table(subscription_name name, relation regclass, + truncate boolean DEFAULT true) +RETURNS boolean STRICT VOLATILE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_alter_subscription_resynchronize_table'; + +CREATE FUNCTION pglogical.synchronize_sequence(relation regclass) +RETURNS boolean STRICT VOLATILE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_synchronize_sequence'; + +CREATE FUNCTION pglogical.table_data_filtered(reltyp anyelement, relation regclass, repsets text[]) +RETURNS SETOF anyelement CALLED ON NULL INPUT STABLE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_table_data_filtered'; + +CREATE FUNCTION pglogical.show_repset_table_info(relation regclass, repsets text[], OUT relid oid, OUT nspname text, + OUT relname text, OUT att_list text[], OUT has_row_filter boolean, OUT sync_clear_filter text) +RETURNS record STRICT STABLE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_show_repset_table_info'; + +CREATE FUNCTION pglogical.show_subscription_table(subscription_name name, relation regclass, OUT nspname text, OUT relname text, OUT status text) +RETURNS record STRICT STABLE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_show_subscription_table'; + +CREATE TABLE pglogical.queue ( + node_id oid REFERENCES node(node_id), + original_node_id oid REFERENCES node(node_id), + queued_at timestamp with time zone NOT NULL, + role name NOT NULL, + replication_sets text[], + message_type "char" NOT NULL, + message json NOT NULL +); + +CREATE FUNCTION pglogical.replicate_ddl_command(command text, replication_sets text[] DEFAULT '{ddl_sql}') +RETURNS boolean STRICT VOLATILE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_replicate_ddl_command'; + +CREATE OR REPLACE FUNCTION pglogical.queue_truncate() +RETURNS trigger LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_queue_truncate'; + +CREATE FUNCTION pglogical.pglogical_node_info(OUT node_id oid, OUT node_name text, OUT sysid text, OUT dbname text, OUT replication_sets text) +RETURNS record +STABLE STRICT LANGUAGE c AS 'MODULE_PATHNAME'; + +CREATE FUNCTION pglogical.pglogical_gen_slot_name(name, name, name) +RETURNS name +IMMUTABLE STRICT LANGUAGE c AS 'MODULE_PATHNAME'; + +CREATE FUNCTION pglogical_version() RETURNS text +LANGUAGE c AS 'MODULE_PATHNAME'; + +CREATE FUNCTION pglogical_version_num() RETURNS integer +LANGUAGE c AS 'MODULE_PATHNAME'; + +CREATE FUNCTION pglogical_max_proto_version() RETURNS integer +LANGUAGE c AS 'MODULE_PATHNAME'; + +CREATE FUNCTION pglogical_min_proto_version() RETURNS integer +LANGUAGE c AS 'MODULE_PATHNAME'; + +CREATE FUNCTION +pglogical.wait_slot_confirm_lsn(slotname name, target pg_lsn) +RETURNS void LANGUAGE c AS 'pglogical','pglogical_wait_slot_confirm_lsn'; +CREATE FUNCTION pglogical.wait_for_subscription_sync_complete(subscription_name name) +RETURNS boolean RETURNS NULL ON NULL INPUT VOLATILE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_wait_for_subscription_sync_complete'; + +CREATE FUNCTION pglogical.wait_for_table_sync_complete(subscription_name name, relation regclass) +RETURNS boolean RETURNS NULL ON NULL INPUT VOLATILE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_wait_for_table_sync_complete'; + +CREATE FUNCTION pglogical.xact_commit_timestamp_origin("xid" xid, OUT "timestamp" timestamptz, OUT "roident" oid) +RETURNS record RETURNS NULL ON NULL INPUT VOLATILE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_xact_commit_timestamp_origin'; diff --git a/pglogical--2.3.4.sql b/pglogical--2.3.4.sql index 50fa578..f0b60c3 100644 --- a/pglogical--2.3.4.sql +++ b/pglogical--2.3.4.sql @@ -203,7 +203,6 @@ CREATE FUNCTION pglogical.show_subscription_table(subscription_name name, relati RETURNS record STRICT STABLE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_show_subscription_table'; CREATE TABLE pglogical.queue ( - node_id oid REFERENCES node(node_id), queued_at timestamp with time zone NOT NULL, role name NOT NULL, replication_sets text[], @@ -248,3 +247,4 @@ RETURNS void RETURNS NULL ON NULL INPUT VOLATILE LANGUAGE c AS 'MODULE_PATHNAME' CREATE FUNCTION pglogical.xact_commit_timestamp_origin("xid" xid, OUT "timestamp" timestamptz, OUT "roident" oid) RETURNS record RETURNS NULL ON NULL INPUT VOLATILE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_xact_commit_timestamp_origin'; + diff --git a/pglogical.h b/pglogical.h index 75180c1..a5a53cc 100644 --- a/pglogical.h +++ b/pglogical.h @@ -26,8 +26,8 @@ #include "pglogical_compat.h" -#define PGLOGICAL_VERSION "2.3.4" -#define PGLOGICAL_VERSION_NUM 20304 +#define PGLOGICAL_VERSION "2.3.4-1" +#define PGLOGICAL_VERSION_NUM 2030401 #define PGLOGICAL_MIN_PROTO_VERSION_NUM 1 #define PGLOGICAL_MAX_PROTO_VERSION_NUM 1 diff --git a/pglogical_functions.c b/pglogical_functions.c index 68fbd1c..fda0741 100644 --- a/pglogical_functions.c +++ b/pglogical_functions.c @@ -148,8 +148,8 @@ PG_FUNCTION_INFO_V1(pglogical_xact_commit_timestamp_origin); PG_FUNCTION_INFO_V1(pglogical_show_repset_table_info_by_target); static void gen_slot_name(Name slot_name, char *dbname, - const char *provider_name, - const char *subscriber_name); + const char *provider_name, + const char *subscriber_name); bool in_pglogical_replicate_ddl_command = false; @@ -161,9 +161,9 @@ check_local_node(bool for_update) node = get_local_node(for_update, true); if (!node) ereport(ERROR, - (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), - errmsg("current database is not configured as pglogical node"), - errhint("create pglogical node first"))); + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("current database is not configured as pglogical node"), + errhint("create pglogical node first"))); return node; } @@ -247,9 +247,9 @@ pglogical_drop_node(PG_FUNCTION_ARGS) tsubs = get_node_subscriptions(node->id, false); if (list_length(osubs) != 0 || list_length(tsubs) != 0) ereport(ERROR, - (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), - errmsg("cannot drop node \"%s\" because it still has subscriptions associated with it", node_name), - errhint("drop the subscriptions first"))); + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("cannot drop node \"%s\" because it still has subscriptions associated with it", node_name), + errhint("drop the subscriptions first"))); /* If the node is local node, drop the record as well. */ local_node = get_local_node(true, true); @@ -267,19 +267,19 @@ pglogical_drop_node(PG_FUNCTION_ARGS) PG_TRY(); { res = SPI_execute("SELECT pg_catalog.pg_drop_replication_slot(slot_name)" - " FROM pg_catalog.pg_replication_slots" - " WHERE (plugin = 'pglogical_output' OR plugin = 'pglogical')" - " AND database = current_database()" - " AND slot_name ~ 'pgl_.*'", - false, 0); + " FROM pg_catalog.pg_replication_slots" + " WHERE (plugin = 'pglogical_output' OR plugin = 'pglogical')" + " AND database = current_database()" + " AND slot_name ~ 'pgl_.*'", + false, 0); } PG_CATCH(); { ereport(ERROR, - (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), - errmsg("cannot drop node \"%s\" because one or more replication slots for the node are still active", - node_name), - errhint("drop the subscriptions connected to the node first"))); + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("cannot drop node \"%s\" because one or more replication slots for the node are still active", + node_name), + errhint("drop the subscriptions connected to the node first"))); } PG_END_TRY(); @@ -316,20 +316,20 @@ pglogical_alter_node_add_interface(PG_FUNCTION_ARGS) char *if_dsn = text_to_cstring(PG_GETARG_TEXT_PP(2)); PGLogicalNode *node; PGlogicalInterface *oldif, - newif; + newif; node = get_node_by_name(node_name, false); if (node == NULL) ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("node \"%s\" not found", node_name))); + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("node \"%s\" not found", node_name))); oldif = get_node_interface_by_name(node->id, if_name, true); if (oldif != NULL) ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("node \"%s\" already has interface named \"%s\"", - node_name, if_name))); + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("node \"%s\" already has interface named \"%s\"", + node_name, if_name))); newif.id = InvalidOid; newif.name = if_name; @@ -356,15 +356,15 @@ pglogical_alter_node_drop_interface(PG_FUNCTION_ARGS) node = get_node_by_name(node_name, false); if (node == NULL) ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("node \"%s\" not found", node_name))); + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("node \"%s\" not found", node_name))); oldif = get_node_interface_by_name(node->id, if_name, true); if (oldif == NULL) ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("interface \"%s\" for node node \"%s\" not found", - if_name, node_name))); + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("interface \"%s\" for node node \"%s\" not found", + if_name, node_name))); other_subs = get_node_subscriptions(node->id, true); foreach (lc, other_subs) @@ -372,11 +372,11 @@ pglogical_alter_node_drop_interface(PG_FUNCTION_ARGS) PGLogicalSubscription *sub = (PGLogicalSubscription *) lfirst(lc); if (oldif->id == sub->origin_if->id) ereport(ERROR, - (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), - errmsg("cannot drop interface \"%s\" for node \"%s\" because subscription \"%s\" is using it", - oldif->name, node->name, sub->name), - errhint("change the subscription interface first"))); - } + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("cannot drop interface \"%s\" for node \"%s\" because subscription \"%s\" is using it", + oldif->name, node->name, sub->name), + errhint("change the subscription interface first"))); + } drop_node_interface(oldif->id); @@ -395,9 +395,11 @@ pglogical_create_subscription(PG_FUNCTION_ARGS) ArrayType *rep_set_names = PG_GETARG_ARRAYTYPE_P(2); bool sync_structure = PG_GETARG_BOOL(3); bool sync_data = PG_GETARG_BOOL(4); - ArrayType *forward_origin_names = PG_GETARG_ARRAYTYPE_P(5); - Interval *apply_delay = PG_GETARG_INTERVAL_P(6); - bool force_text_transfer = PG_GETARG_BOOL(7); + bool replace_data = PG_GETARG_BOOL(5); + ArrayType *after_sync_queries = PG_GETARG_ARRAYTYPE_P(6); + ArrayType *forward_origin_names = PG_GETARG_ARRAYTYPE_P(7); + Interval *apply_delay = PG_GETARG_INTERVAL_P(8); + bool force_text_transfer = PG_GETARG_BOOL(9); PGconn *conn; PGLogicalSubscription sub; PGLogicalSyncStatus sync; @@ -453,9 +455,9 @@ pglogical_create_subscription(PG_FUNCTION_ARGS) existingif = get_node_interface_by_name(origin.id, origin.name, false); if (strcmp(existingif->dsn, provider_dsn) != 0) ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("dsn \"%s\" points to existing node \"%s\" with different dsn \"%s\"", - provider_dsn, origin.name, existingif->dsn))); + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("dsn \"%s\" points to existing node \"%s\" with different dsn \"%s\"", + provider_dsn, origin.name, existingif->dsn))); memcpy(&originif, existingif, sizeof(PGlogicalInterface)); } @@ -484,11 +486,11 @@ pglogical_create_subscription(PG_FUNCTION_ARGS) if (strcmp(newset, existingset) == 0) ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("existing subscription \"%s\" to node " - "\"%s\" already subscribes to replication " - "set \"%s\"", esub->name, origin.name, - newset))); + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("existing subscription \"%s\" to node " + "\"%s\" already subscribes to replication " + "set \"%s\"", esub->name, origin.name, + newset))); } } } @@ -508,8 +510,10 @@ pglogical_create_subscription(PG_FUNCTION_ARGS) sub.replication_sets = replication_sets; sub.forward_origins = textarray_to_list(forward_origin_names); sub.enabled = true; + sub.data_replace = replace_data; + sub.after_sync_queries = textarray_to_list(after_sync_queries); gen_slot_name(&slot_name, get_database_name(MyDatabaseId), - origin.name, sub_name); + origin.name, sub_name); sub.slot_name = pstrdup(NameStr(slot_name)); sub.apply_delay = apply_delay; sub.force_text_transfer = force_text_transfer; @@ -590,6 +594,7 @@ pglogical_drop_subscription(PG_FUNCTION_ARGS) LWLockRelease(PGLogicalCtx->lock); /* Wait for the apply to die. */ + int total_wait = 0; for (;;) { int rc; @@ -606,15 +611,15 @@ pglogical_drop_subscription(PG_FUNCTION_ARGS) { const LOCKTAG apply_lock_tag = apply->proc->waitLock->tag; if (((LockTagType) apply_lock_tag.locktag_type == LOCKTAG_TRANSACTION) && - (apply_lock_tag.locktag_field1 == (uint32)GetCurrentTransactionId())) + (apply_lock_tag.locktag_field1 == (uint32)GetCurrentTransactionId())) { StringInfoData buf; initStringInfo(&buf); DescribeLockTag(&buf, &apply_lock_tag); - elog( WARNING, "Apply worker [%d] is locked by %s of [%d], try to kill it", apply->proc->pid, buf.data, MyProc->pid ); + elog( WARNING, "Apply worker [%d] is locked by %s of this proc [%d], stop waiting", apply->proc->pid, buf.data, MyProc->pid ); - /* cancel transaction */ - kill(apply->proc->pid, SIGINT); + LWLockRelease(PGLogicalCtx->lock); + break; } } @@ -623,12 +628,19 @@ pglogical_drop_subscription(PG_FUNCTION_ARGS) CHECK_FOR_INTERRUPTS(); rc = WaitLatch(&MyProc->procLatch, - WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, 1000L); + WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, 1000L); if (rc & WL_POSTMASTER_DEATH) proc_exit(1); ResetLatch(&MyProc->procLatch); + + total_wait+=1000; + if (total_wait >= 60000) + { + elog( WARNING, "Apply worker [%d] probably is locked by this proc [%d], stop waiting", apply->proc->pid, MyProc->pid ); + break; + } } /* @@ -640,13 +652,14 @@ pglogical_drop_subscription(PG_FUNCTION_ARGS) PG_TRY(); { PGconn *origin_conn = pglogical_connect(sub->origin_if->dsn, - sub->name, "cleanup"); + sub->name, "cleanup"); pglogical_drop_remote_slot(origin_conn, sub->slot_name); PQfinish(origin_conn); } PG_CATCH(); - elog(WARNING, "could not drop slot \"%s\" on provider, you will probably have to drop it manually", - sub->slot_name); + FlushErrorState(); + elog(WARNING, "could not drop slot \"%s\" on provider, you will probably have to drop it manually", + sub->slot_name); PG_END_TRY(); /* Drop the origin tracking locally. */ @@ -681,9 +694,9 @@ pglogical_alter_subscription_disable(PG_FUNCTION_ARGS) if ((IsTransactionBlock() || IsSubTransaction())) ereport(ERROR, - (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION), - errmsg("alter_subscription_disable with immediate = true " - "cannot be run inside a transaction block"))); + (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION), + errmsg("alter_subscription_disable with immediate = true " + "cannot be run inside a transaction block"))); LWLockAcquire(PGLogicalCtx->lock, LW_EXCLUSIVE); apply = pglogical_apply_find(MyDatabaseId, sub->id); @@ -718,9 +731,9 @@ pglogical_alter_subscription_enable(PG_FUNCTION_ARGS) if (immediate && (IsTransactionBlock() || IsSubTransaction())) { ereport(ERROR, - (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION), - errmsg("alter_subscription_enable with immediate = true " - "cannot be run inside a transaction block"))); + (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION), + errmsg("alter_subscription_enable with immediate = true " + "cannot be run inside a transaction block"))); } PG_RETURN_BOOL(true); @@ -811,7 +824,7 @@ pglogical_alter_subscription_remove_replication_set(PG_FUNCTION_ARGS) lc); #else sub->replication_sets = list_delete_cell(sub->replication_sets, - lc, prev); + lc, prev); #endif alter_subscription(sub); @@ -872,7 +885,7 @@ pglogical_alter_subscription_synchronize(PG_FUNCTION_ARGS) #endif if (namestrcmp(&tablesync->nspname, remoterel->nspname) == 0 && - namestrcmp(&tablesync->relname, remoterel->relname) == 0) + namestrcmp(&tablesync->relname, remoterel->relname) == 0) { oldsync = tablesync; #if PG_VERSION_NUM >= 130000 @@ -916,8 +929,8 @@ pglogical_alter_subscription_synchronize(PG_FUNCTION_ARGS) PGLogicalSyncStatus *tablesync = (PGLogicalSyncStatus *) lfirst(lc); drop_table_sync_status_for_sub(tablesync->subid, - NameStr(tablesync->nspname), - NameStr(tablesync->relname)); + NameStr(tablesync->nspname), + NameStr(tablesync->relname)); } /* Tell apply to re-read sync statuses. */ @@ -939,7 +952,7 @@ pglogical_alter_subscription_resynchronize_table(PG_FUNCTION_ARGS) PGLogicalSyncStatus *oldsync; Relation rel; char *nspname, - *relname; + *relname; rel = table_open(reloid, AccessShareLock); @@ -951,13 +964,13 @@ pglogical_alter_subscription_resynchronize_table(PG_FUNCTION_ARGS) if (oldsync) { if (oldsync->status != SYNC_STATUS_READY && - oldsync->status != SYNC_STATUS_SYNCDONE && - oldsync->status != SYNC_STATUS_NONE) + oldsync->status != SYNC_STATUS_SYNCDONE && + oldsync->status != SYNC_STATUS_NONE) elog(ERROR, "table %s.%s is already being synchronized", - nspname, relname); + nspname, relname); set_table_sync_status(sub->id, nspname, relname, SYNC_STATUS_INIT, - InvalidXLogRecPtr); + InvalidXLogRecPtr); } else { @@ -1048,12 +1061,12 @@ pglogical_show_subscription_table(PG_FUNCTION_ARGS) /* check to see if caller supports us returning a tuplestore */ if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("set-valued function called in context that cannot accept a set"))); + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("set-valued function called in context that cannot accept a set"))); if (!(rsinfo->allowedModes & SFRM_Materialize)) ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("materialize mode required, but it is not " \ + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("materialize mode required, but it is not " \ "allowed in this context"))); /* Switch into long-lived context to construct returned data structures */ @@ -1111,12 +1124,12 @@ pglogical_show_subscription_status(PG_FUNCTION_ARGS) /* check to see if caller supports us returning a tuplestore */ if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("set-valued function called in context that cannot accept a set"))); + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("set-valued function called in context that cannot accept a set"))); if (!(rsinfo->allowedModes & SFRM_Materialize)) ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("materialize mode required, but it is not " \ + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("materialize mode required, but it is not " \ "allowed in this context"))); node = check_local_node(false); @@ -1185,12 +1198,12 @@ pglogical_show_subscription_status(PG_FUNCTION_ARGS) values[4] = CStringGetTextDatum(sub->slot_name); if (sub->replication_sets) values[5] = - PointerGetDatum(strlist_to_textarray(sub->replication_sets)); + PointerGetDatum(strlist_to_textarray(sub->replication_sets)); else nulls[5] = true; if (sub->forward_origins) values[6] = - PointerGetDatum(strlist_to_textarray(sub->forward_origins)); + PointerGetDatum(strlist_to_textarray(sub->forward_origins)); else nulls[6] = true; @@ -1239,13 +1252,13 @@ pglogical_alter_replication_set(PG_FUNCTION_ARGS) if (PG_ARGISNULL(0)) ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("set_name cannot be NULL"))); + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("set_name cannot be NULL"))); node = check_local_node(true); repset = get_replication_set_by_name(node->node->id, - NameStr(*PG_GETARG_NAME(0)), false); + NameStr(*PG_GETARG_NAME(0)), false); if (!PG_ARGISNULL(1)) repset->replicate_insert = PG_GETARG_BOOL(1); @@ -1327,7 +1340,7 @@ parse_row_filter(Relation rel, char *row_filter_str) */ initStringInfo(&buf); appendStringInfo(&buf, "SELECT %s FROM %s", row_filter_str, - quote_qualified_identifier(nspname, relname)); + quote_qualified_identifier(nspname, relname)); /* Parse it, providing proper error context. */ myerrcontext.callback = add_table_parser_error_callback; @@ -1348,29 +1361,29 @@ parse_row_filter(Relation rel, char *row_filter_str) stmt = (SelectStmt *) linitial(raw_parsetree_list); #endif if (stmt == NULL || - !IsA(stmt, SelectStmt) || - stmt->distinctClause != NIL || - stmt->intoClause != NULL || - stmt->whereClause != NULL || - stmt->groupClause != NIL || - stmt->havingClause != NULL || - stmt->windowClause != NIL || - stmt->valuesLists != NIL || - stmt->sortClause != NIL || - stmt->limitOffset != NULL || - stmt->limitCount != NULL || - stmt->lockingClause != NIL || - stmt->withClause != NULL || - stmt->op != SETOP_NONE) + !IsA(stmt, SelectStmt) || + stmt->distinctClause != NIL || + stmt->intoClause != NULL || + stmt->whereClause != NULL || + stmt->groupClause != NIL || + stmt->havingClause != NULL || + stmt->windowClause != NIL || + stmt->valuesLists != NIL || + stmt->sortClause != NIL || + stmt->limitOffset != NULL || + stmt->limitCount != NULL || + stmt->lockingClause != NIL || + stmt->withClause != NULL || + stmt->op != SETOP_NONE) goto fail; if (list_length(stmt->targetList) != 1) goto fail; restarget = (ResTarget *) linitial(stmt->targetList); if (restarget == NULL || - !IsA(restarget, ResTarget) || - restarget->name != NULL || - restarget->indirection != NIL || - restarget->val == NULL) + !IsA(restarget, ResTarget) || + restarget->name != NULL || + restarget->indirection != NIL || + restarget->val == NULL) goto fail; row_filter = restarget->val; @@ -1390,13 +1403,13 @@ parse_row_filter(Relation rel, char *row_filter_str) addNSItemToQuery(pstate, nsitem, true, true, true); #else rte = addRangeTableEntryForRelation(pstate, - rel, + rel, #if PG_VERSION_NUM >= 120000 - AccessShareLock, + AccessShareLock, #endif - NULL, - false, - true); + NULL, + false, + true); addRTEtoQuery(pstate, rte, true, true, true); #endif /* @@ -1412,17 +1425,17 @@ parse_row_filter(Relation rel, char *row_filter_str) assign_expr_collations(pstate, row_filter); if (list_length(pstate->p_rtable) != 1) ereport(ERROR, - (errcode(ERRCODE_INVALID_COLUMN_REFERENCE), - errmsg("only table \"%s\" can be referenced in row_filter", - relname))); + (errcode(ERRCODE_INVALID_COLUMN_REFERENCE), + errmsg("only table \"%s\" can be referenced in row_filter", + relname))); pfree(buf.data); return row_filter; -fail: + fail: ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("invalid row_filter expression \"%s\"", row_filter_str))); + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("invalid row_filter expression \"%s\"", row_filter_str))); return NULL; /* keep compiler quiet */ } @@ -1437,6 +1450,7 @@ pglogical_replication_set_add_table(PG_FUNCTION_ARGS) bool synchronize; Node *row_filter = NULL; List *att_list = NIL; + char *initial_sync_clear_filter = NULL; PGLogicalRepSet *repset; Relation rel; TupleDesc tupDesc; @@ -1448,16 +1462,16 @@ pglogical_replication_set_add_table(PG_FUNCTION_ARGS) /* Proccess for required parameters. */ if (PG_ARGISNULL(0)) ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("set_name cannot be NULL"))); + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("set_name cannot be NULL"))); if (PG_ARGISNULL(1)) ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("relation cannot be NULL"))); + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("relation cannot be NULL"))); if (PG_ARGISNULL(2)) ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("synchronize_data cannot be NULL"))); + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("synchronize_data cannot be NULL"))); repset_name = PG_GETARG_NAME(0); reloid = PG_GETARG_OID(1); @@ -1468,7 +1482,7 @@ pglogical_replication_set_add_table(PG_FUNCTION_ARGS) /* Find the replication set. */ repset = get_replication_set_by_name(node->node->id, - NameStr(*repset_name), false); + NameStr(*repset_name), false); /* * Make sure the relation exists (lock mode has to be the same one as @@ -1498,10 +1512,10 @@ pglogical_replication_set_add_table(PG_FUNCTION_ARGS) if (attnum < 0) ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("table %s does not have column %s", - quote_qualified_identifier(nspname, relname), - attname))); + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("table %s does not have column %s", + quote_qualified_identifier(nspname, relname), + attname))); idattrs = bms_del_member(idattrs, attnum - FirstLowInvalidHeapAttributeNumber); @@ -1509,18 +1523,24 @@ pglogical_replication_set_add_table(PG_FUNCTION_ARGS) if (!bms_is_empty(idattrs)) ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("REPLICA IDENTITY columns must be replicated"))); + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("REPLICA IDENTITY columns must be replicated"))); } /* Proccess row_filter if any. */ if (!PG_ARGISNULL(4)) { row_filter = parse_row_filter(rel, - text_to_cstring(PG_GETARG_TEXT_PP(4))); + text_to_cstring(PG_GETARG_TEXT_PP(4))); } - replication_set_add_table(repset->id, reloid, att_list, row_filter); + /* Proccess initial sync clear filter. */ + if (!PG_ARGISNULL(5)) + { + initial_sync_clear_filter = text_to_cstring(PG_GETARG_TEXT_PP(5)); + } + + replication_set_add_table(repset->id, reloid, att_list, row_filter, initial_sync_clear_filter); if (synchronize) { @@ -1533,7 +1553,7 @@ pglogical_replication_set_add_table(PG_FUNCTION_ARGS) appendStringInfo(&json, "}"); /* Queue the synchronize request for replication. */ queue_message(list_make1(repset->name), GetUserId(), - QUEUE_COMMAND_TYPE_TABLESYNC, json.data); + QUEUE_COMMAND_TYPE_TABLESYNC, json.data); } /* Cleanup. */ @@ -1562,7 +1582,7 @@ pglogical_replication_set_add_sequence(PG_FUNCTION_ARGS) /* Find the replication set. */ repset = get_replication_set_by_name(node->node->id, - NameStr(*repset_name), false); + NameStr(*repset_name), false); /* * Make sure the relation exists (lock mode has to be the same one as @@ -1583,13 +1603,13 @@ pglogical_replication_set_add_sequence(PG_FUNCTION_ARGS) escape_json(&json, nspname); appendStringInfo(&json, ",\"sequence_name\": "); escape_json(&json, relname); - appendStringInfo(&json, ",\"last_value\": \""INT64_FORMAT"\"", - sequence_get_last_value(reloid)); + appendStringInfo(&json, ",\"last_value\": \""INT64_FORMAT"\"", + sequence_get_last_value(reloid)); appendStringInfo(&json, "}"); /* Add sequence to the queue. */ queue_message(list_make1(repset->name), GetUserId(), - QUEUE_COMMAND_TYPE_SEQUENCE, json.data); + QUEUE_COMMAND_TYPE_SEQUENCE, json.data); } /* Cleanup. */ @@ -1603,8 +1623,8 @@ pglogical_replication_set_add_sequence(PG_FUNCTION_ARGS) */ static Datum pglogical_replication_set_add_all_relations(Name repset_name, - ArrayType *nsp_names, - bool synchronize, char relkind) + ArrayType *nsp_names, + bool synchronize, char relkind) { PGLogicalRepSet *repset; Relation rel; @@ -1616,11 +1636,11 @@ pglogical_replication_set_add_all_relations(Name repset_name, /* Find the replication set. */ repset = get_replication_set_by_name(node->node->id, - NameStr(*repset_name), false); + NameStr(*repset_name), false); existing_relations = replication_set_get_tables(repset->id); existing_relations = list_concat_unique_oid(existing_relations, - replication_set_get_seqs(repset->id)); + replication_set_get_seqs(repset->id)); rel = table_open(RelationRelationId, RowExclusiveLock); @@ -1633,12 +1653,12 @@ pglogical_replication_set_add_all_relations(Name repset_name, HeapTuple tuple; ScanKeyInit(&skey[0], - Anum_pg_class_relnamespace, - BTEqualStrategyNumber, F_OIDEQ, - ObjectIdGetDatum(nspoid)); + Anum_pg_class_relnamespace, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(nspoid)); sysscan = systable_beginscan(rel, ClassNameNspIndexId, true, - NULL, 1, skey); + NULL, 1, skey); while (HeapTupleIsValid(tuple = systable_getnext(sysscan))) { @@ -1654,14 +1674,14 @@ pglogical_replication_set_add_all_relations(Name repset_name, * (catalog, toast). */ if (reltup->relkind != relkind || - reltup->relpersistence != RELPERSISTENCE_PERMANENT || - IsSystemClass(reloid, reltup)) + reltup->relpersistence != RELPERSISTENCE_PERMANENT || + IsSystemClass(reloid, reltup)) continue; if (!list_member_oid(existing_relations, reloid)) { if (relkind == RELKIND_RELATION) - replication_set_add_table(repset->id, reloid, NIL, NULL); + replication_set_add_table(repset->id, reloid, NIL, NULL, NULL); else replication_set_add_seq(repset->id, reloid); @@ -1688,7 +1708,7 @@ pglogical_replication_set_add_all_relations(Name repset_name, appendStringInfo(&json, ",\"sequence_name\": "); escape_json(&json, relname); appendStringInfo(&json, ",\"last_value\": \""INT64_FORMAT"\"", - sequence_get_last_value(reloid)); + sequence_get_last_value(reloid)); cmdtype = QUEUE_COMMAND_TYPE_SEQUENCE; break; default: @@ -1698,7 +1718,7 @@ pglogical_replication_set_add_all_relations(Name repset_name, /* Queue the truncate for replication. */ queue_message(list_make1(repset->name), GetUserId(), cmdtype, - json.data); + json.data); } } } @@ -1722,8 +1742,8 @@ pglogical_replication_set_add_all_tables(PG_FUNCTION_ARGS) bool synchronize = PG_GETARG_BOOL(2); return pglogical_replication_set_add_all_relations(repset_name, nsp_names, - synchronize, - RELKIND_RELATION); + synchronize, + RELKIND_RELATION); } /* @@ -1737,8 +1757,8 @@ pglogical_replication_set_add_all_sequences(PG_FUNCTION_ARGS) bool synchronize = PG_GETARG_BOOL(2); return pglogical_replication_set_add_all_relations(repset_name, nsp_names, - synchronize, - RELKIND_SEQUENCE); + synchronize, + RELKIND_SEQUENCE); } /* @@ -1758,7 +1778,7 @@ pglogical_replication_set_remove_table(PG_FUNCTION_ARGS) /* Find the replication set. */ repset = get_replication_set_by_name(node->node->id, - NameStr(*PG_GETARG_NAME(0)), false); + NameStr(*PG_GETARG_NAME(0)), false); replication_set_remove_table(repset->id, reloid, false); @@ -1779,7 +1799,7 @@ pglogical_replication_set_remove_sequence(PG_FUNCTION_ARGS) /* Find the replication set. */ repset = get_replication_set_by_name(node->node->id, - NameStr(*PG_GETARG_NAME(0)), false); + NameStr(*PG_GETARG_NAME(0)), false); replication_set_remove_seq(repset->id, seqoid, false); @@ -1827,12 +1847,12 @@ pglogical_replicate_ddl_command(PG_FUNCTION_ARGS) /* Force everything in the query to be fully qualified. */ (void) set_config_option("search_path", "", - PGC_USERSET, PGC_S_SESSION, - GUC_ACTION_SAVE, true, 0 + PGC_USERSET, PGC_S_SESSION, + GUC_ACTION_SAVE, true, 0 #if PG_VERSION_NUM >= 90500 - , false + , false #endif - ); + ); /* Convert the query to json string. */ initStringInfo(&cmd); @@ -1840,7 +1860,7 @@ pglogical_replicate_ddl_command(PG_FUNCTION_ARGS) /* Queue the query for replication. */ queue_message(replication_sets, GetUserId(), - QUEUE_COMMAND_TYPE_SQL, cmd.data); + QUEUE_COMMAND_TYPE_SQL, cmd.data); /* * Execute the query locally. @@ -1850,11 +1870,11 @@ pglogical_replicate_ddl_command(PG_FUNCTION_ARGS) PG_TRY(); { pglogical_execute_sql_command(query, GetUserNameFromId(GetUserId() - #if PG_VERSION_NUM >= 90500 - , false - #endif - ), - false); +#if PG_VERSION_NUM >= 90500 + , false +#endif + ), + false); } PG_CATCH(); { @@ -1896,16 +1916,16 @@ pglogical_queue_truncate(PG_FUNCTION_ARGS) /* Make sure this is being called as an AFTER TRUNCTATE trigger. */ if (!CALLED_AS_TRIGGER(fcinfo)) ereport(ERROR, - (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED), - errmsg("function \"%s\" was not called by trigger manager", - funcname))); + (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED), + errmsg("function \"%s\" was not called by trigger manager", + funcname))); if (!TRIGGER_FIRED_AFTER(trigdata->tg_event) || - !TRIGGER_FIRED_BY_TRUNCATE(trigdata->tg_event)) + !TRIGGER_FIRED_BY_TRUNCATE(trigdata->tg_event)) ereport(ERROR, - (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED), - errmsg("function \"%s\" must be fired AFTER TRUNCATE", - funcname))); + (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED), + errmsg("function \"%s\" must be fired AFTER TRUNCATE", + funcname))); /* If this is not pglogical node, don't do anything. */ local_node = get_local_node(false, true); @@ -1915,7 +1935,7 @@ pglogical_queue_truncate(PG_FUNCTION_ARGS) /* Make sure the list change survives the trigger call. */ oldcontext = MemoryContextSwitchTo(TopTransactionContext); pglogical_truncated_tables = lappend_oid(pglogical_truncated_tables, - RelationGetRelid(trigdata->tg_relation)); + RelationGetRelid(trigdata->tg_relation)); MemoryContextSwitchTo(oldcontext); PG_RETURN_VOID(); @@ -1951,7 +1971,7 @@ pglogical_node_info(PG_FUNCTION_ARGS) node = get_local_node(false, false); snprintf(sysid, sizeof(sysid), UINT64_FORMAT, - GetSystemIdentifier()); + GetSystemIdentifier()); repsets = get_node_replication_sets(node->node->id); memset(nulls, 0, sizeof(nulls)); @@ -1977,15 +1997,15 @@ Datum pglogical_show_repset_table_info(PG_FUNCTION_ARGS) { Oid reloid = PG_GETARG_OID(0); - ArrayType *rep_set_names = PG_GETARG_ARRAYTYPE_P(1); + ArrayType *rep_set_names = PG_GETARG_ARRAYTYPE_P(1); Relation rel; List *replication_sets; TupleDesc reldesc; TupleDesc rettupdesc; int i; List *att_list = NIL; - Datum values[5]; - bool nulls[5]; + Datum values[6]; + bool nulls[6]; char *nspname; char *relname; HeapTuple htup; @@ -2003,15 +2023,15 @@ pglogical_show_repset_table_info(PG_FUNCTION_ARGS) reldesc = RelationGetDescr(rel); replication_sets = textarray_to_list(rep_set_names); replication_sets = get_replication_sets(node->node->id, - replication_sets, - false); + replication_sets, + false); nspname = get_namespace_name(RelationGetNamespace(rel)); relname = RelationGetRelationName(rel); /* Build the replication info for the table. */ tableinfo = get_table_replication_info(node->node->id, rel, - replication_sets); + replication_sets); /* Build the column list. */ for (i = 0; i < reldesc->natts; i++) @@ -2024,8 +2044,8 @@ pglogical_show_repset_table_info(PG_FUNCTION_ARGS) /* Skip filtered columns if any. */ if (tableinfo->att_list && - !bms_is_member(att->attnum - FirstLowInvalidHeapAttributeNumber, - tableinfo->att_list)) + !bms_is_member(att->attnum - FirstLowInvalidHeapAttributeNumber, + tableinfo->att_list)) continue; att_list = lappend(att_list, NameStr(att->attname)); @@ -2038,6 +2058,14 @@ pglogical_show_repset_table_info(PG_FUNCTION_ARGS) values[2] = CStringGetTextDatum(relname); values[3] = PointerGetDatum(strlist_to_textarray(att_list)); values[4] = BoolGetDatum(list_length(tableinfo->row_filter) > 0); + if ( tableinfo->sync_clear_filter != NULL ) + { + values[5] = CStringGetTextDatum( tableinfo->sync_clear_filter ); + } + else + { + nulls[5] = true; + } htup = heap_form_tuple(rettupdesc, values, nulls); @@ -2098,7 +2126,7 @@ pglogical_table_data_filtered(PG_FUNCTION_ARGS) { Oid argtype = get_fn_expr_argtype(fcinfo->flinfo, 0); Oid reloid; - ArrayType *rep_set_names; + ArrayType *rep_set_names; ReturnSetInfo *rsi; Relation rel; List *replication_sets; @@ -2121,31 +2149,31 @@ pglogical_table_data_filtered(PG_FUNCTION_ARGS) /* Validate parameter. */ if (PG_ARGISNULL(1)) ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("relation cannot be NULL"))); + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("relation cannot be NULL"))); if (PG_ARGISNULL(2)) ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("repsets cannot be NULL"))); + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("repsets cannot be NULL"))); reloid = PG_GETARG_OID(1); rep_set_names = PG_GETARG_ARRAYTYPE_P(2); if (!type_is_rowtype(argtype)) ereport(ERROR, - (errcode(ERRCODE_DATATYPE_MISMATCH), - errmsg("first argument of %s must be a row type", - "pglogical_table_data_filtered"))); + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("first argument of %s must be a row type", + "pglogical_table_data_filtered"))); rsi = (ReturnSetInfo *) fcinfo->resultinfo; if (!rsi || !IsA(rsi, ReturnSetInfo) || - (rsi->allowedModes & SFRM_Materialize) == 0 || - rsi->expectedDesc == NULL) + (rsi->allowedModes & SFRM_Materialize) == 0 || + rsi->expectedDesc == NULL) ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("set-valued function called in context that " - "cannot accept a set"))); + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("set-valued function called in context that " + "cannot accept a set"))); /* Switch into long-lived context to construct returned data structures */ per_query_ctx = rsi->econtext->ecxt_per_query_memory; @@ -2158,9 +2186,9 @@ pglogical_table_data_filtered(PG_FUNCTION_ARGS) */ if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("function returning record called in context " - "that cannot accept type record"))); + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("function returning record called in context " + "that cannot accept type record"))); tupdesc = BlessTupleDesc(tupdesc); /* Prepare output tuple store. */ @@ -2176,17 +2204,17 @@ pglogical_table_data_filtered(PG_FUNCTION_ARGS) reltupdesc = RelationGetDescr(rel); if (!equalTupleDescs(tupdesc, reltupdesc)) ereport(ERROR, - (errcode(ERRCODE_DATATYPE_MISMATCH), - errmsg("return type of %s must be same as row type of the relation", - "pglogical_table_data_filtered"))); + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("return type of %s must be same as row type of the relation", + "pglogical_table_data_filtered"))); /* Build the replication info for the table. */ replication_sets = textarray_to_list(rep_set_names); replication_sets = get_replication_sets(node->node->id, - replication_sets, - false); + replication_sets, + false); tableinfo = get_table_replication_info(node->node->id, rel, - replication_sets); + replication_sets); /* Prepare executor. */ estate = create_estate_for_relation(rel, false); @@ -2232,7 +2260,7 @@ pglogical_table_data_filtered(PG_FUNCTION_ARGS) * We have to play games with snapshots to achieve this, since we're looking at * pglogical tables in the future as far as our snapshot is concerned. */ -static void +static bool pglogical_wait_for_sync_complete(char *subscription_name, char *relnamespace, char *relname) { PGLogicalSubscription *sub; @@ -2246,36 +2274,60 @@ pglogical_wait_for_sync_complete(char *subscription_name, char *relnamespace, ch sub = get_subscription_by_name(subscription_name, false); + bool isdone = false; do { - PGLogicalSyncStatus *subsync; - List *tables; - bool isdone = false; - int rc; + List *tables; + + bool failed = false; /* We need to see the latest rows */ - PushActiveSnapshot(GetLatestSnapshot()); + PushActiveSnapshot( GetLatestSnapshot()); + + LWLockAcquire( PGLogicalCtx->lock, LW_EXCLUSIVE ); + + PGLogicalWorker *apply = pglogical_apply_find( MyDatabaseId, sub->id ); + if ( pglogical_worker_running( apply )) + { + PGLogicalSyncStatus *sync = get_subscription_sync_status( sub->id, true ); + + if ( !sync ) + { + // unknown + failed = true; + } + else if ( sync->status == SYNC_STATUS_READY ) + { + // replicating + isdone = true; + } + + free_sync_status( sync ); + } + else if ( sub->enabled ) + { + // sync down + failed = true; + } + LWLockRelease(PGLogicalCtx->lock); - subsync = get_subscription_sync_status(sub->id, true); - isdone = subsync && subsync->status == SYNC_STATUS_READY; - free_sync_status(subsync); if (isdone) { /* - * Subscription itself is synced, but what about separately + * Subscription its self is synced, but what about separately * synced tables? */ if (relname != NULL) { - PGLogicalSyncStatus *table = get_table_sync_status(sub->id, relnamespace, relname, false); + PGLogicalSyncStatus *table = get_table_sync_status(sub->id, relnamespace, relname, true); isdone = table && table->status == SYNC_STATUS_READY; free_sync_status(table); } else { /* - * XXX This is plenty inefficient and we should probably just do a direct catalog + * This is plenty inefficient and we should probably just do a direct catalog * scan, but meh, it hardly has to be fast. */ ListCell *lc; @@ -2292,20 +2344,19 @@ pglogical_wait_for_sync_complete(char *subscription_name, char *relnamespace, ch PopActiveSnapshot(); - if (isdone) + if (isdone || failed) break; CHECK_FOR_INTERRUPTS(); /* some kind of backoff could be useful here */ - rc = WaitLatch(&MyProc->procLatch, - WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, 200L); - - if (rc & WL_POSTMASTER_DEATH) - proc_exit(1); + (void) WaitLatch(&MyProc->procLatch, + WL_LATCH_SET | WL_TIMEOUT, 200L); ResetLatch(&MyProc->procLatch); } while (1); + + return isdone; } Datum @@ -2313,9 +2364,7 @@ pglogical_wait_for_subscription_sync_complete(PG_FUNCTION_ARGS) { char *subscription_name = NameStr(*PG_GETARG_NAME(0)); - pglogical_wait_for_sync_complete(subscription_name, NULL, NULL); - - PG_RETURN_VOID(); + PG_RETURN_BOOL( pglogical_wait_for_sync_complete(subscription_name, NULL, NULL) ); } Datum @@ -2328,9 +2377,7 @@ pglogical_wait_for_table_sync_complete(PG_FUNCTION_ARGS) relname = get_rel_name(relid); relnamespace = get_namespace_name(get_rel_namespace(relid)); - pglogical_wait_for_sync_complete(subscription_name, relnamespace, relname); - - PG_RETURN_VOID(); + PG_RETURN_BOOL( pglogical_wait_for_sync_complete(subscription_name, relnamespace, relname) ); } /* @@ -2357,9 +2404,9 @@ pglogical_xact_commit_timestamp_origin(PG_FUNCTION_ARGS) */ tupdesc = CreateTemplateTupleDesc(2); TupleDescInitEntry(tupdesc, (AttrNumber) 1, "timestamp", - TIMESTAMPTZOID, -1, 0); + TIMESTAMPTZOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 2, "roident", - OIDOID, -1, 0); + OIDOID, -1, 0); tupdesc = BlessTupleDesc(tupdesc); #ifdef HAVE_REPLICATION_ORIGINS @@ -2394,7 +2441,7 @@ pglogical_gen_slot_name(PG_FUNCTION_ARGS) slot_name = (Name) palloc0(NAMEDATALEN); gen_slot_name(slot_name, dbname, provider_node_name, - subscription_name); + subscription_name); PG_RETURN_NAME(slot_name); } @@ -2411,24 +2458,24 @@ pglogical_gen_slot_name(PG_FUNCTION_ARGS) */ static void gen_slot_name(Name slot_name, char *dbname, const char *provider_node, - const char *subscription_name) + const char *subscription_name) { char *cp; memset(NameStr(*slot_name), 0, NAMEDATALEN); snprintf(NameStr(*slot_name), NAMEDATALEN, - "pgl_%s_%s_%s", - shorten_hash(dbname, 16), - shorten_hash(provider_node, 16), - shorten_hash(subscription_name, 16)); + "pgl_%s_%s_%s", + shorten_hash(dbname, 16), + shorten_hash(provider_node, 16), + shorten_hash(subscription_name, 16)); NameStr(*slot_name)[NAMEDATALEN-1] = '\0'; /* Replace all the invalid characters in slot name with underscore. */ for (cp = NameStr(*slot_name); *cp; cp++) { if (!((*cp >= 'a' && *cp <= 'z') - || (*cp >= '0' && *cp <= '9') - || (*cp == '_'))) + || (*cp >= '0' && *cp <= '9') + || (*cp == '_'))) { *cp = '_'; } diff --git a/pglogical_node.c b/pglogical_node.c index 0694685..e54e645 100644 --- a/pglogical_node.c +++ b/pglogical_node.c @@ -78,28 +78,31 @@ typedef struct NodeInterfaceTuple typedef struct SubscriptionTuple { Oid sub_id; - NameData sub_name; + NameData sub_name; Oid sub_origin; Oid sub_target; - Oid sub_origin_if; + Oid sub_origin_if; Oid sub_target_if; - bool sub_enabled; - NameData sub_slot_name; + bool sub_enabled; + bool sub_data_replace; + NameData sub_slot_name; } SubscriptionTuple; -#define Natts_subscription 12 +#define Natts_subscription 14 #define Anum_sub_id 1 #define Anum_sub_name 2 #define Anum_sub_origin 3 #define Anum_sub_target 4 #define Anum_sub_origin_if 5 #define Anum_sub_target_if 6 -#define Anum_sub_enabled 7 -#define Anum_sub_slot_name 8 -#define Anum_sub_replication_sets 9 -#define Anum_sub_forward_origins 10 -#define Anum_sub_apply_delay 11 -#define Anum_sub_force_text_transfer 12 +#define Anum_sub_enabled 7 +#define Anum_sub_data_replace 8 +#define Anum_sub_slot_name 9 +#define Anum_sub_replication_sets 10 +#define Anum_sub_forward_origins 11 +#define Anum_sub_apply_delay 12 +#define Anum_sub_force_text_transfer 13 +#define Anum_sub_after_sync_queries 14 /* * We impose same validation rules as replication slot name validation does. @@ -721,6 +724,7 @@ create_subscription(PGLogicalSubscription *sub) values[Anum_sub_origin_if - 1] = ObjectIdGetDatum(sub->origin_if->id); values[Anum_sub_target_if - 1] = ObjectIdGetDatum(sub->target_if->id); values[Anum_sub_enabled - 1] = BoolGetDatum(sub->enabled); + values[Anum_sub_data_replace - 1] = BoolGetDatum(sub->data_replace); namestrcpy(&sub_slot_name, sub->slot_name); values[Anum_sub_slot_name - 1] = NameGetDatum(&sub_slot_name); @@ -743,6 +747,12 @@ create_subscription(PGLogicalSubscription *sub) values[Anum_sub_force_text_transfer - 1] = BoolGetDatum(sub->force_text_transfer); + if (list_length(sub->after_sync_queries) > 0) + values[Anum_sub_after_sync_queries - 1] = + PointerGetDatum(strlist_to_textarray(sub->after_sync_queries)); + else + nulls[Anum_sub_after_sync_queries - 1] = true; + tup = heap_form_tuple(tupDesc, values, nulls); /* Insert the tuple to the catalog. */ @@ -810,6 +820,7 @@ alter_subscription(PGLogicalSubscription *sub) values[Anum_sub_origin_if - 1] = ObjectIdGetDatum(sub->origin_if->id); values[Anum_sub_target_if - 1] = ObjectIdGetDatum(sub->target_if->id); values[Anum_sub_enabled - 1] = BoolGetDatum(sub->enabled); + values[Anum_sub_data_replace - 1] = BoolGetDatum(sub->data_replace); namestrcpy(&sub_slot_name, sub->slot_name); values[Anum_sub_slot_name - 1] = NameGetDatum(&sub_slot_name); @@ -828,7 +839,14 @@ alter_subscription(PGLogicalSubscription *sub) values[Anum_sub_apply_delay - 1] = IntervalPGetDatum(sub->apply_delay); values[Anum_sub_force_text_transfer - 1] = BoolGetDatum(sub->force_text_transfer); - newtup = heap_modify_tuple(oldtup, tupDesc, values, nulls, replaces); + if (list_length(sub->after_sync_queries) > 0) + values[Anum_sub_after_sync_queries - 1] = + PointerGetDatum(strlist_to_textarray(sub->after_sync_queries)); + else + nulls[Anum_sub_after_sync_queries - 1] = true; + + + newtup = heap_modify_tuple(oldtup, tupDesc, values, nulls, replaces); /* Update the tuple in catalog. */ CatalogTupleUpdate(rel, &oldtup->t_self, newtup); @@ -894,6 +912,7 @@ subscription_fromtuple(HeapTuple tuple, TupleDesc desc) sub->id = subtup->sub_id; sub->name = pstrdup(NameStr(subtup->sub_name)); sub->enabled = subtup->sub_enabled; + sub->data_replace = subtup->sub_data_replace; sub->slot_name = pstrdup(NameStr(subtup->sub_slot_name)); sub->origin = get_node(subtup->sub_origin); @@ -937,6 +956,17 @@ subscription_fromtuple(HeapTuple tuple, TupleDesc desc) else sub->force_text_transfer = DatumGetBool(d); + /* Get replication sets. */ + d = heap_getattr(tuple, Anum_sub_after_sync_queries, desc, &isnull); + if (isnull) + sub->after_sync_queries = NIL; + else + { + List *after_sync_queries; + after_sync_queries = textarray_to_list(DatumGetArrayTypeP(d)); + sub->after_sync_queries = after_sync_queries; + } + return sub; } diff --git a/pglogical_node.h b/pglogical_node.h index 29c391a..978218f 100644 --- a/pglogical_node.h +++ b/pglogical_node.h @@ -44,11 +44,13 @@ typedef struct PGLogicalSubscription PGlogicalInterface *origin_if; PGlogicalInterface *target_if; bool enabled; + bool data_replace; Interval *apply_delay; char *slot_name; List *replication_sets; List *forward_origins; bool force_text_transfer; + List *after_sync_queries; } PGLogicalSubscription; extern void create_node(PGLogicalNode *node); diff --git a/pglogical_output_plugin.c b/pglogical_output_plugin.c index 8514388..c18cbbf 100644 --- a/pglogical_output_plugin.c +++ b/pglogical_output_plugin.c @@ -46,15 +46,15 @@ extern void _PG_output_plugin_init(OutputPluginCallbacks *cb); static void pg_decode_startup(LogicalDecodingContext * ctx, - OutputPluginOptions *opt, bool is_init); + OutputPluginOptions *opt, bool is_init); static void pg_decode_shutdown(LogicalDecodingContext * ctx); static void pg_decode_begin_txn(LogicalDecodingContext *ctx, - ReorderBufferTXN *txn); + ReorderBufferTXN *txn); static void pg_decode_commit_txn(LogicalDecodingContext *ctx, - ReorderBufferTXN *txn, XLogRecPtr commit_lsn); + ReorderBufferTXN *txn, XLogRecPtr commit_lsn); static void pg_decode_change(LogicalDecodingContext *ctx, - ReorderBufferTXN *txn, Relation rel, - ReorderBufferChange *change); + ReorderBufferTXN *txn, Relation rel, + ReorderBufferChange *change); #ifdef HAVE_REPLICATION_ORIGINS static bool pg_decode_origin_filter(LogicalDecodingContext *ctx, @@ -62,17 +62,17 @@ static bool pg_decode_origin_filter(LogicalDecodingContext *ctx, #endif static void send_startup_message(LogicalDecodingContext *ctx, - PGLogicalOutputData *data, bool last_message); + PGLogicalOutputData *data, bool last_message); static bool startup_message_sent = false; typedef struct PGLRelMetaCacheEntry { - Oid relid; - /* Does the client have this relation cached? */ - bool is_cached; - /* Entry is valid and not due to be purged */ - bool is_valid; + Oid relid; + /* Does the client have this relation cached? */ + bool is_cached; + /* Entry is valid and not due to be purged */ + bool is_valid; } PGLRelMetaCacheEntry; #define RELMETACACHE_INITIAL_SIZE 128 @@ -82,7 +82,7 @@ static int InvalidRelMetaCacheCnt = 0; static void relmetacache_init(MemoryContext decoding_context); static PGLRelMetaCacheEntry *relmetacache_get_relation(PGLogicalOutputData *data, - Relation rel); + Relation rel); static void relmetacache_flush(void); static void relmetacache_prune(void); @@ -92,292 +92,292 @@ static void pglReorderBufferCleanSerializedTXNs(const char *slotname); void _PG_output_plugin_init(OutputPluginCallbacks *cb) { - AssertVariableIsOfType(&_PG_output_plugin_init, LogicalOutputPluginInit); + AssertVariableIsOfType(&_PG_output_plugin_init, LogicalOutputPluginInit); - cb->startup_cb = pg_decode_startup; - cb->begin_cb = pg_decode_begin_txn; - cb->change_cb = pg_decode_change; - cb->commit_cb = pg_decode_commit_txn; + cb->startup_cb = pg_decode_startup; + cb->begin_cb = pg_decode_begin_txn; + cb->change_cb = pg_decode_change; + cb->commit_cb = pg_decode_commit_txn; #ifdef HAVE_REPLICATION_ORIGINS - cb->filter_by_origin_cb = pg_decode_origin_filter; + cb->filter_by_origin_cb = pg_decode_origin_filter; #endif - cb->shutdown_cb = pg_decode_shutdown; + cb->shutdown_cb = pg_decode_shutdown; } static bool check_binary_compatibility(PGLogicalOutputData *data) { - if (data->client_binary_basetypes_major_version != PG_VERSION_NUM / 100) - return false; - - if (data->client_binary_bigendian_set - && data->client_binary_bigendian != server_bigendian()) - { - elog(DEBUG1, "Binary mode rejected: Server and client endian mismatch"); - return false; - } - - if (data->client_binary_sizeofdatum != 0 - && data->client_binary_sizeofdatum != sizeof(Datum)) - { - elog(DEBUG1, "Binary mode rejected: Server and client sizeof(Datum) mismatch"); - return false; - } - - if (data->client_binary_sizeofint != 0 - && data->client_binary_sizeofint != sizeof(int)) - { - elog(DEBUG1, "Binary mode rejected: Server and client sizeof(int) mismatch"); - return false; - } - - if (data->client_binary_sizeoflong != 0 - && data->client_binary_sizeoflong != sizeof(long)) - { - elog(DEBUG1, "Binary mode rejected: Server and client sizeof(long) mismatch"); - return false; - } - - if (data->client_binary_float4byval_set - && data->client_binary_float4byval != server_float4_byval()) - { - elog(DEBUG1, "Binary mode rejected: Server and client float4byval mismatch"); - return false; - } - - if (data->client_binary_float8byval_set - && data->client_binary_float8byval != server_float8_byval()) - { - elog(DEBUG1, "Binary mode rejected: Server and client float8byval mismatch"); - return false; - } - - if (data->client_binary_intdatetimes_set - && data->client_binary_intdatetimes != server_integer_datetimes()) - { - elog(DEBUG1, "Binary mode rejected: Server and client integer datetimes mismatch"); - return false; - } - - return true; + if (data->client_binary_basetypes_major_version != PG_VERSION_NUM / 100) + return false; + + if (data->client_binary_bigendian_set + && data->client_binary_bigendian != server_bigendian()) + { + elog(DEBUG1, "Binary mode rejected: Server and client endian mismatch"); + return false; + } + + if (data->client_binary_sizeofdatum != 0 + && data->client_binary_sizeofdatum != sizeof(Datum)) + { + elog(DEBUG1, "Binary mode rejected: Server and client sizeof(Datum) mismatch"); + return false; + } + + if (data->client_binary_sizeofint != 0 + && data->client_binary_sizeofint != sizeof(int)) + { + elog(DEBUG1, "Binary mode rejected: Server and client sizeof(int) mismatch"); + return false; + } + + if (data->client_binary_sizeoflong != 0 + && data->client_binary_sizeoflong != sizeof(long)) + { + elog(DEBUG1, "Binary mode rejected: Server and client sizeof(long) mismatch"); + return false; + } + + if (data->client_binary_float4byval_set + && data->client_binary_float4byval != server_float4_byval()) + { + elog(DEBUG1, "Binary mode rejected: Server and client float4byval mismatch"); + return false; + } + + if (data->client_binary_float8byval_set + && data->client_binary_float8byval != server_float8_byval()) + { + elog(DEBUG1, "Binary mode rejected: Server and client float8byval mismatch"); + return false; + } + + if (data->client_binary_intdatetimes_set + && data->client_binary_intdatetimes != server_integer_datetimes()) + { + elog(DEBUG1, "Binary mode rejected: Server and client integer datetimes mismatch"); + return false; + } + + return true; } /* initialize this plugin */ static void pg_decode_startup(LogicalDecodingContext * ctx, OutputPluginOptions *opt, - bool is_init) + bool is_init) { - PGLogicalOutputData *data = palloc0(sizeof(PGLogicalOutputData)); - - /* Short lived memory context for individual messages */ - data->context = AllocSetContextCreate(ctx->context, - "pglogical output msg context", - ALLOCSET_DEFAULT_SIZES); - data->allow_internal_basetypes = false; - data->allow_binary_basetypes = false; - - - ctx->output_plugin_private = data; - - /* - * This is replication start and not slot initialization. - * - * Parse and validate options passed by the client. - */ - if (!is_init) - { - int params_format; - bool started_tx = false; - PGLogicalLocalNode *node; - MemoryContext oldctx; - - /* - * There's a potential corruption bug in PostgreSQL 10.1, 9.6.6, 9.5.10 - * and 9.4.15 that can cause reorder buffers to accumulate duplicated - * transactions. See - * https://www.postgresql.org/message-id/CAMsr+YHdX=XECbZshDZ2CZNWGTyw-taYBnzqVfx4JzM4ExP5xg@mail.gmail.com - * - * We can defend against this by doing our own cleanup of any serialized - * txns in the reorder buffer on startup. - */ - pglReorderBufferCleanSerializedTXNs(NameStr(MyReplicationSlot->data.name)); - - if (!IsTransactionState()) - { - StartTransactionCommand(); - started_tx = true; - } - node = get_local_node(false, false); - data->local_node_id = node->node->id; - - /* - * Ideally we'd send the startup message immediately. That way - * it'd arrive before any error we emit if we see incompatible - * options sent by the client here. That way the client could - * possibly adjust its options and reconnect. It'd also make - * sure the client gets the startup message in a timely way if - * the server is idle, since otherwise it could be a while - * before the next callback. - * - * The decoding plugin API doesn't let us write to the stream - * from here, though, so we have to delay the startup message - * until the first change processed on the stream, in a begin - * callback. - * - * If we ERROR there, the startup message is buffered but not - * sent since the callback didn't finish. So we'd have to send - * the startup message, finish the callback and check in the - * next callback if we need to ERROR. - * - * That's a bit much hoop jumping, so for now ERRORs are - * immediate. A way to emit a message from the startup callback - * is really needed to change that. - */ - startup_message_sent = false; - - /* Now parse the rest of the params and ERROR if we see any we don't recognise */ - oldctx = MemoryContextSwitchTo(ctx->context); - params_format = process_parameters(ctx->output_plugin_options, data); - MemoryContextSwitchTo(oldctx); - - if (params_format != 1) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("client sent startup parameters in format %d but we only support format 1", - params_format))); - - if (data->client_min_proto_version > PGLOGICAL_PROTO_VERSION_NUM) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("client sent min_proto_version=%d but we only support protocol %d or lower", - data->client_min_proto_version, PGLOGICAL_PROTO_VERSION_NUM))); - - if (data->client_max_proto_version < PGLOGICAL_PROTO_MIN_VERSION_NUM) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("client sent max_proto_version=%d but we only support protocol %d or higher", - data->client_max_proto_version, PGLOGICAL_PROTO_MIN_VERSION_NUM))); - - /* - * Set correct protocol format. - * - * This is the output plugin protocol format, this is different - * from the individual fields binary vs textual format. - */ - if (data->client_protocol_format != NULL - && strcmp(data->client_protocol_format, "json") == 0) - { - oldctx = MemoryContextSwitchTo(ctx->context); - data->api = pglogical_init_api(PGLogicalProtoJson); - opt->output_type = OUTPUT_PLUGIN_TEXTUAL_OUTPUT; - MemoryContextSwitchTo(oldctx); - } - else if ((data->client_protocol_format != NULL - && strcmp(data->client_protocol_format, "native") == 0) - || data->client_protocol_format == NULL) - { - oldctx = MemoryContextSwitchTo(ctx->context); - data->api = pglogical_init_api(PGLogicalProtoNative); - opt->output_type = OUTPUT_PLUGIN_BINARY_OUTPUT; - - if (data->client_no_txinfo) - { - elog(WARNING, "no_txinfo option ignored for protocols other than json"); - data->client_no_txinfo = false; - } - MemoryContextSwitchTo(oldctx); - } - else - { - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("client requested protocol %s but only \"json\" or \"native\" are supported", - data->client_protocol_format))); - } - - /* check for encoding match if specific encoding demanded by client */ - if (data->client_expected_encoding != NULL - && strlen(data->client_expected_encoding) != 0) - { - int wanted_encoding = pg_char_to_encoding(data->client_expected_encoding); - - if (wanted_encoding == -1) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("unrecognised encoding name %s passed to expected_encoding", - data->client_expected_encoding))); - - if (opt->output_type == OUTPUT_PLUGIN_TEXTUAL_OUTPUT) - { - /* - * datum encoding must match assigned client_encoding in text - * proto, since everything is subject to client_encoding - * conversion. - */ - if (wanted_encoding != pg_get_client_encoding()) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("expected_encoding must be unset or match client_encoding in text protocols"))); - } - else - { - /* - * currently in the binary protocol we can only emit encoded - * datums in the server encoding. There's no support for encoding - * conversion. - */ - if (wanted_encoding != GetDatabaseEncoding()) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("encoding conversion for binary datum not supported yet"), - errdetail("expected_encoding %s must be unset or match server_encoding %s", - data->client_expected_encoding, GetDatabaseEncodingName()))); - } - - data->field_datum_encoding = wanted_encoding; - } - - /* - * It's obviously not possible to send binary representation of data - * unless we use the binary output. - */ - if (opt->output_type == OUTPUT_PLUGIN_BINARY_OUTPUT && - data->client_want_internal_basetypes) - { - data->allow_internal_basetypes = - check_binary_compatibility(data); - } - - if (opt->output_type == OUTPUT_PLUGIN_BINARY_OUTPUT && - data->client_want_binary_basetypes && - data->client_binary_basetypes_major_version == PG_VERSION_NUM / 100) - { - data->allow_binary_basetypes = true; - } - - /* - * 9.4 lacks origins info so don't forward it. - * - * There's currently no knob for clients to use to suppress - * this info and it's sent if it's supported and available. - */ - if (PG_VERSION_NUM/100 == 904) - data->forward_changeset_origins = false; - else - data->forward_changeset_origins = true; - - if (started_tx) - CommitTransactionCommand(); - - relmetacache_init(ctx->context); - } - - /* So we can identify the process type in Valgrind logs */ - VALGRIND_PRINTF("PGLOGICAL: pglogical worker output_plugin\n"); - /* For incremental leak checking */ - VALGRIND_DISABLE_ERROR_REPORTING; - VALGRIND_DO_LEAK_CHECK; - VALGRIND_ENABLE_ERROR_REPORTING; + PGLogicalOutputData *data = palloc0(sizeof(PGLogicalOutputData)); + + /* Short lived memory context for individual messages */ + data->context = AllocSetContextCreate(ctx->context, + "pglogical output msg context", + ALLOCSET_DEFAULT_SIZES); + data->allow_internal_basetypes = false; + data->allow_binary_basetypes = false; + + + ctx->output_plugin_private = data; + + /* + * This is replication start and not slot initialization. + * + * Parse and validate options passed by the client. + */ + if (!is_init) + { + int params_format; + bool started_tx = false; + PGLogicalLocalNode *node; + MemoryContext oldctx; + + /* + * There's a potential corruption bug in PostgreSQL 10.1, 9.6.6, 9.5.10 + * and 9.4.15 that can cause reorder buffers to accumulate duplicated + * transactions. See + * https://www.postgresql.org/message-id/CAMsr+YHdX=XECbZshDZ2CZNWGTyw-taYBnzqVfx4JzM4ExP5xg@mail.gmail.com + * + * We can defend against this by doing our own cleanup of any serialized + * txns in the reorder buffer on startup. + */ + pglReorderBufferCleanSerializedTXNs(NameStr(MyReplicationSlot->data.name)); + + if (!IsTransactionState()) + { + StartTransactionCommand(); + started_tx = true; + } + node = get_local_node(false, false); + data->local_node_id = node->node->id; + + /* + * Ideally we'd send the startup message immediately. That way + * it'd arrive before any error we emit if we see incompatible + * options sent by the client here. That way the client could + * possibly adjust its options and reconnect. It'd also make + * sure the client gets the startup message in a timely way if + * the server is idle, since otherwise it could be a while + * before the next callback. + * + * The decoding plugin API doesn't let us write to the stream + * from here, though, so we have to delay the startup message + * until the first change processed on the stream, in a begin + * callback. + * + * If we ERROR there, the startup message is buffered but not + * sent since the callback didn't finish. So we'd have to send + * the startup message, finish the callback and check in the + * next callback if we need to ERROR. + * + * That's a bit much hoop jumping, so for now ERRORs are + * immediate. A way to emit a message from the startup callback + * is really needed to change that. + */ + startup_message_sent = false; + + /* Now parse the rest of the params and ERROR if we see any we don't recognise */ + oldctx = MemoryContextSwitchTo(ctx->context); + params_format = process_parameters(ctx->output_plugin_options, data); + MemoryContextSwitchTo(oldctx); + + if (params_format != 1) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("client sent startup parameters in format %d but we only support format 1", + params_format))); + + if (data->client_min_proto_version > PGLOGICAL_PROTO_VERSION_NUM) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("client sent min_proto_version=%d but we only support protocol %d or lower", + data->client_min_proto_version, PGLOGICAL_PROTO_VERSION_NUM))); + + if (data->client_max_proto_version < PGLOGICAL_PROTO_MIN_VERSION_NUM) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("client sent max_proto_version=%d but we only support protocol %d or higher", + data->client_max_proto_version, PGLOGICAL_PROTO_MIN_VERSION_NUM))); + + /* + * Set correct protocol format. + * + * This is the output plugin protocol format, this is different + * from the individual fields binary vs textual format. + */ + if (data->client_protocol_format != NULL + && strcmp(data->client_protocol_format, "json") == 0) + { + oldctx = MemoryContextSwitchTo(ctx->context); + data->api = pglogical_init_api(PGLogicalProtoJson); + opt->output_type = OUTPUT_PLUGIN_TEXTUAL_OUTPUT; + MemoryContextSwitchTo(oldctx); + } + else if ((data->client_protocol_format != NULL + && strcmp(data->client_protocol_format, "native") == 0) + || data->client_protocol_format == NULL) + { + oldctx = MemoryContextSwitchTo(ctx->context); + data->api = pglogical_init_api(PGLogicalProtoNative); + opt->output_type = OUTPUT_PLUGIN_BINARY_OUTPUT; + + if (data->client_no_txinfo) + { + elog(WARNING, "no_txinfo option ignored for protocols other than json"); + data->client_no_txinfo = false; + } + MemoryContextSwitchTo(oldctx); + } + else + { + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("client requested protocol %s but only \"json\" or \"native\" are supported", + data->client_protocol_format))); + } + + /* check for encoding match if specific encoding demanded by client */ + if (data->client_expected_encoding != NULL + && strlen(data->client_expected_encoding) != 0) + { + int wanted_encoding = pg_char_to_encoding(data->client_expected_encoding); + + if (wanted_encoding == -1) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("unrecognised encoding name %s passed to expected_encoding", + data->client_expected_encoding))); + + if (opt->output_type == OUTPUT_PLUGIN_TEXTUAL_OUTPUT) + { + /* + * datum encoding must match assigned client_encoding in text + * proto, since everything is subject to client_encoding + * conversion. + */ + if (wanted_encoding != pg_get_client_encoding()) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("expected_encoding must be unset or match client_encoding in text protocols"))); + } + else + { + /* + * currently in the binary protocol we can only emit encoded + * datums in the server encoding. There's no support for encoding + * conversion. + */ + if (wanted_encoding != GetDatabaseEncoding()) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("encoding conversion for binary datum not supported yet"), + errdetail("expected_encoding %s must be unset or match server_encoding %s", + data->client_expected_encoding, GetDatabaseEncodingName()))); + } + + data->field_datum_encoding = wanted_encoding; + } + + /* + * It's obviously not possible to send binary representation of data + * unless we use the binary output. + */ + if (opt->output_type == OUTPUT_PLUGIN_BINARY_OUTPUT && + data->client_want_internal_basetypes) + { + data->allow_internal_basetypes = + check_binary_compatibility(data); + } + + if (opt->output_type == OUTPUT_PLUGIN_BINARY_OUTPUT && + data->client_want_binary_basetypes && + data->client_binary_basetypes_major_version == PG_VERSION_NUM / 100) + { + data->allow_binary_basetypes = true; + } + + /* + * 9.4 lacks origins info so don't forward it. + * + * There's currently no knob for clients to use to suppress + * this info and it's sent if it's supported and available. + */ + if (PG_VERSION_NUM/100 == 904) + data->forward_changeset_origins = false; + else + data->forward_changeset_origins = true; + + if (started_tx) + CommitTransactionCommand(); + + relmetacache_init(ctx->context); + } + + /* So we can identify the process type in Valgrind logs */ + VALGRIND_PRINTF("PGLOGICAL: pglogical worker output_plugin\n"); + /* For incremental leak checking */ + VALGRIND_DISABLE_ERROR_REPORTING; + VALGRIND_DO_LEAK_CHECK; + VALGRIND_ENABLE_ERROR_REPORTING; } /* @@ -386,27 +386,27 @@ pg_decode_startup(LogicalDecodingContext * ctx, OutputPluginOptions *opt, static void pg_decode_begin_txn(LogicalDecodingContext *ctx, ReorderBufferTXN *txn) { - PGLogicalOutputData* data = (PGLogicalOutputData*)ctx->output_plugin_private; - bool send_replication_origin = data->forward_changeset_origins; - MemoryContext old_ctx; + PGLogicalOutputData* data = (PGLogicalOutputData*)ctx->output_plugin_private; + bool send_replication_origin = data->forward_changeset_origins; + MemoryContext old_ctx; - old_ctx = MemoryContextSwitchTo(data->context); + old_ctx = MemoryContextSwitchTo(data->context); - VALGRIND_DO_ADDED_LEAK_CHECK; + VALGRIND_DO_ADDED_LEAK_CHECK; - if (!startup_message_sent) - send_startup_message(ctx, data, false /* can't be last message */); + if (!startup_message_sent) + send_startup_message(ctx, data, false /* can't be last message */); #ifdef HAVE_REPLICATION_ORIGINS - /* If the record didn't originate locally, send origin info */ + /* If the record didn't originate locally, send origin info */ send_replication_origin &= txn->origin_id != InvalidRepOriginId; #endif - OutputPluginPrepareWrite(ctx, !send_replication_origin); - data->api->write_begin(ctx->out, data, txn); + OutputPluginPrepareWrite(ctx, !send_replication_origin); + data->api->write_begin(ctx->out, data, txn); #ifdef HAVE_REPLICATION_ORIGINS - if (send_replication_origin) + if (send_replication_origin) { char *origin; @@ -429,10 +429,10 @@ pg_decode_begin_txn(LogicalDecodingContext *ctx, ReorderBufferTXN *txn) } #endif - OutputPluginWrite(ctx, true); + OutputPluginWrite(ctx, true); - Assert(CurrentMemoryContext == data->context); - MemoryContextSwitchTo(old_ctx); + Assert(CurrentMemoryContext == data->context); + MemoryContextSwitchTo(old_ctx); } /* @@ -440,295 +440,295 @@ pg_decode_begin_txn(LogicalDecodingContext *ctx, ReorderBufferTXN *txn) */ static void pg_decode_commit_txn(LogicalDecodingContext *ctx, ReorderBufferTXN *txn, - XLogRecPtr commit_lsn) + XLogRecPtr commit_lsn) { - PGLogicalOutputData* data = (PGLogicalOutputData*)ctx->output_plugin_private; - MemoryContext old_ctx; + PGLogicalOutputData* data = (PGLogicalOutputData*)ctx->output_plugin_private; + MemoryContext old_ctx; - old_ctx = MemoryContextSwitchTo(data->context); + old_ctx = MemoryContextSwitchTo(data->context); - OutputPluginPrepareWrite(ctx, true); - data->api->write_commit(ctx->out, data, txn, commit_lsn); - OutputPluginWrite(ctx, true); + OutputPluginPrepareWrite(ctx, true); + data->api->write_commit(ctx->out, data, txn, commit_lsn); + OutputPluginWrite(ctx, true); - /* - * Now is a good time to get rid of invalidated relation - * metadata entries since nothing will be referencing them - * at the moment. - */ - relmetacache_prune(); + /* + * Now is a good time to get rid of invalidated relation + * metadata entries since nothing will be referencing them + * at the moment. + */ + relmetacache_prune(); - Assert(CurrentMemoryContext == data->context); - MemoryContextSwitchTo(old_ctx); - MemoryContextReset(data->context); + Assert(CurrentMemoryContext == data->context); + MemoryContextSwitchTo(old_ctx); + MemoryContextReset(data->context); - VALGRIND_DO_ADDED_LEAK_CHECK; + VALGRIND_DO_ADDED_LEAK_CHECK; } static bool pglogical_change_filter(PGLogicalOutputData *data, Relation relation, - ReorderBufferChange *change, Bitmapset **att_list) + ReorderBufferChange *change, Bitmapset **att_list) { - PGLogicalTableRepInfo *tblinfo; - ListCell *lc; - - if (data->replicate_only_table) - { - /* - * Special case - we are catching up just one table. - * TODO: performance - */ - return strcmp(RelationGetRelationName(relation), - data->replicate_only_table->relname) == 0 && - RelationGetNamespace(relation) == - get_namespace_oid(data->replicate_only_table->schemaname, true); - } - else if (RelationGetRelid(relation) == get_queue_table_oid()) - { - /* Special case - queue table */ - if (change->action == REORDER_BUFFER_CHANGE_INSERT) - { - HeapTuple tup = &change->data.tp.newtuple->tuple; - QueuedMessage *q; - ListCell *qlc; - - LockRelation(relation, AccessShareLock); - q = queued_message_from_tuple(tup); - UnlockRelation(relation, AccessShareLock); - - /* - * No replication set means global message, those are always - * replicated. - */ - if (q->replication_sets == NULL) - return true; - - foreach (qlc, q->replication_sets) - { - char *queue_set = (char *) lfirst(qlc); - ListCell *plc; - - foreach (plc, data->replication_sets) - { - PGLogicalRepSet *rs = lfirst(plc); - - /* TODO: this is somewhat ugly. */ - if (strcmp(queue_set, rs->name) == 0 && - (q->message_type != QUEUE_COMMAND_TYPE_TRUNCATE || - rs->replicate_truncate)) - return true; - } - } - } - - return false; - } - else if (RelationGetRelid(relation) == get_replication_set_rel_oid()) - { - /* - * Special case - replication set table. - * - * We can use this to update our cached replication set info, without - * having to deal with cache invalidation callbacks. - */ - HeapTuple tup; - PGLogicalRepSet *replicated_set; - ListCell *plc; - - if (change->action == REORDER_BUFFER_CHANGE_UPDATE) - tup = &change->data.tp.newtuple->tuple; - else if (change->action == REORDER_BUFFER_CHANGE_DELETE) - tup = &change->data.tp.oldtuple->tuple; - else - return false; - - replicated_set = replication_set_from_tuple(tup); - foreach (plc, data->replication_sets) - { - PGLogicalRepSet *rs = lfirst(plc); - - /* Check if the changed repset is used by us. */ - if (rs->id == replicated_set->id) - { - /* - * In case this was delete, somebody deleted one of our - * rep sets, bail here and let reconnect logic handle any - * potential issues. - */ - if (change->action == REORDER_BUFFER_CHANGE_DELETE) - elog(ERROR, "replication set \"%s\" used by this connection was deleted, existing", - rs->name); - - /* This was update of our repset, update the cache. */ - rs->replicate_insert = replicated_set->replicate_insert; - rs->replicate_update = replicated_set->replicate_update; - rs->replicate_delete = replicated_set->replicate_delete; - rs->replicate_truncate = replicated_set->replicate_truncate; - - return false; - } - } - - return false; - } - - /* Normal case - use replication set membership. */ - tblinfo = get_table_replication_info(data->local_node_id, relation, - data->replication_sets); - - /* First try filter out by change type. */ - switch (change->action) - { - case REORDER_BUFFER_CHANGE_INSERT: - if (!tblinfo->replicate_insert) - return false; - break; - case REORDER_BUFFER_CHANGE_UPDATE: - if (!tblinfo->replicate_update) - return false; - break; - case REORDER_BUFFER_CHANGE_DELETE: - if (!tblinfo->replicate_delete) - return false; - break; - default: - elog(ERROR, "Unhandled reorder buffer change type %d", - change->action); - return false; /* shut compiler up */ - } - - /* - * Proccess row filters. - * XXX: we could probably cache some of the executor stuff. - */ - if (list_length(tblinfo->row_filter) > 0) - { - EState *estate; - ExprContext *econtext; - TupleDesc tupdesc = RelationGetDescr(relation); - HeapTuple oldtup = change->data.tp.oldtuple ? - &change->data.tp.oldtuple->tuple : NULL; - HeapTuple newtup = change->data.tp.newtuple ? - &change->data.tp.newtuple->tuple : NULL; - - /* Skip empty changes. */ - if (!newtup && !oldtup) - { - elog(DEBUG1, "pglogical output got empty change"); - return false; - } - - estate = create_estate_for_relation(relation, false); - econtext = prepare_per_tuple_econtext(estate, tupdesc); - - ExecStoreHeapTuple(newtup ? newtup : oldtup, econtext->ecxt_scantuple, false); - - /* Next try the row_filters if there are any. */ - foreach (lc, tblinfo->row_filter) - { - Node *row_filter = (Node *) lfirst(lc); - ExprState *exprstate = pglogical_prepare_row_filter(row_filter); - Datum res; - bool isnull; - - res = ExecEvalExpr(exprstate, econtext, &isnull, NULL); - - /* NULL is same as false for our use. */ - if (isnull) - return false; - - if (!DatumGetBool(res)) - return false; - } - - ExecDropSingleTupleTableSlot(econtext->ecxt_scantuple); - FreeExecutorState(estate); - } - - /* Make sure caller is aware of any attribute filter. */ - *att_list = tblinfo->att_list; - - return true; + PGLogicalTableRepInfo *tblinfo; + ListCell *lc; + + if (data->replicate_only_table) + { + /* + * Special case - we are catching up just one table. + * TODO: performance + */ + return strcmp(RelationGetRelationName(relation), + data->replicate_only_table->relname) == 0 && + RelationGetNamespace(relation) == + get_namespace_oid(data->replicate_only_table->schemaname, true); + } + else if (RelationGetRelid(relation) == get_queue_table_oid()) + { + /* Special case - queue table */ + if (change->action == REORDER_BUFFER_CHANGE_INSERT) + { + HeapTuple tup = &change->data.tp.newtuple->tuple; + QueuedMessage *q; + ListCell *qlc; + + LockRelation(relation, AccessShareLock); + q = queued_message_from_tuple(tup); + UnlockRelation(relation, AccessShareLock); + + /* + * No replication set means global message, those are always + * replicated. + */ + if (q->replication_sets == NULL) + return true; + + foreach (qlc, q->replication_sets) + { + char *queue_set = (char *) lfirst(qlc); + ListCell *plc; + + foreach (plc, data->replication_sets) + { + PGLogicalRepSet *rs = lfirst(plc); + + /* TODO: this is somewhat ugly. */ + if (strcmp(queue_set, rs->name) == 0 && + (q->message_type != QUEUE_COMMAND_TYPE_TRUNCATE || + rs->replicate_truncate)) + return true; + } + } + } + + return false; + } + else if (RelationGetRelid(relation) == get_replication_set_rel_oid()) + { + /* + * Special case - replication set table. + * + * We can use this to update our cached replication set info, without + * having to deal with cache invalidation callbacks. + */ + HeapTuple tup; + PGLogicalRepSet *replicated_set; + ListCell *plc; + + if (change->action == REORDER_BUFFER_CHANGE_UPDATE) + tup = &change->data.tp.newtuple->tuple; + else if (change->action == REORDER_BUFFER_CHANGE_DELETE) + tup = &change->data.tp.oldtuple->tuple; + else + return false; + + replicated_set = replication_set_from_tuple(tup); + foreach (plc, data->replication_sets) + { + PGLogicalRepSet *rs = lfirst(plc); + + /* Check if the changed repset is used by us. */ + if (rs->id == replicated_set->id) + { + /* + * In case this was delete, somebody deleted one of our + * rep sets, bail here and let reconnect logic handle any + * potential issues. + */ + if (change->action == REORDER_BUFFER_CHANGE_DELETE) + elog(ERROR, "replication set \"%s\" used by this connection was deleted, existing", + rs->name); + + /* This was update of our repset, update the cache. */ + rs->replicate_insert = replicated_set->replicate_insert; + rs->replicate_update = replicated_set->replicate_update; + rs->replicate_delete = replicated_set->replicate_delete; + rs->replicate_truncate = replicated_set->replicate_truncate; + + return false; + } + } + + return false; + } + + /* Normal case - use replication set membership. */ + tblinfo = get_table_replication_info(data->local_node_id, relation, + data->replication_sets); + + /* First try filter out by change type. */ + switch (change->action) + { + case REORDER_BUFFER_CHANGE_INSERT: + if (!tblinfo->replicate_insert) + return false; + break; + case REORDER_BUFFER_CHANGE_UPDATE: + if (!tblinfo->replicate_update) + return false; + break; + case REORDER_BUFFER_CHANGE_DELETE: + if (!tblinfo->replicate_delete) + return false; + break; + default: + elog(ERROR, "Unhandled reorder buffer change type %d", + change->action); + return false; /* shut compiler up */ + } + + /* + * Proccess row filters. + * XXX: we could probably cache some of the executor stuff. + */ + if (list_length(tblinfo->row_filter) > 0) + { + EState *estate; + ExprContext *econtext; + TupleDesc tupdesc = RelationGetDescr(relation); + HeapTuple oldtup = change->data.tp.oldtuple ? + &change->data.tp.oldtuple->tuple : NULL; + HeapTuple newtup = change->data.tp.newtuple ? + &change->data.tp.newtuple->tuple : NULL; + + /* Skip empty changes. */ + if (!newtup && !oldtup) + { + elog(DEBUG1, "pglogical output got empty change"); + return false; + } + + estate = create_estate_for_relation(relation, false); + econtext = prepare_per_tuple_econtext(estate, tupdesc); + + ExecStoreHeapTuple(newtup ? newtup : oldtup, econtext->ecxt_scantuple, false); + + /* Next try the row_filters if there are any. */ + foreach (lc, tblinfo->row_filter) + { + Node *row_filter = (Node *) lfirst(lc); + ExprState *exprstate = pglogical_prepare_row_filter(row_filter); + Datum res; + bool isnull; + + res = ExecEvalExpr(exprstate, econtext, &isnull, NULL); + + /* NULL is same as false for our use. */ + if (isnull) + return false; + + if (!DatumGetBool(res)) + return false; + } + + ExecDropSingleTupleTableSlot(econtext->ecxt_scantuple); + FreeExecutorState(estate); + } + + /* Make sure caller is aware of any attribute filter. */ + *att_list = tblinfo->att_list; + + return true; } static void pg_decode_change(LogicalDecodingContext *ctx, ReorderBufferTXN *txn, - Relation relation, ReorderBufferChange *change) + Relation relation, ReorderBufferChange *change) { - PGLogicalOutputData *data = ctx->output_plugin_private; - MemoryContext old; - Bitmapset *att_list = NULL; - - /* Avoid leaking memory by using and resetting our own context */ - old = MemoryContextSwitchTo(data->context); - - /* First check the table filter */ - if (!pglogical_change_filter(data, relation, change, &att_list)) - return; - - /* - * If the protocol wants to write relation information and the client - * isn't known to have metadata cached for this relation already, - * send relation metadata. - * - * TODO: track hit/miss stats - */ - if (data->api->write_rel != NULL) - { - PGLRelMetaCacheEntry *cached_relmeta; - cached_relmeta = relmetacache_get_relation(data, relation); - - if (!cached_relmeta->is_cached) - { - OutputPluginPrepareWrite(ctx, false); - data->api->write_rel(ctx->out, data, relation, att_list); - OutputPluginWrite(ctx, false); - cached_relmeta->is_cached = true; - } - } - - /* Send the data */ - switch (change->action) - { - case REORDER_BUFFER_CHANGE_INSERT: - OutputPluginPrepareWrite(ctx, true); - data->api->write_insert(ctx->out, data, relation, - &change->data.tp.newtuple->tuple, - att_list); - OutputPluginWrite(ctx, true); - break; - case REORDER_BUFFER_CHANGE_UPDATE: - { - HeapTuple oldtuple = change->data.tp.oldtuple ? - &change->data.tp.oldtuple->tuple : NULL; - - OutputPluginPrepareWrite(ctx, true); - data->api->write_update(ctx->out, data, relation, oldtuple, - &change->data.tp.newtuple->tuple, - att_list); - OutputPluginWrite(ctx, true); - break; - } - case REORDER_BUFFER_CHANGE_DELETE: - if (change->data.tp.oldtuple) - { - OutputPluginPrepareWrite(ctx, true); - data->api->write_delete(ctx->out, data, relation, - &change->data.tp.oldtuple->tuple, - att_list); - OutputPluginWrite(ctx, true); - } - else - elog(DEBUG1, "didn't send DELETE change because of missing oldtuple"); - break; - default: - Assert(false); - } - - /* Cleanup */ - Assert(CurrentMemoryContext == data->context); - MemoryContextSwitchTo(old); - MemoryContextReset(data->context); + PGLogicalOutputData *data = ctx->output_plugin_private; + MemoryContext old; + Bitmapset *att_list = NULL; + + /* Avoid leaking memory by using and resetting our own context */ + old = MemoryContextSwitchTo(data->context); + + /* First check the table filter */ + if (!pglogical_change_filter(data, relation, change, &att_list)) + return; + + /* + * If the protocol wants to write relation information and the client + * isn't known to have metadata cached for this relation already, + * send relation metadata. + * + * TODO: track hit/miss stats + */ + if (data->api->write_rel != NULL) + { + PGLRelMetaCacheEntry *cached_relmeta; + cached_relmeta = relmetacache_get_relation(data, relation); + + if (!cached_relmeta->is_cached) + { + OutputPluginPrepareWrite(ctx, false); + data->api->write_rel(ctx->out, data, relation, att_list); + OutputPluginWrite(ctx, false); + cached_relmeta->is_cached = true; + } + } + + /* Send the data */ + switch (change->action) + { + case REORDER_BUFFER_CHANGE_INSERT: + OutputPluginPrepareWrite(ctx, true); + data->api->write_insert(ctx->out, data, relation, + &change->data.tp.newtuple->tuple, + att_list); + OutputPluginWrite(ctx, true); + break; + case REORDER_BUFFER_CHANGE_UPDATE: + { + HeapTuple oldtuple = change->data.tp.oldtuple ? + &change->data.tp.oldtuple->tuple : NULL; + + OutputPluginPrepareWrite(ctx, true); + data->api->write_update(ctx->out, data, relation, oldtuple, + &change->data.tp.newtuple->tuple, + att_list); + OutputPluginWrite(ctx, true); + break; + } + case REORDER_BUFFER_CHANGE_DELETE: + if (change->data.tp.oldtuple) + { + OutputPluginPrepareWrite(ctx, true); + data->api->write_delete(ctx->out, data, relation, + &change->data.tp.oldtuple->tuple, + att_list); + OutputPluginWrite(ctx, true); + } + else + elog(DEBUG1, "didn't send DELETE change because of missing oldtuple"); + break; + default: + Assert(false); + } + + /* Cleanup */ + Assert(CurrentMemoryContext == data->context); + MemoryContextSwitchTo(old); + MemoryContextReset(data->context); } #ifdef HAVE_REPLICATION_ORIGINS @@ -760,28 +760,28 @@ pg_decode_origin_filter(LogicalDecodingContext *ctx, static void send_startup_message(LogicalDecodingContext *ctx, - PGLogicalOutputData *data, bool last_message) + PGLogicalOutputData *data, bool last_message) { - List *msg; + List *msg; - Assert(!startup_message_sent); + Assert(!startup_message_sent); - msg = prepare_startup_message(data); + msg = prepare_startup_message(data); - /* - * We could free the extra_startup_params DefElem list here, but it's - * pretty harmless to just ignore it, since it's in the decoding memory - * context anyway, and we don't know if it's safe to free the defnames or - * not. - */ + /* + * We could free the extra_startup_params DefElem list here, but it's + * pretty harmless to just ignore it, since it's in the decoding memory + * context anyway, and we don't know if it's safe to free the defnames or + * not. + */ - OutputPluginPrepareWrite(ctx, last_message); - data->api->write_startup_message(ctx->out, msg); - OutputPluginWrite(ctx, last_message); + OutputPluginPrepareWrite(ctx, last_message); + data->api->write_startup_message(ctx->out, msg); + OutputPluginWrite(ctx, last_message); - list_free_deep(msg); + list_free_deep(msg); - startup_message_sent = true; + startup_message_sent = true; } @@ -791,14 +791,14 @@ send_startup_message(LogicalDecodingContext *ctx, static void pg_decode_shutdown(LogicalDecodingContext * ctx) { - relmetacache_flush(); + relmetacache_flush(); - VALGRIND_PRINTF("PGLOGICAL: output plugin shutdown\n"); + VALGRIND_PRINTF("PGLOGICAL: output plugin shutdown\n"); - /* - * no need to delete data->context as it's child of ctx->context which - * will expire on return. - */ + /* + * no need to delete data->context as it's child of ctx->context which + * will expire on return. + */ } @@ -808,30 +808,30 @@ pg_decode_shutdown(LogicalDecodingContext * ctx) */ static void relmetacache_invalidation_cb(Datum arg, Oid relid) -{ - struct PGLRelMetaCacheEntry *hentry; - Assert (RelMetaCache != NULL); - - /* - * Nobody keeps pointers to entries in this hash table around outside - * logical decoding callback calls - but invalidation events can come in - * *during* a callback if we access the relcache in the callback. Because - * of that we must mark the cache entry as invalid but not remove it from - * the hash while it could still be referenced, then prune it at a later - * safe point. - * - * Getting invalidations for relations that aren't in the table is - * entirely normal, since there's no way to unregister for an - * invalidation event. So we don't care if it's found or not. - */ - hentry = (struct PGLRelMetaCacheEntry *) - hash_search(RelMetaCache, &relid, HASH_FIND, NULL); - - if (hentry != NULL) - { - hentry->is_valid = false; - InvalidRelMetaCacheCnt++; - } + { + struct PGLRelMetaCacheEntry *hentry; + Assert (RelMetaCache != NULL); + + /* + * Nobody keeps pointers to entries in this hash table around outside + * logical decoding callback calls - but invalidation events can come in + * *during* a callback if we access the relcache in the callback. Because + * of that we must mark the cache entry as invalid but not remove it from + * the hash while it could still be referenced, then prune it at a later + * safe point. + * + * Getting invalidations for relations that aren't in the table is + * entirely normal, since there's no way to unregister for an + * invalidation event. So we don't care if it's found or not. + */ + hentry = (struct PGLRelMetaCacheEntry *) + hash_search(RelMetaCache, &relid, HASH_FIND, NULL); + + if (hentry != NULL) + { + hentry->is_valid = false; + InvalidRelMetaCacheCnt++; + } } /* @@ -844,44 +844,44 @@ relmetacache_invalidation_cb(Datum arg, Oid relid) static void relmetacache_init(MemoryContext decoding_context) { - HASHCTL ctl; - int hash_flags; + HASHCTL ctl; + int hash_flags; - InvalidRelMetaCacheCnt = 0; + InvalidRelMetaCacheCnt = 0; - if (RelMetaCache == NULL) - { - MemoryContext old_ctxt; + if (RelMetaCache == NULL) + { + MemoryContext old_ctxt; - RelMetaCacheContext = AllocSetContextCreate(TopMemoryContext, - "pglogical output relmetacache", - ALLOCSET_DEFAULT_SIZES); + RelMetaCacheContext = AllocSetContextCreate(TopMemoryContext, + "pglogical output relmetacache", + ALLOCSET_DEFAULT_SIZES); - /* Make a new hash table for the cache */ - hash_flags = HASH_ELEM | HASH_CONTEXT; + /* Make a new hash table for the cache */ + hash_flags = HASH_ELEM | HASH_CONTEXT; - MemSet(&ctl, 0, sizeof(ctl)); - ctl.keysize = sizeof(Oid); - ctl.entrysize = sizeof(struct PGLRelMetaCacheEntry); - ctl.hcxt = RelMetaCacheContext; + MemSet(&ctl, 0, sizeof(ctl)); + ctl.keysize = sizeof(Oid); + ctl.entrysize = sizeof(struct PGLRelMetaCacheEntry); + ctl.hcxt = RelMetaCacheContext; #if PG_VERSION_NUM >= 90500 - hash_flags |= HASH_BLOBS; + hash_flags |= HASH_BLOBS; #else - ctl.hash = tag_hash; - hash_flags |= HASH_FUNCTION; + ctl.hash = tag_hash; + hash_flags |= HASH_FUNCTION; #endif - old_ctxt = MemoryContextSwitchTo(RelMetaCacheContext); - RelMetaCache = hash_create("pglogical relation metadata cache", - RELMETACACHE_INITIAL_SIZE, - &ctl, hash_flags); - (void) MemoryContextSwitchTo(old_ctxt); + old_ctxt = MemoryContextSwitchTo(RelMetaCacheContext); + RelMetaCache = hash_create("pglogical relation metadata cache", + RELMETACACHE_INITIAL_SIZE, + &ctl, hash_flags); + (void) MemoryContextSwitchTo(old_ctxt); - Assert(RelMetaCache != NULL); + Assert(RelMetaCache != NULL); - CacheRegisterRelcacheCallback(relmetacache_invalidation_cb, (Datum)0); - } + CacheRegisterRelcacheCallback(relmetacache_invalidation_cb, (Datum)0); + } } @@ -896,31 +896,31 @@ relmetacache_init(MemoryContext decoding_context) */ static PGLRelMetaCacheEntry * relmetacache_get_relation(struct PGLogicalOutputData *data, - Relation rel) + Relation rel) { - struct PGLRelMetaCacheEntry *hentry; - bool found; - MemoryContext old_mctx; - - /* Find cached function info, creating if not found */ - old_mctx = MemoryContextSwitchTo(RelMetaCacheContext); - hentry = (struct PGLRelMetaCacheEntry*) hash_search(RelMetaCache, - (void *)(&RelationGetRelid(rel)), - HASH_ENTER, &found); - (void) MemoryContextSwitchTo(old_mctx); - - /* If not found or not valid, it can't be cached. */ - if (!found || !hentry->is_valid) - { - Assert(hentry->relid = RelationGetRelid(rel)); - hentry->is_cached = false; - /* Only used for lazy purging of invalidations */ - hentry->is_valid = true; - } - - Assert(hentry != NULL); - - return hentry; + struct PGLRelMetaCacheEntry *hentry; + bool found; + MemoryContext old_mctx; + + /* Find cached function info, creating if not found */ + old_mctx = MemoryContextSwitchTo(RelMetaCacheContext); + hentry = (struct PGLRelMetaCacheEntry*) hash_search(RelMetaCache, + (void *)(&RelationGetRelid(rel)), + HASH_ENTER, &found); + (void) MemoryContextSwitchTo(old_mctx); + + /* If not found or not valid, it can't be cached. */ + if (!found || !hentry->is_valid) + { + Assert(hentry->relid = RelationGetRelid(rel)); + hentry->is_cached = false; + /* Only used for lazy purging of invalidations */ + hentry->is_valid = true; + } + + Assert(hentry != NULL); + + return hentry; } @@ -934,21 +934,21 @@ relmetacache_get_relation(struct PGLogicalOutputData *data, static void relmetacache_flush(void) { - HASH_SEQ_STATUS status; - struct PGLRelMetaCacheEntry *hentry; - - if (RelMetaCache != NULL) - { - hash_seq_init(&status, RelMetaCache); - - while ((hentry = (struct PGLRelMetaCacheEntry*) hash_seq_search(&status)) != NULL) - { - if (hash_search(RelMetaCache, - (void *) &hentry->relid, - HASH_REMOVE, NULL) == NULL) - elog(ERROR, "hash table corrupted"); - } - } + HASH_SEQ_STATUS status; + struct PGLRelMetaCacheEntry *hentry; + + if (RelMetaCache != NULL) + { + hash_seq_init(&status, RelMetaCache); + + while ((hentry = (struct PGLRelMetaCacheEntry*) hash_seq_search(&status)) != NULL) + { + if (hash_search(RelMetaCache, + (void *) &hentry->relid, + HASH_REMOVE, NULL) == NULL) + elog(ERROR, "hash table corrupted"); + } + } } /* @@ -960,30 +960,30 @@ relmetacache_flush(void) static void relmetacache_prune(void) { - HASH_SEQ_STATUS status; - struct PGLRelMetaCacheEntry *hentry; - - /* - * Since the pruning can be expensive, do it only if ig we invalidated - * at least half of initial cache size. - */ - if (InvalidRelMetaCacheCnt < RELMETACACHE_INITIAL_SIZE/2) - return; - - hash_seq_init(&status, RelMetaCache); - - while ((hentry = (struct PGLRelMetaCacheEntry*) hash_seq_search(&status)) != NULL) - { - if (!hentry->is_valid) - { - if (hash_search(RelMetaCache, - (void *) &hentry->relid, - HASH_REMOVE, NULL) == NULL) - elog(ERROR, "hash table corrupted"); - } - } - - InvalidRelMetaCacheCnt = 0; + HASH_SEQ_STATUS status; + struct PGLRelMetaCacheEntry *hentry; + + /* + * Since the pruning can be expensive, do it only if ig we invalidated + * at least half of initial cache size. + */ + if (InvalidRelMetaCacheCnt < RELMETACACHE_INITIAL_SIZE/2) + return; + + hash_seq_init(&status, RelMetaCache); + + while ((hentry = (struct PGLRelMetaCacheEntry*) hash_seq_search(&status)) != NULL) + { + if (!hentry->is_valid) + { + if (hash_search(RelMetaCache, + (void *) &hentry->relid, + HASH_REMOVE, NULL) == NULL) + elog(ERROR, "hash table corrupted"); + } + } + + InvalidRelMetaCacheCnt = 0; } /* @@ -993,33 +993,34 @@ relmetacache_prune(void) static void pglReorderBufferCleanSerializedTXNs(const char *slotname) { - DIR *spill_dir; - struct dirent *spill_de; - struct stat statbuf; - char path[MAXPGPATH * 2 + 12]; - - sprintf(path, "pg_replslot/%s", slotname); - - /* we're only handling directories here, skip if it's not ours */ - if (lstat(path, &statbuf) == 0 && !S_ISDIR(statbuf.st_mode)) - return; - - spill_dir = AllocateDir(path); - while ((spill_de = ReadDirExtended(spill_dir, path, INFO)) != NULL) - { - /* only look at names that can be ours */ - if (strncmp(spill_de->d_name, "xid", 3) == 0) - { - snprintf(path, sizeof(path), - "pg_replslot/%s/%s", slotname, - spill_de->d_name); - - if (unlink(path) != 0) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not remove file \"%s\" during removal of pg_replslot/%s/*.xid: %m", - path, slotname))); - } - } - FreeDir(spill_dir); + DIR *spill_dir; + struct dirent *spill_de; + struct stat statbuf; + char path[MAXPGPATH * 2 + 12]; + + sprintf(path, "pg_replslot/%s", slotname); + + /* we're only handling directories here, skip if it's not ours */ + if (lstat(path, &statbuf) == 0 && !S_ISDIR(statbuf.st_mode)) + return; + + spill_dir = AllocateDir(path); + while ((spill_de = ReadDirExtended(spill_dir, path, INFO)) != NULL) + { + /* only look at names that can be ours */ + if (strncmp(spill_de->d_name, "xid", 3) == 0) + { + snprintf(path, sizeof(path), + "pg_replslot/%s/%s", slotname, + spill_de->d_name); + + if (unlink(path) != 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not remove file \"%s\" during removal of pg_replslot/%s/*.xid: %m", + path, slotname))); + } + } + FreeDir(spill_dir); } + diff --git a/pglogical_relcache.h b/pglogical_relcache.h index b76cf19..d99951a 100644 --- a/pglogical_relcache.h +++ b/pglogical_relcache.h @@ -25,6 +25,7 @@ typedef struct PGLogicalRemoteRel /* Only returned by info function, not protocol. */ bool hasRowFilter; + char *sync_clear_filter; } PGLogicalRemoteRel; typedef struct PGLogicalRelation diff --git a/pglogical_repset.c b/pglogical_repset.c index b7b68ed..bc036d9 100644 --- a/pglogical_repset.c +++ b/pglogical_repset.c @@ -94,11 +94,12 @@ typedef struct RepSetTableTuple #endif } RepSetTableTuple; -#define Natts_repset_table 4 +#define Natts_repset_table 5 #define Anum_repset_table_setid 1 #define Anum_repset_table_reloid 2 #define Anum_repset_table_att_list 3 #define Anum_repset_table_row_filter 4 +#define Anum_repset_table_sync_clear_filter 5 #define REPSETTABLEHASH_INITIAL_SIZE 128 @@ -412,6 +413,7 @@ get_table_replication_info(Oid nodeid, Relation table, entry->replicate_delete = false; entry->att_list = NULL; entry->row_filter = NIL; + entry->sync_clear_filter = NULL; /* * Check for match between table's replication sets and the subscription @@ -503,6 +505,16 @@ get_table_replication_info(Oid nodeid, Relation table, entry->row_filter = lappend(entry->row_filter, row_filter); MemoryContextSwitchTo(olctx); } + + d = heap_getattr(tuple, Anum_repset_table_sync_clear_filter, + repset_rel_desc, &isnull); + + if (!isnull) + { + MemoryContext olctx = MemoryContextSwitchTo(CacheMemoryContext); + entry->sync_clear_filter = TextDatumGetCString(d); + MemoryContextSwitchTo(olctx); + } } } } @@ -1014,7 +1026,7 @@ drop_node_replication_sets(Oid nodeid) */ void replication_set_add_table(Oid setid, Oid reloid, List *att_list, - Node *row_filter) + Node *row_filter, char *sync_clear_filter) { RangeVar *rv; Relation rel; @@ -1076,6 +1088,12 @@ replication_set_add_table(Oid setid, Oid reloid, List *att_list, else nulls[Anum_repset_table_row_filter - 1] = true; + if (sync_clear_filter) + values[Anum_repset_table_sync_clear_filter - 1] = + CStringGetTextDatum( sync_clear_filter ); + else + nulls[Anum_repset_table_sync_clear_filter - 1] = true; + tup = heap_form_tuple(tupDesc, values, nulls); /* Insert the tuple to the catalog. */ diff --git a/pglogical_repset.h b/pglogical_repset.h index 2c70569..7caf1a2 100644 --- a/pglogical_repset.h +++ b/pglogical_repset.h @@ -46,6 +46,7 @@ typedef struct PGLogicalTableRepInfo otherwise each replicated column is a member */ List *row_filter; /* compiled row_filter nodes */ + char *sync_clear_filter; } PGLogicalTableRepInfo; extern PGLogicalRepSet *get_replication_set(Oid setid); @@ -66,7 +67,8 @@ extern void drop_replication_set(Oid setid); extern void drop_node_replication_sets(Oid nodeid); extern void replication_set_add_table(Oid setid, Oid reloid, - List *att_list, Node *row_filter); + List *att_list, Node *row_filter, char *sync_clear_filter); + extern void replication_set_add_seq(Oid setid, Oid seqoid); extern List *replication_set_get_tables(Oid setid); extern List *replication_set_get_seqs(Oid setid); diff --git a/pglogical_rpc.c b/pglogical_rpc.c index ddb2c12..9411f7c 100644 --- a/pglogical_rpc.c +++ b/pglogical_rpc.c @@ -64,7 +64,7 @@ pg_logical_get_remote_repset_tables(PGconn *conn, List *replication_sets) /* PGLogical 2.0+ */ appendStringInfo(&query, "SELECT i.relid, i.nspname, i.relname, i.att_list," - " i.has_row_filter" + " i.has_row_filter, i.sync_clear_filter" " FROM (SELECT DISTINCT relid FROM pglogical.tables WHERE set_name = ANY(ARRAY[%s])) t," " LATERAL pglogical.show_repset_table_info(t.relid, ARRAY[%s]) i", repsetarr.data, repsetarr.data); @@ -97,6 +97,11 @@ pg_logical_get_remote_repset_tables(PGconn *conn, List *replication_sets) elog(ERROR, "could not parse column list for table"); remoterel->hasRowFilter = (strcmp(PQgetvalue(res, i, 4), "t") == 0); + if( !PQgetisnull(res, i, 5)) + { + remoterel->sync_clear_filter = pstrdup(PQgetvalue(res, i, 5)); + } + tables = lappend(tables, remoterel); } @@ -145,7 +150,7 @@ pg_logical_get_remote_repset_table(PGconn *conn, RangeVar *rv, /* PGLogical 2.0+ */ appendStringInfo(&query, "SELECT i.relid, i.nspname, i.relname, i.att_list," - " i.has_row_filter" + " i.has_row_filter, i.sync_clear_filter" " FROM pglogical.show_repset_table_info(%s::regclass, ARRAY[%s]) i", PQescapeLiteral(conn, relname.data, relname.len), repsetarr.data); @@ -175,6 +180,10 @@ pg_logical_get_remote_repset_table(PGconn *conn, RangeVar *rv, elog(ERROR, "could not parse column list for table"); remoterel->hasRowFilter = (strcmp(PQgetvalue(res, 0, 4), "t") == 0); + if( !PQgetisnull(res, 0, 5)) + { + remoterel->sync_clear_filter = pstrdup(PQgetvalue(res, 0, 5)); + } PQclear(res); diff --git a/pglogical_sync.c b/pglogical_sync.c index 84e622e..19ca247 100644 --- a/pglogical_sync.c +++ b/pglogical_sync.c @@ -494,6 +494,66 @@ make_copy_attnamelist(PGLogicalRelation *rel) return attnamelist; } +/* + * delete data from table + */ +static void exec_after_sync_queries(PGconn *target_conn, char *single_query) +{ + StringInfoData query; + initStringInfo(&query); + + // prepare query + appendStringInfo(&query, single_query ); + + elog(LOG, "AFTER SYNC QUERY: %s", query.data); + + // Execute query + PGresult *res = PQexec(target_conn, query.data); + if (PQresultStatus(res) != PGRES_COMMAND_OK && PQresultStatus(res) != PGRES_TUPLES_OK) + { + ereport(ERROR, + (errmsg("Failed to execute after sync query: %s, %s", PQerrorMessage(target_conn), PQresultErrorMessage(res) ), + errdetail("Query '%s': %s", query.data, + PQresultErrorMessage(res) ) )); + } + + PQclear(res); +} + +/* + * delete data from table + */ +static void clear_table_data(PGconn *origin_conn, PGconn *target_conn, PGLogicalRemoteRel *remoterel) +{ + StringInfoData query; + initStringInfo(&query); + + // prepare fill main table query + appendStringInfo(&query, "delete from %s.%s", + PQescapeIdentifier(origin_conn, remoterel->nspname, strlen(remoterel->nspname)), + PQescapeIdentifier(origin_conn, remoterel->relname, strlen(remoterel->relname))); + + if ( remoterel->sync_clear_filter != NULL ) + { + appendStringInfo(&query, " where %s", remoterel->sync_clear_filter); + } + + elog(LOG, "CLEAR TABLE QUERY: %s", query.data); + + // Execute delete + PGresult *res = PQexec(target_conn, query.data); + if (PQresultStatus(res) != PGRES_COMMAND_OK) + { + ereport(ERROR, + (errmsg("Failed clear table"), + errdetail("Query '%s': %s", query.data, + PQerrorMessage( target_conn )))); + } + + PQclear(res); +} + + /* * COPY single table over wire. */ @@ -595,6 +655,7 @@ copy_table_data(PGconn *origin_conn, PGconn *target_conn, } appendStringInfoString(&query, "TO stdout"); + elog(LOG, "COPY TABLE QUERY: %s", query.data); /* Execute COPY TO. */ res = PQexec(origin_conn, query.data); @@ -670,22 +731,38 @@ copy_table_data(PGconn *origin_conn, PGconn *target_conn, * Creates new connection to origin and target. */ static void -copy_tables_data(char *sub_name, const char *origin_dsn, - const char *target_dsn, const char *origin_snapshot, - List *tables, List *replication_sets, - const char *origin_name) +copy_tables_data(char *sub_name, bool data_replace, const char *origin_dsn, + const char *target_dsn, const char *origin_snapshot, + List *tables, List *replication_sets, + const char *origin_name, List *after_sync_queries) { - PGconn *origin_conn; - PGconn *target_conn; - ListCell *lc; + PGconn *origin_conn; + PGconn *target_conn; + ListCell *lc; /* Connect to origin node. */ - origin_conn = pglogical_connect(origin_dsn, sub_name, "copy"); - start_copy_origin_tx(origin_conn, origin_snapshot); + origin_conn = pglogical_connect( origin_dsn, sub_name, "copy" ); + start_copy_origin_tx( origin_conn, origin_snapshot ); /* Connect to target node. */ - target_conn = pglogical_connect(target_dsn, sub_name, "copy"); - start_copy_target_tx(target_conn, origin_name); + target_conn = pglogical_connect( target_dsn, sub_name, "copy" ); + start_copy_target_tx( target_conn, origin_name ); + + if( data_replace ) + { + foreach( lc, tables ) + { + RangeVar *rv = lfirst( lc ); + PGLogicalRemoteRel *remoterel; + + remoterel = pg_logical_get_remote_repset_table( origin_conn, rv, + replication_sets ); + + clear_table_data( origin_conn, target_conn, remoterel ); + + CHECK_FOR_INTERRUPTS(); + } + } /* Copy every table. */ foreach (lc, tables) @@ -694,13 +771,22 @@ copy_tables_data(char *sub_name, const char *origin_dsn, PGLogicalRemoteRel *remoterel; remoterel = pg_logical_get_remote_repset_table(origin_conn, rv, - replication_sets); + replication_sets); copy_table_data(origin_conn, target_conn, remoterel, replication_sets); CHECK_FOR_INTERRUPTS(); } + if ( after_sync_queries ) + { + foreach (lc, after_sync_queries) + { + char *query_str = (char *) lfirst(lc); + exec_after_sync_queries( target_conn, query_str ); + } + } + /* Finish the transactions and disconnect. */ finish_copy_origin_tx(origin_conn); finish_copy_target_tx(target_conn); @@ -716,10 +802,11 @@ copy_tables_data(char *sub_name, const char *origin_dsn, * the transaction is bound to a snapshot. */ static List * -copy_replication_sets_data(char *sub_name, const char *origin_dsn, - const char *target_dsn, - const char *origin_snapshot, - List *replication_sets, const char *origin_name) +copy_replication_sets_data(char *sub_name, bool data_replace, const char *origin_dsn, + const char *target_dsn, + const char *origin_snapshot, + List *replication_sets, const char *origin_name, + List *after_sync_queries) { PGconn *origin_conn; PGconn *target_conn; @@ -731,13 +818,24 @@ copy_replication_sets_data(char *sub_name, const char *origin_dsn, start_copy_origin_tx(origin_conn, origin_snapshot); /* Get tables to copy from origin node. */ - tables = pg_logical_get_remote_repset_tables(origin_conn, - replication_sets); + tables = pg_logical_get_remote_repset_tables(origin_conn, replication_sets); /* Connect to target node. */ target_conn = pglogical_connect(target_dsn, sub_name, "copy"); start_copy_target_tx(target_conn, origin_name); + if( data_replace ) + { + foreach( lc, tables ) + { + PGLogicalRemoteRel *remoterel = lfirst( lc ); + + clear_table_data( origin_conn, target_conn, remoterel ); + + CHECK_FOR_INTERRUPTS(); + } + } + /* Copy every table. */ foreach (lc, tables) { @@ -748,6 +846,15 @@ copy_replication_sets_data(char *sub_name, const char *origin_dsn, CHECK_FOR_INTERRUPTS(); } + if ( after_sync_queries ) + { + foreach (lc, after_sync_queries) + { + char *query_str = (char *) lfirst(lc); + exec_after_sync_queries( target_conn, query_str ); + } + } + /* Finish the transactions and disconnect. */ finish_copy_origin_tx(origin_conn); finish_copy_target_tx(target_conn); @@ -775,7 +882,7 @@ pglogical_sync_worker_cleanup(PGLogicalSubscription *sub) WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, 1000L); - ResetLatch(&MyProc->procLatch); + ResetLatch(&MyProc->procLatch); /* emergency bailout if postmaster has died */ if (rc & WL_POSTMASTER_DEATH) @@ -941,12 +1048,13 @@ pglogical_sync_subscription(PGLogicalSubscription *sub) set_subscription_sync_status(sub->id, status); CommitTransactionCommand(); - tables = copy_replication_sets_data(sub->name, - sub->origin_if->dsn, - sub->target_if->dsn, - snapshot, - sub->replication_sets, - sub->slot_name); + tables = copy_replication_sets_data(sub->name, sub->data_replace, + sub->origin_if->dsn, + sub->target_if->dsn, + snapshot, + sub->replication_sets, + sub->slot_name, + sub->after_sync_queries); /* Store info about all the synchronized tables. */ StartTransactionCommand(); @@ -1096,9 +1204,9 @@ pglogical_sync_table(PGLogicalSubscription *sub, RangeVar *table, CommitTransactionCommand(); /* Copy data. */ - copy_tables_data(sub->name, sub->origin_if->dsn,sub->target_if->dsn, + copy_tables_data(sub->name, sub->data_replace, sub->origin_if->dsn,sub->target_if->dsn, snapshot, list_make1(table), sub->replication_sets, - sub->slot_name); + sub->slot_name, sub->after_sync_queries); } PG_END_ENSURE_ERROR_CLEANUP(pglogical_sync_worker_cleanup_error_cb, PointerGetDatum(sub));