From a236ce1e160ea5382cc07af48fae5869f68567c8 Mon Sep 17 00:00:00 2001 From: James Date: Mon, 16 Jun 2014 15:49:40 +0000 Subject: [PATCH] Document refactoring to match pro console menus --- symmetric-assemble/pom.xml | 35 + .../src/docbook/administration.xml | 513 --- .../src/docbook/advanced-topics.xml | 45 +- .../src/docbook/configuration.xml | 3767 +++++++---------- symmetric-assemble/src/docbook/developer.xml | 39 + .../src/docbook/introduction.xml | 130 +- symmetric-assemble/src/docbook/manage.xml | 1121 +++++ symmetric-assemble/src/docbook/planning.xml | 447 -- .../docbook/{tutorial.xml => quick-start.xml} | 0 symmetric-assemble/src/docbook/setup.xml | 270 ++ symmetric-assemble/src/docbook/user-guide.xml | 11 +- 11 files changed, 2945 insertions(+), 3433 deletions(-) delete mode 100644 symmetric-assemble/src/docbook/administration.xml create mode 100644 symmetric-assemble/src/docbook/developer.xml create mode 100644 symmetric-assemble/src/docbook/manage.xml delete mode 100644 symmetric-assemble/src/docbook/planning.xml rename symmetric-assemble/src/docbook/{tutorial.xml => quick-start.xml} (100%) create mode 100644 symmetric-assemble/src/docbook/setup.xml diff --git a/symmetric-assemble/pom.xml b/symmetric-assemble/pom.xml index 86e984c4fc..d9e19a7422 100644 --- a/symmetric-assemble/pom.xml +++ b/symmetric-assemble/pom.xml @@ -329,6 +329,41 @@ + + build-quickstart-singlehtml-doc + generate-sources + + generate-html + + + ${docbook.build} + ${docbook.target}/quick-start/html-single + + true + quick-start.xml + css/docbook-style.css + ${docbook.build}/resources/xsl/docbook-html.xsl + + false + + + + + + + + + + + + + + + + + + + diff --git a/symmetric-assemble/src/docbook/administration.xml b/symmetric-assemble/src/docbook/administration.xml deleted file mode 100644 index a621d40d8d..0000000000 --- a/symmetric-assemble/src/docbook/administration.xml +++ /dev/null @@ -1,513 +0,0 @@ - - - - Administration - -
- Solving Synchronization Issues - - - By design, whenever SymmetricDS encounters an issue with a synchronization, the batch containing the error is marked as being in - an error state, and all subsequent batches for that particular channel to that particular node are held and not - synchronized until the error batch is resolved. SymmetricDS will retry the batch in error until the situation creating the - error is resolved (or the data for the batch itself is changed). - - - - Analyzing and resolving issues can take place on the outgoing or incoming side. The techniques for analysis are slightly different in - the two cases, however, due to the fact that the node with outgoing batch data also has the data and data events associated with the batch in - the database. On the incoming node, however, all that is available is the incoming batch header and data present in an incoming error table. - - -
- Analyzing the Issue - Outgoing Batches - - - The first step in analyzing the cause of a failed batch is to locate information about the data in the batch, starting with - - To locate batches in error, use: - select * from sym_outgoing_batch where error_flag=1; - Several useful pieces of information are available from this query: - - - The batch number of the failed batch, available in column BATCH_ID. - - - The node to which the batch is being sent, available in column NODE_ID. - - - The channel to which the batch belongs, available in column CHANNEL_ID. - All subsequent batches on this channel to this node will be held until the error condition is resolved. - - - The specific data id in the batch which is causing the failure, available in column FAILED_DATA_ID. - - - Any SQL message, SQL State, and SQL Codes being returned during the synchronization attempt, available in columns SQL_MESSAGE, - SQL_STATE, and SQL_CODE, respectively. - - - - - Using the error_flag on the batch table, as shown above, is more reliable than using the - status column. The status column can change from 'ER' to a different status temporarily as - the batch is retried. - - The query above will also show you any recent batches that - were originally in error and were changed to be manually skipped. See the end of for more details. - - - To get a full picture of the batch, you can query for information representing the complete - list of all data changes associated with the failed batch by joining - and , such as: - select * from sym_data where data_id in - (select data_id from sym_data_event where batch_id='XXXXXX'); - where XXXXXX is the batch id of the failing batch. - - - This query returns a wealth of information about each data change in a batch, including: - - - The table involved in each data change, available in column TABLE_NAME, - - The event type (Update [U], Insert [I], or Delete [D]), available in column EVENT_TYPE, - - - A comma separated list of the new data and (optionally) the old data, available in columns ROW_DATA and - OLD_DATA, respectively. - - - The primary key data, available in column PK_DATA - - - The channel id, trigger history information, transaction id if available, and other information. - - - - - More importantly, if you narrow your query to just the failed data id you can determine the exact data change that is causing the failure: - select * from sym_data where data_id in - (select failed_data_id from sym_outgoing_batch where batch_id='XXXXX' - and node_id='YYYYY'); - where XXXXXX is the batch id and YYYYY is the node id of the batch that is failing. - - The queries above usually yield enough information to be able to determine why a - particular batch is failing. Common reasons a batch might be failing include: - - - The schema at the destination has a column that is not nullable yet the source - has the column defined as nullable and a data change was sent with the column as null. - - A foreign key constraint at the destination is preventing an insertion or update, which could be caused from - data being deleted at the destination or the foreign key constraint is not in place at the source. - - - The data size of a column on the destination is smaller than the data size in the source, and data that - is too large for the destination has been synced. - - - - -
- -
- Analyzing the Issue - Incoming Batches - - - Analysis using an incoming batch is different than that of outgoing batches. For incoming batches, you will rely on two tables, - and . - - The first step in analyzing the cause of an incoming failed batch is to locate information about the batch, starting with - - To locate batches in error, use: - select * from sym_incoming_batch where error_flag=1; - Several useful pieces of information are available from this query: - - - The batch number of the failed batch, available in column BATCH_ID. Note that this is the batch number of the - outgoing batch on the outgoing node. - - - The node the batch is being sent from, available in column NODE_ID. - - - The channel to which the batch belongs, available in column CHANNEL_ID. - All subsequent batches on this channel from this node will be held until the error condition is resolved. - - - The data_id that was being processed when the batch failed, available in column FAILED_DATA_ID. - - - Any SQL message, SQL State, and SQL Codes being returned during the synchronization attempt, available in columns SQL_MESSAGE, - SQL_STATE, and SQL_CODE, respectively. - - - - - - For incoming batches, we do not have data and data event entries in the database we can query. - We do, however, have a table, , which provides some information about the batch. - - - select * from sym_incoming_error - where batch_id='XXXXXX' and node_id='YYYYY'; - where XXXXXX is the batch id and YYYYY is the node id of the failing batch. - - - - - - This query returns a wealth of information about each data change in a batch, including: - - - The table involved in each data change, available in column TARGET_TABLE_NAME, - - The event type (Update [U], Insert [I], or Delete [D]), available in column EVENT_TYPE, - - - A comma separated list of the new data and (optionally) the old data, available in columns ROW_DATA and - OLD_DATA, respectively, - - The column names of the table, available in column COLUMN_NAMES, - - The primary key column names of the table, available in column PK_COLUMN_NAMES, - - - - -
- -
- Resolving the Issue - Outgoing Batches - - - Once you have decided upon the cause of the issue, you'll have to decide the best course of action to fix the issue. If, for example, - the problem is due to a database schema mismatch, one possible solution would be to alter the destination database - in such a way that the SQL error no longer occurs. Whatever approach you take to remedy the issue, once you have - made the change, on the next push or pull SymmetricDS will retry the batch - and the channel's data will start flowing again. - - - If you have instead decided that the batch itself is wrong, or does not need synchronized, or you wish to remove a - particular data change from a batch, you do have the option of changing the data associated with the batch directly. - - - Be cautious when using the following two approaches to resolve synchronization issues. By far, the - best approach to solving a synchronization error is to resolve what is truly causing the - error at the destination database. Skipping a batch or removing a data id as discussed below should be your - solution of last resort, since doing so results in differences between the source and destination databases. - - - Now that you've read the warning, if you still want to change the batch - data itself, you do have several options, including: - - Causing SymmetricDS to skip the batch completely. This is accomplished by setting the - batch's status to 'OK', as in: - update sym_outgoing_batch set status='OK' where batch_id='XXXXXX' - where XXXXXX is the failing batch. On the next pull or push, SymmetricDS will skip this batch since - it now thinks the batch has already been synchronized. Note that you can still distinguish between successful - batches and ones that you've artificially marked as 'OK', since the error_flag column on - the failed batch will still be set to '1' (in error). - - - Removing the failing data id from the batch by deleting the corresponding row in . - Eliminating the data id from the list of data ids in the batch will cause future synchronization attempts - of the batch to no longer include that particular data change as part of the batch. For example: - delete from sym_data_event where batch_id='XXXXXX' and data_id='YYYYYY' - where XXXXXX is the failing batch and YYYYYY is the data id to longer be included in the batch. - - - -
- -
- Resolving the Issue - Incoming Batches - - - For batches in error, from the incoming side you'll also have to decide the best course of action to fix the issue. - Incoming batch errors that are in conflict can by fixed by taking advantage of two columns in which are examined each time - batches are processed. The first column, resolve_data if filled in will be used in place of row_data. - The second column, resolve_ignore if set will cause this particular data item to be ignored and batch processing to continue. This is the same - two columns used when a manual conflict resolution strategy is chosen, as discussed in . - -
-
-
- Changing Triggers - - A trigger row may be updated using SQL to change a synchronization definition. - SymmetricDS will look for changes each night or whenever the Sync Triggers Job - is run (see below). For example, a change to place the table price_changes - into the price channel would be accomplished with the following statement: - - - All configuration changes should be managed centrally at the registration node. If enabled, configuration - changes will be synchronized out to client nodes. When trigger changes reach the client - nodes the Sync Triggers Job will run automatically. - - - Centrally, the trigger changes will not take effect until the Sync Triggers Job runs. - Instead of waiting for the Sync Triggers Job to run overnight after making a Trigger - change, you can invoke the syncTriggers() method over JMX or simply restart the SymmetricDS - server. A complete record of trigger changes is kept in the table , - which was discussed in . - -
- -
- Maintaining multiple synchronization configurations through Grouplets - - - As you probably know by now, SymmetricDS stores its single configuration centrally and distributes it to all nodes. By default, a trigger-router is in effect for all nodes in the source node group or target node group. Triggers will be established - on each node that is a member of the source node, and changes will be routed to all relevant nodes that are members of the target node group. If, for example, the router routes to "all" nodes, - "all" means every node that is in the target node group. This is the default behavior of SymmetricDS. - - - Once in production, however, you will likely find you need or want to make configuration changes to triggers and routers as new features are rolled out to your network of SymmetricDS nodes. - You may, for example, wish to "pilot" a new configuration, containing new synchronizations, only on specific nodes initially, and then increase the size of the pilot over time. - SymmetricDS' does provide the ability to specify that only particular trigger-router combinations are applicable to particular nodes for this purpose. It does this - by allowing you to define an arbitray collection of nodes, called a "grouplet", and then choosing which trigger-routers apply to the normal set of nodes (the default behavior) - and which apply just to nodes in one or more "grouplets". This allows you, essentially, to filter the list of nodes that would otherwise be included as source nodes and/or target nodes. - Through the use of grouplets, you can, for example, specify a subset of nodes on which a given trigger would be created. It also allows you to - specify a subset of the normal set of nodes a change would be routed to. This behaviour is in addition to, and occurs before, any subsetting or filtering the router might otherwise do. - - - In its simplest form, a grouplet is just an arbitrary collection of nodes. To define a grouplet, you start by creating a grouplet with a unique id, a description, and a link policy, - as defined in . To defined which nodes are members of (or are not members of) a grouplet, you provide a list of external ids of the nodes - in . How those external ids are used varies based on the grouplet link policy. - The grouplet_link_policy can be either I or E, representing an "inclusive" list of nodes or an "exclusive" list of - nodes, respectively. In the case of "inclusive", you'll be listing each external id to be included in the grouplet. In the case of exclusive, all nodes will be included in - the grouplet except ones which have an external id in the list of external ids. - - - - Once you have defined your grouplet and which nodes are members of a grouplet, you can tie a grouplet to a given trigger-router through - the use of . - If a particular trigger-router does not appear in this table, SymmetricDS behaves as normal. - If, however, an entry for a particular trigger-router appears in this table, the default behavior is overridden based on the grouplet_id and applies_when settings. - The grouplet id provides the node list, and the applies_when indicates whether the grouplet nodes are to be used to filter the source node list, the target node list, - or both (settings are "S", "T", and "B", respectively). Nodes that survive the filtering process on as a source will have a trigger defined, and nodes that survive the filtering process - as a target are eligible nodes that can be routed to. -
- Grouplet Example - - - - At this point, an example would probably be useful. Picture the case where you have 100 retail stores (each containing one database, and each a member of the "store" node group) - and a central office database (external id of corp, and a member of the "corp" node group ). You wish to pilot two new trigger and routers - for a new feature on your point-of-sale software (one which moves data from corp to store, and one which moves data from store to corp), but you only want the triggers to be installed on 10 specific stores that represent your "pilot" stores. In this case, - the simplest approach would be to define a grouplet with, say, a grouplet id of "pilot". We'd use a grouplet link policy of "inclusive", and list each of the 10 external ids - in the table. - - - For the trigger-router meant to send data from corp to store, we'd create an entry in for - our grouplet id of "pilot", and we'd specify "T" (target) as the applies-when setting. In this way, the source node list is not filtered, but the target node list used during routing - will filter the potential target nodes to just our pilot stores. For the trigger-router meant to send data from a pilot store back to corp, we would have the grouplet apply when - the node is in the source node list (i.e., applies_when will be "S"). This will cause the trigger to only be created for stores in the pilot list and not other stores. - - An important thing to mention in this example: Since your grouplet only included the store nodes, you can't simply specify "both" for the applies when setting. For the corp-to-store trigger, - for example, if you had said "both", no trigger would have been installed in corp since the grouplet nodes represent all possible source nodes as well as target nodes, and "corp" is not in the list! - The same is true for the store to corp trigger-router as well. You could, however, use "both" as the applies when if you had included the "corp" external id in with the list of the 10 pilot store external ids. - -
-
- -
- Re-synchronizing Data - - There may be times where you find you need to re-send or re-synchronize data when the change itself was not captured. This could be needed, for example, - if the data changes occurred prior to SymmetricDS placing triggers on the data tables themselves, or if the data at the destination was accidentally deleted, or for - some other reason. Two approaches are commonly taken to re-send the data, both of which are discussed below. - - - - Be careful when re-sending data using either of these two techniques. Be sure you are only sending the rows you intend to send and, - more importantly, be sure to re-send the data in a way that won't cause foreign key constraint issues at the destination. In other words, - if more than one table is involved, be sure to send any tables which are referred to by other tables by foreign keys first. Otherwise, - the channel's synchronization will block because SymmetricDS is unable to insert or update the row because the foreign key relationship refers to - a non-existent row in the destination! - - - - One possible approach would be to "touch" the rows in individual tables that need re-sent. By "touch", we mean to alter the row data in such a way - that SymmetricDS detects a data change and therefore includes the data change in the batching and synchronizing steps. Note that you have to - change the data in some meaningful way (e.g., update a time stamp); setting a column to its current value is not sufficient (by default, if there's not an actual data - value change SymmetricDS won't treat the change as something which needs synched. - - A second approach would be to take advantage of SymmetricDS built-in functionality by simulating a partial "initial load" of the data. The approach - is to manually create "reload" events in for the necessary tables, thereby resending the desired rows for the given tables. - Again, foreign key constraints must be kept in mind when creating these reload events. These reload events are created in the source database itself, and - the necessary table, trigger-router combination, and channel are included to indicate the direction of synchronization. - - To create a reload event, you create a row, using: - - data_id: null - table_name: name of table to be sent - event_type: 'R', for reload - row_data: a "where" clause (minus the word 'where') which defines the subset of rows from the table to be sent. To send all rows, one can use 1=1 for this value. - pk_data: null - old_data: null - trigger_hist_id: use the id of the most recent entry (i.e., max(trigger_hist_id) ) in - for the trigger-router combination for your table and router. - channel_id: the channel in which the table is routed - transaction_id: pick a value, for example '1' - source_node_id: null - external_data: null - create_time: current_timestamp - - - - - By way of example, take our retail hands-on tutorial covered in . Let's say - we need to re-send a particular sales transaction from the store to corp over again because we lost the data in corp due to - an overzealous delete. For the tutorial, all transaction-related tables start with sale_, - use the sale_transaction channel, and are routed using the store_corp_identity - router. In addition, the trigger-routers have been set up with an initial load order based on the necessary - foreign key relationships (i.e., transaction tables which are "parents" have a lower initial load order than those of their - "children"). An insert statement that would create the necessary "reload" events (three in this case, one for each table) would be as follows - (where MISSING_ID is changed to the needed transaction id): - - -insert into sym_data ( - select null, t.source_table_name, 'R', 'tran_id=''MISSING-ID''', null, null, - h.trigger_hist_id, t.channel_id, '1', null, null, current_timestamp - from sym_trigger t inner join sym_trigger_router tr on - t.trigger_id=tr.trigger_id inner join sym_trigger_hist h on - h.trigger_hist_id=(select max(trigger_hist_id) from sym_trigger_hist - where trigger_id=t.trigger_id) - where channel_id='sale_transaction' and - tr.router_id like 'store_corp_identity' and - (t.source_table_name like 'sale_%') - order by tr.initial_load_order asc); - - - This insert statement generates three rows, one for each configured sale table. It uses the most recent - trigger history id for the corresponding table. Finally, it takes advantage of the initial load order for each trigger-router to - create the three rows in the correct order (the order corresponding to the order in which the tables would have been initial loaded). - - -
-
- Changing Configuration - - The configuration of your system as defined in the sym_* tables may be modified at runtime. By default, any changes made to - the sym_* tables (with the exception of sym_node) should be made at the registration server. The changes will - be synchronized out to the leaf nodes by SymmetricDS triggers that are automatically created on the tables. - - - If this behavior is not desired, the feature can be turned off using a parameter. Custom triggers may be added - to the sym_* tables when the auto syncing feature is disabled. - -
- -
- Logging Configuration - - The standalone SymmetricDS installation uses Log4J for logging. The configuration file is conf/log4j.xml. - The log4j.xml file has hints as to what logging can be enabled for useful, finer-grained logging. - - - There is a command line option to turn on preconfigured debugging levels. When the --debug option is used the conf/debug-log4j.xml is used instead of log4j.xml. - - - SymmetricDS proxies all of its logging through SLF4J. When deploying to an application server or if Log4J is not - being leveraged, then the general rules for for SLF4J logging apply. - -
- -
- Java Management Extensions - - Monitoring and administrative operations can be performed using Java Management Extensions (JMX). - SymmetricDS uses MX4J to expose JMX attributes and operations that can be accessed - from the built-in web console, Java's jconsole, or an application server. - By default, the web management console can be opened from the following address: - - - - In order to use jconsole, you must enable JMX remote management in the JVM. You can edit the startup scripts to set the following system - parameters. - - - - More details about enabling JMX for JConsole can be found here. - - - Using the Java jconsole command, SymmetricDS is listed as a local process named SymmetricLauncher. - In jconsole, SymmetricDS appears under the MBeans tab under the name defined by the engine.name - property. The default value is SymmetricDS. - - - The management interfaces under SymmetricDS are organized as follows: - - - - Node - administrative operations - - - Parameters - access to properties set through the parameter service - - - - -
- - - -
- Temporary Files - - SymmetricDS creates temporary extraction and data load files with the CSV payload of a synchronization when - the value of the stream.to.file.threshold.bytes SymmetricDS property has been reached. Before reaching the threshold, files - are streamed to/from memory. The default threshold value is 32,767 bytes. This feature may be turned off by setting the stream.to.file.enabled - property to false. - - - SymmetricDS creates these temporary files in the directory specified by the java.io.tmpdir Java System property. - - - The location of the temporary directory may be changed by setting the Java System property passed into the Java program at startup. For example, - - -Djava.io.tmpdir=/home/.symmetricds/tmp - - -
- - -
diff --git a/symmetric-assemble/src/docbook/advanced-topics.xml b/symmetric-assemble/src/docbook/advanced-topics.xml index a86ddae7ee..e06a73f417 100644 --- a/symmetric-assemble/src/docbook/advanced-topics.xml +++ b/symmetric-assemble/src/docbook/advanced-topics.xml @@ -21,7 +21,7 @@ under the License. --> - -
Advanced Synchronization
@@ -703,6 +702,44 @@ Enter key password for basic authentication is setup with the standard configuration in the WEB.xml file.
- - +
+ Java Management Extensions + + Monitoring and administrative operations can be performed using Java Management Extensions (JMX). + SymmetricDS uses MX4J to expose JMX attributes and operations that can be accessed + from the built-in web console, Java's jconsole, or an application server. + By default, the web management console can be opened from the following address: + + + + In order to use jconsole, you must enable JMX remote management in the JVM. You can edit the startup scripts to set the following system + parameters. + + + + More details about enabling JMX for JConsole can be found here. + + + Using the Java jconsole command, SymmetricDS is listed as a local process named SymmetricLauncher. + In jconsole, SymmetricDS appears under the MBeans tab under the name defined by the engine.name + property. The default value is SymmetricDS. + + + The management interfaces under SymmetricDS are organized as follows: + + + + Node - administrative operations + + + Parameters - access to properties set through the parameter service + + + + +
diff --git a/symmetric-assemble/src/docbook/configuration.xml b/symmetric-assemble/src/docbook/configuration.xml index 4e348cc06a..058a3a0906 100644 --- a/symmetric-assemble/src/docbook/configuration.xml +++ b/symmetric-assemble/src/docbook/configuration.xml @@ -21,7 +21,7 @@ under the License. --> - -Configuration - - - -introduced numerous concepts and the analysis and design needed to -create an implementation of SymmetricDS. This chapter re-visits each -analysis step and documents how to turn a SymmetricDS design into -reality through configuration of the various SymmetricDS tables. In -addition, several advanced configuration options, not presented -previously, will also be covered. - - -
-Node Properties - - -To get a SymmetricDS node running, it needs to be given an identity and -it needs to know how to connect to the database it will be -synchronizing. The preferred way to configure a SymmetricDS engine is to -create a properties file in the engines directory. The SymmetricDS -server will create an engine for each properties file found in the -engines directory. When started up, SymmetricDS reads the -synchronization configuration and state from the database. If the -configuration tables are missing, they are created automatically (auto -creation can be disabled). Basic configuration is described by inserting -into the following tables (the complete data model is defined in - -). - - - - -- specifies the tiers that exist in a SymmetricDS network - - - - - - -- links two node groups together for synchronization - - - - - - -- grouping and priority of synchronizations - - - - - - -- specifies tables, channels, and conditions for which changes in the -database should be captured - - - - - - -- specifies the routers defined for synchronization, along with other -routing details - - - - - - -- provides mappings of routers and triggers - - - - - -During start up, triggers are verified against the -database, and database triggers are installed on tables that require -data changes to be captured. The Route, Pull and Push Jobs begin running -to synchronize changes with other nodes. - - -Each node requires properties that allow it to connect to a database and -register with a parent node. Properties are configured in a file named -xxxxx.properties -that is placed in the engines directory of the SymmetricDS install. The -file is usually named according to the engine.name, but it is not a -requirement. - - - -To give a node its identity, the following properties are required. Any -other properties found in -conf/symmetric.properties -can be overridden for a specific engine in an engine's properties file. -If the properties are changed in -conf/symmetric.properties -they will take effect across all engines deployed to the server. Note -that you can use the variable -$(hostName) -to represent the host name of the machine when defining these properties -(for example, external.id=$(hostName) ). - - - - - -engine.name - - - -This is an arbitrary name that is used to access a specific -engine using an HTTP URL. Each node configured in the engines directory -must have a unique engine name. The engine name is also used for the -domain name of registered JMX beans. - - - - - -group.id - - - -The node group that this node is a member of. -Synchronization is specified between node groups, which means you only -need to specify it once for multiple nodes in the same group. - - - - - -external.id - - - -The external id for this node has meaning to the user and -provides integration into the system where it is deployed. For example, -it might be a retail store number or a region number. The external id -can be used in expressions for conditional and subset data -synchronization. Behind the scenes, each node has a unique sequence -number for tracking synchronization events. That makes it possible to -assign the same external id to multiple nodes, if desired. - - - - - -sync.url - - - - -The URL where this node can be contacted for synchronization. At startup -and during each heartbeat, the node updates its entry in the database -with this URL. The sync url is of the format: -http://{hostname}:{port}/{webcontext}/sync/{engine.name} -. - - -The {webcontext} is blank for a standalone deployment. It -will typically be the name of the war file for an application server -deployment. - -The {engine.name} can be left blank if there is only one -engine deployed in a SymmetricDS server. - - - - -When a new node is first started, it is has no information -about synchronizing. It contacts the registration server in order to -join the network and receive its configuration. The configuration for -all nodes is stored on the registration server, and the URL must be -specified in the following property: - - - - -registration.url - - - -The URL where this node can connect for registration to -receive its configuration. The registration server is part of -SymmetricDS and is enabled as part of the deployment. This is typically -equal to the value of the sync.url of the registration server. - - - - - - -Note that a -registration server node -is defined as one whose -registration.url -is either (a) blank, or (b) identical to its -sync.url -. - - - -For a deployment where the database connection pool should -be created using a JDBC driver, set the following properties: - - - - -db.driver - - - -The class name of the JDBC driver. - - - - - -db.url - - - -The JDBC URL used to connect to the database. - - - - - -db.user - - - -The database username, which is used to login, create, and -update SymmetricDS tables. - - - - - -db.password - - - -The password for the database user. - - - -
- -
-Node - - -A -node -, a single instance of SymmetricDS, is defined in the - -table. Two other tables play a direct role in defining a node, as well -The first is - -. The -only -row in this table is inserted in the database when the node first -registers -with a parent node. In the case of a root node, the row is entered by -the user. The row is used by a node instance to determine its node -identity. - - - -The following SQL statements set up a top-level registration server as a -node identified as "00000" in the "corp" node group. - -insert into SYM_NODE (node_id, node_group_id, external_id, sync_enabled) - values ('00000', 'corp', '00000', 1); insert into SYM_NODE_IDENTITY values ('00000'); - - - -The second table, - -has rows created for each -child -node that registers with the node, assuming auto-registration is -enabled. If auto registration is not enabled, you must create a row in - -and - -for the node to be able to register. You can also, with this table, -manually cause a node to re-register or do a re-initial load by setting -the corresponding columns in the table itself. Registration is discussed -in more detail in - -. - -
- -
-Node Group - - -Node Groups are straightforward to configure and are defined in the - -table. The following SQL statements would create node groups for "corp" -and "store" based on our retail store example. - -insert into SYM_NODE_GROUP (node_group_id, description) values ('store', 'A retail store node'); -insert into SYM_NODE_GROUP (node_group_id, description) values ('corp', 'A corporate node'); - - -
- - - -
-Channel - - -By categorizing data into channels and assigning them to - -s, the user gains more control and visibility into the flow of data. In -addition, SymmetricDS allows for synchronization to be enabled, -suspended, or scheduled by channels as well. The frequency of -synchronization and order that data gets synchronized is also controlled -at the channel level. - - - -The following SQL statements setup channels for a retail store. An -"item" channel includes data for items and their prices, while a -"sale_transaction" channel includes data for ringing sales at a -register. - -insert into SYM_CHANNEL (channel_id, processing_order, max_batch_size, max_batch_to_send, - extract_period_millis, batch_algorithm, enabled, description) - values ('item', 10, 1000, 10, 0, 'default', 1, 'Item and pricing data'); -insert into SYM_CHANNEL (channel_id, processing_order, max_batch_size, max_batch_to_send, - extract_period_millis, batch_algorithm, enabled, description) - values ('sale_transaction', 1, 1000, 10, 60000, - 'transactional', 1, 'retail sale transactions from register'); - - - -Batching is the grouping of data, by channel, to be transferred and -committed at the client together. There are three different -out-of-the-box batching algorithms which may be configured in the -batch_algorithm column on channel. - - - -default - - - -All changes that happen in a transaction are guaranteed to -be batched together. Multiple transactions will be batched and committed -together until there is no more data to be sent or the max_batch_size is -reached. - - - - - -transactional - - - -Batches will map directly to database transactions. If -there are many small database transactions, then there will be many -batches. The max_batch_size column has no effect. - - - - - -nontransactional - - - -Multiple transactions will be batched and committed -together until there is no more data to be sent or the max_batch_size is -reached. The batch will be cut off at the max_batch_size regardless of -whether it is in the middle of a transaction. - - - - - - -If a channel contains -only -tables that will be synchronized in one direction and and data is routed -to all the nodes in the target node groups, then batching on the channel -can be optimized to share batches across nodes. This is an important -feature when data needs to be routed to thousands of nodes. When this -mode is detected, you will see batches created in - -with the -common_flag -set to 1. - - - -There are also several size-related parameters that can be set by -channel. They include: - - - -max_batch_size - - - -Specifies the maximum number of data events to process -within a batch for this channel. - - - - - -max_batch_to_send - - - -Specifies the maximum number of batches to send for a given -channel during a 'synchronization' between two nodes. A -'synchronization' is equivalent to a push or a pull. For example, if -there are 12 batches ready to be sent for a channel and -max_batch_to_send is equal to 10, then only the first 10 batches will be -sent even though 12 batches are ready. - - - - - -max_data_to_route - - - -Specifies the maximum number of data rows to route for a -channel at a time. - - - - - -Based on your particular synchronization requirements, you -can also specify whether old, new, and primary key data should be read -and included during routing for a given channel. These are controlled by -the columns use_old_data_to_route, use_row_data_to_route, and -use_pk_data_to_route, respectively. By default, they are all 1 (true). - - -Finally, if data on a particular channel contains big lobs, you can set -the column contains_big_lob to 1 (true) to provide SymmetricDS the hint -that the channel contains big lobs. Some databases have shortcuts that -SymmetricDS can take advantage of if it knows that the lob columns in - -aren't going to contain large lobs. The definition of how large a 'big' -lob is varies from database to database. - -
- -
-Triggers, Routers, and Trigger / Routers Mappings - -In order to synchronize data, you must define at least one -trigger, at least one router, and provide at least one link between the -two (known as a trigger-router). - -
-Trigger - - -SymmetricDS captures synchronization data using database triggers. -SymmetricDS' Triggers are defined in the - -table. Each record is used by SymmetricDS when generating database -triggers. Database triggers are only generated when a trigger is -associated with a - -whose -source_node_group_id -matches the node group id of the current node. - - - -The -source_table_name -may contain the asterisk ('*') wildcard character so that one - -table entry can define synchronization for many tables. System tables -and any tables that start with the SymmetricDS table prefix will be -excluded. A list of wildcard tokens can also be supplied. If there are -multiple tokens, they should be delimited with a comma. A wildcard token -can also start with a bang ('!') to indicate an exclusive match. Tokens -are always evalulated from left to right. When a table match is made, -the table is either added to or removed from the list of tables. If -another trigger already exists for a table, then that table is not -included in the wildcard match (the explictly defined trigger entry take -precendence). - - - -When determining whether a data change has occurred or not, by defalt -the triggers will record a change even if the data was updated to the -same value(s) they were originally. For example, a data change will be -captured if an update of one column in a row updated the value to the -same value it already was. There is a global property, -trigger.update.capture.changed.data.only.enabled -(false by default), that allows you to override this behavior. When set -to true, SymmetricDS will only capture a change if the data has truly -changed (i.e., when the new column data is not equal to the old column -data). - - - -The property -trigger.update.capture.changed.data.only.enabled -is currently only supported in the MySQL, DB2 and Oracle dialects. - - - -The following SQL statement defines a trigger that will capture data for -a table named "item" whenever data is inserted, updated, or deleted. The -trigger is assigned to a channel also called 'item'. - -insert into SYM_TRIGGER (trigger_id,source_table_name,channel_id,last_update_time,create_time) - values ('item', 'item', 'item', current_timestamp, current_timestamp); - - - -Note that many databases allow for multiple triggers of the -same type to be defined. Each database defines the order in which the -triggers fire differently. If you have additional triggers beyond those -SymmetricDS installs on your table, please consult your database -documentation to determine if there will be issues with the ordering of -the triggers. - - - -
-Large Objects - -Two lobs-related settings are also available on - -: - - - -use_stream_lobs - - - -Specifies whether to capture lob data as the trigger is -firing or to stream lob columns from the source tables using callbacks -during extraction. A value of 1 indicates to stream from the source via -callback; a value of 0, lob data is captured by the trigger. - - - - - -use_capture_lobs - - - -Provides a hint as to whether this trigger will capture big -lobs data. If set to 1 every effort will be made during data capture in -trigger and during data selection for initial load to use lob facilities -to extract and store data in the database. - - - - -
- -
-External Select - - -Occasionally, you may find that you need to capture and save away a -piece of data present in another table when a trigger is firing. This -data is typically needed for the purposes of determining where to -'route' the data to once routing takes place. Each trigger definition -contains an optional -external_select -field which can be used to specify the data to be captured. Once -captured, this data is available during routing in - -'s -external_data -field. For these cases, place a SQL select statement which returns the -data item you need for routing in -external_select -. An example of the use of external select can be found in - -. - -
-
- -
-Router - - -Routers provided in the base implementation currently include: - -Default Router - a router that sends all data to -all nodes that belong to the target node group defined in the router. - -Column Match Router - a router that compares old or -new column values to a constant value or the value of a node's -external_id or node_id. - -Lookup Router - a router which can be configured to -determine routing based on an existing or ancillary table specifically -for the purpose of routing data. - -Subselect Router - a router that executes a SQL -expression against the database to select nodes to route to. This SQL -expression can be passed values of old and new column values. - -Scripted Router - a router that executes a Bean -Shell script expression in order to select nodes to route to. The script -can use the old and new column values. - -Xml Publishing Router - a router the publishes data -changes directly to a messaging solution instead of transmitting changes -to registered nodes. This router must be configured manually in XML as -an extension point. - -Audit Table Router - a router that inserts into an -automatically created audit table. It records captured changes to tables -that it is linked to. - -The mapping between the set of triggers and set of routers is -many-to-many. This means that one trigger can capture changes and route -to multiple locations. It also means that one router can be defined an -associated with many different triggers. - - -
-Default Router - - -The simplest router is a router that sends all the data that is captured -by its associated triggers to all the nodes that belong to the target -node group defined in the router. A router is defined as a row in the - -table. It is then linked to triggers in the - -table. - - - -The following SQL statement defines a router that will send data from -the 'corp' group to the 'store' group. - -insert into SYM_ROUTER (router_id, source_node_group_id, target_node_group_id, create_time, last_update_time) - values ('corp-2-store','corp', 'store', current_timestamp, current_timestamp); - - - -The following SQL statement maps the 'corp-2-store' router to the item -trigger. - -insert into SYM_TRIGGER_ROUTER (trigger_id, router_id, initial_load_order, create_time, last_update_time) - values ('item', 'corp-2-store', 1, current_timestamp, current_timestamp); - -
- -
-Column Match Router - - -Sometimes requirements may exist that require data to be routed based on -the current value or the old value of a column in the table that is -being routed. Column routers are configured by setting the -router_type -column on the - -table to -column -and setting the -router_expression -column to an equality expression that represents the expected value of -the column. - - -The first part of the expression is always the column name. -The column name should always be defined in upper case. The upper case -column name prefixed by OLD_ can be used for a comparison being done -with the old column data value. - -The second part of the expression can be a constant value, -a token that represents another column, or a token that represents some -other SymmetricDS concept. Token values always begin with a colon (:). - - -Consider a table that needs to be routed to all nodes in the target -group only when a status column is set to 'READY TO SEND.' The following -SQL statement will insert a column router to accomplish that. - insert into SYM_ROUTER (router_id, -source_node_group_id, target_node_group_id, router_type, -router_expression, create_time, last_update_time) values -('corp-2-store-ok','corp', 'store', 'column', 'STATUS=READY TO SEND', -current_timestamp, current_timestamp); - - - -Consider a table that needs to be routed to all nodes in the target -group only when a status column changes values. The following SQL -statement will insert a column router to accomplish that. Note the use -of OLD_STATUS, where the OLD_ prefix gives access to the old column -value. - insert into SYM_ROUTER (router_id, -source_node_group_id, target_node_group_id, router_type, -router_expression, create_time, last_update_time) values -('corp-2-store-status','corp', 'store', 'column', 'STATUS!=:OLD_STATUS', -current_timestamp, current_timestamp); - - - -Consider a table that needs to be routed to only nodes in the target -group whose STORE_ID column matches the external id of a node. The -following SQL statement will insert a column router to accomplish that. - insert into SYM_ROUTER (router_id, -source_node_group_id, target_node_group_id, router_type, -router_expression, create_time, last_update_time) values -('corp-2-store-id','corp', 'store', 'column', 'STORE_ID=:EXTERNAL_ID', -current_timestamp, current_timestamp); -Attributes on a - -that can be referenced with tokens include: - -:NODE_ID - -:EXTERNAL_ID - -:NODE_GROUP_ID - -Captured EXTERNAL_DATA is also available for routing as a virtual -column. - - - -Consider a table that needs to be routed to a redirect node defined by -its external id in the - -table. The following SQL statement will insert a column router to -accomplish that. - -insert into SYM_ROUTER (router_id, source_node_group_id, target_node_group_id, - router_type, router_expression, create_time, last_update_time) - values ('corp-2-store-redirect','corp', 'store', 'column', - 'STORE_ID=:REDIRECT_NODE', current_timestamp, current_timestamp); - - - -More than one column may be configured in a router_expression. When more -than one column is configured, all matches are added to the list of -nodes to route to. The following is an example where the STORE_ID column -may contain the STORE_ID to route to or the constant of ALL which -indicates that all nodes should receive the update. - -insert into SYM_ROUTER (router_id, source_node_group_id, target_node_group_id, router_type, -router_expression, create_time, last_update_time) - values ('corp-2-store-multiple-matches','corp', 'store', 'column', - 'STORE_ID=ALL or STORE_ID=:EXTERNAL_ID', current_timestamp, current_timestamp); - - - -The NULL keyword may be used to check if a column is null. If the column -is null, then data will be routed to all nodes who qualify for the -update. This following is an example where the STORE_ID column is used -to route to a set of nodes who have a STORE_ID equal to their -EXTERNAL_ID, or to all nodes if the STORE_ID is null. - -insert into SYM_ROUTER (router_id, source_node_group_id, target_node_group_id, router_type, - router_expression, create_time, last_update_time) - values ('corp-2-store-multiple-matches','corp', 'store', 'column', - 'STORE_ID=NULL or STORE_ID=:EXTERNAL_ID', current_timestamp, current_timestamp); - -
- -
-Lookup Table Router - - -A lookup table may contain the id of the node where data needs to be -routed. This could be an existing table or an ancillary table that is -added specifically for the purpose of routing data. Lookup table routers -are configured by setting the -router_type -column on the - -table to -lookuptable -and setting a list of configuration parameters in the -router_expression -column. - - - -Each of the following configuration parameters are required. - - - -LOOKUP_TABLE - - - -This is the name of the lookup table. - - - - - -KEY_COLUMN - - - -This is the name of the column on the table that is being -routed. It will be used as a key into the lookup table. - - - - - -LOOKUP_KEY_COLUMN - - - -This is the name of the column that is the key on the -lookup table. - - - - - -EXTERNAL_ID_COLUMN - - - -This is the name of the column that contains the -external_id of the node to route to on the lookup table. - - - - - -Note that the lookup table will be read into memory and -cached for the duration of a routing pass for a single channel. - - -Consider a table that needs to be routed to a specific store, but the -data in the changing table only contains brand information. In this -case, the STORE table may be used as a lookup table. - insert into SYM_ROUTER (router_id, -source_node_group_id, target_node_group_id, router_type, -router_expression, create_time, last_update_time) values -('corp-2-store-ok','corp', 'store', 'lookuptable', 'LOOKUP_TABLE=STORE -KEY_COLUMN=BRAND_ID LOOKUP_KEY_COLUMN=BRAND_ID -EXTERNAL_ID_COLUMN=STORE_ID', current_timestamp, current_timestamp); - -
- -
-Subselect Router - - -Sometimes routing decisions need to be made based on data that is not in -the current row being synchronized. A 'subselect' router can be used in -these cases. A 'subselect' is configured with a -router_expression -that is a SQL select statement which returns a result set of the node -ids that need routed to. Column tokens can be used in the SQL expression -and will be replaced with row column data. The overhead of using this -router type is high because the 'subselect' statement runs for each row -that is routed. It should not be used for tables that have a lot of rows -that are updated. It also has the disadvantage that if the data being -relied on to determine the node id has been deleted before routing takes -place, then no results would be returned and routing would not happen. - - -The -router_expression -you specify is appended to the following SQL statement in order to -select the node ids: -select c.node_id from sym_node c where -c.node_group_id=:NODE_GROUP_ID and c.sync_enabled=1 and ... - -As you can see, you have access to information about the node currently -under consideration for routing through the 'c' alias, for example -c.external_id -. There are two node-related tokens you can use in your expression: - -:NODE_GROUP_ID -:EXTERNAL_DATA - - -Column names representing data for the row in question are prefixed with -a colon as well, for example: - -:EMPLOYEE_ID -, or -:OLD_EMPLOYEE_ID -. Here, the OLD_ prefix indicates the value before the change in cases -where the old data has been captured. - - - For an example, consider the case where an Order table and -an OrderLineItem table need to be routed to a specific store. The Order -table has a column named order_id and STORE_ID. A store node has an -external_id that is equal to the STORE_ID on the Order table. -OrderLineItem, however, only has a foreign key to its Order of order_id. -To route OrderLineItems to the same nodes that the Order will be routed -to, we need to reference the master Order record. - - -There are two possible ways to solve this in SymmetricDS. One is to -configure a 'subselect' router_type on the - -table, shown below (The other possible approach is to use an -external_select -to capture the data via a trigger for use in a column match router, -demonstrated in - -). - - - -Our solution utilizing subselect compares the external id of the current -node with the store id from the Order table where the order id matches -the order id of the current row being routed: - -insert into SYM_ROUTER (router_id, source_node_group_id, target_node_group_id, - router_type, router_expression, create_time, last_update_time) - values ('corp-2-store','corp', 'store', 'subselect', - 'c.external_id in (select STORE_ID from order where order_id=:ORDER_ID)', - current_timestamp, current_timestamp); - - -As a final note, please note in this example that the -parent row in Order must still exist at the moment of routing for the -child rows (OrderLineItem) to route, since the select statement is run -when routing is occurring, not when the change data is first captured. - -
- -
-Scripted Router - - -When more flexibility is needed in the logic to choose the nodes to -route to, then the a scripted router may be used. The currently -available scripting language is Bean Shell. Bean Shell is a Java-like -scripting language. Documentation for the Bean Shell scripting language -can be found at -http://www.beanshell.org -. - - - -The router_type for a Bean Shell scripted router is 'bsh'. The -router_expression is a valid Bean Shell script that: - - -adds node ids to the -targetNodes -collection which is bound to the script - - -returns a new collection of node ids - -returns a single node id - -returns true to indicate that all nodes should be -routed or returns false to indicate that no nodes should be routed - -Also bound to the script evaluation is a list of -nodes -. The list of -nodes -is a list of eligible -org.jumpmind.symmetric.model.Node -objects. The current data column values and the old data column values -are bound to the script evaluation as Java object representations of the -column data. The columns are bound using the uppercase names of the -columns. Old values are bound to uppercase representations that are -prefixed with 'OLD_'. - - - -If you need access to any of the SymmetricDS services, then the instance -of -org.jumpmind.symmetric.ISymmetricEngine -is accessible via the bound -engine -variable. - - - -In the following example, the node_id is a combination of STORE_ID and -WORKSTATION_NUMBER, both of which are columns on the table that is being -routed. - -insert into SYM_ROUTER (router_id, source_node_group_id, target_node_group_id, - router_type, router_expression, create_time, last_update_time) - values ('corp-2-store-bsh','corp', 'store', 'bsh', - 'targetNodes.add(STORE_ID +"-" + WORKSTATION_NUMBER);', current_timestamp, current_timestamp); - - - -The same could also be accomplished by simply returning the node id. The -last line of a bsh script is always the return value. - -insert into SYM_ROUTER (router_id, source_node_group_id, target_node_group_id, router_type, - router_expression, create_time, last_update_time) - values ('corp-2-store-bsh','corp', 'store', 'bsh', - 'STORE_ID + "-" + WORKSTATION_NUMBER', current_timestamp, current_timestamp); - - - -The following example will synchronize to all nodes if the FLAG column -has changed, otherwise no nodes will be synchronized. Note that here we -make use of OLD_, which provides access to the old column value. - -insert into SYM_ROUTER (router_id, source_node_group_id, target_node_group_id, - router_type, router_expression, create_time, last_update_time) - values ('corp-2-store-flag-changed','corp', 'store', 'bsh', - 'FLAG != null&& !FLAG.equals(OLD_FLAG)', - current_timestamp, current_timestamp); - - - -The next example shows a script that iterates over each eligible node -and checks to see if the trimmed value of the column named STATION -equals the external_id. - -insert into SYM_ROUTER (router_id, source_node_group_id, target_node_group_id, - router_type, router_expression, create_time, last_update_time) - values ('corp-2-store-trimmed-station','corp', 'store', 'bsh', - 'for (org.jumpmind.symmetric.model.Node node : nodes) { if (STATION != null - && node.getExternalId().equals(STATION.trim())) { - targetNodes.add(node.getNodeId()); } }', - current_timestamp, current_timestamp); - -
- -
-Audit Table Router - - -This router audits captured data by recording the change in an audit -table that the router creates and keeps up to date (as long as -auto.config.database -is set to true.) The router creates a table named the same as the table -for which data was captured with the suffix of _AUDIT. It will contain -all of the same columns as the original table with the same data types -only each column is nullable with no default values. - - - -Three extra "AUDIT" columns are added to the table: - -AUDIT_ID - the primary key of the table. -AUDIT_TIME - the time at which the change occurred. -AUDIT_EVENT - the DML type that happened to the -row. - - - - -The following is an example of an audit router - insert into SYM_ROUTER (router_id, -source_node_group_id, target_node_group_id, router_type, create_time, -last_update_time) values ('audit_at_corp','corp', 'local', 'audit', -current_timestamp, current_timestamp); - - -The audit router captures data for a group link. For the -audit router to work it must be associated with a node_group_link with -an action of type 'R'. The 'R' stands for 'only routes to'. In the above -example, we refer to a 'corp to local' group link. Here, local is a new -node_group created for the audit router. No nodes belong to the 'local' -node_group. If a trigger linked to an audit router fires on the corp -node, a new audit table will be created at the corp node with the new -data inserted. -
- - - - -
-Utilizing External Select when Routing - - - - -There may be times when you wish to route based on a piece of data that -exists in a table other than the one being routed. The approach, first -discussed in - -, is to utilize an -external_select -to save away data in -external_data -, which can then be referenced during routing. - - -Reconsider subselect's Order / OrderLineItem example (found in - -), where routing for the line item is accomplished by linking to the -"header" Order row. As an alternate way of solving the problem, we will -now use External Select combined with a column match router. - - -In this version of the solution, the STORE_ID is captured from the Order -table in the EXTERNAL_DATA column when the trigger fires. The router is -configured to route based on the captured EXTERNAL_DATA to all nodes -whose external id matches the captured external data. - -insert into SYM_TRIGGER (trigger_id, source_table_name, channel_id, - external_select, last_update_time,create_time) - values ('orderlineitem', 'orderlineitem', 'orderlineitem', - 'select STORE_ID from order where order_id=$(curTriggerValue).$(curColumnPrefix)order_id', - current_timestamp, current_timestamp); -insert into SYM_ROUTER (router_id, source_node_group_id, target_node_group_id, router_type, - router_expression, create_time, last_update_time) - values ('corp-2-store-ext','corp', 'store', 'column', 'EXTERNAL_DATA=:EXTERNAL_ID', - current_timestamp, current_timestamp); - - -The following variables can be used with the external select: - - - - - $(curTriggerValue) - - - - Variable to be replaced with the NEW or OLD column alias provided by the trigger context, which is platform specific. - For insert and update triggers, the NEW alias is used; for delete triggers, the OLD alias is used. - For example, "$(curTriggerValue).COLUMN" becomes ":new.COLUMN" for an insert trigger on Oracle. - - - - - - - $(curColumnPrefix) - - - - Variable to be replaced with the NEW_ or OLD_ column prefix for platforms that don't support column aliases. - This is currently only used by the H2 database. All other platforms will replace the variable with an empty string. - For example "$(curColumnPrefix)COLUMN" becomes "NEW_COLUMN" on H2 and "COLUMN" on Oracle. - - - - - -The advantage of this approach over the 'subselect' -approach is that it guards against the (somewhat unlikely) possibility -that the master Order table row might have been deleted before routing -has taken place. This external select solution also is a bit more -efficient than the 'subselect' approach, although the triggers produced -do run the extra external_select SQL inline with application database -updates. - -
- -
- -
-Trigger / Router Mappings - - -The - -table is used to define which specific combinations of triggers and -routers are needed for your configuration. The relationship between -triggers and routers is many-to-many, so this table serves as the join -table to define which combinations are valid, as well as to define -settings available at the trigger-router level of granularity. - - -Three important controls can be configured for a specific Trigger / -Router combination: Enabled, Initial Loads and Ping Back. The parameters -for these can be found in the Trigger / Router mapping table, - -. - - -
-Enable / disable trigger router - - -Each individual trigger-router combination can be disabled or enabled if -needed. By default, a trigger router is enabled, but if you have a -reason you wish to define a trigger router combination prior to it being -active, you can set the -enabled -flag to 0. This will cause the trigger-router mapping to be sent to all -nodes, but the trigger-router mapping will not be considered active or -enabled for the purposes of capturing data changes or routing. - -
-
- -Initial Loads - -An initial load is the process of seeding tables at a -target node with data from its parent node. When a node connects and -data is extracted, after it is registered and if an initial load was -requested, each table that is configured to synchronize to the target -node group will be given a reload event in the order defined by the end -user. A SQL statement is run against each table to get the data load -that will be streamed to the target node. The selected data is filtered -through the configured router for the table being loaded. If the data -set is going to be large, then SQL criteria can optionally be provided -to pare down the data that is selected out of the database. - - -An initial load cannot occur until after a node is registered. An -initial load is requested by setting the -initial_load_enabled -column on - -to -1 -on the row for the target node in the parent node's database. You can -configure SymmetricDS to automatically perform an initial load when a -node registers by setting the parameter -auto.reload -to true. Regardless of how the initial load is initiated, the next time -the source node routes data, reload batches will be inserted. At the -same time reload batches are inserted, all previously pending batches -for the node are marked as successfully sent. - - - - -Note that if the parent node that a node is registering with is -not -a registration server node (as can happen with a registration redirect -or certain non-tree structure node configurations) the parent node's - -entry must exist at the parent node and have a non-null value for column -initial_load_time -. Nodes can't be registered to non-registration-server nodes without -this value being set one way or another (i.e., manually, or as a result -of an initial load occurring at the parent node). - - - - -SymmetricDS recognizes that an initial load has completed when the -initial_load_time -column on the target node is set to a non-null value. - - - -An initial load is accomplished by inserting reload batches in a defined -order according to the -initial_load_order -column on - -. If the -initial_load_order -column contains a negative value the associated table will -NOT -be loaded. If the -initial_load_order -column contains the same value for multiple tables, SymmetricDS will -attempt to order the tables according to foreign key constraints. If -there are cyclical constraints, then foreign keys might need to be -turned off or the initial load will need to be manually configured based -on knowledge of how the data is structured. - - -Initial load data is always queried from the source -database table. All data is passed through the configured router to -filter out data that might not be targeted at a node. - -
-Target table prep for initial load -There are several parameters that can be used to specify -what, if anything, should be done to the table on the target database -just prior to loading the data. Note that the parameters below specify -the desired behavior for all tables in the initial load, not just one. - - -initial.load.delete.first / -initial.load.delete.first.sql - -By default, an initial load will not delete existing rows from a target -table before loading the data. If a delete is desired, the parameter -initial.load.delete.first -can be set to true. If true, the command found in -initial.load.delete.first.sql -will be run on each table prior to loading the data. The default value -for -initial.load.delete.first.sql -is -delete from %s -, but could be changed if needed. Note that additional reload batches -are created, in the correct order, to achieve the delete. - - - -initial.load.create.first - -By default, an initial load will not create the table on the target if -it doesn't alleady exist. If the desired behavior is to create the table -on the target if it is not present, set the parameter -intial.load.create.first -to true. SymmetricDS will attempt to create the table and indexes on the -target database before doing the initial load. (Additional batches are -created to represent the table schema). - - - -
-
-Loading subsets of data - - -An efficient way to select a subset of data from a table for an initial -load is to provide an -initial_load_select -clause on - -. This clause, if present, is applied as a -where -clause to the SQL used to select the data to be loaded. The clause may -use "t" as an alias for the table being loaded, if needed. The -$(externalId) -token can be used for subsetting the data in the where clause. - - - -In cases where routing is done using a feature like -Subselect Router -, an -initial_load_select -clause matching the subselect's criteria would be a more efficient -approach. Some routers will check to see if the -initial_load_select -clause is provided, and they will -not -execute assuming that the more optimal path is using the -initial_load_select -statement. - - - -One example of the use of an initial load select would be if you wished -to only load data created more recently than the start of year 2011. -Say, for example, the column -created_time -contains the creation date. Your -initial_load_select -would read -created_time > ts {'2011-01-01 00:00:00.0000'} -(using whatever timestamp format works for your database). This then -gets applied as a -where -clause when selecting data from the table. - - - - -When providing an -initial_load_select -be sure to test out the criteria against production data in a query -browser. Do an explain plan to make sure you are properly using indexes. - - -
- -
-Splitting an Initial Load for a Table Across Multiple Batches - -By default, all data for a given table will be initial loaded in a single batch, regardless -of the max batch size parameter on the reload channel. That is, for a table with one million -rows, all rows for that table will be initial loaded and sent to the destination node in a -single batch. For large tables, this can result in a batch that can take a long time to -extract and load. - - - -Initial loads for a table can be broken into multiple batches by specifying -initial.load.use.extract.job.enabled to true. This parameter allows -SymmetricDS to pre-extract initial load batches versus having them extracted when -the batch is pulled or pushed. When using this parameter, there are two ways to tell -SymmetricDS the number of batches to create for a given table. The first is to specify -a positive integer in the initial_load_batch_count column on -. This -number will dictate the number of batches created for the initial load of the given table. -The second way is to specify 0 for initial_load_batch_count on - and -specify a max_batch_size on the reload channel in . -When 0 is specified for -initial_load_batch_count, SymmetricDS will execute a count(*) query on the table during -the extract process and create N batches based on the total number of records found -in the table divided by the max_batch_size on the reload channel. - - -
- -
-Reverse Initial Loads - -The default behavior for initial loads is to load data from the -registration server or parent node, to a client node. Occasionally, -there may be need to do a one-time intial load of data in the opposite -or "reverse" direction, namely from a client node to the registration -node. To achieve this, set the parameter -auto.reload.reverse -to be true, -but only for the specific node group representing -the client nodes -. This will cause a onetime reverse load of data, for tables configured -with non-negative initial load orders, to be batched at the point when -registration of the client node is occurring. These batches are then -sent to the parent or registration node. This capability might be -needed, for example, if there is data already present in the client that -doesn't exist in the parent but needs to. - -
-
-
-Dead Triggers - - -Occasionally the decision of what data to load initially results in -additional triggers. These triggers, known as -Dead Triggers -, are configured such that they do not capture any data changes. A -"dead" Trigger is one that does not capture data changes. In other -words, the -sync_on_insert -, -sync_on_update -, and -sync_on_delete -properties for the Trigger are all set to false. However, since the -Trigger is specified, it -will -be included in the initial load of data for target Nodes. - - -Why might you need a Dead Trigger? A dead Trigger might be -used to load a read-only lookup table, for example. It could also be -used to load a table that needs populated with example or default data. -Another use is a recovery load of data for tables that have a single -direction of synchronization. For example, a retail store records sales -transactions that synchronize in one direction by trickling back to the -central office. If the retail store needs to recover all the sales -transactions from the central office, they can be sent are part of an -initial load from the central office by setting up dead Triggers that -"sync" in that direction. - - -The following SQL statement sets up a non-syncing dead Trigger that -sends the -sale_transaction -table to the "store" Node Group from the "corp" Node Group during an -initial load. - insert into sym_trigger -(TRIGGER_ID,SOURCE_CATALOG_NAME, -SOURCE_SCHEMA_NAME,SOURCE_TABLE_NAME,CHANNEL_ID, -SYNC_ON_UPDATE,SYNC_ON_INSERT,SYNC_ON_DELETE, -SYNC_ON_INCOMING_BATCH,NAME_FOR_UPDATE_TRIGGER, -NAME_FOR_INSERT_TRIGGER,NAME_FOR_DELETE_TRIGGER, -SYNC_ON_UPDATE_CONDITION,SYNC_ON_INSERT_CONDITION, -SYNC_ON_DELETE_CONDITION,EXTERNAL_SELECT, -TX_ID_EXPRESSION,EXCLUDED_COLUMN_NAMES, -CREATE_TIME,LAST_UPDATE_BY,LAST_UPDATE_TIME) values -('SALE_TRANSACTION_DEAD',null,null, 'SALE_TRANSACTION','transaction', -0,0,0,0,null,null,null,null,null,null,null,null,null, -current_timestamp,'demo',current_timestamp); insert into sym_router -(ROUTER_ID,TARGET_CATALOG_NAME,TARGET_SCHEMA_NAME, -TARGET_TABLE_NAME,SOURCE_NODE_GROUP_ID,TARGET_NODE_GROUP_ID,ROUTER_TYPE, -ROUTER_EXPRESSION,SYNC_ON_UPDATE,SYNC_ON_INSERT,SYNC_ON_DELETE, -CREATE_TIME,LAST_UPDATE_BY,LAST_UPDATE_TIME) values -('CORP_2_STORE',null,null,null, 'corp','store',null,null,1,1,1, -current_timestamp,'demo',current_timestamp); insert into -sym_trigger_router (TRIGGER_ID,ROUTER_ID,INITIAL_LOAD_ORDER, -INITIAL_LOAD_SELECT,CREATE_TIME,LAST_UPDATE_BY,LAST_UPDATE_TIME) values -('SALE_TRANSACTION_DEAD','CORP_2_REGION',100,null, -current_timestamp,'demo',current_timestamp); - -
- -
-Enabling "Ping Back" - - -As discussed in - -SymmetricDS, by default, avoids circular data changes. When a trigger -fires as a result of SymmetricDS itself (such as the case when sync on -incoming batch is set), it records the originating source node of the -data change in -source_node_id -. During routing, if routing results in sending the data back to the -originating source node, the data is not routed by default. If instead -you wish to route the data back to the originating node, you can set the -ping_back_enabled -column for the needed particular trigger / router combination. This will -cause the router to "ping" the data back to the originating node when it -usually would not. - -
-
-
- -
-Opening Registration - - -Node registration is the act of setting up a new - -and - -so that when the new node is brought online it is allowed to join the -system. Nodes are only allowed to register if rows exist for the node -and the -registration_enabled -flag is set to 1. If the -auto.registration -SymmetricDS property is set to true, then when a node attempts to -register, if registration has not already occurred, the node will -automatically be registered. - - - -SymmetricDS allows you to have multiple nodes with the same -external_id -. Out of the box, openRegistration will open a new registration if a -registration already exists for a node with the same external_id. A new -registration means a new node with a new -node_id -and the same -external_id -will be created. If you want to re-register the same node you can use -the -reOpenRegistration() -JMX method which takes a -node_id -as an argument. - -
- -
-Transforming Data - + + Configuration + +
+ Groups + + Node Groups are straightforward to configure and are defined in the + + table. The following SQL statements would create node groups for "corp" + and "store" based on our retail store example. + insert into SYM_NODE_GROUP + (node_group_id, description) values ('store', 'A retail store node'); + + insert into SYM_NODE_GROUP (node_group_id, description) values ('corp', + 'A corporate node'); + +
+ +
+ Channels + + By categorizing data into channels and assigning them to + + s, the user gains more control and visibility into the flow of data. In + addition, SymmetricDS allows for synchronization to be enabled, + suspended, or scheduled by channels as well. The frequency of + synchronization and order that data gets synchronized is also controlled + at the channel level. + + + + The following SQL statements setup channels for a retail store. An + "item" channel includes data for items and their prices, while a + "sale_transaction" channel includes data for ringing sales at a + register. + insert into SYM_CHANNEL (channel_id, + processing_order, max_batch_size, max_batch_to_send, + extract_period_millis, batch_algorithm, enabled, description) values + ('item', 10, 1000, 10, 0, 'default', 1, 'Item and pricing data'); insert + into SYM_CHANNEL (channel_id, processing_order, max_batch_size, + max_batch_to_send, extract_period_millis, batch_algorithm, enabled, + description) values ('sale_transaction', 1, 1000, 10, 60000, + 'transactional', 1, 'retail sale transactions from register'); + + + + Batching is the grouping of data, by channel, to be transferred and + committed at the client together. There are three different + out-of-the-box batching algorithms which may be configured in the + batch_algorithm column on channel. + + + + default + + + + All changes that happen in a transaction are guaranteed to + be batched together. Multiple transactions will be batched and committed + together until there is no more data to be sent or the max_batch_size is + reached. + + + + + + transactional + + + + Batches will map directly to database transactions. If + there are many small database transactions, then there will be many + batches. The max_batch_size column has no effect. + + + + + + nontransactional + + + + Multiple transactions will be batched and committed + together until there is no more data to be sent or the max_batch_size is + reached. The batch will be cut off at the max_batch_size regardless of + whether it is in the middle of a transaction. + + + + + + + If a channel contains + only + tables that will be synchronized in one direction and and data is routed + to all the nodes in the target node groups, then batching on the channel + can be optimized to share batches across nodes. This is an important + feature when data needs to be routed to thousands of nodes. When this + mode is detected, you will see batches created in + + with the + common_flag + set to 1. + + + + There are also several size-related parameters that can be set by + channel. They include: + + + + max_batch_size + + + + Specifies the maximum number of data events to process + within a batch for this channel. + + + + + + max_batch_to_send + + + + Specifies the maximum number of batches to send for a given + channel during a 'synchronization' between two nodes. A + 'synchronization' is equivalent to a push or a pull. For example, if + there are 12 batches ready to be sent for a channel and + max_batch_to_send is equal to 10, then only the first 10 batches will be + sent even though 12 batches are ready. + + + + + + max_data_to_route + + + + Specifies the maximum number of data rows to route for a + channel at a time. + + + + + + Based on your particular synchronization requirements, you + can also specify whether old, new, and primary key data should be read + and included during routing for a given channel. These are controlled by + the columns use_old_data_to_route, use_row_data_to_route, and + use_pk_data_to_route, respectively. By default, they are all 1 (true). + + + Finally, if data on a particular channel contains big lobs, you can set + the column contains_big_lob to 1 (true) to provide SymmetricDS the hint + that the channel contains big lobs. Some databases have shortcuts that + SymmetricDS can take advantage of if it knows that the lob columns in + + aren't going to contain large lobs. The definition of how large a 'big' + lob is varies from database to database. + +
+
+ Table Triggers + + + SymmetricDS captures synchronization data using database triggers. + SymmetricDS' Triggers are defined in the + + table. Each record is used by SymmetricDS when generating database + triggers. Database triggers are only generated when a trigger is + associated with a + + whose + source_node_group_id + matches the node group id of the current node. + + + + The + source_table_name + may contain the asterisk ('*') wildcard character so that one + + table entry can define synchronization for many tables. System tables + and any tables that start with the SymmetricDS table prefix will be + excluded. A list of wildcard tokens can also be supplied. If there are + multiple tokens, they should be delimited with a comma. A wildcard token + can also start with a bang ('!') to indicate an exclusive match. Tokens + are always evalulated from left to right. When a table match is made, + the table is either added to or removed from the list of tables. If + another trigger already exists for a table, then that table is not + included in the wildcard match (the explictly defined trigger entry take + precendence). + + + + When determining whether a data change has occurred or not, by defalt + the triggers will record a change even if the data was updated to the + same value(s) they were originally. For example, a data change will be + captured if an update of one column in a row updated the value to the + same value it already was. There is a global property, + trigger.update.capture.changed.data.only.enabled + (false by default), that allows you to override this behavior. When set + to true, SymmetricDS will only capture a change if the data has truly + changed (i.e., when the new column data is not equal to the old column + data). + + + + The property + trigger.update.capture.changed.data.only.enabled + is currently only supported in the MySQL, DB2 and Oracle dialects. + + + + The following SQL statement defines a trigger that will capture data for + a table named "item" whenever data is inserted, updated, or deleted. The + trigger is assigned to a channel also called 'item'. + insert into SYM_TRIGGER + (trigger_id,source_table_name,channel_id,last_update_time,create_time) + values ('item', 'item', 'item', current_timestamp, current_timestamp); + + + + Note that many databases allow for multiple triggers of the + same type to be defined. Each database defines the order in which the + triggers fire differently. If you have additional triggers beyond those + SymmetricDS installs on your table, please consult your database + documentation to determine if there will be issues with the ordering of + the triggers. + +
+ Linking Triggers + + + The + + table is used to define which specific combinations of triggers and + routers are needed for your configuration. The relationship between + triggers and routers is many-to-many, so this table serves as the join + table to define which combinations are valid, as well as to define + settings available at the trigger-router level of granularity. + + + Three important controls can be configured for a specific Trigger / + Router combination: Enabled, Initial Loads and Ping Back. The parameters + for these can be found in the Trigger / Router mapping table, + + . + + +
+ Enable / disable trigger router + + + Each individual trigger-router combination can be disabled or enabled if + needed. By default, a trigger router is enabled, but if you have a + reason you wish to define a trigger router combination prior to it being + active, you can set the + enabled + flag to 0. This will cause the trigger-router mapping to be sent to all + nodes, but the trigger-router mapping will not be considered active or + enabled for the purposes of capturing data changes or routing. + +
+
+ Enabling "Ping Back" + + + As discussed in + + SymmetricDS, by default, avoids circular data changes. When a trigger + fires as a result of SymmetricDS itself (such as the case when sync on + incoming batch is set), it records the originating source node of the + data change in + source_node_id + . During routing, if routing results in sending the data back to the + originating source node, the data is not routed by default. If instead + you wish to route the data back to the originating node, you can set the + ping_back_enabled + column for the needed particular trigger / router combination. This will + cause the router to "ping" the data back to the originating node when it + usually would not. + +
+
+
+ Large Objects + + Two lobs-related settings are also available on + + : + + + + use_stream_lobs + + + + Specifies whether to capture lob data as the trigger is + firing or to stream lob columns from the source tables using callbacks + during extraction. A value of 1 indicates to stream from the source via + callback; a value of 0, lob data is captured by the trigger. + + + + + + use_capture_lobs + + + + Provides a hint as to whether this trigger will capture big + lobs data. If set to 1 every effort will be made during data capture in + trigger and during data selection for initial load to use lob facilities + to extract and store data in the database. + + + + +
+ +
+ External Select + + + Occasionally, you may find that you need to capture and save away a + piece of data present in another table when a trigger is firing. This + data is typically needed for the purposes of determining where to + 'route' the data to once routing takes place. Each trigger definition + contains an optional + external_select + field which can be used to specify the data to be captured. Once + captured, this data is available during routing in + + 's + external_data + field. For these cases, place a SQL select statement which returns the + data item you need for routing in + external_select + . An example of the use of external select can be found in + + . + +
+
+ Dead Triggers + + + Occasionally the decision of what data to load initially results in + additional triggers. These triggers, known as + Dead Triggers + , are configured such that they do not capture any data changes. A + "dead" Trigger is one that does not capture data changes. In other + words, the + sync_on_insert + , + sync_on_update + , and + sync_on_delete + properties for the Trigger are all set to false. However, since the + Trigger is specified, it + will + be included in the initial load of data for target Nodes. + + + Why might you need a Dead Trigger? A dead Trigger might be + used to load a read-only lookup table, for example. It could also be + used to load a table that needs populated with example or default data. + Another use is a recovery load of data for tables that have a single + direction of synchronization. For example, a retail store records sales + transactions that synchronize in one direction by trickling back to the + central office. If the retail store needs to recover all the sales + transactions from the central office, they can be sent are part of an + initial load from the central office by setting up dead Triggers that + "sync" in that direction. + + + The following SQL statement sets up a non-syncing dead Trigger that + sends the + sale_transaction + table to the "store" Node Group from the "corp" Node Group during an + initial load. + insert into sym_trigger + (TRIGGER_ID,SOURCE_CATALOG_NAME, + SOURCE_SCHEMA_NAME,SOURCE_TABLE_NAME,CHANNEL_ID, + SYNC_ON_UPDATE,SYNC_ON_INSERT,SYNC_ON_DELETE, + SYNC_ON_INCOMING_BATCH,NAME_FOR_UPDATE_TRIGGER, + NAME_FOR_INSERT_TRIGGER,NAME_FOR_DELETE_TRIGGER, + SYNC_ON_UPDATE_CONDITION,SYNC_ON_INSERT_CONDITION, + SYNC_ON_DELETE_CONDITION,EXTERNAL_SELECT, + TX_ID_EXPRESSION,EXCLUDED_COLUMN_NAMES, + CREATE_TIME,LAST_UPDATE_BY,LAST_UPDATE_TIME) values + ('SALE_TRANSACTION_DEAD',null,null, 'SALE_TRANSACTION','transaction', + 0,0,0,0,null,null,null,null,null,null,null,null,null, + current_timestamp,'demo',current_timestamp); insert into sym_router + (ROUTER_ID,TARGET_CATALOG_NAME,TARGET_SCHEMA_NAME, + TARGET_TABLE_NAME,SOURCE_NODE_GROUP_ID,TARGET_NODE_GROUP_ID,ROUTER_TYPE, + ROUTER_EXPRESSION,SYNC_ON_UPDATE,SYNC_ON_INSERT,SYNC_ON_DELETE, + CREATE_TIME,LAST_UPDATE_BY,LAST_UPDATE_TIME) values + ('CORP_2_STORE',null,null,null, 'corp','store',null,null,1,1,1, + current_timestamp,'demo',current_timestamp); insert into + sym_trigger_router (TRIGGER_ID,ROUTER_ID,INITIAL_LOAD_ORDER, + INITIAL_LOAD_SELECT,CREATE_TIME,LAST_UPDATE_BY,LAST_UPDATE_TIME) values + ('SALE_TRANSACTION_DEAD','CORP_2_REGION',100,null, + current_timestamp,'demo',current_timestamp); + +
+
+ Changing Triggers + + A trigger row may be updated using SQL to change a synchronization definition. + SymmetricDS will look for changes each night or whenever the Sync Triggers Job + is run (see below). For example, a change to place the table price_changes + into the price channel would be accomplished with the following statement: + + + All configuration changes should be managed centrally at the registration node. If enabled, configuration + changes will be synchronized out to client nodes. When trigger changes reach the client + nodes the Sync Triggers Job will run automatically. + + + Centrally, the trigger changes will not take effect until the Sync Triggers Job runs. + Instead of waiting for the Sync Triggers Job to run overnight after making a Trigger + change, you can invoke the syncTriggers() method over JMX or simply restart the SymmetricDS + server. A complete record of trigger changes is kept in the table , + which was discussed in . + +
+
+
+ File Triggers + +
+
+ Routers + + Routers provided in the base implementation currently include: + + Default Router - a router that sends all data to + all nodes that belong to the target node group defined in the router. + + Column Match Router - a router that compares old or + new column values to a constant value or the value of a node's + external_id or node_id. + + Lookup Router - a router which can be configured to + determine routing based on an existing or ancillary table specifically + for the purpose of routing data. + + Subselect Router - a router that executes a SQL + expression against the database to select nodes to route to. This SQL + expression can be passed values of old and new column values. + + Scripted Router - a router that executes a Bean + Shell script expression in order to select nodes to route to. The script + can use the old and new column values. + + Xml Publishing Router - a router the publishes data + changes directly to a messaging solution instead of transmitting changes + to registered nodes. This router must be configured manually in XML as + an extension point. + + Audit Table Router - a router that inserts into an + automatically created audit table. It records captured changes to tables + that it is linked to. + + The mapping between the set of triggers and set of routers is + many-to-many. This means that one trigger can capture changes and route + to multiple locations. It also means that one router can be defined an + associated with many different triggers. + + +
+ Default Router + + + The simplest router is a router that sends all the data that is captured + by its associated triggers to all the nodes that belong to the target + node group defined in the router. A router is defined as a row in the + + table. It is then linked to triggers in the + + table. + + + + The following SQL statement defines a router that will send data from + the 'corp' group to the 'store' group. + insert into SYM_ROUTER (router_id, + source_node_group_id, target_node_group_id, create_time, + last_update_time) values ('corp-2-store','corp', 'store', + current_timestamp, current_timestamp); + + + + The following SQL statement maps the 'corp-2-store' router to the item + trigger. + insert into SYM_TRIGGER_ROUTER + (trigger_id, router_id, initial_load_order, create_time, + last_update_time) values ('item', 'corp-2-store', 1, current_timestamp, + current_timestamp); + +
+ +
+ Column Match Router + + + Sometimes requirements may exist that require data to be routed based on + the current value or the old value of a column in the table that is + being routed. Column routers are configured by setting the + router_type + column on the + + table to + column + and setting the + router_expression + column to an equality expression that represents the expected value of + the column. + + + The first part of the expression is always the column name. + The column name should always be defined in upper case. The upper case + column name prefixed by OLD_ can be used for a comparison being done + with the old column data value. + + The second part of the expression can be a constant value, + a token that represents another column, or a token that represents some + other SymmetricDS concept. Token values always begin with a colon (:). + + + Consider a table that needs to be routed to all nodes in the target + group only when a status column is set to 'READY TO SEND.' The following + SQL statement will insert a column router to accomplish that. + insert into SYM_ROUTER (router_id, + source_node_group_id, target_node_group_id, router_type, + router_expression, create_time, last_update_time) values + ('corp-2-store-ok','corp', 'store', 'column', 'STATUS=READY TO SEND', + current_timestamp, current_timestamp); + + + + Consider a table that needs to be routed to all nodes in the target + group only when a status column changes values. The following SQL + statement will insert a column router to accomplish that. Note the use + of OLD_STATUS, where the OLD_ prefix gives access to the old column + value. + insert into SYM_ROUTER (router_id, + source_node_group_id, target_node_group_id, router_type, + router_expression, create_time, last_update_time) values + ('corp-2-store-status','corp', 'store', 'column', 'STATUS!=:OLD_STATUS', + current_timestamp, current_timestamp); + + + + Consider a table that needs to be routed to only nodes in the target + group whose STORE_ID column matches the external id of a node. The + following SQL statement will insert a column router to accomplish that. + insert into SYM_ROUTER (router_id, + source_node_group_id, target_node_group_id, router_type, + router_expression, create_time, last_update_time) values + ('corp-2-store-id','corp', 'store', 'column', 'STORE_ID=:EXTERNAL_ID', + current_timestamp, current_timestamp); + Attributes on a + + that can be referenced with tokens include: + + :NODE_ID + + :EXTERNAL_ID + + :NODE_GROUP_ID + + Captured EXTERNAL_DATA is also available for routing as a virtual + column. + + + + Consider a table that needs to be routed to a redirect node defined by + its external id in the + + table. The following SQL statement will insert a column router to + accomplish that. + insert into SYM_ROUTER (router_id, + source_node_group_id, target_node_group_id, router_type, + router_expression, create_time, last_update_time) values + ('corp-2-store-redirect','corp', 'store', 'column', + 'STORE_ID=:REDIRECT_NODE', current_timestamp, current_timestamp); + + + + More than one column may be configured in a router_expression. When more + than one column is configured, all matches are added to the list of + nodes to route to. The following is an example where the STORE_ID column + may contain the STORE_ID to route to or the constant of ALL which + indicates that all nodes should receive the update. + insert into SYM_ROUTER (router_id, + source_node_group_id, target_node_group_id, router_type, + router_expression, create_time, last_update_time) values + ('corp-2-store-multiple-matches','corp', 'store', 'column', + 'STORE_ID=ALL or STORE_ID=:EXTERNAL_ID', current_timestamp, + current_timestamp); + + + + The NULL keyword may be used to check if a column is null. If the column + is null, then data will be routed to all nodes who qualify for the + update. This following is an example where the STORE_ID column is used + to route to a set of nodes who have a STORE_ID equal to their + EXTERNAL_ID, or to all nodes if the STORE_ID is null. + insert into SYM_ROUTER (router_id, + source_node_group_id, target_node_group_id, router_type, + router_expression, create_time, last_update_time) values + ('corp-2-store-multiple-matches','corp', 'store', 'column', + 'STORE_ID=NULL or STORE_ID=:EXTERNAL_ID', current_timestamp, + current_timestamp); + +
+ +
+ Lookup Table Router + + + A lookup table may contain the id of the node where data needs to be + routed. This could be an existing table or an ancillary table that is + added specifically for the purpose of routing data. Lookup table routers + are configured by setting the + router_type + column on the + + table to + lookuptable + and setting a list of configuration parameters in the + router_expression + column. + + + + Each of the following configuration parameters are required. + + + + LOOKUP_TABLE + + + + This is the name of the lookup table. + + + + + + KEY_COLUMN + + + + This is the name of the column on the table that is being + routed. It will be used as a key into the lookup table. + + + + + + LOOKUP_KEY_COLUMN + + + + This is the name of the column that is the key on the + lookup table. + + + + + + EXTERNAL_ID_COLUMN + + + + This is the name of the column that contains the + external_id of the node to route to on the lookup table. + + + + + + Note that the lookup table will be read into memory and + cached for the duration of a routing pass for a single channel. + + + Consider a table that needs to be routed to a specific store, but the + data in the changing table only contains brand information. In this + case, the STORE table may be used as a lookup table. + insert into SYM_ROUTER (router_id, + source_node_group_id, target_node_group_id, router_type, + router_expression, create_time, last_update_time) values + ('corp-2-store-ok','corp', 'store', 'lookuptable', 'LOOKUP_TABLE=STORE + KEY_COLUMN=BRAND_ID LOOKUP_KEY_COLUMN=BRAND_ID + EXTERNAL_ID_COLUMN=STORE_ID', current_timestamp, current_timestamp); + +
+ +
+ Subselect Router + + + Sometimes routing decisions need to be made based on data that is not in + the current row being synchronized. A 'subselect' router can be used in + these cases. A 'subselect' is configured with a + router_expression + that is a SQL select statement which returns a result set of the node + ids that need routed to. Column tokens can be used in the SQL expression + and will be replaced with row column data. The overhead of using this + router type is high because the 'subselect' statement runs for each row + that is routed. It should not be used for tables that have a lot of rows + that are updated. It also has the disadvantage that if the data being + relied on to determine the node id has been deleted before routing takes + place, then no results would be returned and routing would not happen. + + + The + router_expression + you specify is appended to the following SQL statement in order to + select the node ids: + select c.node_id from sym_node c where + c.node_group_id=:NODE_GROUP_ID and c.sync_enabled=1 and ... + + As you can see, you have access to information about the node currently + under consideration for routing through the 'c' alias, for example + c.external_id + . There are two node-related tokens you can use in your expression: + + :NODE_GROUP_ID + :EXTERNAL_DATA + + + Column names representing data for the row in question are prefixed with + a colon as well, for example: + + :EMPLOYEE_ID + , or + :OLD_EMPLOYEE_ID + . Here, the OLD_ prefix indicates the value before the change in cases + where the old data has been captured. + + + For an example, consider the case where an Order table and + an OrderLineItem table need to be routed to a specific store. The Order + table has a column named order_id and STORE_ID. A store node has an + external_id that is equal to the STORE_ID on the Order table. + OrderLineItem, however, only has a foreign key to its Order of order_id. + To route OrderLineItems to the same nodes that the Order will be routed + to, we need to reference the master Order record. + + + There are two possible ways to solve this in SymmetricDS. One is to + configure a 'subselect' router_type on the + + table, shown below (The other possible approach is to use an + external_select + to capture the data via a trigger for use in a column match router, + demonstrated in + + ). + + + + Our solution utilizing subselect compares the external id of the current + node with the store id from the Order table where the order id matches + the order id of the current row being routed: + insert into SYM_ROUTER (router_id, + source_node_group_id, target_node_group_id, router_type, + router_expression, create_time, last_update_time) values + ('corp-2-store','corp', 'store', 'subselect', 'c.external_id in (select + STORE_ID from order where order_id=:ORDER_ID)', current_timestamp, + current_timestamp); + + + As a final note, please note in this example that the + parent row in Order must still exist at the moment of routing for the + child rows (OrderLineItem) to route, since the select statement is run + when routing is occurring, not when the change data is first captured. + +
+ +
+ Scripted Router + + + When more flexibility is needed in the logic to choose the nodes to + route to, then the a scripted router may be used. The currently + available scripting language is Bean Shell. Bean Shell is a Java-like + scripting language. Documentation for the Bean Shell scripting language + can be found at + http://www.beanshell.org + . + + + + The router_type for a Bean Shell scripted router is 'bsh'. The + router_expression is a valid Bean Shell script that: + + + adds node ids to the + targetNodes + collection which is bound to the script + + + returns a new collection of node ids + + returns a single node id + + returns true to indicate that all nodes should be + routed or returns false to indicate that no nodes should be routed + + Also bound to the script evaluation is a list of + nodes + . The list of + nodes + is a list of eligible + org.jumpmind.symmetric.model.Node + objects. The current data column values and the old data column values + are bound to the script evaluation as Java object representations of the + column data. The columns are bound using the uppercase names of the + columns. Old values are bound to uppercase representations that are + prefixed with 'OLD_'. + + + + If you need access to any of the SymmetricDS services, then the instance + of + org.jumpmind.symmetric.ISymmetricEngine + is accessible via the bound + engine + variable. + + + + In the following example, the node_id is a combination of STORE_ID and + WORKSTATION_NUMBER, both of which are columns on the table that is being + routed. + insert into SYM_ROUTER (router_id, + source_node_group_id, target_node_group_id, router_type, + router_expression, create_time, last_update_time) values + ('corp-2-store-bsh','corp', 'store', 'bsh', 'targetNodes.add(STORE_ID + + "-" + WORKSTATION_NUMBER);', current_timestamp, current_timestamp); + + + + The same could also be accomplished by simply returning the node id. The + last line of a bsh script is always the return value. + insert into SYM_ROUTER (router_id, + source_node_group_id, target_node_group_id, router_type, + router_expression, create_time, last_update_time) values + ('corp-2-store-bsh','corp', 'store', 'bsh', 'STORE_ID + "-" + + WORKSTATION_NUMBER', current_timestamp, current_timestamp); + + + + The following example will synchronize to all nodes if the FLAG column + has changed, otherwise no nodes will be synchronized. Note that here we + make use of OLD_, which provides access to the old column value. + insert into SYM_ROUTER (router_id, + source_node_group_id, target_node_group_id, router_type, + router_expression, create_time, last_update_time) values + ('corp-2-store-flag-changed','corp', 'store', 'bsh', 'FLAG != null + && !FLAG.equals(OLD_FLAG)', current_timestamp, + current_timestamp); + + + + The next example shows a script that iterates over each eligible node + and checks to see if the trimmed value of the column named STATION + equals the external_id. + insert into SYM_ROUTER (router_id, + source_node_group_id, target_node_group_id, router_type, + router_expression, create_time, last_update_time) values + ('corp-2-store-trimmed-station','corp', 'store', 'bsh', 'for + (org.jumpmind.symmetric.model.Node node : nodes) { if (STATION != null + && node.getExternalId().equals(STATION.trim())) { + targetNodes.add(node.getNodeId()); } }', current_timestamp, + current_timestamp); + +
+ +
+ Audit Table Router + + + This router audits captured data by recording the change in an audit + table that the router creates and keeps up to date (as long as + auto.config.database + is set to true.) The router creates a table named the same as the table + for which data was captured with the suffix of _AUDIT. It will contain + all of the same columns as the original table with the same data types + only each column is nullable with no default values. + + + + Three extra "AUDIT" columns are added to the table: + + AUDIT_ID - the primary key of the table. + AUDIT_TIME - the time at which the change occurred. + AUDIT_EVENT - the DML type that happened to the + row. + + + + + The following is an example of an audit router + insert into SYM_ROUTER (router_id, + source_node_group_id, target_node_group_id, router_type, create_time, + last_update_time) values ('audit_at_corp','corp', 'local', 'audit', + current_timestamp, current_timestamp); + + + The audit router captures data for a group link. For the + audit router to work it must be associated with a node_group_link with + an action of type 'R'. The 'R' stands for 'only routes to'. In the above + example, we refer to a 'corp to local' group link. Here, local is a new + node_group created for the audit router. No nodes belong to the 'local' + node_group. If a trigger linked to an audit router fires on the corp + node, a new audit table will be created at the corp node with the new + data inserted. +
+ + + + +
+ Utilizing External Select when Routing + + + + + There may be times when you wish to route based on a piece of data that + exists in a table other than the one being routed. The approach, first + discussed in + + , is to utilize an + external_select + to save away data in + external_data + , which can then be referenced during routing. + + + Reconsider subselect's Order / OrderLineItem example (found in + + ), where routing for the line item is accomplished by linking to the + "header" Order row. As an alternate way of solving the problem, we will + now use External Select combined with a column match router. + + + In this version of the solution, the STORE_ID is captured from the Order + table in the EXTERNAL_DATA column when the trigger fires. The router is + configured to route based on the captured EXTERNAL_DATA to all nodes + whose external id matches the captured external data. + insert into SYM_TRIGGER + (trigger_id,source_table_name,channel_id,external_select, + last_update_time,create_time) values ('orderlineitem', 'orderlineitem', + 'orderlineitem','select STORE_ID from order where + order_id=$(curTriggerValue).$(curColumnPrefix)order_id', + current_timestamp, current_timestamp); insert into SYM_ROUTER + (router_id, source_node_group_id, target_node_group_id, router_type, + router_expression, create_time, last_update_time) values + ('corp-2-store-ext','corp', 'store', 'column', + 'EXTERNAL_DATA=:EXTERNAL_ID', current_timestamp, current_timestamp); + + + The following variables can be used with the external select: + + + + + $(curTriggerValue) + + + + Variable to be replaced with the NEW or OLD column alias provided by the trigger context, which is platform specific. + For insert and update triggers, the NEW alias is used; for delete triggers, the OLD alias is used. + For example, "$(curTriggerValue).COLUMN" becomes ":new.COLUMN" for an insert trigger on Oracle. + + + + + + + $(curColumnPrefix) + + + + Variable to be replaced with the NEW_ or OLD_ column prefix for platforms that don't support column aliases. + This is currently only used by the H2 database. All other platforms will replace the variable with an empty string. + For example "$(curColumnPrefix)COLUMN" becomes "NEW_COLUMN" on H2 and "COLUMN" on Oracle. + + + + + + The advantage of this approach over the 'subselect' + approach is that it guards against the (somewhat unlikely) possibility + that the master Order table row might have been deleted before routing + has taken place. This external select solution also is a bit more + efficient than the 'subselect' approach, although the triggers produced + do run the extra external_select SQL inline with application database + updates. + +
+ +
+
+ Conflicts + +
+
+ Transforms + New as of SymmetricDS 2.4, SymmetricDS is now able to transform synchronized data by way of configuration (previously, for most cases a custom data loader would need to have been written). This @@ -1813,757 +1277,386 @@ transformations are applied.
Transformation Types - - -There are several pre-defined transform types available in SymmetricDS. -Additional ones can be defined by creating and configuring an extension -point which implements the -IColumnTransform -interface. The pre-defined transform types include the following (the -transform_type entry is shown in parentheses): - -Copy Column Transform ('copy'): This transformation -type copies the source column value to the target column. This is the -default behavior. - -Remove Column Transform ('remove'): This -transformation type removes the source column. This transform type is -only valid for a table transformation type of 'IMPLIED' where all the -columns from the source are automatically copied to the target. - -Constant Transform ('const'): This transformation -type allows you to map a constant value to the given target column. The -constant itself is placed in transform_expression. - - -Variable Transform ('variable'): This transformation type allows you to -map a built-in dynamic variable to the given target column. The variable -name is placed in transform_expression. The following variables are -available: -system_date -is the current system date, -system_timestamp -is the current system date and time, -source_node_id -is the node id of the source, -target_node_id -is the node id of the target, -null -is a null value, and old_column_value is the column's old value prior to the DML operation. - - -Additive Transform ('additive'): This -transformation type is used for numeric data. It computes the change -between the old and new values on the source and then adds the change to -the existing value in the target column. That is, target = target + -multiplier (source_new - source_old), where multiplier is a constant -found in the transform_expression (default is 1 if not specified). For -example, if the source column changed from a 2 to a 4, the target column -is currently 10, and the multiplier is 3, the effect of the transform -will be to change the target column to a value of 16 ( 10+3*(4-2) => -16 ). Note that, in the case of deletes, the new column value is -considered 0 for the purposes of the calculation. - - -Substring Transform ('substr'): This transformation computes a substring -of the source column data and uses the substring as the target column -value. The transform_expression can be a single integer ( -n -, the beginning index), or a pair of comma-separated integers ( -n,m -- the beginning and ending index). The transform behaves as the Java -substring function would using the specified values in -transform_expression. - - -Multiplier Transform ('multiply'): This -transformation allows for the creation of multiple rows in the target -table based on the transform_expression. This transform type can only be -used on a primary key column. The transform_expression is a SQL -statement that returns the list to be used to create the multiple -targets. - -Lookup Transform ('lookup'): This transformation -determines the target column value by using a query, contained in -transform_expression to lookup the value in another table. The query -must return a single row, and the first column of the query is used as -the value. Your query references source column names by prefixing with a -colon (e.g., :MY_COLUMN). - - -Shell Script Transform ('bsh'): This transformation allows you to -provide a Bean Shell script in transform_expression and executes the -script at the time of transformation. Some variables are provided to the -script: -COLUMN_NAME -is a variable for a source column in the row, where the variable name is -the column name in uppercase; -currentValue -is the value of the current source column; -oldValue -is the old value of the source column for an updated row; -sqlTemplate -is a -org.jumpmind.db.sql.ISqlTemplate -object for querying or updating the database; -channelId -is a reference to the channel on which the transformation is happening; -sourceNode -is a -org.jumpmind.symmetric.model.Node -object that represents the node from where the data came; -targetNode -is a -org.jumpmind.symmetric.model.Node -object that represents the node where the data is being loaded. - - -Identity Transform ('identity'): This -transformation allows you to insert into an identity column by computing -a new identity, not copying the actual identity value from the source. - - - -Mathematical Transform ('math'): This transformation allows you to -perform mathematical equations in the transform expression. Some -variables are provided to the script: -#{COLUMN_NAME} -is a variable for a source column in the row, where the variable name -is the column name in uppercase; -#{currentValue} -is the value of the current source column; -#{oldValue} -is the old value of the source column for an updated row. - - - -Copy If Changed Transform ('copyIfChanged'): This transformation will copy the value to the target column if the source value has changed. More -specifically, the copy will occur if the the old value of the source does not equal the new value. If the old and new are, in fact, equal, then either -the column will be ignored or the row will be ignored, based on the setting of the transform expression. If the transform expression is euqal -to the string 'IgnoreColumn', the column will be ignored; otherwise, the row will be ignored. - - - - -Value Map Transform ('valueMap'): This transformation allows for simple value substitutions through use of the transform expression. -The transform expresion should consist of a space separated list of value pairs of the format sourceValue=TargetValue. The column value is used to -locate the correct sourceValue, and the transform will change the value into the corresponding targetValue. A sourceValue of * can be used to -represent a default target value in the event that the sourceValue is not found. Otherwise, if no default value is found, -the result will be null. For example, consider the following transform expression: s1=t1 s2=t2 s3=t3 *=t4. A source value of -s1 will be transformed to t1, s2 to t2, s3 to t3, s4 to t4, s5 to t4, null to t4, etc. - - - -Clarion Date Time ('clarionDateTime'): Convert a Clarion date with optional time into a timestamp. Clarion dates are stored as the number of days -since December 28, 1800, while Clarion times are stored as hundredths of a second since midnight, plus one. Use a source column of the Clarion date -and a target column of the timestamp. Optionally, in the transform expression, enter the name of the Clarion time column. - - - -Columns To Rows ('columnsToRowsKey' and 'columnsToRowsValue'): Convert column values from a single source row into a row per column value -at the target. Two column mappings are needed to complete the work: use "columnsToRowsKey" to map which source column is used, -and use "columnsToRowsValue" to map the value. The "columnsToRowsKey" mapping requires an expression in the format of -"column1=key1,column2=key2" to list the source column names and which key value is stored in the target column. -The "columnsToRowsValue" mapping sets the column's value at the target and allows an optional expression: -"changesOnly=true" to convert only rows when the old and new values have changed; "ignoreNulls=true" to convert only rows that are not null. -For example, column "fieldid" mapped as "columnsToRowsKey" with expression of "user1=1,user2=2" and column "color" mapped as -"columnsToRowsValue" would convert a row with columns named "user1" and "user2" containing values "red" and "blue" into two rows with columns -"fieldid" and "color" containing a row of "1" and "red" and a row of "2" and "blue". - - - - -
- - -
-
-Data Load Filters - - -New as of SymmetricDS 3.1, SymmetricDS is now capable of taking actions -upon the load of certain data via configurable load filters. This new -configurable option is in additon to the already existing option of -writing a class that implements - -. A configurable load filter watches for specific data that is being -loaded and then takes action based on the load of that data. - - -Specifying which data to action is done by specifying a -souce and target node group (data extracted from this node group, and -loaded into that node group), and a target catalog, schema and table -name. You can decide to take action on rows that are inserted, updated -and/or deleted, and can also further delineate which rows of the target -table to take action on by specifying additional criteria in the bean -shell script that is executed in response to the loaded data. As an -example, old and new values for the row of data being loaded are -available in the bean shell script, so you can action rows with a -certain column value in old or new data. - -The action taken is based on a bean shell script that you -can provide as part of the configuration. Actions can be taken at -different points in the load process including before write, after -write, at batch complete, at batch commit and/or at batch rollback. - -
-Load Filter Configuration Table - - -SymmetricDS stores its load filter configuration in a single table -called - -. The load filter table allows you to specify the following: - -Load Filter Type ('load_filter_type'): The type of -load filter. Today only Bean Shell is supported ('BSH'), but SQL scripts -may be added in a future release. - -Source Node Group ('source_node_group_id'): The -source node group for which you would like to watch for changes. - -Target Node Group ('target_node_group_id'): The -target node group for which you would like to watch for changes. The -source and target not groups are used together to identify the node -group link for which you would like to watch for changes (i.e. When the -Server node group sends data to a Client node group). - -Target Catalog ('target_catalog_name'): The name of -the target catalog for which you would like to watch for changes. - -Target Schema ('target_schema_name'): The name of -the target schema for which you would like to watch for changes. - -Target Table ('target_table_name'): The name of the -target table for which you would like to watch for changes. The target -catalog, target schema and target table name are used together to fully -qualify the table for which you would like to watch for changes. - -Filter on Update ('filter_on_update'): Determines -whether the load filter takes action (executes) on a database update -statement. - -Filter on Insert ('filter_on_insert'): Determines -whether the load filter takes action (executes) on a database insert -statement. - -Filter on Delete ('filter_on_delete'): Determines -whether the load filter takes action (executes) on a database delete -statement. - -Before Write Script ('before_write_script'): The -script to execute before the database write occurs. - -After Write Script ('after_write_script'): The -script to execute after the database write occurs. - -Batch Complete Script ('batch_complete_script'): -The script to execute after the entire batch completes. - -Batch Commit Script ('batch_commit_script'): The -script to execute after the entire batch is committed. - -Batch Rollback Script ('batch_rollback_script'): -The script to execute if the batch rolls back. - -Handle Error Script ('handle_error_script'): A -script to execute if data cannot be processed. - -Load Filter Order ('load_filter_order'): The order -in which load filters should execute if there are multiple scripts -pertaining to the same source and target data. - - -
- -
-Variables available to Data Load Filters - - -As part of the bean shell load filters, SymmetricDS provides certain -variables for use in the bean shell script. Those variables include: - -Symmetric Engine ('ENGINE'): The Symmetric engine -object. - -Source Values ('<COLUMN_NAME>'): The source -values for the row being inserted, updated or deleted. - -Old Values ('OLD_<COLUMN_NAME>'): The old -values for the row being inserted, updated or deleted. - -Data Context ('CONTEXT'): The data context object -for the data being inserted, updated or deleted. . - -Table Data ('TABLE'): The table object for the -table being inserted, updated or deleted. - - -
- -
-Data Load Filter Example - - -The following is an example of a load filter that watches a table named -TABLE_TO_WATCH being loaded from the Server Node Group to the Client -Node Group for inserts or updates, and performs an initial load on a -table named "TABLE_TO_RELOAD" for KEY_FIELD on the reload table equal to -a column named KEY_FIELD on the TABLE_TO_WATCH table. - insert into sym_load_filter -(LOAD_FILTER_ID, LOAD_FILTER_TYPE, SOURCE_NODE_GROUP_ID, -TARGET_NODE_GROUP_ID, TARGET_CATALOG_NAME, TARGET_SCHEMA_NAME, -TARGET_TABLE_NAME, FILTER_ON_UPDATE, FILTER_ON_INSERT, FILTER_ON_DELETE, -BEFORE_WRITE_SCRIPT, AFTER_WRITE_SCRIPT, BATCH_COMPLETE_SCRIPT, -BATCH_COMMIT_SCRIPT, BATCH_ROLLBACK_SCRIPT, HANDLE_ERROR_SCRIPT, -CREATE_TIME, LAST_UPDATE_BY, LAST_UPDATE_TIME, LOAD_FILTER_ORDER, -FAIL_ON_ERROR) values -('TABLE_TO_RELOAD','BSH','Client','Server',NULL,NULL, -'TABLE_TO_WATCH',1,1,0,null, -'engine.getDataService().reloadTable(context.getBatch().getSourceNodeId(), -table.getCatalog(), table.getSchema(), "TABLE_TO_RELOAD","KEY_FIELD=''" -+ KEY_FIELD + "''");' -,null,null,null,null,sysdate,'userid',sysdate,1,1); - -
-
- - - - - -
-Jobs - -Work done by SymmetricDS is initiated by jobs. Jobs are tasks that are -started and scheduled by a job manager. Jobs are enabled by the -start.{name}.job -property. Most jobs are enabled by default. The frequency at which a job -runs in controlled by one of two properties: -job.{name}.period.time.ms -or -job.{name}.cron -. If a valid cron property exists in the configuration, then it will be -used to schedule the job. Otherwise, the job manager will attempt to use -the period.time.ms property. - - -The frequency of jobs can be configured in either the engines properties -file or in - -. When managed in - -the frequency properties can be changed in the registration server and -when the updated settings sync to the nodes in the system the job -manager will restart the jobs at the new frequency settings. - - -SymmetricDS utilizes Spring's CRON support, which includes seconds as -the first parameter. This differs from the typical Unix-based -implementation, where the first parameter is usually minutes. For -example, -*/15 * * * * * -means every 15 seconds, not every 15 minutes. See -Spring's -documentation -for more details. - - -Some jobs cannot be run in parallel against a single node. When running -on a cluster these jobs use the - -table to get an exclusive semaphore to run the job. In order to use this -table the -cluster.lock.enabled -must be set to true. - - The three main jobs in SymmetricDS are the route, push and -pull jobs. The route job decides what captured data changes should be -sent to which nodes. It also decides what captured data changes should -be transported and loaded together in a batch. The push and pull jobs -are responsible for initiating HTTP communication with linked nodes to -push or pull data changes that have been routed. - -
-Route Job - -After data is captured in the - -table, it is routed to specific nodes in batches by the -Route Job -. It is a single background task that inserts into - -and - -. - - -The job processes each enabled channel, one at a time, collecting a list -of data ids from - -which have not been routed (see - -for much more detail about this step), up to a limit specified by the -channel configuration ( -max_data_to_route -, on - -). The data is then batched based on the -batch_algorithm -defined for the channel and as documented in - -. Note that, for the -default -and -transactional -algorithm, there may actually be more than -max_data_to_route -included depending on the transaction boundaries. The mapping of data to -specific nodes, organized into batches, is then recorded in - -with a status of "RT" in each case (representing the fact that the Route -Job is still running). Once the routing algorithms and batching are -completed, the batches are organized with their corresponding data ids -and saved in - -. Once - -is updated, the rows in - -are updated to a status of New "NE". - - -The route job will respect the -max_batch_size -on - -. If the max batch size is reached before the end of a database -tranaction and the batch algorithm is set to something other than -nontransactional -the batch may exceed the specified max size. - - -The route job delegates to a router defined by the -router_type -and configured by the -router_expression -in the - -table. Each router that has a -source_node_group_id -that matches the current node's source node group id and is linked to -the - -that captured the data gets an opportunity to choose a list of nodes the -data should be sent to. Data can only be routed to nodes that belong to -the router's -target_node_group_id -. - -
-Data Gaps - -On the surface, the first Route Job step of collecting unrouted data ids -seems simple: assign sequential data ids for each data row as it's -inserted and keep track of which data id was last routed and start from -there. The difficulty arises, however, due to the fact that there can be -multiple transactions inserting into - -simultaneously. As such, a given section of rows in the - -table may actually contain "gaps" in the data ids when the Route Job is -executing. Most of these gaps are only temporarily and fill in at some -point after routing and need to be picked up with the next run of the -Route Job. Thus, the Route Job needs to remember to route the filled-in -gaps. Worse yet, some of these gaps are actually permanent and result -from a transaction that is rolled back for some reason. In this case, -the Route Job must continue to watch for the gap to fill in and, at some -point, eventually gives up and assumes the gap is permanent and can be -skipped. All of this must be done in some fashion that guarantees that -gaps are routed when they fill in while also keeping routing as -efficient as possible. - - -SymmetricDS handles the issue of data gaps by making use of a table, - -, to record gaps found in the data ids. In fact, this table completely -defines the entire range of data tha can be routed at any point in time. -For a brand new instance of SymmetricDS, this table is empty and -SymmetricDS creates a gap starting from data id of zero and ending with -a very large number (defined by -routing.largest.gap.size -). At the start of a Route Job, the list of valid gaps (gaps with status -of 'GP') is collected, and each gap is evaluated in turn. If a gap is -sufficiently old (as defined by -routing.stale.dataid.gap.time.ms -, the gap is marked as skipped (status of 'SK') and will no longer be -evaluated in future Route Jobs (note that the 'last' gap (the one with -the highest starting data id) is never skipped). If not skipped, then - -is searched for data ids present in the gap. If one or more data ids is -found in - -, then the current gap is marked with a status of OK, and new gap(s) are -created to represent the data ids still missing in the gap's range. This -process is done for all gaps. If the very last gap contained data, a new -gap starting from the highest data id and ending at (highest data id + -routing.largest.gap.size -) is then created. This process has resulted in an updated list of gaps -which may contain new data to be routed. - -
-
-
-Push and Pull Jobs for Database changes - -After database-change data is routed, it awaits transport to the target nodes. Transport -can occur when a client node is configured to pull data or when the host -node is configured to push data. These events are controlled by the -push -and the -pull jobs -. When the -start.pull.job -SymmetricDS property is set to -true -, the frequency that data is pulled is controlled by the -job.pull.period.time.ms -. When the -start.push.job -SymmetricDS property is set to -true -, the frequency that data is pushed is controlled by the -job.push.period.time.ms -. - - -Data is extracted by channel from the source database's - -table at an interval controlled by the -extract_period_millis -column on the - -table. The -last_extract_time -is always recorded, by channel, on the - -table for the host node's id. When the Pull and Push Job run, if the -extract period has not passed according to the last extract time, then -the channel will be skipped for this run. If the -extract_period_millis -is set to zero, data extraction will happen every time the jobs run. - - -The maximum number of batches to extract per synchronization is -controlled by -max_batch_to_send -on the - -table. There is also a setting that controls the max number of bytes to -send in one synchronization. If SymmetricDS has extracted the more than -the number of bytes configured by the -transport.max.bytes.to.sync -parameter, then it will finish extracting the current batch and finish -synchronization so the client has a chance to process and acknowlege the -"big" batch. This may happen before the configured max number of batches -has been reached. - - -Both the push and pull jobs can be configured to push and pull multiple -nodes in parallel. In order to take advantage of this the -pull.thread.per.server.count -or -push.thread.per.server.count -should be adjusted (from their default value of 10) to the number to the -number of concurrent push/pulls you want to occur per period on each -SymmetricDS instance. Push and pull activity is recorded in the - -table. This table is also used to lock push and pull activity across -multiple servers in a cluster. - - -SymmetricDS also provides the ability to configure windows of time when -synchronization is allowed. This is done using the - -table. A list of allowed time windows can be specified for a node group -and a channel. If one or more windows exist, then data will only be -extracted and transported if the time of day falls within the window of -time specified. The configured times are always for the target node's -local time. If the -start_time -is greater than the -end_time -, then the window crosses over to the next day. - - -All data loading may be disabled by setting the -dataloader.enable -property to false. This has the effect of not allowing incoming -synchronizations, while allowing outgoing synchronizations. All data -extractions may be disabled by setting the -dataextractor.enable -property to false. These properties can be controlled by inserting into -the root server's - -table. These properties affect every channel with the exception of the -'config' channel. - - Node communication over HTTP is represented in the -following figure. - -
-Node Communication - - - - - -
-
-
- -
-File Sync Push and Pull Jobs - -The File Sync Push and Pull jobs (introduced in version 3.5) are responsible for synchronizing file changes. -These jobs work with batches on the filesync channel and create ZIP files of changed files -to be sent and applied on other nodes. -The parameters job.file.sync.push.period.time.ms and job.file.sync.pull.period.time.ms -control how often the jobs runs, which default to every 60 seconds. -See also and . - -
- -
-File System Tracker Job - -The File System Tracker job (introduced in version 3.5) is responsible for monitoring and -recording the events of files being created, modified, or deleted. -It records the current state of files to the table. -The parameter job.file.sync.tracker.cron controls how often the job runs, -which defaults to every 5 minutes. -See also and . - -
- -
-Sync Triggers Job - -SymmetricDS examines the current configuration, corresponding database -triggers, and the underlying tables to determine if database triggers -need created or updated. The change activity is recorded on the - -table with a reason for the change. The following reasons for a change -are possible: - - - -N - New trigger that has not been created before - - -S - Schema changes in the table were detected - - -C - Configuration changes in Trigger - - -T - Trigger was missing - - - -A configuration entry in Trigger without any history in Trigger Hist -results in a new trigger being created (N). The Trigger Hist stores a -hash of the underlying table, so any alteration to the table causes the -trigger to be rebuilt (S). When the -last_update_time -is changed on the Trigger entry, the configuration change causes the -trigger to be rebuilt (C). If an entry in Trigger Hist is missing the -corresponding database trigger, the trigger is created (T). - - -The process of examining triggers and rebuilding them is automatically -run during startup and each night by the SyncTriggersJob. The user can -also manually run the process at any time by invoking the -syncTriggers() -method over JMX. - -
-
-Purge Jobs - -Purging is the act of cleaning up captured data that is no longer needed -in SymmetricDS's runtime tables. Data is purged through delete -statements by the -Purge Job -. Only data that has been successfully synchronized will be purged. -Purged tables include: - - - - - - - - - - - - - - - - - - - - - - - - - - -The purge job is enabled by the -start.purge.job -SymmetricDS property. The timing of the three purge jobs (incoming, -outgoing, and data gaps) is controlled by a cron expression as specified -by the following properties: -job.purge.outgoing.cron -, -job.purge.incoming.cron -, and -job.purge.datagaps.cron -. The default is -0 0 0 * * * -, or once per day at midnight. - - - -Two retention period properties indicate how much history SymmetricDS -will retain before purging. The -purge.retention.minutes -property indicates the period of history to keep for synchronization -tables. The default value is 5 days. The -statistic.retention.minutes -property indicates the period of history to keep for statistics. The -default value is also 5 days. - - The purge properties should be adjusted according to how -much data is flowing through the system and the amount of storage space -the database has. For an initial deployment it is recommended that the -purge properties be kept at the defaults, since it is often helpful to -be able to look at the captured data in order to triage problems and -profile the synchronization patterns. When scaling up to more nodes, it -is recomended that the purge parameters be scaled back to 24 hours or -less. -
- -
+ + There are several pre-defined transform types available in SymmetricDS. + Additional ones can be defined by creating and configuring an extension + point which implements the + IColumnTransform + interface. The pre-defined transform types include the following (the + transform_type entry is shown in parentheses): + + Copy Column Transform ('copy'): This transformation + type copies the source column value to the target column. This is the + default behavior. + + Remove Column Transform ('remove'): This + transformation type removes the source column. This transform type is + only valid for a table transformation type of 'IMPLIED' where all the + columns from the source are automatically copied to the target. + + Constant Transform ('const'): This transformation + type allows you to map a constant value to the given target column. The + constant itself is placed in transform_expression. + + + Variable Transform ('variable'): This transformation type allows you to + map a built-in dynamic variable to the given target column. The variable + name is placed in transform_expression. The following variables are + available: + system_date + is the current system date, + system_timestamp + is the current system date and time, + source_node_id + is the node id of the source, + target_node_id + is the node id of the target, + null + is a null value, and old_column_value is the column's old value prior to the DML operation. + + + Additive Transform ('additive'): This + transformation type is used for numeric data. It computes the change + between the old and new values on the source and then adds the change to + the existing value in the target column. That is, target = target + + multiplier (source_new - source_old), where multiplier is a constant + found in the transform_expression (default is 1 if not specified). For + example, if the source column changed from a 2 to a 4, the target column + is currently 10, and the multiplier is 3, the effect of the transform + will be to change the target column to a value of 16 ( 10+3*(4-2) => + 16 ). Note that, in the case of deletes, the new column value is + considered 0 for the purposes of the calculation. + + + Substring Transform ('substr'): This transformation computes a substring + of the source column data and uses the substring as the target column + value. The transform_expression can be a single integer ( + n + , the beginning index), or a pair of comma-separated integers ( + n,m + - the beginning and ending index). The transform behaves as the Java + substring function would using the specified values in + transform_expression. + + + Multiplier Transform ('multiply'): This + transformation allows for the creation of multiple rows in the target + table based on the transform_expression. This transform type can only be + used on a primary key column. The transform_expression is a SQL + statement that returns the list to be used to create the multiple + targets. + + Lookup Transform ('lookup'): This transformation + determines the target column value by using a query, contained in + transform_expression to lookup the value in another table. The query + must return a single row, and the first column of the query is used as + the value. Your query references source column names by prefixing with a + colon (e.g., :MY_COLUMN). + + + Shell Script Transform ('bsh'): This transformation allows you to + provide a Bean Shell script in transform_expression and executes the + script at the time of transformation. Some variables are provided to the + script: + COLUMN_NAME + is a variable for a source column in the row, where the variable name is + the column name in uppercase; + currentValue + is the value of the current source column; + oldValue + is the old value of the source column for an updated row; + sqlTemplate + is a + org.jumpmind.db.sql.ISqlTemplate + object for querying or updating the database; + channelId + is a reference to the channel on which the transformation is happening; + sourceNode + is a + org.jumpmind.symmetric.model.Node + object that represents the node from where the data came; + targetNode + is a + org.jumpmind.symmetric.model.Node + object that represents the node where the data is being loaded. + + + Identity Transform ('identity'): This + transformation allows you to insert into an identity column by computing + a new identity, not copying the actual identity value from the source. + + + + Mathematical Transform ('math'): This transformation allows you to + perform mathematical equations in the transform expression. Some + variables are provided to the script: + #{COLUMN_NAME} + is a variable for a source column in the row, where the variable name + is the column name in uppercase; + #{currentValue} + is the value of the current source column; + #{oldValue} + is the old value of the source column for an updated row. + + + + Copy If Changed Transform ('copyIfChanged'): This transformation will copy the value to the target column if the source value has changed. More + specifically, the copy will occur if the the old value of the source does not equal the new value. If the old and new are, in fact, equal, then either + the column will be ignored or the row will be ignored, based on the setting of the transform expression. If the transform expression is euqal + to the string 'IgnoreColumn', the column will be ignored; otherwise, the row will be ignored. + + + + + Value Map Transform ('valueMap'): This transformation allows for simple value substitutions through use of the transform expression. + The transform expresion should consist of a space separated list of value pairs of the format sourceValue=TargetValue. The column value is used to + locate the correct sourceValue, and the transform will change the value into the corresponding targetValue. A sourceValue of * can be used to + represent a default target value in the event that the sourceValue is not found. Otherwise, if no default value is found, + the result will be null. For example, consider the following transform expression: s1=t1 s2=t2 s3=t3 *=t4. A source value of + s1 will be transformed to t1, s2 to t2, s3 to t3, s4 to t4, s5 to t4, null to t4, etc. + + + + Clarion Date Time ('clarionDateTime'): Convert a Clarion date with optional time into a timestamp. Clarion dates are stored as the number of days + since December 28, 1800, while Clarion times are stored as hundredths of a second since midnight, plus one. Use a source column of the Clarion date + and a target column of the timestamp. Optionally, in the transform expression, enter the name of the Clarion time column. + + + + Columns To Rows ('columnsToRowsKey' and 'columnsToRowsValue'): Convert column values from a single source row into a row per column value + at the target. Two column mappings are needed to complete the work: use "columnsToRowsKey" to map which source column is used, + and use "columnsToRowsValue" to map the value. The "columnsToRowsKey" mapping requires an expression in the format of + "column1=key1,column2=key2" to list the source column names and which key value is stored in the target column. + The "columnsToRowsValue" mapping sets the column's value at the target and allows an optional expression: + "changesOnly=true" to convert only rows when the old and new values have changed; "ignoreNulls=true" to convert only rows that are not null. + For example, column "fieldid" mapped as "columnsToRowsKey" with expression of "user1=1,user2=2" and column "color" mapped as + "columnsToRowsValue" would convert a row with columns named "user1" and "user2" containing values "red" and "blue" into two rows with columns + "fieldid" and "color" containing a row of "1" and "red" and a row of "2" and "blue". + + + + +
+
+
+ Load Filters + + New as of SymmetricDS 3.1, SymmetricDS is now capable of taking actions + upon the load of certain data via configurable load filters. This new + configurable option is in additon to the already existing option of + writing a class that implements + + . A configurable load filter watches for specific data that is being + loaded and then takes action based on the load of that data. + + + Specifying which data to action is done by specifying a + souce and target node group (data extracted from this node group, and + loaded into that node group), and a target catalog, schema and table + name. You can decide to take action on rows that are inserted, updated + and/or deleted, and can also further delineate which rows of the target + table to take action on by specifying additional criteria in the bean + shell script that is executed in response to the loaded data. As an + example, old and new values for the row of data being loaded are + available in the bean shell script, so you can action rows with a + certain column value in old or new data. + + The action taken is based on a bean shell script that you + can provide as part of the configuration. Actions can be taken at + different points in the load process including before write, after + write, at batch complete, at batch commit and/or at batch rollback. + +
+ Load Filter Configuration Table + + + SymmetricDS stores its load filter configuration in a single table + called + + . The load filter table allows you to specify the following: + + Load Filter Type ('load_filter_type'): The type of + load filter. Today only Bean Shell is supported ('BSH'), but SQL scripts + may be added in a future release. + + Source Node Group ('source_node_group_id'): The + source node group for which you would like to watch for changes. + + Target Node Group ('target_node_group_id'): The + target node group for which you would like to watch for changes. The + source and target not groups are used together to identify the node + group link for which you would like to watch for changes (i.e. When the + Server node group sends data to a Client node group). + + Target Catalog ('target_catalog_name'): The name of + the target catalog for which you would like to watch for changes. + + Target Schema ('target_schema_name'): The name of + the target schema for which you would like to watch for changes. + + Target Table ('target_table_name'): The name of the + target table for which you would like to watch for changes. The target + catalog, target schema and target table name are used together to fully + qualify the table for which you would like to watch for changes. + + Filter on Update ('filter_on_update'): Determines + whether the load filter takes action (executes) on a database update + statement. + + Filter on Insert ('filter_on_insert'): Determines + whether the load filter takes action (executes) on a database insert + statement. + + Filter on Delete ('filter_on_delete'): Determines + whether the load filter takes action (executes) on a database delete + statement. + + Before Write Script ('before_write_script'): The + script to execute before the database write occurs. + + After Write Script ('after_write_script'): The + script to execute after the database write occurs. + + Batch Complete Script ('batch_complete_script'): + The script to execute after the entire batch completes. + + Batch Commit Script ('batch_commit_script'): The + script to execute after the entire batch is committed. + + Batch Rollback Script ('batch_rollback_script'): + The script to execute if the batch rolls back. + + Handle Error Script ('handle_error_script'): A + script to execute if data cannot be processed. + + Load Filter Order ('load_filter_order'): The order + in which load filters should execute if there are multiple scripts + pertaining to the same source and target data. + + +
+ +
+ Variables available to Data Load Filters + + + As part of the bean shell load filters, SymmetricDS provides certain + variables for use in the bean shell script. Those variables include: + + Symmetric Engine ('ENGINE'): The Symmetric engine + object. + + Source Values ('<COLUMN_NAME>'): The source + values for the row being inserted, updated or deleted. + + Old Values ('OLD_<COLUMN_NAME>'): The old + values for the row being inserted, updated or deleted. + + Data Context ('CONTEXT'): The data context object + for the data being inserted, updated or deleted. . + + Table Data ('TABLE'): The table object for the + table being inserted, updated or deleted. + + +
+ +
+ Data Load Filter Example + + + The following is an example of a load filter that watches a table named + TABLE_TO_WATCH being loaded from the Server Node Group to the Client + Node Group for inserts or updates, and performs an initial load on a + table named "TABLE_TO_RELOAD" for KEY_FIELD on the reload table equal to + a column named KEY_FIELD on the TABLE_TO_WATCH table. + insert into sym_load_filter + (LOAD_FILTER_ID, LOAD_FILTER_TYPE, SOURCE_NODE_GROUP_ID, + TARGET_NODE_GROUP_ID, TARGET_CATALOG_NAME, TARGET_SCHEMA_NAME, + TARGET_TABLE_NAME, FILTER_ON_UPDATE, FILTER_ON_INSERT, FILTER_ON_DELETE, + BEFORE_WRITE_SCRIPT, AFTER_WRITE_SCRIPT, BATCH_COMPLETE_SCRIPT, + BATCH_COMMIT_SCRIPT, BATCH_ROLLBACK_SCRIPT, HANDLE_ERROR_SCRIPT, + CREATE_TIME, LAST_UPDATE_BY, LAST_UPDATE_TIME, LOAD_FILTER_ORDER, + FAIL_ON_ERROR) values + ('TABLE_TO_RELOAD','BSH','Client','Server',NULL,NULL, + 'TABLE_TO_WATCH',1,1,0,null, + 'engine.getDataService().reloadTable(context.getBatch().getSourceNodeId(), + table.getCatalog(), table.getSchema(), "TABLE_TO_RELOAD","KEY_FIELD=''" + + KEY_FIELD + "''");' + ,null,null,null,null,sysdate,'userid',sysdate,1,1); + +
+
+
+ Grouplets + + As you probably know by now, SymmetricDS stores its single configuration centrally and distributes it to all nodes. By default, a trigger-router is in effect for all nodes in the source node group or target node group. Triggers will be established + on each node that is a member of the source node, and changes will be routed to all relevant nodes that are members of the target node group. If, for example, the router routes to "all" nodes, + "all" means every node that is in the target node group. This is the default behavior of SymmetricDS. + + + Once in production, however, you will likely find you need or want to make configuration changes to triggers and routers as new features are rolled out to your network of SymmetricDS nodes. + You may, for example, wish to "pilot" a new configuration, containing new synchronizations, only on specific nodes initially, and then increase the size of the pilot over time. + SymmetricDS' does provide the ability to specify that only particular trigger-router combinations are applicable to particular nodes for this purpose. It does this + by allowing you to define an arbitray collection of nodes, called a "grouplet", and then choosing which trigger-routers apply to the normal set of nodes (the default behavior) + and which apply just to nodes in one or more "grouplets". This allows you, essentially, to filter the list of nodes that would otherwise be included as source nodes and/or target nodes. + Through the use of grouplets, you can, for example, specify a subset of nodes on which a given trigger would be created. It also allows you to + specify a subset of the normal set of nodes a change would be routed to. This behaviour is in addition to, and occurs before, any subsetting or filtering the router might otherwise do. + + + In its simplest form, a grouplet is just an arbitrary collection of nodes. To define a grouplet, you start by creating a grouplet with a unique id, a description, and a link policy, + as defined in . To defined which nodes are members of (or are not members of) a grouplet, you provide a list of external ids of the nodes + in . How those external ids are used varies based on the grouplet link policy. + The grouplet_link_policy can be either I or E, representing an "inclusive" list of nodes or an "exclusive" list of + nodes, respectively. In the case of "inclusive", you'll be listing each external id to be included in the grouplet. In the case of exclusive, all nodes will be included in + the grouplet except ones which have an external id in the list of external ids. + + + + Once you have defined your grouplet and which nodes are members of a grouplet, you can tie a grouplet to a given trigger-router through + the use of . + If a particular trigger-router does not appear in this table, SymmetricDS behaves as normal. + If, however, an entry for a particular trigger-router appears in this table, the default behavior is overridden based on the grouplet_id and applies_when settings. + The grouplet id provides the node list, and the applies_when indicates whether the grouplet nodes are to be used to filter the source node list, the target node list, + or both (settings are "S", "T", and "B", respectively). Nodes that survive the filtering process on as a source will have a trigger defined, and nodes that survive the filtering process + as a target are eligible nodes that can be routed to. +
+ Grouplet Example + + + + At this point, an example would probably be useful. Picture the case where you have 100 retail stores (each containing one database, and each a member of the "store" node group) + and a central office database (external id of corp, and a member of the "corp" node group ). You wish to pilot two new trigger and routers + for a new feature on your point-of-sale software (one which moves data from corp to store, and one which moves data from store to corp), but you only want the triggers to be installed on 10 specific stores that represent your "pilot" stores. In this case, + the simplest approach would be to define a grouplet with, say, a grouplet id of "pilot". We'd use a grouplet link policy of "inclusive", and list each of the 10 external ids + in the table. + + + For the trigger-router meant to send data from corp to store, we'd create an entry in for + our grouplet id of "pilot", and we'd specify "T" (target) as the applies-when setting. In this way, the source node list is not filtered, but the target node list used during routing + will filter the potential target nodes to just our pilot stores. For the trigger-router meant to send data from a pilot store back to corp, we would have the grouplet apply when + the node is in the source node list (i.e., applies_when will be "S"). This will cause the trigger to only be created for stores in the pilot list and not other stores. + + An important thing to mention in this example: Since your grouplet only included the store nodes, you can't simply specify "both" for the applies when setting. For the corp-to-store trigger, + for example, if you had said "both", no trigger would have been installed in corp since the grouplet nodes represent all possible source nodes as well as target nodes, and "corp" is not in the list! + The same is true for the store to corp trigger-router as well. You could, however, use "both" as the applies when if you had included the "corp" external id in with the list of the 10 pilot store external ids. + +
+
+
+ Parameters + Parameters can be used to help tune and configure your SymmetricDS configuration. Parameters can be set for an individual node or for all nodes in your network. + See , for a complete list of parameters. +
+
+ Export +
+
+ Import +
+
+ Uninstall +
+
diff --git a/symmetric-assemble/src/docbook/developer.xml b/symmetric-assemble/src/docbook/developer.xml new file mode 100644 index 0000000000..60fcacdbd5 --- /dev/null +++ b/symmetric-assemble/src/docbook/developer.xml @@ -0,0 +1,39 @@ + + + + Developer + + This chapter focuses on a variety of ways for developers to build upon and extend some of the existing features found within SymmetricDS. + + + + + + diff --git a/symmetric-assemble/src/docbook/introduction.xml b/symmetric-assemble/src/docbook/introduction.xml index f770e67c78..299f308450 100644 --- a/symmetric-assemble/src/docbook/introduction.xml +++ b/symmetric-assemble/src/docbook/introduction.xml @@ -120,11 +120,7 @@
- Overview - - The following is an overview of how SymmetricDS works. - - + Architecture
A Node is Born @@ -469,49 +465,7 @@
-
- Origins - - The idea of SymmetricDS was born from a real-world need. Several of - the original developers were, several years ago, implementing a commercial - Point of Sale (POS) system for a large retailer. The development team came - to the conclusion that the software available for trickling back - transactions to corporate headquarters (frequently known as the 'central - office' or 'general office') did not meet the project needs. The list of - project requirements made finding the ideal solution difficult: - - - - Sending and receiving data with up to 2000 stores during peak - holiday loads. - - - - Supporting one database platform at the store and a different - one at the central office. - - - - Synchronizing some data in one direction, and other data in both - directions. - - - - Filtering out sensitive data and re-routing it to a protected - database. - - - - Preparing the store database with an initial load of data from - the central office. - - - - The team ultimately created a custom solution that met the - requirements and led to a successful project. From this work came the - knowledge and experience that SymmetricDS benefits from today. -
- +
Why Database Triggers? @@ -565,83 +519,5 @@ require deployment on the database server itself.
-
- Support - - SymmetricDS is backed by JumpMind, Inc. - - SymmetricDS is, and always will be, open source, which means free - community support is available online, through the forums and the issue - tracker. In a production environment, we have found that clients demand - fast, more experienced help from the original architects and engineers — - people who have the knowledge and experience to design, tune, - troubleshoot, and shape future versions of the product. - - To meet this demand, JumpMind provides Support Subscriptions - designed to provide your organization with expert, dependable support from - development to mission critical production support. -
- -
- What's New in SymmetricDS 3 - - SymmetricDS 3 builds upon the existing SymmetricDS 2.x software base - and incorporates a number of architectural changes and performance - improvements. If you are brand new to SymmetricDS, you can safely skip - this section. If you have used SymmetricDS 2.x in the past, this section - summarizes the key differences you will encounter when moving to - SymmetricDS 3. - - One optimization that effects both routing and data extraction is a - change to the routing process to reuse batches across nodes if all of the - data in the batches is going to be the same. SymmetricDS will - automatically reuse batches if the default router is being used and there - are NO inbound routers that have sync_on_incoming_batch turned on. If the - same data is being sent to all nodes then a great deal of processing, - during both routing and extraction, can be avoided. This is especially - useful when data is being delivered to thousands of nodes. As a result of - this change, the primary key of has changed. This means that during an upgrade the - table will be rebuilt. - - Another optimization that effects data transport is the change to - load batches as soon as they have been delivered to a target node. In 2.x - all batches for a synchronization run were delivered, and then data was - loaded. When errors occurred early on and there were several big batches - or hundreds of batches to deliver, this was inefficient because all the - batches were transported before the loading started. - - Yet another optimization allows SymmetricDS to scale better when it - is initiating communication with nodes. The pulling and pushing of data - now happens from a configurable, but fixed size thread pool so that - multiple nodes can be pulled and pushed to concurrently. This means that - now, a centralized node can reach out to many child nodes in an efficient - manner where in the past, the child nodes were relied upon to initiate - communication. - - The 2.x series allowed multiple nodes to be hosted in one standalone - SymmetricDS instance. This feature (called multiServerMode) was off by - default. In SymmetricDS 3 this feature is now the preferred mode of - operation. It formalizes where properties file are configured and allows - multiple nodes to be hosted on one JVM which saves on system - resources. - - SymmetricDS 3 introduces a long anticipated feature: Conflict - Detection and Resolution. Please see - for more information. - - Transformations are now friendlier. They allow columns to be - implied. This means that when configuring transformations, not all of the - columns have to be specified which makes transformations much more - maintainable. - - An architectural change to the data loader subsystem allows the data - loader to now be pluggable by channel. This will allow more efficient data - loaders to be built if necessary. It will also make it straight forward to - load data into non-relational data stores. - - Several properties and extension points have been deprecated or - renamed. Please see for a list of deprecated - features. -
+ diff --git a/symmetric-assemble/src/docbook/manage.xml b/symmetric-assemble/src/docbook/manage.xml new file mode 100644 index 0000000000..4983c1277d --- /dev/null +++ b/symmetric-assemble/src/docbook/manage.xml @@ -0,0 +1,1121 @@ + + + + + Manage + + +
+ Nodes + + A + node + , a single instance of SymmetricDS, is defined in the + + table. Two other tables play a direct role in defining a node, as well + The first is + + . The + only + row in this table is inserted in the database when the node first + registers + with a parent node. In the case of a root node, the row is entered by + the user. The row is used by a node instance to determine its node + identity. + + + + The following SQL statements set up a top-level registration server as a + node identified as "00000" in the "corp" node group. + insert into SYM_NODE (node_id, + node_group_id, external_id, sync_enabled) values ('00000', 'corp', + '00000', 1); insert into SYM_NODE_IDENTITY values ('00000'); + + + + The second table, + + has rows created for each + child + node that registers with the node, assuming auto-registration is + enabled. If auto registration is not enabled, you must create a row in + + and + + for the node to be able to register. You can also, with this table, + manually cause a node to re-register or do a re-initial load by setting + the corresponding columns in the table itself. Registration is discussed + in more detail in + + . + +
+ Registration + + Node registration is the act of setting up a new + + and + + so that when the new node is brought online it is allowed to join the + system. Nodes are only allowed to register if rows exist for the node + and the + registration_enabled + flag is set to 1. If the + auto.registration + SymmetricDS property is set to true, then when a node attempts to + register, if registration has not already occurred, the node will + automatically be registered. + + + + SymmetricDS allows you to have multiple nodes with the same + external_id + . Out of the box, openRegistration will open a new registration if a + registration already exists for a node with the same external_id. A new + registration means a new node with a new + node_id + and the same + external_id + will be created. If you want to re-register the same node you can use + the + reOpenRegistration() + JMX method which takes a + node_id + as an argument. + +
+ +
+ + Initial Loads + + An initial load is the process of seeding tables at a + target node with data from its parent node. When a node connects and + data is extracted, after it is registered and if an initial load was + requested, each table that is configured to synchronize to the target + node group will be given a reload event in the order defined by the end + user. A SQL statement is run against each table to get the data load + that will be streamed to the target node. The selected data is filtered + through the configured router for the table being loaded. If the data + set is going to be large, then SQL criteria can optionally be provided + to pare down the data that is selected out of the database. + + + An initial load cannot occur until after a node is registered. An + initial load is requested by setting the + initial_load_enabled + column on + + to + 1 + on the row for the target node in the parent node's database. You can + configure SymmetricDS to automatically perform an initial load when a + node registers by setting the parameter + auto.reload + to true. Regardless of how the initial load is initiated, the next time + the source node routes data, reload batches will be inserted. At the + same time reload batches are inserted, all previously pending batches + for the node are marked as successfully sent. + + + + + Note that if the parent node that a node is registering with is + not + a registration server node (as can happen with a registration redirect + or certain non-tree structure node configurations) the parent node's + + entry must exist at the parent node and have a non-null value for column + initial_load_time + . Nodes can't be registered to non-registration-server nodes without + this value being set one way or another (i.e., manually, or as a result + of an initial load occurring at the parent node). + + + + + SymmetricDS recognizes that an initial load has completed when the + initial_load_time + column on the target node is set to a non-null value. + + + + An initial load is accomplished by inserting reload batches in a defined + order according to the + initial_load_order + column on + + . If the + initial_load_order + column contains a negative value the associated table will + NOT + be loaded. If the + initial_load_order + column contains the same value for multiple tables, SymmetricDS will + attempt to order the tables according to foreign key constraints. If + there are cyclical constraints, then foreign keys might need to be + turned off or the initial load will need to be manually configured based + on knowledge of how the data is structured. + + + Initial load data is always queried from the source + database table. All data is passed through the configured router to + filter out data that might not be targeted at a node. + +
+ Target table prep for initial load + There are several parameters that can be used to specify + what, if anything, should be done to the table on the target database + just prior to loading the data. Note that the parameters below specify + the desired behavior for all tables in the initial load, not just one. + + + initial.load.delete.first / + initial.load.delete.first.sql + + By default, an initial load will not delete existing rows from a target + table before loading the data. If a delete is desired, the parameter + initial.load.delete.first + can be set to true. If true, the command found in + initial.load.delete.first.sql + will be run on each table prior to loading the data. The default value + for + initial.load.delete.first.sql + is + delete from %s + , but could be changed if needed. Note that additional reload batches + are created, in the correct order, to achieve the delete. + + + + initial.load.create.first + + By default, an initial load will not create the table on the target if + it doesn't alleady exist. If the desired behavior is to create the table + on the target if it is not present, set the parameter + intial.load.create.first + to true. SymmetricDS will attempt to create the table and indexes on the + target database before doing the initial load. (Additional batches are + created to represent the table schema). + + + +
+
+ Loading subsets of data + + + An efficient way to select a subset of data from a table for an initial + load is to provide an + initial_load_select + clause on + + . This clause, if present, is applied as a + where + clause to the SQL used to select the data to be loaded. The clause may + use "t" as an alias for the table being loaded, if needed. The + $(externalId) + token can be used for subsetting the data in the where clause. + + + + In cases where routing is done using a feature like + Subselect Router + , an + initial_load_select + clause matching the subselect's criteria would be a more efficient + approach. Some routers will check to see if the + initial_load_select + clause is provided, and they will + not + execute assuming that the more optimal path is using the + initial_load_select + statement. + + + + One example of the use of an initial load select would be if you wished + to only load data created more recently than the start of year 2011. + Say, for example, the column + created_time + contains the creation date. Your + initial_load_select + would read + created_time > ts {'2011-01-01 00:00:00.0000'} + (using whatever timestamp format works for your database). This then + gets applied as a + where + clause when selecting data from the table. + + + + + When providing an + initial_load_select + be sure to test out the criteria against production data in a query + browser. Do an explain plan to make sure you are properly using indexes. + + +
+ +
+ Splitting an Initial Load for a Table Across Multiple Batches + + By default, all data for a given table will be initial loaded in a single batch, regardless + of the max batch size parameter on the reload channel. That is, for a table with one million + rows, all rows for that table will be initial loaded and sent to the destination node in a + single batch. For large tables, this can result in a batch that can take a long time to + extract and load. + + + + Initial loads for a table can be broken into multiple batches by specifying + initial.load.use.extract.job.enabled to true. This parameter allows + SymmetricDS to pre-extract initial load batches versus having them extracted when + the batch is pulled or pushed. When using this parameter, there are two ways to tell + SymmetricDS the number of batches to create for a given table. The first is to specify + a positive integer in the initial_load_batch_count column on + . This + number will dictate the number of batches created for the initial load of the given table. + The second way is to specify 0 for initial_load_batch_count on + and + specify a max_batch_size on the reload channel in . + When 0 is specified for + initial_load_batch_count, SymmetricDS will execute a count(*) query on the table during + the extract process and create N batches based on the total number of records found + in the table divided by the max_batch_size on the reload channel. + + +
+ +
+ Reverse Initial Loads + + The default behavior for initial loads is to load data from the + registration server or parent node, to a client node. Occasionally, + there may be need to do a one-time intial load of data in the opposite + or "reverse" direction, namely from a client node to the registration + node. To achieve this, set the parameter + auto.reload.reverse + to be true, + but only for the specific node group representing + the client nodes + . This will cause a onetime reverse load of data, for tables configured + with non-negative initial load orders, to be batched at the point when + registration of the client node is occurring. These batches are then + sent to the parent or registration node. This capability might be + needed, for example, if there is data already present in the client that + doesn't exist in the parent but needs to. + +
+
+
+ Re-synchronizing Data + + There may be times where you find you need to re-send or re-synchronize data when the change itself was not captured. This could be needed, for example, + if the data changes occurred prior to SymmetricDS placing triggers on the data tables themselves, or if the data at the destination was accidentally deleted, or for + some other reason. Two approaches are commonly taken to re-send the data, both of which are discussed below. + + + + Be careful when re-sending data using either of these two techniques. Be sure you are only sending the rows you intend to send and, + more importantly, be sure to re-send the data in a way that won't cause foreign key constraint issues at the destination. In other words, + if more than one table is involved, be sure to send any tables which are referred to by other tables by foreign keys first. Otherwise, + the channel's synchronization will block because SymmetricDS is unable to insert or update the row because the foreign key relationship refers to + a non-existent row in the destination! + + + + One possible approach would be to "touch" the rows in individual tables that need re-sent. By "touch", we mean to alter the row data in such a way + that SymmetricDS detects a data change and therefore includes the data change in the batching and synchronizing steps. Note that you have to + change the data in some meaningful way (e.g., update a time stamp); setting a column to its current value is not sufficient (by default, if there's not an actual data + value change SymmetricDS won't treat the change as something which needs synched. + + A second approach would be to take advantage of SymmetricDS built-in functionality by simulating a partial "initial load" of the data. The approach + is to manually create "reload" events in for the necessary tables, thereby resending the desired rows for the given tables. + Again, foreign key constraints must be kept in mind when creating these reload events. These reload events are created in the source database itself, and + the necessary table, trigger-router combination, and channel are included to indicate the direction of synchronization. + + To create a reload event, you create a row, using: + + data_id: null + table_name: name of table to be sent + event_type: 'R', for reload + row_data: a "where" clause (minus the word 'where') which defines the subset of rows from the table to be sent. To send all rows, one can use 1=1 for this value. + pk_data: null + old_data: null + trigger_hist_id: use the id of the most recent entry (i.e., max(trigger_hist_id) ) in + for the trigger-router combination for your table and router. + channel_id: the channel in which the table is routed + transaction_id: pick a value, for example '1' + source_node_id: null + external_data: null + create_time: current_timestamp + + + + + By way of example, take our retail hands-on tutorial covered in . Let's say + we need to re-send a particular sales transaction from the store to corp over again because we lost the data in corp due to + an overzealous delete. For the tutorial, all transaction-related tables start with sale_, + use the sale_transaction channel, and are routed using the store_corp_identity + router. In addition, the trigger-routers have been set up with an initial load order based on the necessary + foreign key relationships (i.e., transaction tables which are "parents" have a lower initial load order than those of their + "children"). An insert statement that would create the necessary "reload" events (three in this case, one for each table) would be as follows + (where MISSING_ID is changed to the needed transaction id): + + + insert into sym_data ( + select null, t.source_table_name, 'R', 'tran_id=''MISSING-ID''', null, null, + h.trigger_hist_id, t.channel_id, '1', null, null, current_timestamp + from sym_trigger t inner join sym_trigger_router tr on + t.trigger_id=tr.trigger_id inner join sym_trigger_hist h on + h.trigger_hist_id=(select max(trigger_hist_id) from sym_trigger_hist + where trigger_id=t.trigger_id) + where channel_id='sale_transaction' and + tr.router_id like 'store_corp_identity' and + (t.source_table_name like 'sale_%') + order by tr.initial_load_order asc); + + + This insert statement generates three rows, one for each configured sale table. It uses the most recent + trigger history id for the corresponding table. Finally, it takes advantage of the initial load order for each trigger-router to + create the three rows in the correct order (the order corresponding to the order in which the tables would have been initial loaded). + + +
+
+
+ Jobs + + Work done by SymmetricDS is initiated by jobs. Jobs are tasks that are + started and scheduled by a job manager. Jobs are enabled by the + start.{name}.job + property. Most jobs are enabled by default. The frequency at which a job + runs in controlled by one of two properties: + job.{name}.period.time.ms + or + job.{name}.cron + . If a valid cron property exists in the configuration, then it will be + used to schedule the job. Otherwise, the job manager will attempt to use + the period.time.ms property. + + + The frequency of jobs can be configured in either the engines properties + file or in + + . When managed in + + the frequency properties can be changed in the registration server and + when the updated settings sync to the nodes in the system the job + manager will restart the jobs at the new frequency settings. + + + SymmetricDS utilizes Spring's CRON support, which includes seconds as + the first parameter. This differs from the typical Unix-based + implementation, where the first parameter is usually minutes. For + example, + */15 * * * * * + means every 15 seconds, not every 15 minutes. See + Spring's + documentation + for more details. + + + Some jobs cannot be run in parallel against a single node. When running + on a cluster these jobs use the + + table to get an exclusive semaphore to run the job. In order to use this + table the + cluster.lock.enabled + must be set to true. + + The three main jobs in SymmetricDS are the route, push and + pull jobs. The route job decides what captured data changes should be + sent to which nodes. It also decides what captured data changes should + be transported and loaded together in a batch. The push and pull jobs + are responsible for initiating HTTP communication with linked nodes to + push or pull data changes that have been routed. + +
+ Route Job + + After data is captured in the + + table, it is routed to specific nodes in batches by the + Route Job + . It is a single background task that inserts into + + and + + . + + + The job processes each enabled channel, one at a time, collecting a list + of data ids from + + which have not been routed (see + + for much more detail about this step), up to a limit specified by the + channel configuration ( + max_data_to_route + , on + + ). The data is then batched based on the + batch_algorithm + defined for the channel and as documented in + + . Note that, for the + default + and + transactional + algorithm, there may actually be more than + max_data_to_route + included depending on the transaction boundaries. The mapping of data to + specific nodes, organized into batches, is then recorded in + + with a status of "RT" in each case (representing the fact that the Route + Job is still running). Once the routing algorithms and batching are + completed, the batches are organized with their corresponding data ids + and saved in + + . Once + + is updated, the rows in + + are updated to a status of New "NE". + + + The route job will respect the + max_batch_size + on + + . If the max batch size is reached before the end of a database + tranaction and the batch algorithm is set to something other than + nontransactional + the batch may exceed the specified max size. + + + The route job delegates to a router defined by the + router_type + and configured by the + router_expression + in the + + table. Each router that has a + source_node_group_id + that matches the current node's source node group id and is linked to + the + + that captured the data gets an opportunity to choose a list of nodes the + data should be sent to. Data can only be routed to nodes that belong to + the router's + target_node_group_id + . + +
+ Data Gaps + + On the surface, the first Route Job step of collecting unrouted data ids + seems simple: assign sequential data ids for each data row as it's + inserted and keep track of which data id was last routed and start from + there. The difficulty arises, however, due to the fact that there can be + multiple transactions inserting into + + simultaneously. As such, a given section of rows in the + + table may actually contain "gaps" in the data ids when the Route Job is + executing. Most of these gaps are only temporarily and fill in at some + point after routing and need to be picked up with the next run of the + Route Job. Thus, the Route Job needs to remember to route the filled-in + gaps. Worse yet, some of these gaps are actually permanent and result + from a transaction that is rolled back for some reason. In this case, + the Route Job must continue to watch for the gap to fill in and, at some + point, eventually gives up and assumes the gap is permanent and can be + skipped. All of this must be done in some fashion that guarantees that + gaps are routed when they fill in while also keeping routing as + efficient as possible. + + + SymmetricDS handles the issue of data gaps by making use of a table, + + , to record gaps found in the data ids. In fact, this table completely + defines the entire range of data tha can be routed at any point in time. + For a brand new instance of SymmetricDS, this table is empty and + SymmetricDS creates a gap starting from data id of zero and ending with + a very large number (defined by + routing.largest.gap.size + ). At the start of a Route Job, the list of valid gaps (gaps with status + of 'GP') is collected, and each gap is evaluated in turn. If a gap is + sufficiently old (as defined by + routing.stale.dataid.gap.time.ms + , the gap is marked as skipped (status of 'SK') and will no longer be + evaluated in future Route Jobs (note that the 'last' gap (the one with + the highest starting data id) is never skipped). If not skipped, then + + is searched for data ids present in the gap. If one or more data ids is + found in + + , then the current gap is marked with a status of OK, and new gap(s) are + created to represent the data ids still missing in the gap's range. This + process is done for all gaps. If the very last gap contained data, a new + gap starting from the highest data id and ending at (highest data id + + routing.largest.gap.size + ) is then created. This process has resulted in an updated list of gaps + which may contain new data to be routed. + +
+
+
+ Push and Pull Jobs for Database changes + + After database-change data is routed, it awaits transport to the target nodes. Transport + can occur when a client node is configured to pull data or when the host + node is configured to push data. These events are controlled by the + push + and the + pull jobs + . When the + start.pull.job + SymmetricDS property is set to + true + , the frequency that data is pulled is controlled by the + job.pull.period.time.ms + . When the + start.push.job + SymmetricDS property is set to + true + , the frequency that data is pushed is controlled by the + job.push.period.time.ms + . + + + Data is extracted by channel from the source database's + + table at an interval controlled by the + extract_period_millis + column on the + + table. The + last_extract_time + is always recorded, by channel, on the + + table for the host node's id. When the Pull and Push Job run, if the + extract period has not passed according to the last extract time, then + the channel will be skipped for this run. If the + extract_period_millis + is set to zero, data extraction will happen every time the jobs run. + + + The maximum number of batches to extract per synchronization is + controlled by + max_batch_to_send + on the + + table. There is also a setting that controls the max number of bytes to + send in one synchronization. If SymmetricDS has extracted the more than + the number of bytes configured by the + transport.max.bytes.to.sync + parameter, then it will finish extracting the current batch and finish + synchronization so the client has a chance to process and acknowlege the + "big" batch. This may happen before the configured max number of batches + has been reached. + + + Both the push and pull jobs can be configured to push and pull multiple + nodes in parallel. In order to take advantage of this the + pull.thread.per.server.count + or + push.thread.per.server.count + should be adjusted (from their default value of 10) to the number to the + number of concurrent push/pulls you want to occur per period on each + SymmetricDS instance. Push and pull activity is recorded in the + + table. This table is also used to lock push and pull activity across + multiple servers in a cluster. + + + SymmetricDS also provides the ability to configure windows of time when + synchronization is allowed. This is done using the + + table. A list of allowed time windows can be specified for a node group + and a channel. If one or more windows exist, then data will only be + extracted and transported if the time of day falls within the window of + time specified. The configured times are always for the target node's + local time. If the + start_time + is greater than the + end_time + , then the window crosses over to the next day. + + + All data loading may be disabled by setting the + dataloader.enable + property to false. This has the effect of not allowing incoming + synchronizations, while allowing outgoing synchronizations. All data + extractions may be disabled by setting the + dataextractor.enable + property to false. These properties can be controlled by inserting into + the root server's + + table. These properties affect every channel with the exception of the + 'config' channel. + + Node communication over HTTP is represented in the + following figure. + +
+ Node Communication + + + + + +
+
+
+ +
+ File Sync Push and Pull Jobs + + The File Sync Push and Pull jobs (introduced in version 3.5) are responsible for synchronizing file changes. + These jobs work with batches on the filesync channel and create ZIP files of changed files + to be sent and applied on other nodes. + The parameters job.file.sync.push.period.time.ms and job.file.sync.pull.period.time.ms + control how often the jobs runs, which default to every 60 seconds. + See also and . + +
+ +
+ File System Tracker Job + + The File System Tracker job (introduced in version 3.5) is responsible for monitoring and + recording the events of files being created, modified, or deleted. + It records the current state of files to the table. + The parameter job.file.sync.tracker.cron controls how often the job runs, + which defaults to every 5 minutes. + See also and . + +
+ +
+ Sync Triggers Job + + SymmetricDS examines the current configuration, corresponding database + triggers, and the underlying tables to determine if database triggers + need created or updated. The change activity is recorded on the + + table with a reason for the change. The following reasons for a change + are possible: + + + + N - New trigger that has not been created before + + + S - Schema changes in the table were detected + + + C - Configuration changes in Trigger + + + T - Trigger was missing + + + + A configuration entry in Trigger without any history in Trigger Hist + results in a new trigger being created (N). The Trigger Hist stores a + hash of the underlying table, so any alteration to the table causes the + trigger to be rebuilt (S). When the + last_update_time + is changed on the Trigger entry, the configuration change causes the + trigger to be rebuilt (C). If an entry in Trigger Hist is missing the + corresponding database trigger, the trigger is created (T). + + + The process of examining triggers and rebuilding them is automatically + run during startup and each night by the SyncTriggersJob. The user can + also manually run the process at any time by invoking the + syncTriggers() + method over JMX. + +
+
+ Purge Jobs + + Purging is the act of cleaning up captured data that is no longer needed + in SymmetricDS's runtime tables. Data is purged through delete + statements by the + Purge Job + . Only data that has been successfully synchronized will be purged. + Purged tables include: + + + + + + + + + + + + + + + + + + + + + + + + + + + The purge job is enabled by the + start.purge.job + SymmetricDS property. The timing of the three purge jobs (incoming, + outgoing, and data gaps) is controlled by a cron expression as specified + by the following properties: + job.purge.outgoing.cron + , + job.purge.incoming.cron + , and + job.purge.datagaps.cron + . The default is + 0 0 0 * * * + , or once per day at midnight. + + + + Two retention period properties indicate how much history SymmetricDS + will retain before purging. The + purge.retention.minutes + property indicates the period of history to keep for synchronization + tables. The default value is 5 days. The + statistic.retention.minutes + property indicates the period of history to keep for statistics. The + default value is also 5 days. + + The purge properties should be adjusted according to how + much data is flowing through the system and the amount of storage space + the database has. For an initial deployment it is recommended that the + purge properties be kept at the defaults, since it is often helpful to + be able to look at the captured data in order to triage problems and + profile the synchronization patterns. When scaling up to more nodes, it + is recomended that the purge parameters be scaled back to 24 hours or + less. +
+
+ +
+ Outgoing Batches + + By design, whenever SymmetricDS encounters an issue with a synchronization, the batch containing the error is marked as being in + an error state, and all subsequent batches for that particular channel to that particular node are held and not + synchronized until the error batch is resolved. SymmetricDS will retry the batch in error until the situation creating the + error is resolved (or the data for the batch itself is changed). + + + + Analyzing and resolving issues can take place on the outgoing or incoming side. The techniques for analysis are slightly different in + the two cases, however, due to the fact that the node with outgoing batch data also has the data and data events associated with the batch in + the database. On the incoming node, however, all that is available is the incoming batch header and data present in an incoming error table. + +
+ Analyzing the Issue + + + The first step in analyzing the cause of a failed batch is to locate information about the data in the batch, starting with + + To locate batches in error, use: + select * from sym_outgoing_batch where error_flag=1; + Several useful pieces of information are available from this query: + + + The batch number of the failed batch, available in column BATCH_ID. + + + The node to which the batch is being sent, available in column NODE_ID. + + + The channel to which the batch belongs, available in column CHANNEL_ID. + All subsequent batches on this channel to this node will be held until the error condition is resolved. + + + The specific data id in the batch which is causing the failure, available in column FAILED_DATA_ID. + + + Any SQL message, SQL State, and SQL Codes being returned during the synchronization attempt, available in columns SQL_MESSAGE, + SQL_STATE, and SQL_CODE, respectively. + + + + + Using the error_flag on the batch table, as shown above, is more reliable than using the + status column. The status column can change from 'ER' to a different status temporarily as + the batch is retried. + + The query above will also show you any recent batches that + were originally in error and were changed to be manually skipped. See the end of for more details. + + + To get a full picture of the batch, you can query for information representing the complete + list of all data changes associated with the failed batch by joining + and , such as: + select * from sym_data where data_id in + (select data_id from sym_data_event where batch_id='XXXXXX'); + where XXXXXX is the batch id of the failing batch. + + + This query returns a wealth of information about each data change in a batch, including: + + + The table involved in each data change, available in column TABLE_NAME, + + The event type (Update [U], Insert [I], or Delete [D]), available in column EVENT_TYPE, + + + A comma separated list of the new data and (optionally) the old data, available in columns ROW_DATA and + OLD_DATA, respectively. + + + The primary key data, available in column PK_DATA + + + The channel id, trigger history information, transaction id if available, and other information. + + + + + More importantly, if you narrow your query to just the failed data id you can determine the exact data change that is causing the failure: + select * from sym_data where data_id in + (select failed_data_id from sym_outgoing_batch where batch_id='XXXXX' + and node_id='YYYYY'); + where XXXXXX is the batch id and YYYYY is the node id of the batch that is failing. + + The queries above usually yield enough information to be able to determine why a + particular batch is failing. Common reasons a batch might be failing include: + + + The schema at the destination has a column that is not nullable yet the source + has the column defined as nullable and a data change was sent with the column as null. + + A foreign key constraint at the destination is preventing an insertion or update, which could be caused from + data being deleted at the destination or the foreign key constraint is not in place at the source. + + + The data size of a column on the destination is smaller than the data size in the source, and data that + is too large for the destination has been synced. + + + +
+
+ Resolving the Issue + + + Once you have decided upon the cause of the issue, you'll have to decide the best course of action to fix the issue. If, for example, + the problem is due to a database schema mismatch, one possible solution would be to alter the destination database + in such a way that the SQL error no longer occurs. Whatever approach you take to remedy the issue, once you have + made the change, on the next push or pull SymmetricDS will retry the batch + and the channel's data will start flowing again. + + + If you have instead decided that the batch itself is wrong, or does not need synchronized, or you wish to remove a + particular data change from a batch, you do have the option of changing the data associated with the batch directly. + + + Be cautious when using the following two approaches to resolve synchronization issues. By far, the + best approach to solving a synchronization error is to resolve what is truly causing the + error at the destination database. Skipping a batch or removing a data id as discussed below should be your + solution of last resort, since doing so results in differences between the source and destination databases. + + + Now that you've read the warning, if you still want to change the batch + data itself, you do have several options, including: + + Causing SymmetricDS to skip the batch completely. This is accomplished by setting the + batch's status to 'OK', as in: + update sym_outgoing_batch set status='OK' where batch_id='XXXXXX' + where XXXXXX is the failing batch. On the next pull or push, SymmetricDS will skip this batch since + it now thinks the batch has already been synchronized. Note that you can still distinguish between successful + batches and ones that you've artificially marked as 'OK', since the error_flag column on + the failed batch will still be set to '1' (in error). + + + Removing the failing data id from the batch by deleting the corresponding row in . + Eliminating the data id from the list of data ids in the batch will cause future synchronization attempts + of the batch to no longer include that particular data change as part of the batch. For example: + delete from sym_data_event where batch_id='XXXXXX' and data_id='YYYYYY' + where XXXXXX is the failing batch and YYYYYY is the data id to longer be included in the batch. + + + +
+ +
+
+ Incoming Batches +
+ Analyzing the Issue + + + Analysis using an incoming batch is different than that of outgoing batches. For incoming batches, you will rely on two tables, + and . + + The first step in analyzing the cause of an incoming failed batch is to locate information about the batch, starting with + + To locate batches in error, use: + select * from sym_incoming_batch where error_flag=1; + Several useful pieces of information are available from this query: + + + The batch number of the failed batch, available in column BATCH_ID. Note that this is the batch number of the + outgoing batch on the outgoing node. + + + The node the batch is being sent from, available in column NODE_ID. + + + The channel to which the batch belongs, available in column CHANNEL_ID. + All subsequent batches on this channel from this node will be held until the error condition is resolved. + + + The data_id that was being processed when the batch failed, available in column FAILED_DATA_ID. + + + Any SQL message, SQL State, and SQL Codes being returned during the synchronization attempt, available in columns SQL_MESSAGE, + SQL_STATE, and SQL_CODE, respectively. + + + + + + For incoming batches, we do not have data and data event entries in the database we can query. + We do, however, have a table, , which provides some information about the batch. + + + select * from sym_incoming_error + where batch_id='XXXXXX' and node_id='YYYYY'; + where XXXXXX is the batch id and YYYYY is the node id of the failing batch. + + + + + + This query returns a wealth of information about each data change in a batch, including: + + + The table involved in each data change, available in column TARGET_TABLE_NAME, + + The event type (Update [U], Insert [I], or Delete [D]), available in column EVENT_TYPE, + + + A comma separated list of the new data and (optionally) the old data, available in columns ROW_DATA and + OLD_DATA, respectively, + + The column names of the table, available in column COLUMN_NAMES, + + The primary key column names of the table, available in column PK_COLUMN_NAMES, + + + + +
+
+ Resolving the Issue + + + For batches in error, from the incoming side you'll also have to decide the best course of action to fix the issue. + Incoming batch errors that are in conflict can by fixed by taking advantage of two columns in which are examined each time + batches are processed. The first column, resolve_data if filled in will be used in place of row_data. + The second column, resolve_ignore if set will cause this particular data item to be ignored and batch processing to continue. This is the same + two columns used when a manual conflict resolution strategy is chosen, as discussed in . + +
+
+
+ Staging Area + + SymmetricDS creates temporary extraction and data load files with the CSV payload of a synchronization when + the value of the stream.to.file.threshold.bytes SymmetricDS property has been reached. Before reaching the threshold, files + are streamed to/from memory. The default threshold value is 32,767 bytes. This feature may be turned off by setting the stream.to.file.enabled + property to false. + + + SymmetricDS creates these temporary files in the directory specified by the java.io.tmpdir Java System property. + + + The location of the temporary directory may be changed by setting the Java System property passed into the Java program at startup. For example, + + -Djava.io.tmpdir=/home/.symmetricds/tmp + + +
+
+ Logging + + The standalone SymmetricDS installation uses Log4J for logging. The configuration file is conf/log4j.xml. + The log4j.xml file has hints as to what logging can be enabled for useful, finer-grained logging. + + + There is a command line option to turn on preconfigured debugging levels. When the --debug option is used the conf/debug-log4j.xml is used instead of log4j.xml. + + + SymmetricDS proxies all of its logging through SLF4J. When deploying to an application server or if Log4J is not + being leveraged, then the general rules for for SLF4J logging apply. + +
+ +
diff --git a/symmetric-assemble/src/docbook/planning.xml b/symmetric-assemble/src/docbook/planning.xml deleted file mode 100644 index ace054051c..0000000000 --- a/symmetric-assemble/src/docbook/planning.xml +++ /dev/null @@ -1,447 +0,0 @@ - - - - Planning - - In the previous Chapter we presented a high level introduction to some basic concepts in SymmetricDS, some of the - high-level features, and a tutorial demonstrating a basic, working example of SymmetricDS in action. This chapter - will focus on the key considerations and decisions one must make when planning a SymmetricDS implementation. As - needed, basic concepts will be reviewed or introduced throughout this Chapter. By the end of the chapter you should - be able to proceed forward and implement your planned design. This Chapter will intentionally avoid discussing the - underlying database tables that capture the configuration resulting from your analysis and design process. - Implementation of your design, along with discussion of the tables backing each concept, is covered in - - . - - When needed, we will rely on an example of a typical use of SymmetricDS in retail situations. This example retail - deployment of SymmetricDS might include many point-of-sale workstations located at stores that may have intermittent - network connection to a central location. These workstations might have point-sale-software that uses a local - relational database. The database is populated with items, prices and tax information from a centralized database. - The point-of-sale software looks up item information from the local database and also saves sale information to the - same database. The persisted sales need to be propagated back to the centralized database. - -
- Identifying Nodes - - A - node - is a single instance of SymmetricDS. It can be thought of as a proxy for a database which manages the - synchronization of data to and/or from its database. For our example retail application, the following would be - SymmetricDS nodes: - - Each point-of-sale workstation. - The central office database server. - - Each node of SymmetricDS can be either embedded in another application, run stand-alone, or even run in the - background as a service. If desired, nodes can be clustered to help disperse load if they send and/or receive - large volumes of data to or from a large number of nodes. - - Individual nodes are easy to identify when planning your implementation. If a database exists in your domain - that needs to send or receive data, there needs to be a corresponding SymmetricDS instance (a node) responsible - for managing the synchronization for that database. - - -
-
- Organizing Nodes - Nodes in SymmetricDS are organized into an overall node network, with connections based on what data needs to - be synchronized where. The exact organization of your nodes will be very specific to your synchronization goals. - As a starting point, lay out your nodes in diagram form and draw connections between nodes to represent cases in - which data is to flow in some manner. Think in terms of what data is needed at which node, what data is in common - to more than one node, etc. If it is helpful, you could also show data flow into and out of external systems. As - you will discover later, SymmetricDS can publish data changes from a node as well using JMS. - - - Our retail example, as shown in - - , represents a tree hierarchy with a single central office node connected by lines to one or more children nodes - (the POS workstations). Information flows from the central office node to an individual register and vice versa, - but never flows between registers. - - -
- Two-Tier Retail Store Deployment Example - - - - - -
-
- - More complex organization can also be used. Consider, for example, if the same retail example is expanded to - include store - servers - in each store to perform tasks such as opening the store for the day, reconciling registers, assigning employees, - etc. One approach to this new configuration would be to create a three-tier hierarchy (see - - ). The highest tier, the centralized database, connects with each store server's database. The store servers, in - turn, communicate with the individual point-of-sale workstations at the store. In this way data from each - register could be accumulated at the store server, then sent on to the central office. Similarly, data from the - central office can be staged in the store server and then sent on to each register, filtering the register's data - based on which register it is. - - -
- Three-Tier, In-Store Server, Retail Store Deployment Example - - - - - -
-
- - One final example, show in - - , again extending our original two-tier retail use case, would be to organize stores by "region" in the world. - This three tier architecture would introduce new regional servers (and corresponding regional databases) which - would consolidate information specific to stores the regional server is responsible for. The tiers in this case - are therefore the central office server, regional servers, and individual store registers. - - -
- Three-Tier, Regional Server, Retail Store Deployment Example - - - - - -
-
- These are just three common examples of how one might organize nodes in SymmetricDS. While the examples above - were for the retail industry, the organization, they could apply to a variety of application domains. - -
-
- Defining Node Groups - - Once the organization of your SymmetricDS nodes has been chosen, you will need to - group - your nodes based on which nodes share common functionality. This is accomplished in SymmetricDS through the - concept of a - Node Group - . Frequently, an individual tier in your network will represent one Node Group. Much of SymmetricDS' - functionality is specified by Node Group and not an individual node. For example, when it comes time to decide - where to route data captured by SymmetricDS, the routing is configured by - Node Group - . - - - For the examples above, we might define Node Groups of: - - "workstation", to represent each point-of-sale workstation - "corp" or "central-office" to represent the centralized node. - "store" to represent the store server that interacts with store workstations and sends and receives - data from a central office server. - - "region" to represent the a regional server that interacts with store workstations and sends and - receives data from a central office server. - - - Considerable thought should be given to how you define the Node Groups. Groups should be created for each set of - nodes that synchronize common tables in a similar manner. Also, give your Node Groups meaningful names, as they - will appear in many, many places in your implementation of SymmetricDS. - - Note that there are other mechanisms in SymmetricDS to route to individual nodes or smaller subsets of nodes - within a Node Group, so do not choose Node Groups based on needing only subsets of data at specific nodes. For - example, although you could, you would not want to create a Node Group for each store even though different tax - rates need to be routed to each store. Each store needs to synchronize the same tables to the same groups, so - 'store' would be a good choice for a Node Group. - -
-
- Linking Nodes - - Now that Node Groups have been chosen, the next step in planning is to document the individual links between Node - Groups. These - Node Group Links - establish a source Node Group, a target Node Group, and a - data event action - , namely whether the data changes are - pushed - or - pulled - . The push method causes the source Node Group to connect to the target, while a pull method causes it to wait - for the target to connect to it. - - For our retail store example, there are two Node Group Links defined. For the first link, the "store" Node - Group pushes data to the "corp" central office Node Group. The second defines a "corp" to "store" link as a pull. - Thus, the store nodes will periodically pull data from the central office, but when it comes time to send data to - the central office a store node will do a push. - -
-
- Choosing Data Channels - When SymmetricDS captures data changes in the database, the changes are captured in the order in which they - occur. In addition, that order is preserved when synchronizing the data to other nodes. Frequently, however, you - will have cases where you have different "types" of data with differing priorities. Some data might, for example, - need priority for synchronization despite the normal order of events. For example, in a retail environment, users - may be waiting for inventory documents to update while a promotional sale event updates a large number of items. - - - SymmetricDS supports this by allowing tables being synchronized to be grouped together into - Channels - of data. A number of controls to the synchronization behavior of SymmetricDS are controlled at the Channel level. - For example, Channels provide a processing order when synchronizing, a limit on the amount of data that will be - batched together, and isolation from errors in other channels. By categorizing data into channels and assigning - them to - - s, the user gains more control and visibility into the flow of data. In addition, SymmetricDS allows for - synchronization to be enabled, suspended, or scheduled by Channels as well. The frequency of synchronization can - also be controlled at the channel level. - - - Choosing Channels is fairly straightforward and can be changed over time, if needed. Think about the differing - "types" of data present in your application, the volume of data in the various types, etc. What data is - considered must-have and can't be delayed due to a high volume load of another type of data? For example, you - might place employee-related data, such as clocking in or out, on one channel, but sales transactions on another. - We will define which tables belong to which channels in the next sections. - - - Be sure that, when defining Channels, all tables related by foreign keys are included in the same channel. - - - - Avoid deadlocks! If client database transactions include tables that update common rows along with different rows, then - concurrent synchronization can cause database deadlocks. You can avoid this by using channels to segregate those tables - that cause the deadlocks. - - -
-
- Defining Data Changes to be Captured and Routed - - At this point, you have designed the node-related aspects of your implementation, namely choosing nodes, grouping - the nodes based on functionality, defining which node groups send and receive data to which others (and by what - method). You have defined data Channels based on the types and priority of data being synchronized. The largest - remaining task prior to starting your implementation is to define and document what data changes are to be - captured (by defining SymmetricDS - Triggers - ), to decide to which node(s) the data changes are to be - routed - to, and to decide which trigger applies to which router and under what conditions. We will also, in this section, discuss the concept of an - initial load - of data into a SymmetricDS node. - -
- Defining Triggers - - SymmetricDS uses - database triggers - to capture and record changes to be synchronized to other nodes. Based on the configuration you provide, - SymmetricDS creates the needed database triggers automatically for you. There is a great deal of flexibility - in terms of defining the exact conditions under which a data change is captured. SymmetricDS triggers are - defined in a table named - - . Each trigger you define is for a particular table associated. Each trigger can also specify: - - whether to install a trigger for updates, inserts, and/or deletes - conditions on which an insert, update, and/or delete fires - a list of columns that should not be synchronized from this table - a SQL select statement that can be used to hold data needed for routing (known as External - Data) - - - As you define your triggers, consider which data changes are relevant to your application and which ones - ar not. Consider under what special conditions you might want to route data, as well. For our retail example, - we likely want to have triggers defined for updating, inserting, and deleting pricing information in the - central office so that the data can be routed down to the stores. Similarly, we need triggers on sales - transaction tables such that sales information can be sent back to the central office. - -
-
- Defining Routers - - The triggers that have been defined in the previous section only define - when - data changes are to be captured for synchronization. They do not define - where - the data changes are to be sent to. Routers, plus a mapping between Triggers and Routers ( - - ), define the process for determining which nodes receive the data changes. - - - Before we discuss Routers and Trigger Routers, we should probably take a break and discuss the process - SymmetricDS uses to keep track of the changes and routing. As we stated, SymmetricDS relies on auto-created - database triggers to capture and record relevant data changes into a table, the - - table. After the data is captured, a background process chooses the nodes that the data will be synchronized - to. This is called - routing - and it is performed by the Routing Job. Note that the Routing Job does not actually send any data. It just - organizes and records the decisions on where to send data in a "staging" table called - - and - - . - - - Now we are ready to discuss Routers. The router itself is what defines the configuration of where to send a - data change. Each Router you define can be associated with or assigned to any number of Triggers through a - join table that defines the relationship. Routers are defined in the SymmetricDS table named - - . For each router you define, you will need to specify: - - the target table on the destination node to route the data - the source node group and target node group for the nodes to route the data to - - a router - type - and router - expression - - whether to route updates, inserts, and/or deletes - - - - For now, do not worry about the specific routing types. They will be covered later. For your design simply - make notes of the information needed and decisions to determine the list of nodes to route to. You will find - later that there is incredible flexibility and functionality available in routers. For example, you will find - you can: - - send the changes to all nodes that belong to the target node group defined in the router. - - compare old or new column values to a constant value or the value of a node's identity. - - execute a SQL expression against the database to select nodes to route to. This SQL expression - can be passed values of old and new column values. - - execute a Bean Shell expression in order to select nodes to route to. The Bean Shell expression - can use the old and new column values. - - publish data changes directly to a messaging solution instead of transmitting changes to - registered nodes. (This router must be configured manually in XML as an extension point.) - - - - -
-
- Mapping Triggers to Routers - - For each of your Triggers (which specify when a data change should be captured), - you will need to decide which Router(s) to pair with the Trigger such that the change is routed - to the desired target nodes. This needed mapping between Triggers and Routers, found in the table - - , defines configuration specific to a particular Trigger and Router combination that you need. - In addition to defining which triggers map to which routers, the table also has several - settings present to define various behaviors, including initial loads and ping back. - -
- Planning Initial Loads - - SymmetricDS provides the ability to "load" or "seed" a node's database with specific sets of data from - its parent node. This concept is known as an - Initial Load - of data and is used to start off most synchronization scenarios. The Trigger Router mapping defines how - initial loads can occur, so now is a good time to plan how your - Initial Loads - will work. Using our retail example, consider a new store being opened. Initially, you would like to - pre-populate a store database with all the item, pricing, and tax data for that specific store. This is - achieved through an initial load. As part of your planning, be sure to consider which tables, if any, will - need to be loaded initially. SymmetricDS can also perform an initial load on a table with just a subset - of data. Initial Loads are further discussed in - . - -
-
- Circular References and "Ping Back" - - When routing data, SymmetricDS by default checks each data change and will not route a data change back - to a node if it originated the change to begin with. This prevents the possibility of data changes - resulting in an infinite loop of changes under certain circumstances. You may find that, for some reason, - you need SymmetricDS to go ahead and send the data back to the originating node - a "ping back". As part - of the planning process, consider whether you have a special case for needing ping back. Ping Back - control is further discussed in - . - -
-
-
- Planning for Registering Nodes - - Our final step in planning an implementation of SymmetricDS involves deciding how a new node is connected to, - or - registered - with a parent node for the first time. - - - The following are some options on ways you might register nodes: - - The tutorial uses the command line utility to register each individual node. - A JMX interface provides the same interface that the command line utility does. JMX can be - invoked programmatically or via a web console. - Both the utility and the JMX method register a node by inserting into two tables. A script can - be written to directly register nodes by directly inserting into the database. - SymmetricDS can be configured to auto register nodes. This means that any node that asks for a - registration will be given one. - - -
-
-
- Planning Data Transformations - - SymmetricDS also provides the ability to - transform - synchronized data instead of simply synchronizing it. Your application might, for example require a particular - column in your source data to be mapped to two different target tables with possibly different column names. Or, - you might need to "merge" one or more columns of data from two indepdentent tables into one table on the target. - Or, you may want to set default column values on a target table based on a particular event on the source - database. All of these operations, and many more, can be accomplished using SymmetricDS' transformation - capabilities. - - - As you plan your SymmetricDS implementation, make notes of cases where a data transformation is needed. Include - details such as when the transformation might occur (is it only on an insert, or a delete?), which tables or - columns play a part, etc. Complete details of all the transformation features, including how to configure a - transformation, are discussed in . - -
-
- Planning Conflict Detection and Resolution - As a final step to planning an implementation, consider for a moment cases in which the same data may be - modified at nearly the same time at more than one node. For example, can data representing a customer be modified - at both a central office and a store location? Conflict detection is the act of determining if an insert, update - or delete is in "conflict" due to the target data row not being consistent with the data at the source prior to - the insert/update/delete. Conflict resolution is the act of figuring out what to do when a conflict is detected. - Both detection and resolution behaviour can be configured and customized in a number of ways. For example, a - conflict can be "detected" based solely on a single column which has been modified to a different value, or a row - can be considered in conflict if any data in the row has been changed from what was expected, even if the column - that has been changed was still expected. There are also numerous ways to resolve the conflict, such as - referencing a timestamp column and choosing whichever edit was "most recent" or perhaps causing the conflict to - cause the channel to go into error until a manual resolution takes place. A set of conflict detection / - resolution rules is configured for a given node group link, but you can set the rules to be for a given channel - or for a given table in a channel. - - - For the purpose of planning your implementation, make a list of all tables that could have data being modified at - more than one node at the same time. For each table, think through what should happen in each case if such an - event occurs. If the tables on a given channel all have the same set of conflict resolution and detection rules, - then you might be able to configure the rules for the channel instead of a series of table-level detections and - resolutions. Complete details on how to configure conflict resolution and detection are discussed further in - .
\ No newline at end of file diff --git a/symmetric-assemble/src/docbook/tutorial.xml b/symmetric-assemble/src/docbook/quick-start.xml similarity index 100% rename from symmetric-assemble/src/docbook/tutorial.xml rename to symmetric-assemble/src/docbook/quick-start.xml diff --git a/symmetric-assemble/src/docbook/setup.xml b/symmetric-assemble/src/docbook/setup.xml new file mode 100644 index 0000000000..f43f0a2edc --- /dev/null +++ b/symmetric-assemble/src/docbook/setup.xml @@ -0,0 +1,270 @@ + + + + + Setup + +
+ Engine Files + + To get a SymmetricDS node running, it needs to be given an identity and + it needs to know how to connect to the database it will be + synchronizing. The preferred way to configure a SymmetricDS engine is to + create a properties file in the engines directory. The SymmetricDS + server will create an engine for each properties file found in the + engines directory. When started up, SymmetricDS reads the + synchronization configuration and state from the database. If the + configuration tables are missing, they are created automatically (auto + creation can be disabled). Basic configuration is described by inserting + into the following tables (the complete data model is defined in + + ). + + + + + - specifies the tiers that exist in a SymmetricDS network + + + + + + + - links two node groups together for synchronization + + + + + + + - grouping and priority of synchronizations + + + + + + + - specifies tables, channels, and conditions for which changes in the + database should be captured + + + + + + + - specifies the routers defined for synchronization, along with other + routing details + + + + + + + - provides mappings of routers and triggers + + + + + + During start up, triggers are verified against the + database, and database triggers are installed on tables that require + data changes to be captured. The Route, Pull and Push Jobs begin running + to synchronize changes with other nodes. + + + Each node requires properties that allow it to connect to a database and + register with a parent node. Properties are configured in a file named + xxxxx.properties + that is placed in the engines directory of the SymmetricDS install. The + file is usually named according to the engine.name, but it is not a + requirement. + + + + To give a node its identity, the following properties are required. Any + other properties found in + conf/symmetric.properties + can be overridden for a specific engine in an engine's properties file. + If the properties are changed in + conf/symmetric.properties + they will take effect across all engines deployed to the server. Note + that you can use the variable + $(hostName) + to represent the host name of the machine when defining these properties + (for example, external.id=$(hostName) ). + + + + + + engine.name + + + + This is an arbitrary name that is used to access a specific + engine using an HTTP URL. Each node configured in the engines directory + must have a unique engine name. The engine name is also used for the + domain name of registered JMX beans. + + + + + + group.id + + + + The node group that this node is a member of. + Synchronization is specified between node groups, which means you only + need to specify it once for multiple nodes in the same group. + + + + + + external.id + + + + The external id for this node has meaning to the user and + provides integration into the system where it is deployed. For example, + it might be a retail store number or a region number. The external id + can be used in expressions for conditional and subset data + synchronization. Behind the scenes, each node has a unique sequence + number for tracking synchronization events. That makes it possible to + assign the same external id to multiple nodes, if desired. + + + + + + sync.url + + + + + The URL where this node can be contacted for synchronization. At startup + and during each heartbeat, the node updates its entry in the database + with this URL. The sync url is of the format: + http://{hostname}:{port}/{webcontext}/sync/{engine.name} + . + + + The {webcontext} is blank for a standalone deployment. It + will typically be the name of the war file for an application server + deployment. + + The {engine.name} can be left blank if there is only one + engine deployed in a SymmetricDS server. + + + + + When a new node is first started, it is has no information + about synchronizing. It contacts the registration server in order to + join the network and receive its configuration. The configuration for + all nodes is stored on the registration server, and the URL must be + specified in the following property: + + + + + registration.url + + + + The URL where this node can connect for registration to + receive its configuration. The registration server is part of + SymmetricDS and is enabled as part of the deployment. This is typically + equal to the value of the sync.url of the registration server. + + + + + + + Note that a + registration server node + is defined as one whose + registration.url + is either (a) blank, or (b) identical to its + sync.url + . + + + + For a deployment where the database connection pool should + be created using a JDBC driver, set the following properties: + + + + + db.driver + + + + The class name of the JDBC driver. + + + + + + db.url + + + + The JDBC URL used to connect to the database. + + + + + + db.user + + + + The database username, which is used to login, create, and + update SymmetricDS tables. + + + + + + db.password + + + + The password for the database user. + + + +
+
diff --git a/symmetric-assemble/src/docbook/user-guide.xml b/symmetric-assemble/src/docbook/user-guide.xml index ed4fc74386..16806e6a82 100644 --- a/symmetric-assemble/src/docbook/user-guide.xml +++ b/symmetric-assemble/src/docbook/user-guide.xml @@ -80,12 +80,13 @@ + - - - - - + + + + +