From ba8c39f6e472b53679f10fd8b5b8758747367f23 Mon Sep 17 00:00:00 2001 From: Kevin Biju Date: Thu, 17 Apr 2025 13:31:15 +0530 Subject: [PATCH 1/5] [mysql] basic doc for RDS MariaDB --- .../data-ingestion/clickpipes/mysql/index.md | 2 + .../clickpipes/mysql/source/aurora.md | 2 +- .../clickpipes/mysql/source/rds.md | 6 +- .../clickpipes/mysql/source/rds_maria.md | 113 ++++++++++++++++++ sidebars.js | 1 + 5 files changed, 120 insertions(+), 4 deletions(-) create mode 100644 docs/integrations/data-ingestion/clickpipes/mysql/source/rds_maria.md diff --git a/docs/integrations/data-ingestion/clickpipes/mysql/index.md b/docs/integrations/data-ingestion/clickpipes/mysql/index.md index 61f8520edff..6e4271abaf9 100644 --- a/docs/integrations/data-ingestion/clickpipes/mysql/index.md +++ b/docs/integrations/data-ingestion/clickpipes/mysql/index.md @@ -34,6 +34,8 @@ To get started, you first need to make sure that your MySQL database is set up c 2. [Amazon Aurora MySQL](./mysql/source/aurora) +3. [Amazon RDS MariaDB](./mysql/source/rds_maria) + Once your source MySQL database is set up, you can continue creating your ClickPipe. ## Create your ClickPipe {#creating-your-clickpipe} diff --git a/docs/integrations/data-ingestion/clickpipes/mysql/source/aurora.md b/docs/integrations/data-ingestion/clickpipes/mysql/source/aurora.md index fc4988e5371..155759e6994 100644 --- a/docs/integrations/data-ingestion/clickpipes/mysql/source/aurora.md +++ b/docs/integrations/data-ingestion/clickpipes/mysql/source/aurora.md @@ -22,7 +22,7 @@ import Image from '@theme/IdealImage'; This is a step-by-step guide on how to configure your Aurora MySQL instance for replicating its data via the MySQL ClickPipe.
:::info -We also recommend going through the MySQL FAQs [here](./mysql/faq). The FAQs page is being actively updated. +We also recommend going through the MySQL FAQs [here](/integrations/data-ingestion/clickpipes/mysql/faq.md). The FAQs page is being actively updated. ::: ## Enable binary log retention {#enable-binlog-retention-aurora} diff --git a/docs/integrations/data-ingestion/clickpipes/mysql/source/rds.md b/docs/integrations/data-ingestion/clickpipes/mysql/source/rds.md index 9124b4867c4..0dc87974634 100644 --- a/docs/integrations/data-ingestion/clickpipes/mysql/source/rds.md +++ b/docs/integrations/data-ingestion/clickpipes/mysql/source/rds.md @@ -22,7 +22,7 @@ import Image from '@theme/IdealImage'; This is a step-by-step guide on how to configure your RDS MySQL instance for replicating its data via the MySQL ClickPipe.
:::info -We also recommend going through the MySQL FAQs [here](./mysql/faq). The FAQs page is being actively updated. +We also recommend going through the MySQL FAQs [here](/integrations/data-ingestion/clickpipes/mysql/faq.md). The FAQs page is being actively updated. ::: ## Enable binary log retention {#enable-binlog-retention-rds} @@ -61,11 +61,11 @@ The following settings need to be set as follows: 2. `binlog_row_metadata` to `FULL` -Binlog row metadata +Binlog row metadata to FULL 3. `binlog_row_image` to `FULL` -Binlog row image +Binlog row image to FULL Then click on `Save Changes` in the top-right. You may need to reboot your instance for the changes to take effect - a way of knowing this is if you see `Pending reboot` next to the parameter group link in the Configurations tab of the RDS instance. diff --git a/docs/integrations/data-ingestion/clickpipes/mysql/source/rds_maria.md b/docs/integrations/data-ingestion/clickpipes/mysql/source/rds_maria.md new file mode 100644 index 00000000000..336b5a51307 --- /dev/null +++ b/docs/integrations/data-ingestion/clickpipes/mysql/source/rds_maria.md @@ -0,0 +1,113 @@ +--- +sidebar_label: 'Amazon RDS MariaDB' +description: 'Step-by-step guide on how to set up Amazon RDS MariaDB as a source for ClickPipes' +slug: /integrations/clickpipes/mysql/source/rds_maria +title: 'RDS MariaDB source setup guide' +--- + +import rds_backups from '@site/static/images/integrations/data-ingestion/clickpipes/mysql/rds-backups.png'; +import rds_config from '@site/static/images/integrations/data-ingestion/clickpipes/mysql/parameter_group/rds_config.png'; +import edit_button from '@site/static/images/integrations/data-ingestion/clickpipes/mysql/parameter_group/edit_button.png'; +import binlog_format from '@site/static/images/integrations/data-ingestion/clickpipes/mysql/parameter_group/binlog_format.png'; +import binlog_row_image from '@site/static/images/integrations/data-ingestion/clickpipes/mysql/parameter_group/binlog_row_image.png'; +import binlog_row_metadata from '@site/static/images/integrations/data-ingestion/clickpipes/mysql/parameter_group/binlog_row_metadata.png'; +import security_group_in_rds_mysql from '@site/static/images/integrations/data-ingestion/clickpipes/mysql/source/rds/security-group-in-rds-mysql.png'; +import edit_inbound_rules from '@site/static/images/integrations/data-ingestion/clickpipes/postgres/source/rds/edit_inbound_rules.png'; +import Image from '@theme/IdealImage'; + +# RDS MariaDB source setup guide + +This is a step-by-step guide on how to configure your RDS MariaDB instance for replicating its data via the MySQL ClickPipe. +
+:::info +We also recommend going through the MySQL FAQs [here](/integrations/data-ingestion/clickpipes/mysql/faq.md). The FAQs page is being actively updated. +::: + +## Enable binary log retention {#enable-binlog-retention-rds} +The binary log is a set of log files that contain information about data modifications made to an MySQL server instance, and binary log files are required for replication. Both of the below steps must be followed: + +### 1. Enable binary logging via automated backup{#enable-binlog-logging-rds} +The automated backups feature determines whether binary logging is turned on or off for MySQL. It can be set in the AWS console: + +Enabling automated backups in RDS + +Setting backup retention to a reasonably long value depending on the replication use-case is advisable. + +### 2. Binlog retention hours{#binlog-retention-hours-rds} +The default value of binlog retention hours is NULL. For RDS for MariaDB, NULL means binary logs aren't retained. +To specify the number of hours to retain binary logs on a DB instance, use the mysql.rds_set_configuration with a period with enough time for replication to occur: + +```text +mysql=> call mysql.rds_set_configuration('binlog retention hours', 24); +``` + +## Configure binlog settings in the parameter group {#binlog-parameter-group-rds} + +The parameter group can be found when you click on your MariaDB instance in the RDS Console, and then heading over to the `Configurations` tab. + +Where to find parameter group in RDS + +Upon clicking on the parameter group link, you will be taken to the page for it. You will see an Edit button in the top-right. + +Edit parameter group + +The following settings need to be set as follows: + +1. `binlog_format` to `ROW`. + +Binlog format to ROW + +2. `binlog_row_metadata` to `FULL` + +Binlog row metadata to FULL + +3. `binlog_row_image` to `FULL` + +Binlog row image to FULL + +Then click on `Save Changes` in the top-right. You may need to reboot your instance for the changes to take effect - a way of knowing this is if you see `Pending reboot` next to the parameter group link in the Configurations tab of the RDS instance. + +
+:::tip +If you have a MariaDB cluster, the above parameters would be found in a [DB Cluster](https://docs.aws.amazon.com/AmazonRDS/latest/AuroraUserGuide/USER_WorkingWithParamGroups.CreatingCluster.html) parameter group and not the DB instance group. +::: + +## Enabling GTID Mode {#gtid-mode-rds} +Global Transaction Identifiers (GTIDs) are unique IDs assigned to each committed transaction in MySQL/MariaDB. They simplify binlog replication and make troubleshooting more straightforward. MariaDB enables GTID mode by default, so no user action is needed to use it. + +## Configure a database user {#configure-database-user-rds} + +Connect to your RDS MySQL instance as an admin user and execute the following commands: + +1. Create a dedicated user for ClickPipes: + + ```sql + CREATE USER 'clickpipes_user'@'host' IDENTIFIED BY 'some-password'; + ``` + +2. Grant schema permissions. The following example shows permissions for the `mysql` database. Repeat these commands for each database and host you want to replicate: + + ```sql + GRANT SELECT ON `mysql`.* TO 'clickpipes_user'@'host'; + ``` + +3. Grant replication permissions to the user: + + ```sql + GRANT REPLICATION CLIENT ON *.* TO 'clickpipes_user'@'%'; + GRANT REPLICATION SLAVE ON *.* TO 'clickpipes_user'@'%'; + +## Configure network access {#configure-network-access} + +### IP-based access control {#ip-based-access-control} + +If you want to restrict traffic to your RDS instance, please add the [documented static NAT IPs](../../index.md#list-of-static-ips) to the `Inbound rules` of your RDS security group. + +Where to find security group in RDS MySQL? + +Edit inbound rules for the above security group + +### Private access via AWS PrivateLink {#private-access-via-aws-privatelink} + +To connect to your RDS instance through a private network, you can use AWS PrivateLink. Follow our [AWS PrivateLink setup guide for ClickPipes](/knowledgebase/aws-privatelink-setup-for-clickpipes) to set up the connection. + diff --git a/sidebars.js b/sidebars.js index c1f7bcfe09a..28e348b9f0f 100644 --- a/sidebars.js +++ b/sidebars.js @@ -729,6 +729,7 @@ const sidebars = { items: [ "integrations/data-ingestion/clickpipes/mysql/source/rds", "integrations/data-ingestion/clickpipes/mysql/source/aurora", + "integrations/data-ingestion/clickpipes/mysql/source/rds_maria" ], }, "integrations/data-ingestion/clickpipes/mysql/datatypes" From 704baaddfb03925088ecb726810c9232887e9049 Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Thu, 17 Apr 2025 10:10:50 +0200 Subject: [PATCH 2/5] add clickpipes as exception, which is used in import statements --- scripts/aspell-dict-file.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/scripts/aspell-dict-file.txt b/scripts/aspell-dict-file.txt index 5a804a32a1b..f5d04cfe993 100644 --- a/scripts/aspell-dict-file.txt +++ b/scripts/aspell-dict-file.txt @@ -996,3 +996,9 @@ Citus microsoft sparkConfigViaNotebook sparkUICHSettings +--docs/integrations/data-ingestion/clickpipes/mysql/source/aurora.md-- +clickpipes +--docs/integrations/data-ingestion/clickpipes/mysql/source/rds.md-- +clickpipes +--docs/integrations/data-ingestion/clickpipes/mysql/source/rds_maria.md-- +clickpipes From f7c414f78d5cec15e96a2c278800d123e5c859be Mon Sep 17 00:00:00 2001 From: Kevin Biju Date: Thu, 17 Apr 2025 14:02:34 +0530 Subject: [PATCH 3/5] minor changes --- .../data-ingestion/clickpipes/mysql/source/rds_maria.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/integrations/data-ingestion/clickpipes/mysql/source/rds_maria.md b/docs/integrations/data-ingestion/clickpipes/mysql/source/rds_maria.md index 336b5a51307..1940b2a59c7 100644 --- a/docs/integrations/data-ingestion/clickpipes/mysql/source/rds_maria.md +++ b/docs/integrations/data-ingestion/clickpipes/mysql/source/rds_maria.md @@ -77,7 +77,7 @@ Global Transaction Identifiers (GTIDs) are unique IDs assigned to each committed ## Configure a database user {#configure-database-user-rds} -Connect to your RDS MySQL instance as an admin user and execute the following commands: +Connect to your RDS MariaDB instance as an admin user and execute the following commands: 1. Create a dedicated user for ClickPipes: @@ -103,7 +103,7 @@ Connect to your RDS MySQL instance as an admin user and execute the following co If you want to restrict traffic to your RDS instance, please add the [documented static NAT IPs](../../index.md#list-of-static-ips) to the `Inbound rules` of your RDS security group. -Where to find security group in RDS MySQL? +Where to find security group in RDS? Edit inbound rules for the above security group From 2e4cab27be1872792a10548ca073d6c8416c10a7 Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Thu, 17 Apr 2025 10:45:06 +0200 Subject: [PATCH 4/5] Small language edits --- .../clickpipes/mysql/source/rds_maria.md | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/docs/integrations/data-ingestion/clickpipes/mysql/source/rds_maria.md b/docs/integrations/data-ingestion/clickpipes/mysql/source/rds_maria.md index 1940b2a59c7..53a0607a966 100644 --- a/docs/integrations/data-ingestion/clickpipes/mysql/source/rds_maria.md +++ b/docs/integrations/data-ingestion/clickpipes/mysql/source/rds_maria.md @@ -24,9 +24,10 @@ We also recommend going through the MySQL FAQs [here](/integrations/data-ingesti ::: ## Enable binary log retention {#enable-binlog-retention-rds} -The binary log is a set of log files that contain information about data modifications made to an MySQL server instance, and binary log files are required for replication. Both of the below steps must be followed: +The binary log is a set of log files that contain information about data modifications made to a MySQL server instance. Binary log files are required for replication. Both of the steps below must be followed: ### 1. Enable binary logging via automated backup{#enable-binlog-logging-rds} + The automated backups feature determines whether binary logging is turned on or off for MySQL. It can be set in the AWS console: Enabling automated backups in RDS @@ -34,8 +35,9 @@ The automated backups feature determines whether binary logging is turned on or Setting backup retention to a reasonably long value depending on the replication use-case is advisable. ### 2. Binlog retention hours{#binlog-retention-hours-rds} -The default value of binlog retention hours is NULL. For RDS for MariaDB, NULL means binary logs aren't retained. -To specify the number of hours to retain binary logs on a DB instance, use the mysql.rds_set_configuration with a period with enough time for replication to occur: + +The default value of binlog retention hours is `NULL`. For RDS for MariaDB, `NULL` means binary logs aren't retained. +To specify the number of hours to retain binary logs on a DB instance, use the `mysql.rds_set_configuration` with a period with enough time for replication to occur: ```text mysql=> call mysql.rds_set_configuration('binlog retention hours', 24); @@ -43,15 +45,15 @@ mysql=> call mysql.rds_set_configuration('binlog retention hours', 24); ## Configure binlog settings in the parameter group {#binlog-parameter-group-rds} -The parameter group can be found when you click on your MariaDB instance in the RDS Console, and then heading over to the `Configurations` tab. +The parameter group can be found when you click on your MariaDB instance in the RDS Console, and then navigate to the `Configurations` tab. Where to find parameter group in RDS -Upon clicking on the parameter group link, you will be taken to the page for it. You will see an Edit button in the top-right. +Upon clicking on the parameter group link, you will be taken to the parameter group link page. You will see an Edit button in the top-right: Edit parameter group -The following settings need to be set as follows: +Settings `binlog_format`, `binlog_row_metadata` and `binlog_row_image` need to be set as follows: 1. `binlog_format` to `ROW`. @@ -65,7 +67,7 @@ The following settings need to be set as follows: Binlog row image to FULL -Then click on `Save Changes` in the top-right. You may need to reboot your instance for the changes to take effect - a way of knowing this is if you see `Pending reboot` next to the parameter group link in the Configurations tab of the RDS instance. +Next, click on `Save Changes` in the top-right. You may need to reboot your instance for the changes to take effect. If you see `Pending reboot` next to the parameter group link in the Configurations tab of the RDS instance, this is a good indication that a reboot of your instance is needed.
:::tip @@ -85,7 +87,7 @@ Connect to your RDS MariaDB instance as an admin user and execute the following CREATE USER 'clickpipes_user'@'host' IDENTIFIED BY 'some-password'; ``` -2. Grant schema permissions. The following example shows permissions for the `mysql` database. Repeat these commands for each database and host you want to replicate: +2. Grant schema permissions. The following example shows permissions for the `mysql` database. Repeat these commands for each database and host that you want to replicate: ```sql GRANT SELECT ON `mysql`.* TO 'clickpipes_user'@'host'; From 16808c8b8f0d6474821fc1a9a4f1d064e5af81da Mon Sep 17 00:00:00 2001 From: Kevin Biju Date: Thu, 17 Apr 2025 22:40:49 +0530 Subject: [PATCH 5/5] address comment --- .../data-ingestion/clickpipes/mysql/source/rds.md | 5 +++-- .../data-ingestion/clickpipes/mysql/source/rds_maria.md | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/docs/integrations/data-ingestion/clickpipes/mysql/source/rds.md b/docs/integrations/data-ingestion/clickpipes/mysql/source/rds.md index 0dc87974634..419f3ead555 100644 --- a/docs/integrations/data-ingestion/clickpipes/mysql/source/rds.md +++ b/docs/integrations/data-ingestion/clickpipes/mysql/source/rds.md @@ -36,8 +36,9 @@ The automated backups feature determines whether binary logging is turned on or Setting backup retention to a reasonably long value depending on the replication use-case is advisable. ### 2. Binlog retention hours{#binlog-retention-hours-rds} -The default value of binlog retention hours is NULL. For RDS for MySQL, NULL means binary logs aren't retained. -To specify the number of hours to retain binary logs on a DB instance, use the mysql.rds_set_configuration with a period with enough time for replication to occur: +Amazon RDS for MySQL has a different method of setting binlog retention duration, which is the amount of time a binlog file containing changes is kept. If some changes are not read before the binlog file is removed, replication will be unable to continue. The default value of binlog retention hours is NULL, which means binary logs aren't retained. + +To specify the number of hours to retain binary logs on a DB instance, use the mysql.rds_set_configuration function with a binlog retention period long enough for replication to occur. `24 hours` is the recommended minimum. ```text mysql=> call mysql.rds_set_configuration('binlog retention hours', 24); diff --git a/docs/integrations/data-ingestion/clickpipes/mysql/source/rds_maria.md b/docs/integrations/data-ingestion/clickpipes/mysql/source/rds_maria.md index 53a0607a966..6530fd9be93 100644 --- a/docs/integrations/data-ingestion/clickpipes/mysql/source/rds_maria.md +++ b/docs/integrations/data-ingestion/clickpipes/mysql/source/rds_maria.md @@ -35,9 +35,9 @@ The automated backups feature determines whether binary logging is turned on or Setting backup retention to a reasonably long value depending on the replication use-case is advisable. ### 2. Binlog retention hours{#binlog-retention-hours-rds} +Amazon RDS for MariaDB has a different method of setting binlog retention duration, which is the amount of time a binlog file containing changes is kept. If some changes are not read before the binlog file is removed, replication will be unable to continue. The default value of binlog retention hours is NULL, which means binary logs aren't retained. -The default value of binlog retention hours is `NULL`. For RDS for MariaDB, `NULL` means binary logs aren't retained. -To specify the number of hours to retain binary logs on a DB instance, use the `mysql.rds_set_configuration` with a period with enough time for replication to occur: +To specify the number of hours to retain binary logs on a DB instance, use the mysql.rds_set_configuration function with a binlog retention period long enough for replication to occur. `24 hours` is the recommended minimum. ```text mysql=> call mysql.rds_set_configuration('binlog retention hours', 24);