From 79b0b91043d768891fa39925f7383bfc88b3630b Mon Sep 17 00:00:00 2001 From: Kun <1amb4a@gmail.com> Date: Sat, 9 Sep 2023 23:21:04 +0900 Subject: [PATCH] feat: Support Flink CDC Pipeline --- Makefile | 2 +- ...-compose-cdc.yml => docker-compose-cdc.yml | 0 docker-compose.yml | 19 +++++++++++++------ docker/flink/Dockerfile-flink1.16 | 11 +++++++++++ docker/flink/Dockerfile-flink1.17 | 11 +++++++++++ 5 files changed, 36 insertions(+), 7 deletions(-) rename dokcer-compose-cdc.yml => docker-compose-cdc.yml (100%) diff --git a/Makefile b/Makefile index d1808c5..a532383 100644 --- a/Makefile +++ b/Makefile @@ -36,7 +36,7 @@ compose.dbt: .PHONY: compose.cdc compose.cdc: - COMPOSE_PROFILES=kafka docker-compose -f dokcer-compose-cdc.yml up + COMPOSE_PROFILES=flink,kafka docker-compose -f docker-compose.yml -f docker-compose-cdc.yml up .PHONY: compose.clean compose.clean: diff --git a/dokcer-compose-cdc.yml b/docker-compose-cdc.yml similarity index 100% rename from dokcer-compose-cdc.yml rename to docker-compose-cdc.yml diff --git a/docker-compose.yml b/docker-compose.yml index 7535f6a..ac2d2b0 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -262,6 +262,10 @@ services: fs.s3a.endpoint: http://minio:9000 fs.s3a.path.style.access: true jobmanager.rpc.address: flink-jobmanager + state.backend: rocksdb + state.backend.incremental: true + state.checkpoints.dir: s3a://datalake/flink/cluster-common/checkpoints/ + state.savepoints.dir: s3a://datalake/flink/cluster-common/savepoints/ volumes: - ./docker/flink/hadoop-core-site.xml:/opt/hadoop/etc/hadoop/core-site.xml @@ -275,7 +279,7 @@ services: profiles: [ "flink" ] build: dockerfile: ./docker/flink/Dockerfile-flink1.16 - image: 1ambda/lakehouse:flink-1.17 + image: 1ambda/lakehouse:flink-1.16 container_name: flink-taskmanager hostname: flink-taskmanager entrypoint: | @@ -299,13 +303,16 @@ services: fs.s3a.endpoint: http://minio:9000 fs.s3a.path.style.access: true jobmanager.rpc.address: flink-jobmanager - taskmanager.numberOfTaskSlots: 2 - parallelism.default: 2 + taskmanager.numberOfTaskSlots: 8 + parallelism.default: 1 + state.backend: rocksdb + state.backend.incremental: true + state.checkpoints.dir: s3a://datalake/flink/cluster-common/checkpoints/ + state.savepoints.dir: s3a://datalake/flink/cluster-common/savepoints/ volumes: - - ./docker/flink/hadoop-core-site.xml:/opt/hadoop/etc/hadoop/core-site.xml - - ./docker/flink/hadoop-hive-site.xml:/opt/flink/conf/hive-site.xml - + - ./docker/flink/hadoop-core-site.xml:/opt/hadoop/etc/hadoop/core-site.xml + - ./docker/flink/hadoop-hive-site.xml:/opt/flink/conf/hive-site.xml depends_on: - flink-jobmanager diff --git a/docker/flink/Dockerfile-flink1.16 b/docker/flink/Dockerfile-flink1.16 index e5074d0..37d8a1f 100644 --- a/docker/flink/Dockerfile-flink1.16 +++ b/docker/flink/Dockerfile-flink1.16 @@ -46,6 +46,17 @@ RUN curl -L https://repo1.maven.org/maven2/org/apache/hudi/hudi-flink${FLINK_VER RUN curl -L https://repo1.maven.org/maven2/org/apache/calcite/calcite-core/1.16.0/calcite-core-1.16.0.jar \ -o ${FLINK_HOME}/lib/calcite-core-1.16.0.jar +# Configure Plugins +ENV KAFKA_VERSION=3.5.0 +RUN curl -L https://repo1.maven.org/maven2/org/apache/flink/flink-sql-connector-kafka/${FLINK_VERSION}/flink-sql-connector-kafka-${FLINK_VERSION}.jar \ + -o ${FLINK_HOME}/lib/flink-sql-connector-kafka-${FLINK_VERSION}.jar +RUN curl -L https://repo1.maven.org/maven2/org/apache/flink/flink-avro-confluent-registry/${FLINK_VERSION}/flink-avro-confluent-registry-${FLINK_VERSION}.jar \ + -o ${FLINK_HOME}/lib/flink-avro-confluent-registry-${FLINK_VERSION}.jar +RUN curl -L https://repo1.maven.org/maven2/org/apache/flink/flink-connector-kafka/${FLINK_VERSION}/flink-connector-kafka-${FLINK_VERSION}.jar \ + -o ${FLINK_HOME}/lib/flink-connector-kafka-${FLINK_VERSION}.jar +RUN curl -L https://repo1.maven.org/maven2/org/apache/kafka/kafka-clients/${KAFKA_VERSION}/kafka-clients-${KAFKA_VERSION}.jar \ + -o ${FLINK_HOME}/lib/kafka-clients-${KAFKA_VERSION}.jar + # Configure FLINK CLI ENV SQL_CLIENT_HOME /opt/flink-client ENV PATH="/opt/flink-client:${PATH}" diff --git a/docker/flink/Dockerfile-flink1.17 b/docker/flink/Dockerfile-flink1.17 index f3a8675..47bf3b4 100644 --- a/docker/flink/Dockerfile-flink1.17 +++ b/docker/flink/Dockerfile-flink1.17 @@ -36,6 +36,17 @@ RUN curl -L https://repo1.maven.org/maven2/software/amazon/awssdk/url-connection RUN curl -L https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-flink-runtime-${FLINK_VERSION_SHORT}/${ICEBERG_VERSION}/iceberg-flink-runtime-${FLINK_VERSION_SHORT}-${ICEBERG_VERSION}.jar \ -o ${FLINK_HOME}/lib/iceberg-flink-runtime-${FLINK_VERSION_SHORT}-${ICEBERG_VERSION}.jar +# Configure Plugins +ENV KAFKA_VERSION=3.5.0 +RUN curl -L https://repo1.maven.org/maven2/org/apache/flink/flink-sql-connector-kafka/${FLINK_VERSION}/flink-sql-connector-kafka-${FLINK_VERSION}.jar \ + -o ${FLINK_HOME}/lib/flink-sql-connector-kafka-${FLINK_VERSION}.jar +RUN curl -L https://repo1.maven.org/maven2/org/apache/flink/flink-avro-confluent-registry/${FLINK_VERSION}/flink-avro-confluent-registry-${FLINK_VERSION}.jar \ + -o ${FLINK_HOME}/lib/flink-avro-confluent-registry-${FLINK_VERSION}.jar +RUN curl -L https://repo1.maven.org/maven2/org/apache/flink/flink-connector-kafka/${FLINK_VERSION}/flink-connector-kafka-${FLINK_VERSION}.jar \ + -o ${FLINK_HOME}/lib/flink-connector-kafka-${FLINK_VERSION}.jar +RUN curl -L https://repo1.maven.org/maven2/org/apache/kafka/kafka-clients/${KAFKA_VERSION}/kafka-clients-${KAFKA_VERSION}.jar \ + -o ${FLINK_HOME}/lib/kafka-clients-${KAFKA_VERSION}.jar + # Configure FLINK CLI ENV SQL_CLIENT_HOME /opt/flink-client ENV PATH="/opt/flink-client:${PATH}"