Skip to content

Commit

Permalink
feat: Support Flink CDC Pipeline
Browse files Browse the repository at this point in the history
  • Loading branch information
1ambda committed Sep 9, 2023
1 parent abbe1ec commit 79b0b91
Show file tree
Hide file tree
Showing 5 changed files with 36 additions and 7 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ compose.dbt:

.PHONY: compose.cdc
compose.cdc:
COMPOSE_PROFILES=kafka docker-compose -f dokcer-compose-cdc.yml up
COMPOSE_PROFILES=flink,kafka docker-compose -f docker-compose.yml -f docker-compose-cdc.yml up

.PHONY: compose.clean
compose.clean:
Expand Down
File renamed without changes.
19 changes: 13 additions & 6 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,10 @@ services:
fs.s3a.endpoint: http://minio:9000
fs.s3a.path.style.access: true
jobmanager.rpc.address: flink-jobmanager
state.backend: rocksdb
state.backend.incremental: true
state.checkpoints.dir: s3a://datalake/flink/cluster-common/checkpoints/
state.savepoints.dir: s3a://datalake/flink/cluster-common/savepoints/
volumes:
- ./docker/flink/hadoop-core-site.xml:/opt/hadoop/etc/hadoop/core-site.xml
Expand All @@ -275,7 +279,7 @@ services:
profiles: [ "flink" ]
build:
dockerfile: ./docker/flink/Dockerfile-flink1.16
image: 1ambda/lakehouse:flink-1.17
image: 1ambda/lakehouse:flink-1.16
container_name: flink-taskmanager
hostname: flink-taskmanager
entrypoint: |
Expand All @@ -299,13 +303,16 @@ services:
fs.s3a.endpoint: http://minio:9000
fs.s3a.path.style.access: true
jobmanager.rpc.address: flink-jobmanager
taskmanager.numberOfTaskSlots: 2
parallelism.default: 2
taskmanager.numberOfTaskSlots: 8
parallelism.default: 1
state.backend: rocksdb
state.backend.incremental: true
state.checkpoints.dir: s3a://datalake/flink/cluster-common/checkpoints/
state.savepoints.dir: s3a://datalake/flink/cluster-common/savepoints/
volumes:
- ./docker/flink/hadoop-core-site.xml:/opt/hadoop/etc/hadoop/core-site.xml
- ./docker/flink/hadoop-hive-site.xml:/opt/flink/conf/hive-site.xml

- ./docker/flink/hadoop-core-site.xml:/opt/hadoop/etc/hadoop/core-site.xml
- ./docker/flink/hadoop-hive-site.xml:/opt/flink/conf/hive-site.xml
depends_on:
- flink-jobmanager

Expand Down
11 changes: 11 additions & 0 deletions docker/flink/Dockerfile-flink1.16
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,17 @@ RUN curl -L https://repo1.maven.org/maven2/org/apache/hudi/hudi-flink${FLINK_VER
RUN curl -L https://repo1.maven.org/maven2/org/apache/calcite/calcite-core/1.16.0/calcite-core-1.16.0.jar \
-o ${FLINK_HOME}/lib/calcite-core-1.16.0.jar

# Configure Plugins
ENV KAFKA_VERSION=3.5.0
RUN curl -L https://repo1.maven.org/maven2/org/apache/flink/flink-sql-connector-kafka/${FLINK_VERSION}/flink-sql-connector-kafka-${FLINK_VERSION}.jar \
-o ${FLINK_HOME}/lib/flink-sql-connector-kafka-${FLINK_VERSION}.jar
RUN curl -L https://repo1.maven.org/maven2/org/apache/flink/flink-avro-confluent-registry/${FLINK_VERSION}/flink-avro-confluent-registry-${FLINK_VERSION}.jar \
-o ${FLINK_HOME}/lib/flink-avro-confluent-registry-${FLINK_VERSION}.jar
RUN curl -L https://repo1.maven.org/maven2/org/apache/flink/flink-connector-kafka/${FLINK_VERSION}/flink-connector-kafka-${FLINK_VERSION}.jar \
-o ${FLINK_HOME}/lib/flink-connector-kafka-${FLINK_VERSION}.jar
RUN curl -L https://repo1.maven.org/maven2/org/apache/kafka/kafka-clients/${KAFKA_VERSION}/kafka-clients-${KAFKA_VERSION}.jar \
-o ${FLINK_HOME}/lib/kafka-clients-${KAFKA_VERSION}.jar

# Configure FLINK CLI
ENV SQL_CLIENT_HOME /opt/flink-client
ENV PATH="/opt/flink-client:${PATH}"
11 changes: 11 additions & 0 deletions docker/flink/Dockerfile-flink1.17
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,17 @@ RUN curl -L https://repo1.maven.org/maven2/software/amazon/awssdk/url-connection
RUN curl -L https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-flink-runtime-${FLINK_VERSION_SHORT}/${ICEBERG_VERSION}/iceberg-flink-runtime-${FLINK_VERSION_SHORT}-${ICEBERG_VERSION}.jar \
-o ${FLINK_HOME}/lib/iceberg-flink-runtime-${FLINK_VERSION_SHORT}-${ICEBERG_VERSION}.jar

# Configure Plugins
ENV KAFKA_VERSION=3.5.0
RUN curl -L https://repo1.maven.org/maven2/org/apache/flink/flink-sql-connector-kafka/${FLINK_VERSION}/flink-sql-connector-kafka-${FLINK_VERSION}.jar \
-o ${FLINK_HOME}/lib/flink-sql-connector-kafka-${FLINK_VERSION}.jar
RUN curl -L https://repo1.maven.org/maven2/org/apache/flink/flink-avro-confluent-registry/${FLINK_VERSION}/flink-avro-confluent-registry-${FLINK_VERSION}.jar \
-o ${FLINK_HOME}/lib/flink-avro-confluent-registry-${FLINK_VERSION}.jar
RUN curl -L https://repo1.maven.org/maven2/org/apache/flink/flink-connector-kafka/${FLINK_VERSION}/flink-connector-kafka-${FLINK_VERSION}.jar \
-o ${FLINK_HOME}/lib/flink-connector-kafka-${FLINK_VERSION}.jar
RUN curl -L https://repo1.maven.org/maven2/org/apache/kafka/kafka-clients/${KAFKA_VERSION}/kafka-clients-${KAFKA_VERSION}.jar \
-o ${FLINK_HOME}/lib/kafka-clients-${KAFKA_VERSION}.jar

# Configure FLINK CLI
ENV SQL_CLIENT_HOME /opt/flink-client
ENV PATH="/opt/flink-client:${PATH}"

0 comments on commit 79b0b91

Please sign in to comment.