-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[DOP-8208] Detect includeHeaders value automatically during write
- Loading branch information
Showing
12 changed files
with
202 additions
and
102 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
Automatically detect value of ``includeHeaders`` option for Kafka during write process, based on presence of ``headers`` column in the input DataFrame. | ||
|
||
Passing ``includeHeaders`` option explicitly is prohibited. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Add notes about reading and writing to Kafka to documentation |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
.. _kafka-read: | ||
|
||
Reading from Kafka | ||
================== | ||
|
||
For reading data from Kafka, use :obj:`DBReader <onetl.db.db_reader.db_reader.DBReader>` with specific options (see below). | ||
|
||
.. warning:: | ||
|
||
For onETL 0.9.0, Kafka does not support :ref:`strategy`. You can only read the whole topic. | ||
|
||
.. note:: | ||
|
||
Unlike other connection classes, Kafka always return dataframe with fixed schema | ||
(see `documentation <https://spark.apache.org/docs/latest/structured-streaming-kafka-integration.html>`_): | ||
|
||
.. dropdown:: DataFrame Schema | ||
|
||
.. code:: python | ||
from pyspark.sql.types import ( | ||
ArrayType, | ||
BinaryType, | ||
IntegerType, | ||
LongType, | ||
StringType, | ||
StructField, | ||
StructType, | ||
TimestampType, | ||
) | ||
schema = StructType( | ||
[ | ||
StructField("value", BinaryType(), nullable=True), | ||
StructField("key", BinaryType(), nullable=True), | ||
StructField("topic", StringType(), nullable=False), | ||
StructField("partition", IntegerType(), nullable=False), | ||
StructField("offset", LongType(), nullable=False), | ||
StructField("timestamp", TimestampType(), nullable=False), | ||
StructField("timestampType", IntegerType(), nullable=False), | ||
# this field is returned only with ``include_headers=True`` | ||
StructField( | ||
"headers", | ||
ArrayType( | ||
StructType( | ||
[ | ||
StructField("key", StringType(), nullable=False), | ||
StructField("value", BinaryType(), nullable=True), | ||
], | ||
), | ||
), | ||
nullable=True, | ||
), | ||
], | ||
) | ||
.. warning:: | ||
|
||
Columns: | ||
|
||
* ``value`` | ||
* ``key`` | ||
* ``headers[*].value`` | ||
|
||
are always returned as raw bytes. If they contain values of custom type, these values should be deserialized manually. | ||
|
||
.. currentmodule:: onetl.connection.db_connection.kafka.options | ||
|
||
.. autopydantic_model:: KafkaReadOptions | ||
:member-order: bysource | ||
:model-show-field-summary: false | ||
:field-show-constraints: false |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
.. _kafka-write: | ||
|
||
Writing to Kafka | ||
================ | ||
|
||
For writing data to Kafka, use :obj:`DBWriter <onetl.db.db_writer.db_writer.DBWriter>` with specific options (see below). | ||
|
||
.. note:: | ||
|
||
Unlike other connection classes, Kafka only accepts dataframe with fixed schema | ||
(see `documentation <https://spark.apache.org/docs/latest/structured-streaming-kafka-integration.html>`_): | ||
|
||
.. dropdown:: DataFrame Schema | ||
|
||
.. code:: python | ||
from pyspark.sql.types import ( | ||
ArrayType, | ||
BinaryType, | ||
IntegerType, | ||
StringType, | ||
StructField, | ||
StructType, | ||
) | ||
schema = StructType( | ||
[ | ||
# mandatory fields: | ||
StructField("value", BinaryType(), nullable=True), | ||
# optional fields, can be omitted: | ||
StructField("key", BinaryType(), nullable=True), | ||
StructField("partition", IntegerType(), nullable=True), | ||
StructField( | ||
"headers", | ||
ArrayType( | ||
StructType( | ||
[ | ||
StructField("key", StringType(), nullable=False), | ||
StructField("value", BinaryType(), nullable=True), | ||
], | ||
), | ||
), | ||
nullable=True, | ||
), | ||
], | ||
) | ||
You cannot pass dataframe with other column names or types. | ||
|
||
.. warning:: | ||
|
||
Columns: | ||
|
||
* ``value`` | ||
* ``key`` | ||
* ``headers[*].value`` | ||
|
||
can only be string or raw bytes. If they contain values of custom type, these values should be serialized manually. | ||
|
||
.. currentmodule:: onetl.connection.db_connection.kafka.options | ||
|
||
.. autopydantic_model:: KafkaWriteOptions | ||
:member-order: bysource | ||
:model-show-field-summary: false | ||
:field-show-constraints: false |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -88,6 +88,7 @@ def kafka_schema_with_headers(): | |
], | ||
), | ||
), | ||
nullable=True, | ||
), | ||
], | ||
) | ||
|
Oops, something went wrong.