In [1]:
from pyflink.table import StreamTableEnvironment, EnvironmentSettings, DataTypes, CsvTableSink, ChangelogMode, Schema, TableDescriptor, WriteMode

In [2]:
from pyflink.table.window import Tumble

In [3]:
from pyflink.table.expressions import col as col_fk

In [4]:
from pyflink.table.expressions import lit as lit_fk

In [5]:
from pyflink.datastream import StreamExecutionEnvironment

In [6]:
stream_env = StreamExecutionEnvironment.get_execution_environment()

In [7]:
env_set = EnvironmentSettings.new_instance()\
                            .in_streaming_mode()\
                            .build()

In [8]:
stream_tbl_env = StreamTableEnvironment.create(stream_execution_environment=stream_env,
                                               environment_settings=env_set)

In [9]:
stream_tbl_env.get_config().get_configuration().set_string("parallelism.default", "1")

<pyflink.common.configuration.Configuration at 0x1c2ed68e8b0>

In [10]:
stream_env.set_parallelism(1)

<pyflink.datastream.stream_execution_environment.StreamExecutionEnvironment at 0x1c2ca94db50>

In [11]:
kafka_tbl_query = """
CREATE TABLE kafka_tbl_source (
    seller_id VARCHAR,
    product VARCHAR,
    quantity INT,
    product_price DOUBLE,
    sale_ts INT,
    proct AS PROCTIME()
)
WITH (
    'connector' = 'kafka',
    'topic' = 'salesitems',
    'properties.group.id' = 'source-demo-1',
    'properties.bootstrap.servers' = 'localhost:9092',
    'scan.startup.mode' = 'earliest-offset',
    'format' = 'json',
    'json.ignore-parse-errors' = 'true'
)
"""

In [12]:
stream_tbl_env.execute_sql(kafka_tbl_query)

<pyflink.table.table_result.TableResult at 0x1c2ed798e20>

In [13]:
kafka_tbl_query = """
SELECT
    seller_id,
    HOP_START(proct, INTERVAL '10' SECONDS, INTERVAL '30' SECONDS) AS proct_start,
    HOP_END(proct, INTERVAL '10' SECONDS, INTERVAL '30' SECONDS) AS proct_end,
    SUM(quantity*product_price) AS proct_sales
FROM
    kafka_tbl_source
GROUP BY
    seller_id,
    HOP(proct, INTERVAL '10' SECONDS, INTERVAL '30' SECONDS)
"""

In [14]:
kafka_tbl_source = stream_tbl_env.sql_query(kafka_tbl_query)

In [15]:
# kafka_ds_source = stream_tbl_env.to_changelog_stream(table=kafka_tbl_source, 
#                                                      changelog_mode=ChangelogMode.all())

In [16]:
# kafka_tbl_source = stream_tbl_env.from_data_stream(kafka_ds_source)

In [17]:
csv_tbl_sink = CsvTableSink(field_names=kafka_tbl_source.get_schema().get_field_names(), 
                            field_types=kafka_tbl_source.get_schema().get_field_data_types(),
                            path='./WindowCSV/sum_prices_5.csv',
                            num_files=1,
                            write_mode=WriteMode.OVERWRITE)

In [18]:
stream_tbl_env.register_table_sink('csv_tbl_sink',csv_tbl_sink)

In [19]:
kafka_tbl_source.execute_insert('csv_tbl_sink')

<pyflink.table.table_result.TableResult at 0x1c2ed798af0>